summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--AUTHORS1
-rw-r--r--COPYING338
-rw-r--r--ChangeLog251
-rw-r--r--INSTALL84
-rw-r--r--Makefile.in167
-rw-r--r--NEWS35
-rw-r--r--README93
-rw-r--r--alone_to_lz.cc150
-rw-r--r--arg_parser.cc197
-rw-r--r--arg_parser.h110
-rw-r--r--byte_repair.cc520
-rw-r--r--common.h48
-rwxr-xr-xconfigure198
-rw-r--r--decoder.cc291
-rw-r--r--decoder.h387
-rw-r--r--doc/lziprecover.1152
-rw-r--r--doc/lziprecover.info1536
-rw-r--r--doc/lziprecover.texi1617
-rw-r--r--dump_remove.cc365
-rw-r--r--list.cc124
-rw-r--r--lunzcrash.cc374
-rw-r--r--lzip.h538
-rw-r--r--lzip_index.cc366
-rw-r--r--lzip_index.h96
-rw-r--r--main.cc1126
-rw-r--r--main_common.cc197
-rw-r--r--md5.cc206
-rw-r--r--md5.h61
-rw-r--r--merge.cc658
-rw-r--r--mtester.cc373
-rw-r--r--mtester.h395
-rw-r--r--nrep_stats.cc121
-rw-r--r--range_dec.cc186
-rw-r--r--reproduce.cc783
-rw-r--r--split.cc142
-rwxr-xr-xtestsuite/check.sh1538
-rw-r--r--testsuite/fox.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox6.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad1.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad1.txt4
-rw-r--r--testsuite/fox6_bad2.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad3.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad4.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad5.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_bad6.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_mark.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_sc1.lzbin0 -> 480 bytes
-rw-r--r--testsuite/fox6_sc2.lzbin0 -> 500 bytes
-rw-r--r--testsuite/fox6_sc3.lzbin0 -> 500 bytes
-rw-r--r--testsuite/fox6_sc4.lzbin0 -> 500 bytes
-rw-r--r--testsuite/fox6_sc5.lzbin0 -> 500 bytes
-rw-r--r--testsuite/fox6_sc6.lzbin0 -> 500 bytes
-rw-r--r--testsuite/fox_bcrc.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_crc0.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_das46.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_de20.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_mes81.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_s11.lzbin0 -> 80 bytes
-rw-r--r--testsuite/fox_v2.lzbin0 -> 80 bytes
-rw-r--r--testsuite/numbers.lzbin0 -> 369 bytes
-rw-r--r--testsuite/numbersbt.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test.txt676
-rw-r--r--testsuite/test.txt.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test.txt.lzmabin0 -> 7363 bytes
-rw-r--r--testsuite/test21723.txt7
-rw-r--r--testsuite/test_3m.txt.lz.md51
-rw-r--r--testsuite/test_bad1.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad2.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad3.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad4.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad5.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad6.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad6.txt26
-rw-r--r--testsuite/test_bad7.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad7.txt215
-rw-r--r--testsuite/test_bad8.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad8.txt3
-rw-r--r--testsuite/test_bad9.lzbin0 -> 7376 bytes
-rw-r--r--testsuite/test_bad9.txt5
-rw-r--r--testsuite/test_em.txt.lzbin0 -> 14024 bytes
-rw-r--r--unzcrash.cc631
81 files changed, 15392 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..e6cadef
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Lziprecover was written by Antonio Diaz Diaz.
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..4ad17ae
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,338 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..c0737a1
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,251 @@
+2024-01-20 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.24 released.
+ * New options '--empty-error', '--marking-error', '--clear-marking'.
+ * dump_remove.cc, main.cc: Accept 'empty' in --dump, --remove, --strip.
+ * main.cc: Rename '--repair' to '--byte-repair'.
+ Rename '--debug-repair' to '--debug-byte-repair'.
+ Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'.
+ (show_option_error): New function showing argument and option name.
+ (main): Make -o preserve date/mode/owner if 1 input file.
+ (open_outstream): Create missing intermediate directories.
+ * lzip.h: Rename verify_* to check_*.
+ * repair.cc: Rename to byte_repair.cc.
+ * unzcrash.cc: Rename '--no-verify' to '--no-check'.
+ * configure, Makefile.in: New variable 'MAKEINFO'.
+ * Makefile.in: Remove target 'install-as-lzip'; -e needs a real lzip.
+ * testsuite: New test files test_3m.txt.lz.md5, fox6_mark.lz.
+
+2022-01-21 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.23 released.
+ * Decompression time has been reduced by 5-12% depending on the file.
+ * main_common.cc (getnum): Show option name and valid range if error.
+ * dump_remove.cc (dump_members): Check tty except for --dump=tdata.
+ * Option '-U, --unzcrash' now takes an argument ('1' or 'B<size>').
+ * mtester.cc (duplicate_buffer): Use an external buffer.
+ * repair.cc (debug_decompress): Continue decoding on CRC mismatch.
+ * unzcrash.cc: Make zcmp_command a string of unlimited size.
+ Use execvp instead of popen to avoid invoking /bin/sh.
+ Print byte or block position in messages.
+ * New file common.h.
+ * Improve several descriptions in manual, '--help', and man page.
+ * lziprecover.texi: Change GNU Texinfo category to 'Compression'.
+ (Reported by Alfred M. Szmidt).
+
+2021-01-02 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.22 released.
+ * New options '-e, --reproduce', '--lzip-level', '--lzip-name',
+ '--reference-file', and '-E, --debug-reproduce'.
+ * Remove '--dump-tdata', '--remove-tdata', and '--strip-tdata'.
+ * main.cc (main): Report an error if a file name is empty.
+ Make '-o' behave like '-c', but writing to file.
+ Make '-c' and '-o' check whether the output is a terminal only once.
+ Do not open output if input is a terminal.
+ * main.cc (decompress): With '-i', ignore data errors, keep files.
+ * range_dec.cc: '-i -D' now decompresses a truncated last member.
+ * '-i -D' now returns 0 if only ignored errors are found.
+ * '-i' now considers any block > 36 with header a member, not a gap.
+ * Replace 'decompressed', 'compressed' with 'out', 'in' in output.
+ * Fix several compiler warnings. (Reported by Nissanka Gooneratne).
+ * lzip_index.cc: Improve messages for corruption in last header.
+ * New debug options '-M, --md5sum' and '-U, --unzcrash'.
+ * main.cc: Set a valid invocation_name even if argc == 0.
+ * Document extraction from tar.lz in manual, '--help', and man page.
+ * New files lunzcrash.cc, md5.h, md5.cc, nrep_stats.cc, reproduce.cc.
+ * lziprecover.texi: New chapter 'Reproducing one sector'.
+ New sections 'Merging with a backup' and 'Reproducing a mailbox'.
+ Document the debug options for experts.
+ * check.sh: Lzip 1.16 or newer is required to run the tests.
+ * testsuite: Add 9 new test files.
+
+2019-01-04 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.21 released.
+ * Rename File_* to Lzip_*.
+ * New options '--dump', '--remove', and '--strip'. They
+ replace '--dump-tdata', '--remove-tdata', and '--strip-tdata',
+ which are now aliases and will be removed in version 1.22.
+ * lzip.h (Lzip_trailer): New function 'verify_consistency'.
+ * lzip_index.cc: Lzip_index now detects gaps between members,
+ some kinds of corrupt trailers and
+ some fake trailers embedded in trailing data.
+ * split.cc: Use Lzip_index to split members, gaps and trailing data.
+ * split.cc: Check last member before writing anything.
+ * list.cc (list_files): With '-i', ignore format errors, show gaps.
+ * range_dec.cc: With '-i', ignore a truncated last member.
+ * main.cc (main): Check return value of close( infd ).
+ * Improve and add new diagnostic messages.
+ * Print '\n' instead of '\r' if !isatty( 1 ) in merge, repair.
+ * main.cc: Compile on DOS with DJGPP.
+ * lziprecover.texi: New chapter 'Tarlz'.
+ * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'.
+ * INSTALL: Document use of CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
+ * testsuite: New test files fox.lz, fox6_sc[1-6].lz.
+
+2018-02-12 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.20 released.
+ * split.cc: Fix splitting of files > 64 KiB broken since 1.16.
+ * New options '--dump-tdata', '--remove-tdata', '--strip-tdata', and
+ '--loose-trailing'.
+ * Improve corrupt header detection to HD=3.
+ * main.cc: Show corrupt or truncated header in multimember file.
+ * Replace 'bits/byte' with inverse compression ratio in output.
+ * Show progress of decompression at verbosity level 2 (-vv).
+ * Show progress of decompression only if stderr is a terminal.
+ * main.cc: Show final diagnostic when testing multiple files.
+ * decoder.cc (verify_trailer): Show stored sizes also in hex.
+ Show dictionary size at verbosity level 4 (-vvvv).
+
+2017-04-10 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.19 released.
+ * merge.cc: Fix members with thousands of scattered errors.
+ * Option '-a' now works with '-l' and '-D'.
+ * The output of option '-l, --list' has been simplified.
+ * main.cc: Continue testing if any input file is a terminal.
+ * main.cc: Show trailing data in both hexadecimal and ASCII.
+ * lzip_index.cc: Improve detection of bad dict and trailing data.
+ * lzip_index.cc: Skip trailing data more efficiently.
+ * lzip.h: Unify messages for bad magic, trailing data, etc.
+ * New struct Bad_byte allows delta and flip modes for bad_value.
+ * unzcrash.cc: New option '-e, --set-byte'.
+
+2016-05-12 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.18 released.
+ * New option '-a, --trailing-error'.
+ * merge.cc (open_input_files): Use CRC to test identical files.
+ * repair.cc (repair_file): Detect gross damage before repairing.
+ * repair.cc: Repair a damaged dictionary size in the header.
+ * repair.cc: Try bytes at offsets 7 to 11 first.
+ * Decompression time has been reduced by 2%.
+ * main.cc (decompress): Print up to 6 bytes of trailing data
+ when '-tvvvv' is specified.
+ * decoder.cc (verify_trailer): Remove test of final code.
+ * main.cc (main): Delete '--output' file if infd is a terminal.
+ * main.cc (main): Don't use stdin more than once.
+ * Use 'close_and_set_permissions' and 'set_signals' in all modes.
+ * range_dec.cc (list_file): Show dictionary size and size of
+ trailing data (if any) with '-lv'.
+ * New options '-A, --alone-to-lz', '-W, --debug-decompress', and
+ '-X, --show-packets'.
+ * Change short name of option '--debug-delay' to '-Y'.
+ * Change short name of option '--debug-repair' to '-Z'.
+ * unzcrash.cc: New options '-B, --block', '-d, --delta',
+ '-t, --truncate', and '-z, --zcmp'.
+ * unzcrash.cc: Read files as large as RAM allows.
+ * unzcrash.cc: Compare output using zcmp if decompressor returns 0.
+ * unzcrash.cc: Accept negative position and size.
+ * lziprecover.texi: New chapter 'Trailing data'.
+ * configure: Avoid warning on some shells when testing for g++.
+ * Makefile.in: Detect the existence of install-info.
+ * check.sh: Don't check error messages.
+ * check.sh: A POSIX shell is required to run the tests.
+
+2015-05-28 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.17 released.
+ * New block selection algorithm makes merge up to 100 times faster.
+ * repair.cc: Repair time has been reduced by 15%.
+ * New options '-y, --debug-delay' and '-z, --debug-repair'.
+ * Makefile.in: New targets 'install*-compress'.
+ * testsuite/unzcrash.cc: Move to top directory.
+ * lziprecover.texi: New chapter 'File names'.
+
+2014-08-29 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.16 released.
+ * New class LZ_mtester makes repair up to 10 times faster.
+ * main.cc (close_and_set_permissions): Behave like 'cp -p'.
+ * lziprecover.texinfo: Rename to lziprecover.texi.
+ * Change license to GPL version 2 or later.
+
+2013-09-14 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.15 released.
+ * repair.cc: Repair multimember files with up to one byte error
+ per member.
+ * merge.cc: Merge multimember files.
+ * main.cc (show_header): Don't show header version.
+ * lziprecover.texinfo: New chapters 'Repairing files',
+ 'Merging files', and 'Unzcrash'.
+
+2013-05-31 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.14 released.
+ * New option '-i, --ignore-errors'.
+ * Option '-l, --list' now accepts more than one file.
+ * Decompression time has been reduced by 12%.
+ * split.cc: Use as few digits as possible in file names.
+ * split.cc: In verbose mode show names of files being created.
+ * main.cc (show_header): Show header version if verbosity >= 4.
+ * configure: Options now accept a separate argument.
+ * Makefile.in: New targets 'install-as-lzip' and 'install-bin'.
+ * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2.
+
+2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.13 released.
+ * Lziprecover is now distributed in its own package. Until
+ version 1.12 it was included in the lzip package. Previous
+ entries in this file are taken from there.
+ * lziprecover.cc: Rename to main.cc.
+ * New files merge.cc, repair.cc, split.cc, and range_dec.cc.
+ * main.cc: Add decompressor options (-c, -d, -k, -t) so that
+ an external decompressor is not needed for recovery nor for
+ "make check".
+ * New option '-D, --range-decompress', which extracts a range of
+ bytes decompressing only the members containing the desired data.
+ * New option '-l, --list', which prints correct total file sizes
+ even for multimember files.
+ * merge.cc, repair.cc: Remove output file if recovery fails.
+ * Change quote characters in messages as advised by GNU Standards.
+ * split.cc: Use Boyer-Moore algorithm to search for headers.
+ * configure: Rename 'datadir' to 'datarootdir'.
+
+2011-04-30 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.12 released.
+ * lziprecover.cc: If '-v' is not specified show errors only.
+ * unzcrash.cc: Use Arg_parser.
+ * unzcrash.cc: New options '-b, --bits', '-p, --position', and
+ '-s, --size'.
+
+2010-09-16 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.11 released.
+ * lziprecover.cc: New option '-m, --merge', which tries to produce a
+ correct file by merging the good parts of two or more damaged copies.
+ * lziprecover.cc: New option '-R, --repair' for repairing a
+ 1-byte error in single-member files.
+ * decoder.cc (decode_member): Detect file errors earlier to improve
+ efficiency of lziprecover's new repair capability.
+ This change also prevents (harmless) access to uninitialized
+ memory when decompressing a corrupt file.
+ * lziprecover.cc: New options '-f, --force' and '-o, --output'.
+ * lziprecover.cc: New option '-s, --split' to select the until
+ now only operation of splitting multimember files.
+ * lziprecover.cc: If no operation is specified, warn the user and do
+ nothing.
+
+2009-06-22 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.6 released.
+ * lziprecover.1: New man page.
+ * check.sh: Test lziprecover.
+
+2009-01-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.4 released.
+ * Add 'lziprecover', a member recoverer program.
+ * unzcrash.cc: Test all 1-byte errors.
+
+
+Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+This file is a collection of facts, and thus it is not copyrightable,
+but just in case, you have unlimited permission to copy, distribute, and
+modify it.
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..8cb9bdd
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,84 @@
+Requirements
+------------
+You will need a C++98 compiler with support for 'long long'.
+(gcc 3.3.6 or newer is recommended).
+I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
+compliant compiler.
+Gcc is available at http://gcc.gnu.org.
+
+Lzip 1.16 (or clzip 1.6) or newer is required to run the tests.
+
+Unzcrash needs a 'zcmp' program able to understand the format being tested.
+For example the zcmp provided by zutils.
+Zutils is available at http://www.nongnu.org/zutils/zutils.html
+
+The operating system must allow signal handlers read access to objects with
+static storage duration so that the cleanup handler for Control-C can delete
+the partial output file.
+
+
+Procedure
+---------
+1. Unpack the archive if you have not done so already:
+
+ tar -xf lziprecover[version].tar.lz
+or
+ lzip -cd lziprecover[version].tar.lz | tar -xf -
+
+This creates the directory ./lziprecover[version] containing the source code
+extracted from the archive.
+
+2. Change to lziprecover directory and run configure.
+ (Try 'configure --help' for usage instructions).
+
+ cd lziprecover[version]
+ ./configure
+
+ If you are compiling on MinGW, use:
+
+ ./configure CXXFLAGS+='-D __USE_MINGW_ANSI_STDIO'
+
+3. Run make.
+
+ make
+
+4. Optionally, type 'make check' to run the tests that come with lziprecover.
+
+ If you have clzip installed (instead of lzip), use:
+
+ make LZIP_NAME=clzip check
+
+5. Type 'make install' to install the program and any data files and
+ documentation. You need root privileges to install into a prefix owned
+ by root.
+
+ Or type 'make install-compress', which additionally compresses the
+ info manual and the man page after installation.
+ (Installing compressed docs may become the default in the future).
+
+ You can install only the program, the info manual, or the man page by
+ typing 'make install-bin', 'make install-info', or 'make install-man'
+ respectively.
+
+
+Another way
+-----------
+You can also compile lziprecover into a separate directory.
+To do this, you must use a version of 'make' that supports the variable
+'VPATH', such as GNU 'make'. 'cd' to the directory where you want the
+object files and executables to go and run the 'configure' script.
+'configure' automatically checks for the source code in '.', in '..', and
+in the directory that 'configure' is in.
+
+'configure' recognizes the option '--srcdir=DIR' to control where to look
+for the source code. Usually 'configure' can determine that directory
+automatically.
+
+After running 'configure', you can run 'make' and 'make install' as
+explained above.
+
+
+Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+This file is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 0000000..8a7b3a9
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,167 @@
+
+DISTNAME = $(pkgname)-$(pkgversion)
+INSTALL = install
+INSTALL_PROGRAM = $(INSTALL) -m 755
+INSTALL_DATA = $(INSTALL) -m 644
+INSTALL_DIR = $(INSTALL) -d -m 755
+SHELL = /bin/sh
+CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
+
+objs = arg_parser.o alone_to_lz.o lzip_index.o list.o byte_repair.o \
+ dump_remove.o lunzcrash.o md5.o merge.o mtester.o nrep_stats.o \
+ range_dec.o reproduce.o split.o decoder.o main.o
+unzobjs = arg_parser.o unzcrash.o
+
+
+.PHONY : all install install-bin install-info install-man \
+ install-strip install-compress install-strip-compress \
+ install-bin-strip install-info-compress install-man-compress \
+ uninstall uninstall-bin uninstall-info uninstall-man \
+ doc info man check dist clean distclean
+
+all : $(progname)
+
+$(progname) : $(objs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs)
+
+unzcrash : $(unzobjs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(unzobjs)
+
+main.o : main.cc
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+
+unzcrash.o : unzcrash.cc
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+
+%.o : %.cc
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+
+# prevent 'make' from trying to remake source files
+$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ;
+%.h %.cc : ;
+
+$(objs) : Makefile
+alone_to_lz.o : lzip.h common.h mtester.h
+arg_parser.o : arg_parser.h
+byte_repair.o : lzip.h common.h mtester.h lzip_index.h
+decoder.o : lzip.h common.h decoder.h
+dump_remove.o : lzip.h common.h lzip_index.h
+list.o : lzip.h common.h lzip_index.h
+lunzcrash.o : lzip.h common.h md5.h mtester.h lzip_index.h
+lzip_index.o : lzip.h common.h lzip_index.h
+main.o : arg_parser.h lzip.h common.h decoder.h main_common.cc
+md5.o : md5.h
+merge.o : lzip.h common.h decoder.h lzip_index.h
+mtester.o : lzip.h common.h md5.h mtester.h
+nrep_stats.o : lzip.h common.h lzip_index.h
+range_dec.o : lzip.h common.h decoder.h lzip_index.h
+reproduce.o : lzip.h common.h md5.h mtester.h lzip_index.h
+split.o : lzip.h common.h lzip_index.h
+unzcrash.o : Makefile arg_parser.h common.h main_common.cc
+
+doc : info man
+
+info : $(VPATH)/doc/$(pkgname).info
+
+$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
+ cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi
+
+man : $(VPATH)/doc/$(progname).1
+
+$(VPATH)/doc/$(progname).1 : $(progname)
+ help2man -n 'recovers data from damaged lzip files' -o $@ ./$(progname)
+
+Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
+ ./config.status
+
+check : all
+ @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
+
+install : install-bin install-info install-man
+install-strip : install-bin-strip install-info install-man
+install-compress : install-bin install-info-compress install-man-compress
+install-strip-compress : install-bin-strip install-info-compress install-man-compress
+
+install-bin : all
+ if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
+ $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
+
+install-bin-strip : all
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
+
+install-info :
+ if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
+ -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
+ $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
+
+install-info-compress : install-info
+ lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
+
+install-man :
+ if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
+ -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
+ $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
+
+install-man-compress : install-man
+ lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
+
+uninstall : uninstall-man uninstall-info uninstall-bin
+
+uninstall-bin :
+ -rm -f "$(DESTDIR)$(bindir)/$(progname)"
+
+uninstall-info :
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
+ -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
+
+uninstall-man :
+ -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
+
+dist : doc
+ ln -sf $(VPATH) $(DISTNAME)
+ tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \
+ $(DISTNAME)/AUTHORS \
+ $(DISTNAME)/COPYING \
+ $(DISTNAME)/ChangeLog \
+ $(DISTNAME)/INSTALL \
+ $(DISTNAME)/Makefile.in \
+ $(DISTNAME)/NEWS \
+ $(DISTNAME)/README \
+ $(DISTNAME)/configure \
+ $(DISTNAME)/doc/$(progname).1 \
+ $(DISTNAME)/doc/$(pkgname).info \
+ $(DISTNAME)/doc/$(pkgname).texi \
+ $(DISTNAME)/*.h \
+ $(DISTNAME)/*.cc \
+ $(DISTNAME)/testsuite/check.sh \
+ $(DISTNAME)/testsuite/fox6_bad1.txt \
+ $(DISTNAME)/testsuite/test.txt \
+ $(DISTNAME)/testsuite/test21723.txt \
+ $(DISTNAME)/testsuite/test_bad[6-9].txt \
+ $(DISTNAME)/testsuite/test_3m.txt.lz.md5 \
+ $(DISTNAME)/testsuite/fox.lz \
+ $(DISTNAME)/testsuite/fox_*.lz \
+ $(DISTNAME)/testsuite/fox6.lz \
+ $(DISTNAME)/testsuite/fox6_sc[1-6].lz \
+ $(DISTNAME)/testsuite/fox6_bad[1-6].lz \
+ $(DISTNAME)/testsuite/fox6_mark.lz \
+ $(DISTNAME)/testsuite/numbers.lz \
+ $(DISTNAME)/testsuite/numbersbt.lz \
+ $(DISTNAME)/testsuite/test.txt.lz \
+ $(DISTNAME)/testsuite/test.txt.lzma \
+ $(DISTNAME)/testsuite/test_bad[1-9].lz \
+ $(DISTNAME)/testsuite/test_em.txt.lz
+ rm -f $(DISTNAME)
+ lzip -v -9 $(DISTNAME).tar
+
+clean :
+ -rm -f $(progname) $(objs)
+ -rm -f unzcrash unzcrash.o
+
+distclean : clean
+ -rm -f Makefile config.status *.tar *.tar.lz
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..2ac8da5
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,35 @@
+Changes in version 1.24:
+
+The option '--empty-error', which forces exit status 2 if any empty member
+is found, has been added.
+
+The option '--marking-error', which forces exit status 2 if the first LZMA
+byte is non-zero in any member, has been added.
+
+The option '--clear-marking', which sets to zero the first LZMA byte of each
+member, has been added.
+
+The keyword 'empty' is now recognized in the argument of '--dump',
+'--remove', and '--strip'.
+
+The option '--repair' has been renamed to '--byte-repair'.
+
+The option '--debug-repair' has been renamed to '--debug-byte-repair'.
+
+File diagnostics have been reformatted as 'PROGRAM: FILE: MESSAGE'.
+
+Diagnostics caused by invalid arguments to command-line options now show the
+argument and the name of the option.
+
+The option '-o, --output' now preserves dates, permissions, and ownership of
+the file, when decompressing exactly one file.
+
+The option '-o, --output' now creates missing intermediate directories when
+writing to a file.
+
+The option '--no-verify' of unzcrash has been renamed to '--no-check'.
+
+The variable MAKEINFO has been added to configure and Makefile.in.
+
+The makefile target 'install-as-lzip' has been removed because '--reproduce'
+needs a lzip compressor (not just a decompressor) named 'lzip' by default.
diff --git a/README b/README
new file mode 100644
index 0000000..97e37ba
--- /dev/null
+++ b/README
@@ -0,0 +1,93 @@
+Description
+
+Lziprecover is a data recovery tool and decompressor for files in the lzip
+compressed data format (.lz). Lziprecover is able to repair slightly damaged
+files (up to one single-byte error per member), produce a correct file by
+merging the good parts of two or more damaged copies, reproduce a missing
+(zeroed) sector using a reference file, extract data from damaged files,
+decompress files, and test integrity of files.
+
+Lziprecover can remove the damaged members from multimember files, for
+example multimember tar.lz archives.
+
+Lziprecover provides random access to the data in multimember files; it only
+decompresses the members containing the desired data.
+
+Lziprecover facilitates the management of metadata stored as trailing data
+in lzip files.
+
+Lziprecover is not a replacement for regular backups, but a last line of
+defense for the case where the backups are also damaged.
+
+The lzip file format is designed for data sharing and long-term archiving,
+taking into account both data integrity and decoder availability:
+
+ * The lzip format provides very safe integrity checking and some data
+ recovery means. The program lziprecover can repair bit flip errors
+ (one of the most common forms of data corruption) in lzip files, and
+ provides data recovery capabilities, including error-checked merging
+ of damaged copies of a file.
+
+ * The lzip format is as simple as possible (but not simpler). The lzip
+ manual provides the source code of a simple decompressor along with a
+ detailed explanation of how it works, so that with the only help of the
+ lzip manual it would be possible for a digital archaeologist to extract
+ the data from a lzip file long after quantum computers eventually
+ render LZMA obsolete.
+
+ * Additionally the lzip reference implementation is copylefted, which
+ guarantees that it will remain free forever.
+
+A nice feature of the lzip format is that a corrupt byte is easier to repair
+the nearer it is from the beginning of the file. Therefore, with the help of
+lziprecover, losing an entire archive just because of a corrupt byte near
+the beginning is a thing of the past.
+
+Compression may be good for long-term archiving. For compressible data,
+multiple compressed copies may provide redundancy in a more useful form and
+may have a better chance of surviving intact than one uncompressed copy
+using the same amount of storage space. This is especially true if the
+format provides recovery capabilities like those of lziprecover, which is
+able to find and combine the good parts of several damaged copies.
+
+Lziprecover is able to recover or decompress files produced by any of the
+compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
+pdlzip.
+
+If the cause of file corruption is a damaged medium, the combination
+GNU ddrescue + lziprecover is the recommended option for recovering data
+from damaged lzip files.
+
+If a file is too damaged for lziprecover to repair it, all the recoverable
+data in all members of the file can be extracted in one step with the
+command 'lziprecover -cd --ignore-errors file.lz > file'.
+
+When recovering data, lziprecover takes as arguments the names of the
+damaged files and writes zero or more recovered files depending on the
+operation selected and whether the recovery succeeded or not. The damaged
+files themselves are kept unchanged.
+
+When decompressing or testing file integrity, lziprecover behaves like lzip
+or lunzip.
+
+To give you an idea of its possibilities, when merging two copies, each of
+them with one damaged area affecting 1 percent of the copy, the probability
+of obtaining a correct file is about 98 percent. With three such copies the
+probability rises to 99.97 percent. For large files (a few MB) with small
+errors (one sector damaged per copy), the probability approaches 100 percent
+even with only two copies. (Supposing that the errors are randomly located
+inside each copy).
+
+The lziprecover package also includes unzcrash, a program written to test
+robustness to decompression of corrupted data, inspired by unzcrash.c from
+Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
+directory to build it. Then try 'unzcrash --help'.
+
+
+Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+This file is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+
+The file Makefile.in is a data file used by configure to produce the Makefile.
+It has the same copyright owner and permissions that configure itself.
diff --git a/alone_to_lz.cc b/alone_to_lz.cc
new file mode 100644
index 0000000..d67ea5c
--- /dev/null
+++ b/alone_to_lz.cc
@@ -0,0 +1,150 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <new>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "mtester.h"
+
+
+namespace {
+
+/* Return the address of a malloc'd buffer containing the file data and
+ the file size in '*file_sizep'. The buffer is at least 20 bytes larger.
+ In case of error, return 0 and do not modify '*file_sizep'.
+*/
+uint8_t * read_file( const int infd, long * const file_sizep,
+ const char * const filename )
+ {
+ long buffer_size = 65536;
+ uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
+ if( !buffer ) throw std::bad_alloc();
+
+ long file_size = readblock( infd, buffer, buffer_size - 20 );
+ while( file_size >= buffer_size - 20 && !errno )
+ {
+ if( buffer_size >= LONG_MAX )
+ { show_file_error( filename, "Input file is larger than LONG_MAX." );
+ std::free( buffer ); return 0; }
+ buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
+ uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
+ if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
+ buffer = tmp;
+ file_size +=
+ readblock( infd, buffer + file_size, buffer_size - 20 - file_size );
+ }
+ if( errno )
+ {
+ show_file_error( filename, "Error reading input file", errno );
+ std::free( buffer ); return 0;
+ }
+ *file_sizep = file_size;
+ return buffer;
+ }
+
+
+bool validate_ds( unsigned * const dictionary_size )
+ {
+ if( *dictionary_size < min_dictionary_size )
+ { *dictionary_size = min_dictionary_size; return false; }
+ if( *dictionary_size > max_dictionary_size )
+ { *dictionary_size = max_dictionary_size; return false; }
+ return true;
+ }
+
+} // end namespace
+
+
+int alone_to_lz( const int infd, const Pretty_print & pp )
+ {
+ enum { lzma_header_size = 13, offset = lzma_header_size - Lzip_header::size };
+ long file_size = 0;
+ uint8_t * const buffer = read_file( infd, &file_size, pp.name() );
+ if( !buffer ) return 1;
+ if( file_size < lzma_header_size )
+ { show_file_error( pp.name(), "Input file is too short." );
+ std::free( buffer ); return 2; }
+
+ if( buffer[0] != 93 ) // (45 * 2) + (9 * 0) + 3
+ {
+ const Lzip_header & header = *(const Lzip_header *)buffer;
+ if( header.check() )
+ show_file_error( pp.name(), "Input file is already in lzip format." );
+ else
+ show_file_error( pp.name(), "Input file has non-default LZMA properties." );
+ std::free( buffer ); return 2;
+ }
+ for( int i = 5; i < 13; ++i ) if( buffer[i] != 0xFF )
+ { show_file_error( pp.name(), "Input file is non-streamed." );
+ std::free( buffer ); return 2; }
+
+ if( verbosity >= 1 ) pp();
+ unsigned dictionary_size = 0;
+ for( int i = 4; i > 0; --i )
+ { dictionary_size <<= 8; dictionary_size += buffer[i]; }
+ const unsigned orig_dictionary_size = dictionary_size;
+ validate_ds( &dictionary_size );
+ Lzip_header & header = *(Lzip_header *)( buffer + offset );
+ header.set_magic();
+ header.dictionary_size( dictionary_size );
+ for( int i = 0; i < Lzip_trailer::size; ++i ) buffer[file_size++] = 0;
+ // compute and fill trailer
+ {
+ LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
+ const int result = mtester.test_member();
+ if( result == 1 && orig_dictionary_size > max_dictionary_size )
+ { pp( "dictionary size is too large" ); std::free( buffer ); return 2; }
+ if( result != 3 || !mtester.finished() )
+ { pp( "file is corrupt" ); std::free( buffer ); return 2; }
+ if( mtester.max_distance() < dictionary_size &&
+ dictionary_size > min_dictionary_size )
+ {
+ dictionary_size =
+ std::max( mtester.max_distance(), (unsigned)min_dictionary_size );
+ header.dictionary_size( dictionary_size );
+ }
+ Lzip_trailer & trailer =
+ *(Lzip_trailer *)( buffer + file_size - trailer.size );
+ trailer.data_crc( mtester.crc() );
+ trailer.data_size( mtester.data_position() );
+ trailer.member_size( mtester.member_position() );
+ }
+ // check converted member
+ LZ_mtester mtester( buffer + offset, file_size - offset, dictionary_size );
+ if( mtester.test_member() != 0 || !mtester.finished() )
+ { pp( "conversion failed" ); std::free( buffer ); return 2; }
+ if( writeblock( outfd, buffer + offset, file_size - offset ) != file_size - offset )
+ {
+ show_error( "Error writing output file", errno );
+ std::free( buffer ); return 1;
+ }
+ std::free( buffer );
+ if( verbosity >= 1 ) std::fputs( "done\n", stderr );
+ return 0;
+ }
diff --git a/arg_parser.cc b/arg_parser.cc
new file mode 100644
index 0000000..0c04d8e
--- /dev/null
+++ b/arg_parser.cc
@@ -0,0 +1,197 @@
+/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
+ Copyright (C) 2006-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "arg_parser.h"
+
+
+bool Arg_parser::parse_long_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind )
+ {
+ unsigned len;
+ int index = -1;
+ bool exact = false, ambig = false;
+
+ for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
+
+ // Test all long options for either exact match or abbreviated matches.
+ for( int i = 0; options[i].code != 0; ++i )
+ if( options[i].long_name &&
+ std::strncmp( options[i].long_name, &opt[2], len ) == 0 )
+ {
+ if( std::strlen( options[i].long_name ) == len ) // Exact match found
+ { index = i; exact = true; break; }
+ else if( index < 0 ) index = i; // First nonexact match found
+ else if( options[index].code != options[i].code ||
+ options[index].has_arg != options[i].has_arg )
+ ambig = true; // Second or later nonexact match found
+ }
+
+ if( ambig && !exact )
+ {
+ error_ = "option '"; error_ += opt; error_ += "' is ambiguous";
+ return false;
+ }
+
+ if( index < 0 ) // nothing found
+ {
+ error_ = "unrecognized option '"; error_ += opt; error_ += '\'';
+ return false;
+ }
+
+ ++argind;
+ data.push_back( Record( options[index].code, options[index].long_name ) );
+
+ if( opt[len+2] ) // '--<long_option>=<argument>' syntax
+ {
+ if( options[index].has_arg == no )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' doesn't allow an argument";
+ return false;
+ }
+ if( options[index].has_arg == yes && !opt[len+3] )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' requires an argument";
+ return false;
+ }
+ data.back().argument = &opt[len+3];
+ return true;
+ }
+
+ if( options[index].has_arg == yes )
+ {
+ if( !arg || !arg[0] )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' requires an argument";
+ return false;
+ }
+ ++argind; data.back().argument = arg;
+ return true;
+ }
+
+ return true;
+ }
+
+
+bool Arg_parser::parse_short_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind )
+ {
+ int cind = 1; // character index in opt
+
+ while( cind > 0 )
+ {
+ int index = -1;
+ const unsigned char c = opt[cind];
+
+ if( c != 0 )
+ for( int i = 0; options[i].code; ++i )
+ if( c == options[i].code )
+ { index = i; break; }
+
+ if( index < 0 )
+ {
+ error_ = "invalid option -- '"; error_ += c; error_ += '\'';
+ return false;
+ }
+
+ data.push_back( Record( c ) );
+ if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished
+
+ if( options[index].has_arg != no && cind > 0 && opt[cind] )
+ {
+ data.back().argument = &opt[cind]; ++argind; cind = 0;
+ }
+ else if( options[index].has_arg == yes )
+ {
+ if( !arg || !arg[0] )
+ {
+ error_ = "option requires an argument -- '"; error_ += c;
+ error_ += '\'';
+ return false;
+ }
+ data.back().argument = arg; ++argind; cind = 0;
+ }
+ }
+ return true;
+ }
+
+
+Arg_parser::Arg_parser( const int argc, const char * const argv[],
+ const Option options[], const bool in_order )
+ {
+ if( argc < 2 || !argv || !options ) return;
+
+ std::vector< const char * > non_options; // skipped non-options
+ int argind = 1; // index in argv
+
+ while( argind < argc )
+ {
+ const unsigned char ch1 = argv[argind][0];
+ const unsigned char ch2 = ch1 ? argv[argind][1] : 0;
+
+ if( ch1 == '-' && ch2 ) // we found an option
+ {
+ const char * const opt = argv[argind];
+ const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0;
+ if( ch2 == '-' )
+ {
+ if( !argv[argind][2] ) { ++argind; break; } // we found "--"
+ else if( !parse_long_option( opt, arg, options, argind ) ) break;
+ }
+ else if( !parse_short_option( opt, arg, options, argind ) ) break;
+ }
+ else
+ {
+ if( in_order ) data.push_back( Record( argv[argind++] ) );
+ else non_options.push_back( argv[argind++] );
+ }
+ }
+ if( !error_.empty() ) data.clear();
+ else
+ {
+ for( unsigned i = 0; i < non_options.size(); ++i )
+ data.push_back( Record( non_options[i] ) );
+ while( argind < argc )
+ data.push_back( Record( argv[argind++] ) );
+ }
+ }
+
+
+Arg_parser::Arg_parser( const char * const opt, const char * const arg,
+ const Option options[] )
+ {
+ if( !opt || !opt[0] || !options ) return;
+
+ if( opt[0] == '-' && opt[1] ) // we found an option
+ {
+ int argind = 1; // dummy
+ if( opt[1] == '-' )
+ { if( opt[2] ) parse_long_option( opt, arg, options, argind ); }
+ else
+ parse_short_option( opt, arg, options, argind );
+ if( !error_.empty() ) data.clear();
+ }
+ else data.push_back( Record( opt ) );
+ }
diff --git a/arg_parser.h b/arg_parser.h
new file mode 100644
index 0000000..1eeec9a
--- /dev/null
+++ b/arg_parser.h
@@ -0,0 +1,110 @@
+/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
+ Copyright (C) 2006-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+/* Arg_parser reads the arguments in 'argv' and creates a number of
+ option codes, option arguments, and non-option arguments.
+
+ In case of error, 'error' returns a non-empty error message.
+
+ 'options' is an array of 'struct Option' terminated by an element
+ containing a code which is zero. A null long_name means a short-only
+ option. A code value outside the unsigned char range means a long-only
+ option.
+
+ Arg_parser normally makes it appear as if all the option arguments
+ were specified before all the non-option arguments for the purposes
+ of parsing, even if the user of your program intermixed option and
+ non-option arguments. If you want the arguments in the exact order
+ the user typed them, call 'Arg_parser' with 'in_order' = true.
+
+ The argument '--' terminates all options; any following arguments are
+ treated as non-option arguments, even if they begin with a hyphen.
+
+ The syntax for optional option arguments is '-<short_option><argument>'
+ (without whitespace), or '--<long_option>=<argument>'.
+*/
+
+class Arg_parser
+ {
+public:
+ enum Has_arg { no, yes, maybe };
+
+ struct Option
+ {
+ int code; // Short option letter or code ( code != 0 )
+ const char * long_name; // Long option name (maybe null)
+ Has_arg has_arg;
+ };
+
+private:
+ struct Record
+ {
+ int code;
+ std::string parsed_name;
+ std::string argument;
+ explicit Record( const unsigned char c )
+ : code( c ), parsed_name( "-" ) { parsed_name += c; }
+ Record( const int c, const char * const long_name )
+ : code( c ), parsed_name( "--" ) { parsed_name += long_name; }
+ explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
+ };
+
+ const std::string empty_arg;
+ std::string error_;
+ std::vector< Record > data;
+
+ bool parse_long_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind );
+ bool parse_short_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind );
+
+public:
+ Arg_parser( const int argc, const char * const argv[],
+ const Option options[], const bool in_order = false );
+
+ // Restricted constructor. Parses a single token and argument (if any).
+ Arg_parser( const char * const opt, const char * const arg,
+ const Option options[] );
+
+ const std::string & error() const { return error_; }
+
+ // The number of arguments parsed. May be different from argc.
+ int arguments() const { return data.size(); }
+
+ /* If code( i ) is 0, argument( i ) is a non-option.
+ Else argument( i ) is the option's argument (or empty). */
+ int code( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].code;
+ else return 0;
+ }
+
+ // Full name of the option parsed (short or long).
+ const std::string & parsed_name( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].parsed_name;
+ else return empty_arg;
+ }
+
+ const std::string & argument( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].argument;
+ else return empty_arg;
+ }
+ };
diff --git a/byte_repair.cc b/byte_repair.cc
new file mode 100644
index 0000000..370738b
--- /dev/null
+++ b/byte_repair.cc
@@ -0,0 +1,520 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "mtester.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+bool pending_newline = false;
+
+void print_pending_newline( const char terminator )
+ { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
+ pending_newline = false; }
+
+
+bool gross_damage( const uint8_t * const mbuffer, const long msize )
+ {
+ enum { maxlen = 7 }; // max number of consecutive identical bytes
+ long i = Lzip_header::size;
+ const long end = msize - Lzip_trailer::size - maxlen;
+ while( i < end )
+ {
+ const uint8_t byte = mbuffer[i];
+ int len = 0; // does not count the first byte
+ while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true;
+ }
+ return false;
+ }
+
+
+// Return value: 0 = no change, 5 = repaired pos
+int repair_dictionary_size( uint8_t * const mbuffer, const long msize )
+ {
+ Lzip_header & header = *(Lzip_header *)mbuffer;
+ unsigned dictionary_size = header.dictionary_size();
+ const Lzip_trailer & trailer =
+ *(const Lzip_trailer *)( mbuffer + msize - trailer.size );
+ const unsigned long long data_size = trailer.data_size();
+ const bool valid_ds = isvalid_ds( dictionary_size );
+ if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
+
+ const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
+ if( !valid_ds || dictionary_size < dictionary_size_9 )
+ {
+ dictionary_size = std::min( data_size, dictionary_size_9 );
+ if( dictionary_size < min_dictionary_size )
+ dictionary_size = min_dictionary_size;
+ LZ_mtester mtester( mbuffer, msize, dictionary_size );
+ const int result = mtester.test_member();
+ if( result == 0 )
+ { header.dictionary_size( dictionary_size ); return 5; } // fix DS
+ if( result != 1 || mtester.max_distance() <= dictionary_size ||
+ mtester.max_distance() > max_dictionary_size ) return 0;
+ }
+ if( data_size > dictionary_size_9 )
+ {
+ dictionary_size =
+ std::min( data_size, (unsigned long long)max_dictionary_size );
+ LZ_mtester mtester( mbuffer, msize, dictionary_size );
+ if( mtester.test_member() == 0 )
+ { header.dictionary_size( dictionary_size ); return 5; } // fix DS
+ }
+ return 0;
+ }
+
+
+const LZ_mtester * prepare_master( const uint8_t * const buffer,
+ const long buffer_size,
+ const unsigned long pos_limit,
+ const unsigned dictionary_size )
+ {
+ LZ_mtester * const master =
+ new LZ_mtester( buffer, buffer_size, dictionary_size );
+ if( master->test_member( pos_limit ) == -1 ) return master;
+ delete master;
+ return 0;
+ }
+
+
+bool test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
+ long * const failure_posp = 0 )
+ {
+ LZ_mtester mtester( master ); // tester with external buffer
+ mtester.duplicate_buffer( buffer2 );
+ if( mtester.test_member() == 0 && mtester.finished() ) return true;
+ if( failure_posp ) *failure_posp = mtester.member_position();
+ return false;
+ }
+
+
+// Return value: -1 = master failed, 0 = begin reached, > 0 = repaired pos
+long repair_member( uint8_t * const mbuffer, const long long mpos,
+ const long msize, const long begin, const long end,
+ const unsigned dictionary_size, const char terminator )
+ {
+ uint8_t * const buffer2 = new uint8_t[dictionary_size];
+ for( long pos = end; pos >= begin && pos > end - 50000; )
+ {
+ const long min_pos = std::max( begin, pos - 100 );
+ const unsigned long pos_limit = std::max( min_pos - 16, 0L );
+ const LZ_mtester * master =
+ prepare_master( mbuffer, msize, pos_limit, dictionary_size );
+ if( !master ) { delete[] buffer2; return -1; }
+ for( ; pos >= min_pos; --pos )
+ {
+ if( verbosity >= 2 )
+ {
+ std::printf( " Trying position %llu %c", mpos + pos, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ for( int j = 0; j < 255; ++j )
+ {
+ ++mbuffer[pos];
+ if( test_member_rest( *master, buffer2 ) )
+ { delete master; delete[] buffer2; return pos; }
+ }
+ ++mbuffer[pos];
+ }
+ delete master;
+ }
+ delete[] buffer2;
+ return 0;
+ }
+
+} // end namespace
+
+
+long seek_write( const int fd, const uint8_t * const buf, const long size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return writeblock( fd, buf, size );
+ return 0;
+ }
+
+
+uint8_t * read_member( const int infd, const long long mpos,
+ const long long msize, const char * const filename )
+ {
+ if( msize <= 0 || msize > LONG_MAX )
+ { show_file_error( filename,
+ "Input file contains member larger than LONG_MAX." ); return 0; }
+ if( !safe_seek( infd, mpos, filename ) ) return 0;
+ uint8_t * const buffer = new uint8_t[msize];
+
+ if( readblock( infd, buffer, msize ) != msize )
+ { show_file_error( filename, "Error reading input file", errno );
+ delete[] buffer; return 0; }
+ return buffer;
+ }
+
+
+int byte_repair( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts,
+ const char terminator, const bool force )
+ {
+ const char * const filename = input_filename.c_str();
+ struct stat in_stats;
+ const int infd = open_instream( filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts, true );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ const bool to_file = default_output_filename.size();
+ output_filename =
+ to_file ? default_output_filename : insert_fixed( input_filename );
+ if( !force && output_file_exists() ) return 1;
+ outfd = -1;
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
+ long long failure_pos = 0;
+ if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
+ if( failure_pos < Lzip_header::size ) // End Of File
+ { show_error( "Can't repair error in input file." );
+ cleanup_and_fail( 2 ); }
+ if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
+
+ if( verbosity >= 2 ) // damaged member found
+ {
+ std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n",
+ i + 1, lzip_index.members(), mpos + failure_pos );
+ std::fflush( stdout );
+ }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, filename );
+ if( !mbuffer ) cleanup_and_fail( 1 );
+ const Lzip_header & header = *(const Lzip_header *)mbuffer;
+ const unsigned dictionary_size = header.dictionary_size();
+ long pos = 0;
+ if( !gross_damage( mbuffer, msize ) )
+ {
+ pos = repair_dictionary_size( mbuffer, msize );
+ if( pos == 0 )
+ pos = repair_member( mbuffer, mpos, msize, header.size + 1,
+ header.size + 6, dictionary_size, terminator );
+ if( pos == 0 )
+ pos = repair_member( mbuffer, mpos, msize, header.size + 7,
+ failure_pos, dictionary_size, terminator );
+ print_pending_newline( terminator );
+ }
+ if( pos < 0 )
+ { show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); }
+ if( pos > 0 )
+ {
+ if( outfd < 0 ) // first damaged member repaired
+ {
+ if( !safe_seek( infd, 0, filename ) ) return 1;
+ set_signal_handler();
+ if( !open_outstream( true, true, false, true, to_file ) ) return 1;
+ if( !copy_file( infd, outfd ) ) // copy whole file
+ cleanup_and_fail( 1 );
+ }
+ if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
+ { show_error( "Error writing output file", errno );
+ cleanup_and_fail( 1 ); }
+ }
+ delete[] mbuffer;
+ if( pos == 0 )
+ {
+ show_error( "Can't repair input file. Error is probably larger than 1 byte." );
+ cleanup_and_fail( 2 );
+ }
+ }
+
+ if( outfd < 0 )
+ {
+ if( verbosity >= 1 )
+ std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
+ return 0;
+ }
+ if( !close_outstream( &in_stats ) ) return 1;
+ if( verbosity >= 1 )
+ std::fputs( "Copy of input file repaired successfully.\n", stdout );
+ return 0;
+ }
+
+
+int debug_delay( const char * const input_filename,
+ const Cl_options & cl_opts, Block range,
+ const char terminator )
+ {
+ struct stat in_stats; // not used
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ if( range.end() > lzip_index.cdata_size() )
+ range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
+ if( range.size() <= 0 )
+ { show_file_error( input_filename, "Nothing to do." ); return 0; }
+
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const Block & mb = lzip_index.mblock( i );
+ if( !range.overlaps( mb ) ) continue;
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ const unsigned dictionary_size = lzip_index.dictionary_size( i );
+ if( verbosity >= 2 )
+ {
+ std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
+ i + 1, lzip_index.members(), mpos, msize );
+ std::fflush( stdout );
+ }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
+ if( !mbuffer ) return 1;
+ uint8_t * const buffer2 = new uint8_t[dictionary_size];
+ long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
+ const long end = std::min( range.end() - mpos, msize );
+ long max_delay = 0;
+ while( pos < end )
+ {
+ const unsigned long pos_limit = std::max( pos - 16, 0L );
+ const LZ_mtester * master =
+ prepare_master( mbuffer, msize, pos_limit, dictionary_size );
+ if( !master ) { show_error( "Can't prepare master." );
+ delete[] buffer2; delete[] mbuffer; return 1; }
+ const long partial_end = std::min( pos + 100, end );
+ for( ; pos < partial_end; ++pos )
+ {
+ if( verbosity >= 2 )
+ {
+ std::printf( " Delays at position %llu %c", mpos + pos, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ int value = -1;
+ for( int j = 0; j < 256; ++j )
+ {
+ ++mbuffer[pos];
+ if( j == 255 ) break;
+ long failure_pos = 0;
+ if( test_member_rest( *master, buffer2, &failure_pos ) ) continue;
+ const long delay = failure_pos - pos;
+ if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
+ }
+ if( value >= 0 && verbosity >= 2 )
+ {
+ std::printf( " New max delay %lu at position %llu (0x%02X)\n",
+ max_delay, mpos + pos, value );
+ std::fflush( stdout ); pending_newline = false;
+ }
+ if( pos + max_delay >= msize ) { pos = end; break; }
+ }
+ delete master;
+ }
+ delete[] buffer2;
+ delete[] mbuffer;
+ print_pending_newline( terminator );
+ }
+
+ if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
+ return 0;
+ }
+
+
+int debug_byte_repair( const char * const input_filename,
+ const Cl_options & cl_opts, const Bad_byte & bad_byte,
+ const char terminator )
+ {
+ struct stat in_stats; // not used
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ long idx = 0;
+ for( ; idx < lzip_index.members(); ++idx )
+ if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
+ if( idx >= lzip_index.members() )
+ { show_file_error( input_filename, "Nothing to do." ); return 0; }
+
+ const long long mpos = lzip_index.mblock( idx ).pos();
+ const long long msize = lzip_index.mblock( idx ).size();
+ {
+ long long failure_pos = 0;
+ if( !safe_seek( infd, mpos, input_filename ) ) return 1;
+ if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "Member %ld of %ld already damaged (failure pos = %llu)\n",
+ idx + 1, lzip_index.members(), mpos + failure_pos );
+ return 2;
+ }
+ }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
+ if( !mbuffer ) return 1;
+ const Lzip_header & header = *(const Lzip_header *)mbuffer;
+ const unsigned dictionary_size = header.dictionary_size();
+ const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
+ const uint8_t bad_value = bad_byte( good_value );
+ mbuffer[bad_byte.pos-mpos] = bad_value;
+ long failure_pos = 0;
+ if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
+ {
+ LZ_mtester mtester( mbuffer, msize, header.dictionary_size() );
+ if( mtester.test_member() == 0 && mtester.finished() )
+ {
+ if( verbosity >= 1 )
+ std::fputs( "Member decompressed with no errors.\n", stdout );
+ delete[] mbuffer;
+ return 0;
+ }
+ failure_pos = mtester.member_position();
+ }
+ if( verbosity >= 2 )
+ {
+ std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
+ " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n",
+ idx + 1, lzip_index.members(), mpos, msize,
+ bad_byte.pos, good_value, bad_value, mpos + failure_pos,
+ mpos + failure_pos - bad_byte.pos );
+ std::fflush( stdout );
+ }
+ if( failure_pos >= msize ) failure_pos = msize - 1;
+ long pos = repair_dictionary_size( mbuffer, msize );
+ if( pos == 0 )
+ pos = repair_member( mbuffer, mpos, msize, header.size + 1,
+ header.size + 6, dictionary_size, terminator );
+ if( pos == 0 )
+ pos = repair_member( mbuffer, mpos, msize, header.size + 7,
+ failure_pos, dictionary_size, terminator );
+ print_pending_newline( terminator );
+ delete[] mbuffer;
+ if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; }
+ if( pos == 0 ) internal_error( "can't repair input file." );
+ if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout );
+ return 0;
+ }
+
+
+/* If show_packets is true, print to stdout descriptions of the decoded LZMA
+ packets. Print also some global values; total number of packets in
+ member, max distance (rep0) and its file position, max LZMA packet size
+ in each member and the file position of these packets.
+ (Packet sizes are a fractionary number of bytes. The packet and marker
+ sizes shown by option -X are the number of extra bytes required to decode
+ the packet, not counting the data present in the range decoder before and
+ after the decoding. The max marker size of a 'Sync Flush marker' does not
+ include the 5 bytes read by rdec.load).
+ if bad_byte.pos >= cdata_size, bad_byte is ignored.
+*/
+int debug_decompress( const char * const input_filename,
+ const Cl_options & cl_opts, const Bad_byte & bad_byte,
+ const bool show_packets )
+ {
+ struct stat in_stats;
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ outfd = show_packets ? -1 : STDOUT_FILENO;
+ int retval = 0;
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long dpos = lzip_index.dblock( i ).pos();
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ const unsigned dictionary_size = lzip_index.dictionary_size( i );
+ if( verbosity >= 1 && show_packets )
+ std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n"
+ " mpos dpos\n",
+ i + 1, lzip_index.members(), mpos, msize );
+ if( !isvalid_ds( dictionary_size ) )
+ { show_error( bad_dict_msg ); retval = 2; break; }
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
+ if( !mbuffer ) { retval = 1; break; }
+ if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) )
+ {
+ const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
+ const uint8_t bad_value = bad_byte( good_value );
+ mbuffer[bad_byte.pos-mpos] = bad_value;
+ if( verbosity >= 1 && show_packets )
+ std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
+ bad_byte.pos, good_value, bad_value );
+ }
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
+ const int result = mtester.debug_decode_member( dpos, mpos, show_packets );
+ delete[] mbuffer;
+ if( show_packets )
+ {
+ const std::vector< unsigned long long > & mppv = mtester.max_packet_posv();
+ const unsigned mpackets = mppv.size();
+ std::printf( "Total packets in member = %llu\n"
+ "Max distance in any match = %u at file position %llu\n"
+ "Max marker size found = %u\n"
+ "Max packet size found = %u (%u packets)%s",
+ mtester.total_packets(), mtester.max_distance(),
+ mtester.max_distance_pos(), mtester.max_marker_size(),
+ mtester.max_packet_size(), mpackets,
+ mpackets ? " at file positions" : "" );
+ for( unsigned i = 0; i < mpackets; ++i )
+ std::printf( " %llu", mppv[i] );
+ std::fputc( '\n', stdout );
+ }
+ if( result != 0 )
+ {
+ if( verbosity >= 0 && result <= 2 && show_packets )
+ std::printf( "%s at pos %llu\n", ( result == 2 ) ?
+ "File ends unexpectedly" : "Decoder error",
+ mpos + mtester.member_position() );
+ retval = 2;
+ if( result != 3 || !mtester.finished() || mtester.data_position() !=
+ (unsigned long long)lzip_index.dblock( i ).size() ) break;
+ }
+ if( i + 1 < lzip_index.members() && show_packets )
+ std::fputc( '\n', stdout );
+ }
+
+ if( !close_outstream( &in_stats ) && retval == 0 ) retval = 1;
+ if( verbosity >= 1 && show_packets && retval == 0 )
+ std::fputs( "Done.\n", stdout );
+ return retval;
+ }
diff --git a/common.h b/common.h
new file mode 100644
index 0000000..56f6298
--- /dev/null
+++ b/common.h
@@ -0,0 +1,48 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+struct Bad_byte
+ {
+ enum Mode { literal, delta, flip };
+ long long pos;
+ const char * argument;
+ const char * option_name;
+ Mode mode;
+ uint8_t value;
+
+ Bad_byte() :
+ pos( -1 ), argument( 0 ), option_name( 0 ), mode( literal ), value( 0 ) {}
+
+ uint8_t operator()( const uint8_t old_value ) const
+ {
+ if( mode == delta ) return old_value + value;
+ if( mode == flip ) return old_value ^ value;
+ return value;
+ }
+
+ void parse_bb( const char * const arg, const char * const pn );
+ };
+
+
+const char * const mem_msg = "Not enough memory.";
+
+// defined in main_common.cc
+void show_error( const char * const msg, const int errcode = 0,
+ const bool help = false );
+void show_file_error( const char * const filename, const char * const msg,
+ const int errcode = 0 );
+void internal_error( const char * const msg );
diff --git a/configure b/configure
new file mode 100755
index 0000000..b753b20
--- /dev/null
+++ b/configure
@@ -0,0 +1,198 @@
+#! /bin/sh
+# configure script for Lziprecover - Data recovery tool for the lzip format
+# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+#
+# This configure script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+pkgname=lziprecover
+pkgversion=1.24
+progname=lziprecover
+srctrigger=doc/${pkgname}.texi
+
+# clear some things potentially inherited from environment.
+LC_ALL=C
+export LC_ALL
+srcdir=
+prefix=/usr/local
+exec_prefix='$(prefix)'
+bindir='$(exec_prefix)/bin'
+datarootdir='$(prefix)/share'
+infodir='$(datarootdir)/info'
+mandir='$(datarootdir)/man'
+CXX=g++
+CPPFLAGS=
+CXXFLAGS='-Wall -W -O2'
+LDFLAGS=
+MAKEINFO=makeinfo
+
+# checking whether we are using GNU C++.
+/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; }
+
+# Loop over all args
+args=
+no_create=
+while [ $# != 0 ] ; do
+
+ # Get the first arg, and shuffle
+ option=$1 ; arg2=no
+ shift
+
+ # Add the argument quoted to args
+ if [ -z "${args}" ] ; then args="\"${option}\""
+ else args="${args} \"${option}\"" ; fi
+
+ # Split out the argument for options that take them
+ case ${option} in
+ *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
+ esac
+
+ # Process the options
+ case ${option} in
+ --help | -h)
+ echo "Usage: $0 [OPTION]... [VAR=VALUE]..."
+ echo
+ echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as"
+ echo "arguments to configure in the form VAR=VALUE."
+ echo
+ echo "Options and variables: [defaults in brackets]"
+ echo " -h, --help display this help and exit"
+ echo " -V, --version output version information and exit"
+ echo " --srcdir=DIR find the source code in DIR [. or ..]"
+ echo " --prefix=DIR install into DIR [${prefix}]"
+ echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]"
+ echo " --bindir=DIR user executables directory [${bindir}]"
+ echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]"
+ echo " --infodir=DIR info files directory [${infodir}]"
+ echo " --mandir=DIR man pages directory [${mandir}]"
+ echo " CXX=COMPILER C++ compiler to use [${CXX}]"
+ echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]"
+ echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]"
+ echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
+ echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]"
+ echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
+ echo
+ exit 0 ;;
+ --version | -V)
+ echo "Configure script for ${pkgname} version ${pkgversion}"
+ exit 0 ;;
+ --srcdir) srcdir=$1 ; arg2=yes ;;
+ --prefix) prefix=$1 ; arg2=yes ;;
+ --exec-prefix) exec_prefix=$1 ; arg2=yes ;;
+ --bindir) bindir=$1 ; arg2=yes ;;
+ --datarootdir) datarootdir=$1 ; arg2=yes ;;
+ --infodir) infodir=$1 ; arg2=yes ;;
+ --mandir) mandir=$1 ; arg2=yes ;;
+
+ --srcdir=*) srcdir=${optarg} ;;
+ --prefix=*) prefix=${optarg} ;;
+ --exec-prefix=*) exec_prefix=${optarg} ;;
+ --bindir=*) bindir=${optarg} ;;
+ --datarootdir=*) datarootdir=${optarg} ;;
+ --infodir=*) infodir=${optarg} ;;
+ --mandir=*) mandir=${optarg} ;;
+ --no-create) no_create=yes ;;
+
+ CXX=*) CXX=${optarg} ;;
+ CPPFLAGS=*) CPPFLAGS=${optarg} ;;
+ CXXFLAGS=*) CXXFLAGS=${optarg} ;;
+ CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
+ LDFLAGS=*) LDFLAGS=${optarg} ;;
+ MAKEINFO=*) MAKEINFO=${optarg} ;;
+
+ --*)
+ echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
+ *=* | *-*-*) ;;
+ *)
+ echo "configure: unrecognized option: '${option}'" 1>&2
+ echo "Try 'configure --help' for more information." 1>&2
+ exit 1 ;;
+ esac
+
+ # Check if the option took a separate argument
+ if [ "${arg2}" = yes ] ; then
+ if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+ else echo "configure: Missing argument to '${option}'" 1>&2
+ exit 1
+ fi
+ fi
+done
+
+# Find the source code, if location was not specified.
+srcdirtext=
+if [ -z "${srcdir}" ] ; then
+ srcdirtext="or . or .." ; srcdir=.
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then
+ ## the sed command below emulates the dirname command
+ srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+ fi
+fi
+
+if [ ! -r "${srcdir}/${srctrigger}" ] ; then
+ echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2
+ echo "configure: (At least ${srctrigger} is missing)." 1>&2
+ exit 1
+fi
+
+# Set srcdir to . if that's what it is.
+if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
+
+echo
+if [ -z "${no_create}" ] ; then
+ echo "creating config.status"
+ rm -f config.status
+ cat > config.status << EOF
+#! /bin/sh
+# This file was generated automatically by configure. Don't edit.
+# Run this file to recreate the current configuration.
+#
+# This script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+exec /bin/sh "$0" ${args} --no-create
+EOF
+ chmod +x config.status
+fi
+
+echo "creating Makefile"
+echo "VPATH = ${srcdir}"
+echo "prefix = ${prefix}"
+echo "exec_prefix = ${exec_prefix}"
+echo "bindir = ${bindir}"
+echo "datarootdir = ${datarootdir}"
+echo "infodir = ${infodir}"
+echo "mandir = ${mandir}"
+echo "CXX = ${CXX}"
+echo "CPPFLAGS = ${CPPFLAGS}"
+echo "CXXFLAGS = ${CXXFLAGS}"
+echo "LDFLAGS = ${LDFLAGS}"
+echo "MAKEINFO = ${MAKEINFO}"
+rm -f Makefile
+cat > Makefile << EOF
+# Makefile for Lziprecover - Data recovery tool for the lzip format
+# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+# This file was generated automatically by configure. Don't edit.
+#
+# This Makefile is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+pkgname = ${pkgname}
+pkgversion = ${pkgversion}
+progname = ${progname}
+VPATH = ${srcdir}
+prefix = ${prefix}
+exec_prefix = ${exec_prefix}
+bindir = ${bindir}
+datarootdir = ${datarootdir}
+infodir = ${infodir}
+mandir = ${mandir}
+CXX = ${CXX}
+CPPFLAGS = ${CPPFLAGS}
+CXXFLAGS = ${CXXFLAGS}
+LDFLAGS = ${LDFLAGS}
+MAKEINFO = ${MAKEINFO}
+EOF
+cat "${srcdir}/Makefile.in" >> Makefile
+
+echo "OK. Now you can run make."
diff --git a/decoder.cc b/decoder.cc
new file mode 100644
index 0000000..4c68355
--- /dev/null
+++ b/decoder.cc
@@ -0,0 +1,291 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "decoder.h"
+
+
+const CRC32 crc32;
+
+
+/* Return the number of bytes really read.
+ If (value returned < size) and (errno == 0), means EOF was reached.
+*/
+long readblock( const int fd, uint8_t * const buf, const long size )
+ {
+ long sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const long n = read( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+/* Return the number of bytes really written.
+ If (value returned < size), it is always an error.
+*/
+long writeblock( const int fd, const uint8_t * const buf, const long size )
+ {
+ long sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const long n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+bool Range_decoder::read_block()
+ {
+ if( !at_stream_end )
+ {
+ stream_pos = readblock( infd, buffer, buffer_size );
+ if( stream_pos != buffer_size && errno ) throw Error( "Read error" );
+ at_stream_end = ( stream_pos < buffer_size );
+ partial_member_pos += pos;
+ pos = 0;
+ show_dprogress();
+ }
+ return pos < stream_pos;
+ }
+
+
+void LZ_decoder::flush_data()
+ {
+ if( pos > stream_pos )
+ {
+ const int size = pos - stream_pos;
+ crc32.update_buf( crc_, buffer + stream_pos, size );
+ if( outfd >= 0 )
+ {
+ const unsigned long long sp = stream_position();
+ const long long i = positive_diff( outskip, sp );
+ const long long s =
+ std::min( positive_diff( outend, sp ), (unsigned long long)size ) - i;
+ if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s )
+ throw Error( "Write error" );
+ }
+ if( pos >= dictionary_size )
+ { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
+ stream_pos = pos;
+ }
+ }
+
+
+int LZ_decoder::check_trailer( const Pretty_print & pp,
+ const bool ignore_empty ) const
+ {
+ Lzip_trailer trailer;
+ int size = rdec.read_data( trailer.data, trailer.size );
+ bool error = false;
+
+ if( size < trailer.size )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ { pp();
+ std::fprintf( stderr, "Trailer truncated at trailer position %d;"
+ " some checks may fail.\n", size ); }
+ while( size < trailer.size ) trailer.data[size++] = 0;
+ }
+
+ const unsigned td_crc = trailer.data_crc();
+ if( td_crc != crc() )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ { pp();
+ std::fprintf( stderr, "CRC mismatch; stored %08X, computed %08X\n",
+ td_crc, crc() ); }
+ }
+ const unsigned long long data_size = data_position();
+ const unsigned long long td_size = trailer.data_size();
+ if( td_size != data_size )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ { pp();
+ std::fprintf( stderr, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
+ td_size, td_size, data_size, data_size ); }
+ }
+ const unsigned long long member_size = rdec.member_position();
+ const unsigned long long tm_size = trailer.member_size();
+ if( tm_size != member_size )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ { pp();
+ std::fprintf( stderr, "Member size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
+ tm_size, tm_size, member_size, member_size ); }
+ }
+ if( error ) return 3;
+ if( !ignore_empty && data_size == 0 ) return 5;
+ if( verbosity >= 2 )
+ {
+ if( verbosity >= 4 ) show_header( dictionary_size );
+ if( data_size == 0 || member_size == 0 )
+ std::fputs( "no data compressed. ", stderr );
+ else
+ std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved. ",
+ (double)data_size / member_size,
+ ( 100.0 * member_size ) / data_size,
+ 100.0 - ( ( 100.0 * member_size ) / data_size ) );
+ if( verbosity >= 4 ) std::fprintf( stderr, "CRC %08X, ", td_crc );
+ if( verbosity >= 3 )
+ std::fprintf( stderr, "%9llu out, %8llu in. ", data_size, member_size );
+ }
+ if( rdec.get_code() != 0 && verbosity >= 1 )
+ { // corruption in the last 4 bytes of the EOS marker
+ pp();
+ std::fprintf( stderr, "Range decoder final code is %08X\n", rdec.get_code() );
+ }
+ return 0;
+ }
+
+
+/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
+ 3 = trailer error, 4 = unknown marker found,
+ 5 = empty member found, 6 = marked member found. */
+int LZ_decoder::decode_member( const Cl_options & cl_opts,
+ const Pretty_print & pp )
+ {
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
+ Bit_model bm_match[State::states][pos_states];
+ Bit_model bm_rep[State::states];
+ Bit_model bm_rep0[State::states];
+ Bit_model bm_rep1[State::states];
+ Bit_model bm_rep2[State::states];
+ Bit_model bm_len[State::states][pos_states];
+ Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
+ Bit_model bm_dis[modeled_distances-end_dis_model+1];
+ Bit_model bm_align[dis_align_size];
+ Len_model match_len_model;
+ Len_model rep_len_model;
+ unsigned rep0 = 0; // rep[0-3] latest four distances
+ unsigned rep1 = 0; // used for efficient coding of
+ unsigned rep2 = 0; // repeated distances
+ unsigned rep3 = 0;
+ State state;
+
+ if( !rdec.load( cl_opts.ignore_marking ) ) return 6;
+ while( !rdec.finished() )
+ {
+ const int pos_state = data_position() & pos_state_mask;
+ if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
+ {
+ // literal byte
+ Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
+ if( state.is_char_set_char() )
+ put_byte( rdec.decode_tree8( bm ) );
+ else
+ put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
+ continue;
+ }
+ // match or repeated match
+ int len;
+ if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
+ {
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ }
+ else
+ {
+ unsigned distance;
+ if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
+ distance = rep1;
+ else
+ {
+ if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
+ distance = rep2;
+ else
+ { distance = rep3; rep3 = rep2; }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state.set_rep();
+ len = rdec.decode_len( rep_len_model, pos_state );
+ }
+ else // match
+ {
+ len = rdec.decode_len( match_len_model, pos_state );
+ unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( distance >= start_dis_model )
+ {
+ const unsigned dis_slot = distance;
+ const int direct_bits = ( dis_slot >> 1 ) - 1;
+ distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ if( dis_slot < end_dis_model )
+ distance += rdec.decode_tree_reversed(
+ bm_dis + ( distance - dis_slot ), direct_bits );
+ else
+ {
+ distance +=
+ rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ distance += rdec.decode_tree_reversed4( bm_align );
+ if( distance == 0xFFFFFFFFU ) // marker found
+ {
+ rdec.normalize();
+ flush_data();
+ if( len == min_match_len ) // End Of Stream marker
+ return check_trailer( pp, cl_opts.ignore_empty );
+ if( len == min_match_len + 1 ) // Sync Flush marker
+ { rdec.load(); continue; }
+ if( verbosity >= 0 )
+ {
+ pp();
+ std::fprintf( stderr, "Unsupported marker code '%d'\n", len );
+ }
+ return 4;
+ }
+ }
+ }
+ rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
+ state.set_match();
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
+ { flush_data(); return 1; }
+ }
+ copy_block( rep0, len );
+ }
+ flush_data();
+ return 2;
+ }
diff --git a/decoder.h b/decoder.h
new file mode 100644
index 0000000..6efdfc2
--- /dev/null
+++ b/decoder.h
@@ -0,0 +1,387 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Range_decoder
+ {
+ enum { buffer_size = 16384 };
+ unsigned long long partial_member_pos;
+ uint8_t * const buffer; // input buffer
+ int pos; // current pos in buffer
+ int stream_pos; // when reached, a new block must be read
+ uint32_t code;
+ uint32_t range;
+ const int infd; // input file descriptor
+ bool at_stream_end;
+
+ bool read_block();
+
+ Range_decoder( const Range_decoder & ); // declared as private
+ void operator=( const Range_decoder & ); // declared as private
+
+public:
+ explicit Range_decoder( const int ifd )
+ :
+ partial_member_pos( 0 ),
+ buffer( new uint8_t[buffer_size] ),
+ pos( 0 ),
+ stream_pos( 0 ),
+ code( 0 ),
+ range( 0xFFFFFFFFU ),
+ infd( ifd ),
+ at_stream_end( false )
+ {}
+
+ ~Range_decoder() { delete[] buffer; }
+
+ unsigned get_code() const { return code; }
+ bool finished() { return pos >= stream_pos && !read_block(); }
+
+ unsigned long long member_position() const
+ { return partial_member_pos + pos; }
+
+ void reset_member_position()
+ { partial_member_pos = 0; partial_member_pos -= pos; }
+
+ uint8_t get_byte()
+ {
+ // 0xFF avoids decoder error if member is truncated at EOS marker
+ if( finished() ) return 0xFF;
+ return buffer[pos++];
+ }
+
+ int read_data( uint8_t * const outbuf, const int size )
+ {
+ int sz = 0;
+ while( sz < size && !finished() )
+ {
+ const int rd = std::min( size - sz, stream_pos - pos );
+ std::memcpy( outbuf + sz, buffer + pos, rd );
+ pos += rd;
+ sz += rd;
+ }
+ return sz;
+ }
+
+ /* if ignore_errors, stop reading before the first wrong byte, so that
+ unreading the header is not required to sync to next member */
+ int read_header_carefully( Lzip_header & header, const bool ignore_errors )
+ {
+ int sz = 0;
+ while( sz < header.size && !finished() )
+ {
+ header.data[sz] = buffer[pos];
+ if( ignore_errors &&
+ ( ( sz < 4 && header.data[sz] != lzip_magic[sz] ) ||
+ ( sz == 4 && !header.check_version() ) ||
+ ( sz == 5 && !isvalid_ds( header.dictionary_size() ) ) ) ) break;
+ ++pos; ++sz;
+ }
+ return sz;
+ }
+
+ bool find_header( Lzip_header & header )
+ {
+ while( !finished() )
+ {
+ if( buffer[pos] != lzip_magic[0] ) { ++pos; continue; }
+ reset_member_position();
+ Lzip_header h;
+ if( read_header_carefully( h, true ) == header.size )
+ { header = h; return true; }
+ }
+ return false;
+ }
+
+ bool load( const bool ignore_marking = true )
+ {
+ code = 0;
+ range = 0xFFFFFFFFU;
+ // check and discard first byte of the LZMA stream
+ if( get_byte() != 0 && !ignore_marking ) return false;
+ for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
+ return true;
+ }
+
+ void normalize()
+ {
+ if( range <= 0x00FFFFFFU )
+ { range <<= 8; code = ( code << 8 ) | get_byte(); }
+ }
+
+ unsigned decode( const int num_bits )
+ {
+ unsigned symbol = 0;
+ for( int i = num_bits; i > 0; --i )
+ {
+ normalize();
+ range >>= 1;
+// symbol <<= 1;
+// if( code >= range ) { code -= range; symbol |= 1; }
+ const bool bit = ( code >= range );
+ symbol <<= 1; symbol += bit;
+ code -= range & ( 0U - bit );
+ }
+ return symbol;
+ }
+
+ bool decode_bit( Bit_model & bm )
+ {
+ normalize();
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ return 0;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ return 1;
+ }
+ }
+
+ void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
+ {
+ normalize();
+ symbol <<= 1;
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ symbol |= 1;
+ }
+ }
+
+ void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
+ unsigned & symbol, const int i )
+ {
+ normalize();
+ model <<= 1;
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ model |= 1;
+ symbol |= 1 << i;
+ }
+ }
+
+ unsigned decode_tree6( Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return symbol & 0x3F;
+ }
+
+ unsigned decode_tree8( Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return symbol & 0xFF;
+ }
+
+ unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ for( int i = 0; i < num_bits; ++i )
+ decode_symbol_bit_reversed( bm[model], model, symbol, i );
+ return symbol;
+ }
+
+ unsigned decode_tree_reversed4( Bit_model bm[] )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
+ return symbol;
+ }
+
+ unsigned decode_matched( Bit_model bm[], unsigned match_byte )
+ {
+ Bit_model * const bm1 = bm + 0x100;
+ unsigned symbol = 1;
+ while( symbol < 0x100 )
+ {
+ const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
+ const bool bit = decode_bit( bm1[symbol+match_bit] );
+ symbol <<= 1; symbol |= bit;
+ if( match_bit >> 8 != bit )
+ {
+ while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
+ break;
+ }
+ }
+ return symbol & 0xFF;
+ }
+
+ unsigned decode_len( Len_model & lm, const int pos_state )
+ {
+ Bit_model * bm;
+ unsigned mask, offset, symbol = 1;
+
+ if( decode_bit( lm.choice1 ) == 0 )
+ { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
+ if( decode_bit( lm.choice2 ) == 0 )
+ { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
+ bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+len3:
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return ( symbol & mask ) + min_match_len + offset;
+ }
+ };
+
+
+class LZ_decoder
+ {
+ const unsigned long long outskip;
+ const unsigned long long outend;
+ unsigned long long partial_data_pos;
+ Range_decoder & rdec;
+ const unsigned dictionary_size;
+ uint8_t * const buffer; // output buffer
+ unsigned pos; // current pos in buffer
+ unsigned stream_pos; // first byte not yet written to file
+ uint32_t crc_;
+ const int outfd; // output file descriptor
+ bool pos_wrapped;
+
+ unsigned long long stream_position() const
+ { return partial_data_pos + stream_pos; }
+ void flush_data();
+ int check_trailer( const Pretty_print & pp, const bool ignore_empty ) const;
+
+ uint8_t peek_prev() const
+ { return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
+
+ uint8_t peek( const unsigned distance ) const
+ {
+ const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
+ pos - distance - 1;
+ return buffer[i];
+ }
+
+ void put_byte( const uint8_t b )
+ {
+ buffer[pos] = b;
+ if( ++pos >= dictionary_size ) flush_data();
+ }
+
+ void copy_block( const unsigned distance, unsigned len )
+ {
+ unsigned lpos = pos, i = lpos - distance - 1;
+ bool fast, fast2;
+ if( lpos > distance )
+ {
+ fast = ( len < dictionary_size - lpos );
+ fast2 = ( fast && len <= lpos - i );
+ }
+ else
+ {
+ i += dictionary_size;
+ fast = ( len < dictionary_size - i ); // (i == pos) may happen
+ fast2 = ( fast && len <= i - lpos );
+ }
+ if( fast ) // no wrap
+ {
+ pos += len;
+ if( fast2 ) // no wrap, no overlap
+ std::memcpy( buffer + lpos, buffer + i, len );
+ else
+ for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
+ }
+ else for( ; len > 0; --len )
+ {
+ buffer[pos] = buffer[i];
+ if( ++pos >= dictionary_size ) flush_data();
+ if( ++i >= dictionary_size ) i = 0;
+ }
+ }
+
+ LZ_decoder( const LZ_decoder & ); // declared as private
+ void operator=( const LZ_decoder & ); // declared as private
+
+public:
+ LZ_decoder( Range_decoder & rde, const unsigned dict_size, const int ofd,
+ const unsigned long long oskip = 0,
+ const unsigned long long oend = -1ULL )
+ :
+ outskip( oskip ),
+ outend( oend ),
+ partial_data_pos( 0 ),
+ rdec( rde ),
+ dictionary_size( dict_size ),
+ buffer( new uint8_t[dictionary_size] ),
+ pos( 0 ),
+ stream_pos( 0 ),
+ crc_( 0xFFFFFFFFU ),
+ outfd( ofd ),
+ pos_wrapped( false )
+ // prev_byte of first byte; also for peek( 0 ) on corrupt file
+ { buffer[dictionary_size-1] = 0; }
+
+ ~LZ_decoder() { delete[] buffer; }
+
+ unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
+ unsigned long long data_position() const { return partial_data_pos + pos; }
+
+ int decode_member( const Cl_options & cl_opts, const Pretty_print & pp );
+ int decode_member()
+ { return decode_member( Cl_options(), Pretty_print( "" ) ); }
+ };
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
new file mode 100644
index 0000000..f95e80f
--- /dev/null
+++ b/doc/lziprecover.1
@@ -0,0 +1,152 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
+.TH LZIPRECOVER "1" "January 2024" "lziprecover 1.24" "User Commands"
+.SH NAME
+lziprecover \- recovers data from damaged lzip files
+.SH SYNOPSIS
+.B lziprecover
+[\fI\,options\/\fR] [\fI\,files\/\fR]
+.SH DESCRIPTION
+Lziprecover is a data recovery tool and decompressor for files in the lzip
+compressed data format (.lz). Lziprecover is able to repair slightly damaged
+files (up to one single\-byte error per member), produce a correct file by
+merging the good parts of two or more damaged copies, reproduce a missing
+(zeroed) sector using a reference file, extract data from damaged files,
+decompress files, and test integrity of files.
+.PP
+With the help of lziprecover, losing an entire archive just because of a
+corrupt byte near the beginning is a thing of the past.
+.PP
+Lziprecover can remove the damaged members from multimember files, for
+example multimember tar.lz archives.
+.PP
+Lziprecover provides random access to the data in multimember files; it only
+decompresses the members containing the desired data.
+.PP
+Lziprecover facilitates the management of metadata stored as trailing data
+in lzip files.
+.PP
+Lziprecover is not a replacement for regular backups, but a last line of
+defense for the case where the backups are also damaged.
+.SH OPTIONS
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display this help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+output version information and exit
+.TP
+\fB\-a\fR, \fB\-\-trailing\-error\fR
+exit with error status if trailing data
+.TP
+\fB\-A\fR, \fB\-\-alone\-to\-lz\fR
+convert lzma\-alone files to lzip format
+.TP
+\fB\-c\fR, \fB\-\-stdout\fR
+write to standard output, keep input files
+.TP
+\fB\-d\fR, \fB\-\-decompress\fR
+decompress, test compressed file integrity
+.TP
+\fB\-D\fR, \fB\-\-range\-decompress=\fR<n\-m>
+decompress a range of bytes to stdout
+.TP
+\fB\-e\fR, \fB\-\-reproduce\fR
+try to reproduce a zeroed sector in file
+.TP
+\fB\-\-lzip\-level\fR=\fI\,N\/\fR|a|m[N]
+reproduce one level, all, or match length
+.TP
+\fB\-\-lzip\-name=\fR<name>
+name of lzip executable for \fB\-\-reproduce\fR
+.TP
+\fB\-\-reference\-file=\fR<file>
+reference file for \fB\-\-reproduce\fR
+.TP
+\fB\-f\fR, \fB\-\-force\fR
+overwrite existing output files
+.TP
+\fB\-i\fR, \fB\-\-ignore\-errors\fR
+ignore some errors in \fB\-d\fR, \fB\-D\fR, \fB\-l\fR, \fB\-t\fR, \fB\-\-dump\fR
+.TP
+\fB\-k\fR, \fB\-\-keep\fR
+keep (don't delete) input files
+.TP
+\fB\-l\fR, \fB\-\-list\fR
+print (un)compressed file sizes
+.TP
+\fB\-m\fR, \fB\-\-merge\fR
+repair errors in file using several copies
+.TP
+\fB\-o\fR, \fB\-\-output=\fR<file>
+place the output into <file>
+.TP
+\fB\-q\fR, \fB\-\-quiet\fR
+suppress all messages
+.TP
+\fB\-R\fR, \fB\-\-byte\-repair\fR
+try to repair a corrupt byte in file
+.TP
+\fB\-s\fR, \fB\-\-split\fR
+split multimember file in single\-member files
+.TP
+\fB\-t\fR, \fB\-\-test\fR
+test compressed file integrity
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+be verbose (a 2nd \fB\-v\fR gives more)
+.TP
+\fB\-\-dump=\fR<list>:d:e:t
+dump members, damaged/empty, tdata to stdout
+.TP
+\fB\-\-remove=\fR<list>:d:e:t
+remove members, tdata from files in place
+.TP
+\fB\-\-strip=\fR<list>:d:e:t
+copy files to stdout stripping members given
+.TP
+\fB\-\-empty\-error\fR
+exit with error status if empty member in file
+.TP
+\fB\-\-marking\-error\fR
+exit with error status if 1st LZMA byte not 0
+.TP
+\fB\-\-loose\-trailing\fR
+allow trailing data seeming corrupt header
+.TP
+\fB\-\-clear\-marking\fR
+reset the first LZMA byte of each member
+.PP
+If no file names are given, or if a file is '\-', lziprecover decompresses
+from standard input to standard output.
+Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,
+Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...
+.PP
+To extract all the files from archive 'foo.tar.lz', use the commands
+\&'tar \fB\-xf\fR foo.tar.lz' or 'lziprecover \fB\-cd\fR foo.tar.lz | tar \fB\-xf\fR \-'.
+.PP
+Exit status: 0 for a normal exit, 1 for environmental problems
+(file not found, invalid command\-line options, I/O errors, etc), 2 to
+indicate a corrupt or invalid input file, 3 for an internal consistency
+error (e.g., bug) which caused lziprecover to panic.
+.SH "REPORTING BUGS"
+Report bugs to lzip\-bug@nongnu.org
+.br
+Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
+.SH COPYRIGHT
+Copyright \(co 2024 Antonio Diaz Diaz.
+License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
+.br
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+The full documentation for
+.B lziprecover
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B lziprecover
+programs are properly installed at your site, the command
+.IP
+.B info lziprecover
+.PP
+should give you access to the complete manual.
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
new file mode 100644
index 0000000..b1f820f
--- /dev/null
+++ b/doc/lziprecover.info
@@ -0,0 +1,1536 @@
+This is lziprecover.info, produced by makeinfo version 4.13+ from
+lziprecover.texi.
+
+INFO-DIR-SECTION Compression
+START-INFO-DIR-ENTRY
+* Lziprecover: (lziprecover). Data recovery tool for the lzip format
+END-INFO-DIR-ENTRY
+
+
+File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
+
+Lziprecover Manual
+******************
+
+This manual is for Lziprecover (version 1.24, 20 January 2024).
+
+* Menu:
+
+* Introduction:: Purpose and features of lziprecover
+* Invoking lziprecover:: Command-line interface
+* Data safety:: Protecting data from accidental loss
+* Repairing one byte:: Fixing bit flips and similar errors
+* Merging files:: Fixing several damaged copies
+* Reproducing one sector:: Fixing a missing (zeroed) sector
+* Tarlz:: Options supporting the tar.lz format
+* File names:: Names of the files produced by lziprecover
+* File format:: Detailed format of the compressed file
+* Trailing data:: Extra data appended to the file
+* Examples:: A small tutorial with examples
+* Unzcrash:: Testing the robustness of decompressors
+* Problems:: Reporting bugs
+* Concept index:: Index of concepts
+
+
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This manual is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+
+
+File: lziprecover.info, Node: Introduction, Next: Invoking lziprecover, Prev: Top, Up: Top
+
+1 Introduction
+**************
+
+Lziprecover is a data recovery tool and decompressor for files in the lzip
+compressed data format (.lz). Lziprecover is able to repair slightly damaged
+files (up to one single-byte error per member), produce a correct file by
+merging the good parts of two or more damaged copies, reproduce a missing
+(zeroed) sector using a reference file, extract data from damaged files,
+decompress files, and test integrity of files.
+
+ Lziprecover can remove the damaged members from multimember files, for
+example multimember tar.lz archives.
+
+ Lziprecover provides random access to the data in multimember files; it
+only decompresses the members containing the desired data.
+
+ Lziprecover facilitates the management of metadata stored as trailing
+data in lzip files.
+
+ Lziprecover is not a replacement for regular backups, but a last line of
+defense for the case where the backups are also damaged.
+
+ The lzip file format is designed for data sharing and long-term
+archiving, taking into account both data integrity and decoder availability:
+
+ * The lzip format provides very safe integrity checking and some data
+ recovery means. The program lziprecover can repair bit flip errors
+ (one of the most common forms of data corruption) in lzip files, and
+ provides data recovery capabilities, including error-checked merging
+ of damaged copies of a file. *Note Data safety::.
+
+ * The lzip format is as simple as possible (but not simpler). The lzip
+ manual provides the source code of a simple decompressor along with a
+ detailed explanation of how it works, so that with the only help of the
+ lzip manual it would be possible for a digital archaeologist to extract
+ the data from a lzip file long after quantum computers eventually
+ render LZMA obsolete.
+
+ * Additionally the lzip reference implementation is copylefted, which
+ guarantees that it will remain free forever.
+
+ A nice feature of the lzip format is that a corrupt byte is easier to
+repair the nearer it is from the beginning of the file. Therefore, with the
+help of lziprecover, losing an entire archive just because of a corrupt
+byte near the beginning is a thing of the past.
+
+ Compression may be good for long-term archiving. For compressible data,
+multiple compressed copies may provide redundancy in a more useful form and
+may have a better chance of surviving intact than one uncompressed copy
+using the same amount of storage space. This is especially true if the
+format provides recovery capabilities like those of lziprecover, which is
+able to find and combine the good parts of several damaged copies.
+
+ Lziprecover is able to recover or decompress files produced by any of the
+compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
+pdlzip.
+
+ If the cause of file corruption is a damaged medium, the combination
+GNU ddrescue + lziprecover is the recommended option for recovering data
+from damaged lzip files. *Note ddrescue-example::, and *note
+ddrescue-example2::, for examples.
+
+ If a file is too damaged for lziprecover to repair it, all the
+recoverable data in all members of the file can be extracted with the
+following command (the resulting file may contain errors and some garbage
+data may be produced at the end of each damaged member):
+
+ lziprecover -cd --ignore-errors file.lz > file
+
+ When recovering data, lziprecover takes as arguments the names of the
+damaged files and writes zero or more recovered files depending on the
+operation selected and whether the recovery succeeded or not. The damaged
+files themselves are kept unchanged.
+
+ When decompressing or testing file integrity, lziprecover behaves like
+lzip or lunzip.
+
+ LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never
+have been compressed. Decompressed is used to refer to data which have
+undergone the process of decompression.
+
+
+File: lziprecover.info, Node: Invoking lziprecover, Next: Data safety, Prev: Introduction, Up: Top
+
+2 Invoking lziprecover
+**********************
+
+The format for running lziprecover is:
+
+ lziprecover [OPTIONS] [FILES]
+
+When decompressing or testing, a hyphen '-' used as a FILE argument means
+standard input. It can be mixed with other FILES and is read just once, the
+first time it appears in the command line. If no file names are specified,
+lziprecover decompresses from standard input to standard output. Remember
+to prepend './' to any file name beginning with a hyphen, or use '--'.
+
+ lziprecover supports the following options: *Note Argument syntax:
+(arg_parser)Argument syntax.
+
+'-h'
+'--help'
+ Print an informative help message describing the options and exit.
+
+'-V'
+'--version'
+ Print the version number of lziprecover on the standard output and
+ exit. This version number should be included in all bug reports.
+
+'-a'
+'--trailing-error'
+ Exit with error status 2 if any remaining input is detected after
+ decompressing the last member. Such remaining input is usually trailing
+ garbage that can be safely ignored. *Note concat-example::.
+
+'-A'
+'--alone-to-lz'
+ Convert lzma-alone files to lzip format without recompressing, just
+ adding a lzip header and trailer. The conversion minimizes the
+ dictionary size of the resulting file (and therefore the amount of
+ memory required to decompress it). Only streamed files with default
+ LZMA properties can be converted; non-streamed lzma-alone files lack
+ the "End Of Stream" marker required in lzip files.
+
+ The name of the converted lzip file is derived from that of the
+ original lzma-alone file as follows:
+
+ filename.lzma becomes filename.lz
+ filename.tlz becomes filename.tar.lz
+ anyothername becomes anyothername.lz
+
+'-c'
+'--stdout'
+ Write decompressed data to standard output; keep input files
+ unchanged. This option (or '-o') is needed when reading from a named
+ pipe (fifo) or from a device. Use it also to recover as much of the
+ decompressed data as possible when decompressing a corrupt file. '-c'
+ overrides '-o'. '-c' has no effect when merging, removing members,
+ repairing, reproducing, splitting, testing or listing.
+
+'-d'
+'--decompress'
+ Decompress the files specified. The integrity of the files specified is
+ checked. If a file does not exist, can't be opened, or the destination
+ file already exists and '--force' has not been specified, lziprecover
+ continues decompressing the rest of the files and exits with error
+ status 1. If a file fails to decompress, or is a terminal, lziprecover
+ exits immediately with error status 2 without decompressing the rest
+ of the files. A terminal is considered an uncompressed file, and
+ therefore invalid.
+
+'-D RANGE'
+'--range-decompress=RANGE'
+ Decompress only a range of bytes starting at decompressed byte position
+ BEGIN and up to byte position END - 1. Byte positions start at 0. This
+ option provides random access to the data in multimember files; it
+ only decompresses the members containing the desired data. In order to
+ guarantee the correctness of the data produced, all members containing
+ any part of the desired data are decompressed and their integrity is
+ checked.
+
+ Four formats of RANGE are recognized, 'BEGIN', 'BEGIN-END',
+ 'BEGIN,SIZE', and ',SIZE'. If only BEGIN is specified, END is taken as
+ the end of the file. If only SIZE is specified, BEGIN is taken as the
+ beginning of the file. The bytes produced are sent to standard output
+ unless the option '--output' is used.
+
+'-e'
+'--reproduce'
+ Try to recover a missing (zeroed) sector in FILE using a reference
+ file and the same version of lzip that created FILE. If successful, a
+ repaired copy is written to the file FILE_fixed.lz. FILE is not
+ modified at all. The exit status is 0 if the member containing the
+ zeroed sector could be repaired, 2 otherwise. Note that FILE_fixed.lz
+ may still contain errors in the members following the one repaired.
+ *Note Reproducing one sector::, for a complete description of the
+ reproduce mode.
+
+'--lzip-level=DIGIT|a|m[LENGTH]'
+ Try only the given compression level or match length limit when
+ reproducing a zeroed sector. '--lzip-level=a' tries all the
+ compression levels (0 to 9), while '--lzip-level=m' tries all the
+ match length limits (5 to 273).
+
+'--lzip-name=NAME'
+ Set the name of the lzip executable used by '--reproduce'. If
+ '--lzip-name' is not specified, 'lzip' is used.
+
+'--reference-file=FILE'
+ Set the reference file used by '--reproduce'. It must contain the
+ uncompressed data corresponding to the missing compressed data of the
+ zeroed sector, plus some context data before and after them.
+
+'-f'
+'--force'
+ Force overwrite of output files.
+
+'-i'
+'--ignore-errors'
+ Make '--decompress', '--test', and '--range-decompress' ignore format
+ and data errors and continue decompressing the remaining members in
+ the file; keep input files unchanged. For example, the commands
+ 'lziprecover -cd -i file.lz > file' or
+ 'lziprecover -D0 -i file.lz > file' decompress all the recoverable
+ data in all members of 'file.lz' without having to split it first. The
+ '-cd -i' method resyncs to the next member header after each error,
+ and is immune to some format errors that make '-D0 -i' fail. The range
+ decompressed may be smaller than the range requested, because of the
+ errors. The exit status is set to 0 unless other errors are found (I/O
+ errors, for example).
+
+ Make '--list', '--dump', '--remove', and '--strip' ignore format
+ errors. The sizes of the members with errors (especially the last) may
+ be wrong.
+
+'-k'
+'--keep'
+ Keep (don't delete) input files during decompression.
+
+'-l'
+'--list'
+ Print the uncompressed size, compressed size, and percentage saved of
+ the files specified. Trailing data are ignored. The values produced
+ are correct even for multimember files. If more than one file is
+ given, a final line containing the cumulative sizes is printed. With
+ '-v', the dictionary size, the number of members in the file, and the
+ amount of trailing data (if any) are also printed. With '-vv', the
+ positions and sizes of each member in multimember files are also
+ printed. With '-i', format errors are ignored, and with '-ivv', gaps
+ between members are shown. The member numbers shown coincide with the
+ file numbers produced by '--split'.
+
+ If any file is damaged, does not exist, can't be opened, or is not
+ regular, the final exit status is > 0. '-lq' can be used to check
+ quickly (without decompressing) the structural integrity of the files
+ specified. (Use '--test' to check the data integrity). '-alq'
+ additionally checks that none of the files specified contain trailing
+ data.
+
+'-m'
+'--merge'
+ Try to produce a correct file by merging the good parts of two or more
+ damaged copies. If successful, a repaired copy is written to the file
+ FILE_fixed.lz. The exit status is 0 if a correct file could be
+ produced, 2 otherwise. *Note Merging files::, for a complete
+ description of the merge mode.
+
+'-o FILE'
+'--output=FILE'
+ Place the repaired output into FILE instead of into FILE_fixed.lz. If
+ splitting, the names of the files produced are in the form
+ 'rec01FILE', 'rec02FILE', etc.
+
+ If '-c' has not been also specified, write the (de)compressed output
+ to FILE, automatically creating any missing parent directories; keep
+ input files unchanged. This option (or '-c') is needed when reading
+ from a named pipe (fifo) or from a device. '-o -' is equivalent to
+ '-c'. '-o' has no effect when testing or listing.
+
+'-q'
+'--quiet'
+ Quiet operation. Suppress all messages.
+
+'-R'
+'--byte-repair'
+ Try to repair a FILE with small errors (up to one single-byte error
+ per member). If successful, a repaired copy is written to the file
+ FILE_fixed.lz. FILE is not modified at all. The exit status is 0 if
+ the file could be repaired, 2 otherwise. *Note Repairing one byte::,
+ for a complete description of the repair mode.
+
+'-s'
+'--split'
+ Search for members in FILE and write each member in its own file. Gaps
+ between members are detected and each gap is saved in its own file.
+ Trailing data (if any) are saved alone in the last file. You can then
+ use 'lziprecover -t' to test the integrity of the resulting files,
+ decompress those which are undamaged, and try to repair or partially
+ decompress those which are damaged. Gaps may contain garbage or may be
+ members with corrupt headers or trailers. If other lziprecover
+ functions fail to work on a multimember FILE because of damage in
+ headers or trailers, try to split FILE and then work on each member
+ individually.
+
+ The names of the files produced are in the form 'rec01FILE',
+ 'rec02FILE', etc, and are designed so that the use of wildcards in
+ subsequent processing, for example,
+ 'lziprecover -cd rec*FILE > recovered_data', processes the files in
+ the correct order. The number of digits used in the names varies
+ depending on the number of members in FILE.
+
+'-t'
+'--test'
+ Check integrity of the files specified, but don't decompress them. This
+ really performs a trial decompression and throws away the result. Use
+ it together with '-v' to see information about the files. If a file
+ fails the test, does not exist, can't be opened, or is a terminal,
+ lziprecover continues testing the rest of the files. A final
+ diagnostic is shown at verbosity level 1 or higher if any file fails
+ the test when testing multiple files.
+
+'-v'
+'--verbose'
+ Verbose mode.
+ When decompressing or testing, further -v's (up to 4) increase the
+ verbosity level, showing status, compression ratio, dictionary size,
+ trailer contents (CRC, data size, member size), and up to 6 bytes of
+ trailing data (if any) both in hexadecimal and as a string of printable
+ ASCII characters.
+ Two or more '-v' options show the progress of decompression.
+ In other modes, increasing verbosity levels show final status, progress
+ of operations, and extra information (for example, the failed areas).
+
+'--dump=[MEMBER_LIST][:damaged][:empty][:tdata]'
+ Dump the members listed, the damaged members (if any), the empty
+ members (if any), or the trailing data (if any) of one or more regular
+ multimember files to standard output, or to a file if the option
+ '--output' is used. If more than one file is given, the elements
+ dumped from all the files are concatenated. If a file does not exist,
+ can't be opened, or is not regular, lziprecover continues processing
+ the rest of the files. If the dump fails in one file, lziprecover
+ exits immediately without processing the rest of the files. Only
+ '--dump=tdata' can write to a terminal. '--dump=damaged' implies
+ '--ignore-errors'.
+
+ The argument to '--dump' is a colon-separated list of the following
+ element specifiers; a member list (1,3-6), a reverse member list
+ (r1,3-6), and the strings "damaged", "empty", and "tdata" (which may
+ be shortened to 'd', 'e', and 't' respectively). A member list selects
+ the members (or gaps) listed, whose numbers coincide with those shown
+ by '--list'. A reverse member list selects the members listed counting
+ from the last member in the file (r1). Negated versions of both kinds
+ of lists exist (^1,3-6:r^1,3-6) which select all the members except
+ those in the list. The strings "damaged", "empty", and "tdata" select
+ the damaged members, the empty members (those with a data size = 0),
+ and the trailing data respectively. If the same member is selected
+ more than once, for example by '1:r1' in a single-member file, it is
+ dumped just once. See the following examples:
+
+ '--dump' argument Elements dumped
+ ---------------------------------------------------------------------
+ '1,3-6' members 1, 3, 4, 5, 6
+ 'r1-3' last 3 members in file
+ '^13,15' all but 13th and 15th members in file
+ 'r^1' all but last member in file
+ 'damaged' all damaged members in file
+ 'empty' all empty members in file
+ 'tdata' trailing data
+ '1-5:r1:tdata' members 1 to 5, last member, trailing data
+ 'damaged:tdata' damaged members, trailing data
+ '3,12:damaged:tdata' members 3, 12, damaged members, trailing data
+
+'--remove=[MEMBER_LIST][:damaged][:empty][:tdata]'
+ Remove the members listed, the damaged members (if any), the empty
+ members (if any), or the trailing data (if any) from regular
+ multimember files in place. The date of each file modified is
+ preserved if possible. If all members in a file are selected to be
+ removed, the file is left unchanged and the exit status is set to 2.
+ If a file does not exist, can't be opened, is not regular, or is left
+ unchanged, lziprecover continues processing the rest of the files. In
+ case of I/O error, lziprecover exits immediately without processing
+ the rest of the files. See '--dump' above for a description of the
+ argument.
+
+ This option may be dangerous even if only the trailing data are being
+ removed because the file may be corrupt or the trailing data may
+ contain a forbidden combination of characters. *Note Trailing data::.
+ It is safer to send the output of '--strip' to a temporary file, check
+ it, and then copy it over the original file. But if you prefer
+ '--remove' because of its more efficient in-place removal, it is
+ advisable to make a backup before attempting the removal. At least
+ check that 'lzip -cd file.lz | wc -c' and the uncompressed size shown
+ by 'lzip -l file.lz' match before attempting the removal of trailing
+ data.
+
+'--strip=[MEMBER_LIST][:damaged][:empty][:tdata]'
+ Copy one or more regular multimember files to standard output (or to a
+ file if the option '--output' is used), stripping the members listed,
+ the damaged members (if any), the empty members (if any), or the
+ trailing data (if any) from each file. If all members in a file are
+ selected to be stripped, the trailing data (if any) are also stripped
+ even if 'tdata' is not specified. If more than one file is given, the
+ files are concatenated. In this case the trailing data are also
+ stripped from all but the last file even if 'tdata' is not specified.
+ If a file does not exist, can't be opened, or is not regular,
+ lziprecover continues processing the rest of the files. If a file
+ fails to copy, lziprecover exits immediately without processing the
+ rest of the files. See '--dump' above for a description of the
+ argument.
+
+'--empty-error'
+ Exit with error status 2 if any empty member is found in the input
+ files.
+
+'--marking-error'
+ Exit with error status 2 if the first LZMA byte is non-zero in any
+ member of the input files. This may be caused by data corruption or by
+ deliberate insertion of tracking information in the file. Use
+ 'lziprecover --clear-marking' to clear any such non-zero bytes.
+
+'--loose-trailing'
+ When decompressing, testing, or listing, allow trailing data whose
+ first bytes are so similar to the magic bytes of a lzip header that
+ they can be confused with a corrupt header. Use this option if a file
+ triggers a "corrupt header" error and the cause is not indeed a
+ corrupt header.
+
+'--clear-marking'
+ Set to zero the first LZMA byte of each member in the files specified.
+ At verbosity level 1 (-v), print the number of members cleared. The
+ date of each file modified is preserved if possible. This option
+ exists because the first byte of the LZMA stream is ignored by the
+ range decoder, and can therefore be (mis)used to store any value which
+ can then be used as a watermark to track the path of the compressed
+ payload.
+
+
+ Lziprecover also supports the following debug options (for experts):
+
+'-E RANGE[,SECTOR_SIZE]'
+'--debug-reproduce=RANGE[,SECTOR_SIZE]'
+ Load the compressed FILE into memory, set all bytes in the positions
+ specified by RANGE to 0, and try to reproduce a correct compressed
+ file. *Note --reproduce::. *Note range-format::, for a description of
+ RANGE. If a SECTOR_SIZE is specified, set each sector to 0 in sequence
+ and try to reproduce the file, printing to standard output final
+ statistics of the number of sectors reproduced successfully. Exit with
+ nonzero status only in case of fatal error.
+
+'-M'
+'--md5sum'
+ Print to standard output the MD5 digests of the input FILES one per
+ line in the same format produced by the 'md5sum' tool. Lziprecover
+ uses MD5 digests to check the result of some operations. This option
+ can be used to test the correctness of lziprecover's implementation of
+ the MD5 algorithm.
+
+'-S[VALUE]'
+'--nrep-stats[=VALUE]'
+ Compare the frequency of sequences of N repeated bytes of a given
+ VALUE in the compressed LZMA streams of the input FILES with the
+ frequency expected for random data (1 / 2^(8N)). If VALUE is not
+ specified, print the frequency of repeated sequences of all possible
+ byte values. Print cumulative data for all the files, followed by the
+ name of the first file with the longest sequence.
+
+'-U 1|BSIZE'
+'--unzcrash=1|BSIZE'
+ With argument '1', test 1-bit errors in the LZMA stream of the
+ compressed input FILE like the command
+ 'unzcrash -b1 -p7 -s-20 'lzip -t' FILE' but in memory, and therefore
+ much faster (30 to 50 times faster). *Note Unzcrash::. This option
+ tests all the members independently in a multimember file, skipping
+ headers and trailers. If a decompression succeeds, the decompressed
+ output is compared with the decompressed output of the original FILE
+ using MD5 digests. FILE must not contain errors and must decompress
+ correctly for the comparisons to work.
+
+ With argument 'B', test zeroed sectors (blocks of bytes) in the LZMA
+ stream of the compressed input FILE like the command
+ 'unzcrash --block=SIZE -d1 -p7 -s-(SIZE+20) 'lzip -t' FILE' but in
+ memory, and therefore much faster. Testing and comparisons work just
+ like with the argument '1' explained above.
+
+ By default '--unzcrash' only prints the interesting cases; CRC
+ mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
+ apparently successful decompressions, and decoder errors detected
+ 50_000 or more bytes beyond the byte (or the start of the block) being
+ tested. At verbosity level 1 (-v) it also prints decoder errors
+ detected 10_000 or more bytes beyond the byte being tested. At
+ verbosity level 2 (-vv) it prints all cases for 1-bit errors or the
+ decoder errors detected beyond the end of the block for zeroed blocks.
+
+'-W POSITION,VALUE'
+'--debug-decompress=POSITION,VALUE'
+ Load the compressed FILE into memory, set the byte at POSITION to
+ VALUE, and decompress the modified compressed data to standard output.
+ If the damaged member can be decompressed to the end (just fails with
+ a CRC mismatch), the members following it are also decompressed.
+
+'-X[POSITION,VALUE]'
+'--show-packets[=POSITION,VALUE]'
+ Load the compressed FILE into memory, optionally set the byte at
+ POSITION to VALUE, decompress the modified compressed data (discarding
+ the output), and print to standard output descriptions of the LZMA
+ packets being decoded.
+
+'-Y RANGE'
+'--debug-delay=RANGE'
+ Load the compressed FILE into memory and then repeatedly decompress
+ it, increasing 256 times each byte of the subset of the compressed data
+ positions specified by RANGE, so as to test all possible one-byte
+ errors. For each decompression error find the error detection delay and
+ print to standard output the maximum delay. The error detection delay
+ is the difference between the position of the error and the position
+ where the decoder realized that the data contains an error. *Note
+ range-format::, for a description of RANGE.
+
+'-Z POSITION,VALUE'
+'--debug-byte-repair=POSITION,VALUE'
+ Load the compressed FILE into memory, set the byte at POSITION to
+ VALUE, and then try to repair the byte error. *Note --byte-repair::.
+
+
+ Numbers given as arguments to options may be expressed in decimal,
+hexadecimal, or octal (using the same syntax as integer constants in C++),
+and may be followed by a multiplier and an optional 'B' for "byte".
+
+ Table of SI and binary prefixes (unit multipliers):
+
+Prefix Value | Prefix Value
+k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024)
+M megabyte (10^6) | Mi mebibyte (2^20)
+G gigabyte (10^9) | Gi gibibyte (2^30)
+T terabyte (10^12) | Ti tebibyte (2^40)
+P petabyte (10^15) | Pi pebibyte (2^50)
+E exabyte (10^18) | Ei exbibyte (2^60)
+Z zettabyte (10^21) | Zi zebibyte (2^70)
+Y yottabyte (10^24) | Yi yobibyte (2^80)
+R ronnabyte (10^27) | Ri robibyte (2^90)
+Q quettabyte (10^30) | Qi quebibyte (2^100)
+
+
+ Exit status: 0 for a normal exit, 1 for environmental problems (file not
+found, invalid command-line options, I/O errors, etc), 2 to indicate a
+corrupt or invalid input file, 3 for an internal consistency error (e.g.,
+bug) which caused lziprecover to panic.
+
+
+File: lziprecover.info, Node: Data safety, Next: Repairing one byte, Prev: Invoking lziprecover, Up: Top
+
+3 Protecting data from accidental loss
+**************************************
+
+It is a fact of life that sometimes data becomes corrupt. Software has
+errors. Hardware may misbehave or fail. RAM may be struck by a cosmic ray.
+This is why a safe enough integrity checking is needed in compressed
+formats, and the reason why a data recovery tool is sometimes needed.
+
+ There are 3 main types of data corruption that may cause data loss:
+single-byte errors, multibyte errors (generally affecting a whole sector in
+a block device), and total device failure.
+
+ Lziprecover protects natively against single-byte errors as long as file
+integrity is checked frequently enough that a second single-byte error does
+not develop in the same member before the first one is repaired. *Note
+Repairing one byte::.
+
+ Lziprecover also protects against multibyte errors if at least one backup
+copy of the file is made (*note Merging files::), or if the error is a
+zeroed sector and the uncompressed data corresponding to the zeroed sector
+are available (*note Reproducing one sector::). If you can choose between
+merging and reproducing, try merging first because it is usually faster,
+easier to use, and has a high probability of success.
+
+ Lziprecover can't help in case of device failure. The only remedy for
+total device failure is storing backup copies in separate media.
+
+ The extraordinary safety of the lzip format allows lziprecover to exploit
+the redundance that occurrs naturally when making compressed backups.
+Lziprecover can recover data that would not be recoverable from files
+compressed in other formats. Let's see two examples of how much better is
+lzip compared with gzip and bzip2 with respect to data safety:
+
+* Menu:
+
+* Merging with a backup:: Recovering a file using a damaged backup
+* Reproducing a mailbox:: Recovering new messages using an old backup
+
+
+File: lziprecover.info, Node: Merging with a backup, Next: Reproducing a mailbox, Up: Data safety
+
+3.1 Recovering a file using a damaged backup
+============================================
+
+Let's suppose that you made a compressed backup of your valuable scientific
+data and stored two copies on separate media. Years later you notice that
+both copies are corrupt.
+
+ If you compressed the data with gzip and both copies suffer any damage in
+the data stream, even if it is just one altered bit, the original data can
+only be recovered by an expert, if at all.
+
+ If you used bzip2, and if the file is large enough to contain more than
+one compressed data block (usually larger than 900 kB uncompressed), and if
+no block is damaged in both files, then the data can be manually recovered
+by splitting the files with bzip2recover, checking every block, and then
+copying the right blocks in the right order into another file.
+
+ But if you used lzip, the data can be automatically recovered with
+'lziprecover --merge' as long as the damaged areas don't overlap.
+
+ Note that each error in a bzip2 file makes a whole block unusable, but
+each error in a lzip file only affects the damaged bytes, making it
+possible to recover a file with thousands of errors.
+
+
+File: lziprecover.info, Node: Reproducing a mailbox, Prev: Merging with a backup, Up: Data safety
+
+3.2 Recovering new messages using an old backup
+===============================================
+
+Let's suppose that you make periodic backups of your email messages stored
+in one or more mailboxes. (A mailbox is a file containing a possibly large
+number of email messages). New messages are appended to the end of each
+mailbox, therefore the initial part of two consecutive backups is identical
+unless some messages have been changed or deleted in the meantime. The new
+messages added to each backup are usually a small part of the whole mailbox.
+
++============================================+
+| Older backup containing some messages |
++============================================+
++============================================+========================+
+| Newer backup containing the messages above | plus some new messages |
++============================================+========================+
+
+ One day you discover that your mailbox has disappeared because you
+deleted it inadvertently or because of a bug in your email reader. Not only
+that. You need to recover a recent message, but the last backup you made of
+the mailbox (the newer backup above) has lost the data corresponding to a
+whole sector because of an I/O error in the part containing the old
+messages.
+
+ If you compressed the mailbox with gzip, usually none of the new messages
+can be recovered even if they are intact because all the data beyond the
+missing sector can't be decoded.
+
+ If you used bzip2, and if the newer backup is large enough that the new
+messages are in a different compressed data block than the one damaged
+(usually larger than 900 kB uncompressed), then you can recover the new
+messages manually with bzip2recover. If the backups are identical except for
+the new messages appended, you may even recover the whole newer backup by
+combining the good blocks from both backups.
+
+ But if you used lzip, the whole newer backup can be automatically
+recovered with 'lziprecover --reproduce' as long as the missing bytes can be
+recovered from the older backup, even if other messages in the common part
+have been changed or deleted. Mailboxes seem to be especially easy to
+reproduce. The probability of reproducing a mailbox (*note
+performance-of-reproduce::) is almost as high as that of merging two
+identical backups (*note performance-of-merge::).
+
+
+File: lziprecover.info, Node: Repairing one byte, Next: Merging files, Prev: Data safety, Up: Top
+
+4 Repairing one byte
+********************
+
+Lziprecover can repair perfectly most files with small errors (up to one
+single-byte error per member), without the need of any extra redundance at
+all. If the reparation is successful, the repaired file is identical bit for
+bit to the original. This makes lzip files resistant to bit flip, one of the
+most common forms of data corruption.
+
+ The file is repaired in memory. Therefore, enough virtual memory
+(RAM + swap) to contain the largest damaged member is required.
+
+ The error may be located anywhere in the file except in the first 5
+bytes of each member header or in the 'Member size' field of the trailer
+(last 8 bytes of each member). If the error is in the header it can be
+easily repaired with a text editor like GNU Moe (*note File format::). If
+the error is in the member size, it is enough to ignore the message about
+'bad member size' when decompressing.
+
+ Bit flip happens when one bit in the file is changed from 0 to 1 or vice
+versa. It may be caused by bad RAM or even by natural radiation. I have
+seen a case of bit flip in a file stored on an USB flash drive.
+
+ One byte may seem small, but most file corruptions not produced by
+transmission errors or I/O errors just affect one byte, or even one bit, of
+the file. Also, unlike magnetic media, where errors usually affect a whole
+sector, solid-state storage devices tend to produce single-byte errors,
+making of lzip the perfect format for data stored on such devices.
+
+ Repairing a file can take some time. Small files or files with the error
+located near the beginning can be repaired in a few seconds. But repairing
+a large file compressed with a large dictionary size and with the error
+located far from the beginning, may take hours.
+
+ On the other hand, errors located near the beginning of the file cause
+much more loss of data than errors located near the end. So lziprecover
+repairs more efficiently the worst errors.
+
+
+File: lziprecover.info, Node: Merging files, Next: Reproducing one sector, Prev: Repairing one byte, Up: Top
+
+5 Merging files
+***************
+
+If you have several copies of a file but all of them are too damaged to
+repair them individually (*note Repairing one byte::), lziprecover can try
+to produce a correct file by merging the good parts of the damaged copies.
+
+ The merge may succeed even if some copies of the file have all the
+headers and trailers damaged, as long as there is at least one copy of
+every header and trailer intact, even if they are in different copies of
+the file.
+
+ The merge fails if the damaged areas overlap (at least one byte is
+damaged in all copies), or are adjacent and the boundary can't be
+determined, or if the copies have too many damaged areas.
+
+ All the copies to be merged must have the same size. If any of them is
+larger or smaller than it should, either because it has been truncated or
+because it got some garbage data appended at the end, it can be brought to
+the correct size with the following command before merging it with the other
+copies:
+
+ ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
+
+ To give you an idea of its possibilities, when merging two copies, each
+of them with one damaged area affecting 1 percent of the copy, the
+probability of obtaining a correct file is about 98 percent. With three
+such copies the probability rises to 99.97 percent. For large files (a few
+MB) with small errors (one sector damaged per copy), the probability
+approaches 100 percent even with only two copies. (Supposing that the
+errors are randomly located inside each copy).
+
+ Some types of solid-state device (NAND flash, for example) can produce
+bursts of scattered single-bit errors. Lziprecover is able to merge files
+with thousands of such scattered errors by grouping the errors into
+clusters and then merging the files as if each cluster were a single error.
+
+ Here is a real case of successful merging. Two copies of the file
+'icecat-3.5.3-x86.tar.lz' (compressed size 9 MB) became corrupt while
+stored on the same NAND flash device. One of the copies had 76 single-bit
+errors scattered in an area of 1020 bytes, and the other had 3028 such
+errors in an area of 31729 bytes. Lziprecover produced a correct file,
+identical to the original, in just 5 seconds:
+
+ lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz
+ Merging member 1 of 1 (2552 errors)
+ 2552 errors have been grouped in 16 clusters.
+ Trying variation 2 of 2, block 2
+ Input files merged successfully.
+
+ Note that the number of errors reported by lziprecover (2552) is lower
+than the number of corrupt bytes (3104) because contiguous corrupt bytes
+are counted as a single multibyte error.
+
+
+Example 1: Recover a compressed backup from two copies on CD-ROM with
+error-checked merging of copies. *Note GNU ddrescue manual: (ddrescue)Top,
+for details about ddrescue.
+
+ ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
+ mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
+ cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz
+ umount /mnt/cdimage
+ (insert second copy in the CD drive)
+ ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2
+ mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage
+ cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz
+ umount /mnt/cdimage
+ lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz
+ Input files merged successfully.
+ lziprecover -tv backup.tar.lz
+ backup.tar.lz: ok
+
+
+Example 2: Recover the first volume of those created with the command
+'lzip -b 32MiB -S 650MB big_db' from two copies, 'big_db1_00001.lz' and
+'big_db2_00001.lz', with member 07 damaged in the first copy, member 18
+damaged in the second copy, and member 12 damaged in both copies. The
+correct file produced is saved in 'big_db_00001.lz'.
+
+ lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
+ Input files merged successfully.
+ lziprecover -tv big_db_00001.lz
+ big_db_00001.lz: ok
+
+
+File: lziprecover.info, Node: Reproducing one sector, Next: Tarlz, Prev: Merging files, Up: Top
+
+6 Reproducing one sector
+************************
+
+Lziprecover can recover a zeroed sector in a lzip file by concatenating the
+decompressed contents of the file up to the beginning of the zeroed sector
+and the uncompressed data corresponding to the zeroed sector, and then
+feeding the concatenated data to the same version of lzip that created the
+file. For this to work, a reference file is required containing the
+uncompressed data corresponding to the missing compressed data of the zeroed
+sector, plus some context data before and after them. It is possible to
+recover a large file using just a few kB of reference data.
+
+ The difficult part is finding a suitable reference file. It must contain
+the exact data required (possibly mixed with other data). Containing similar
+data is not enough.
+
+ A zeroed sector may be caused by the incomplete recovery of a damaged
+storage device (with I/O errors) using, for example, ddrescue. The
+reproduction can't be done if the zeroed sector overlaps with the first 15
+bytes of a member, or if the zeroed sector is smaller than 8 bytes.
+
+ The file is reproduced in memory. Therefore, enough virtual memory
+(RAM + swap) to contain the damaged member is required.
+
+ To understand how it works, take any lzipped file, say 'foo.lz',
+decompress it (keeping the original), and try to reproduce an artificially
+zeroed sector in it by running the following commands:
+
+ lzip -kd foo.lz
+ lziprecover -vv --debug-reproduce=65536,512 --reference-file=foo foo.lz
+
+which should produce an output like the following:
+
+ Reproducing: foo.lz
+ Reference file: foo
+ Testing sectors of size 512 at file positions 65536 to 66047
+ (master mpos = 65536, dpos = 296892)
+ foo: Match found at offset 296892
+ Reproduction succeeded at pos 65536
+
+ 1 sectors tested
+ 1 reproductions returned with zero status
+ all comparisons passed
+
+ Using 'foo' as reference file guarantees that any zeroed sector in
+'foo.lz' can be reproduced because both files contain the same data. In
+real use, the reference file needs to contain the data corresponding to the
+zeroed sector, but the rest of the data (if any) may differ between both
+files. The reference data may be obtained from the partial decompression of
+the damaged file itself if it contains repeated data. For example if the
+damaged file is a compressed tarball containing several partially modified
+versions of the same file.
+
+ The offset reported by lziprecover is the position in the reference file
+of the first byte that could not be decompressed. This is the first byte
+that will be compressed to reproduce the zeroed sector.
+
+ The reproduce mode tries to reproduce the missing compressed data
+originally present in the zeroed sector. It is based on the perfect
+reproducibility of lzip files (lzip produces identical compressed output
+from identical input). Therefore, the same version of lzip that created the
+file to be reproduced should be used to reproduce the zeroed sector. Near
+versions may also work because the output of lzip changes infrequently. If
+reproducing a tar.lz archive created with tarlz, the version of lzip,
+clzip, or minilzip corresponding to the version of the lzlib library used
+by tarlz to create the archive should be used.
+
+ When recovering a tar.lz archive and using as reference a file from the
+filesystem, if the zeroed sector encodes (part of) a tar header, the archive
+can't be reproduced. Therefore, the less overhead (smaller headers) a tar
+archive has, the more probable is that the zeroed sector does not include a
+header, and that the archive can be reproduced. The tarlz format has minimum
+overhead. It uses basic ustar headers, and only adds extended pax headers
+when they are required.
+
+6.1 Performance of '--reproduce'
+================================
+
+Reproduce mode is especially useful when recovering a corrupt backup (or a
+corrupt source tarball) that is part of a series. Usually only a small
+fraction of the data changes from one backup to the next or from one version
+of a source tarball to the next. This makes sometimes possible to reproduce
+a given corrupted version using reference data from a near version. The
+following two tables show the fraction of reproducible sectors (reproducible
+sectors divided by total sectors in archive) for some archives, using sector
+sizes of 512 and 4096 bytes. 'mailbox-aug.tar.lz' is a backup of some of my
+mailboxes. 'backup-feb.tar.lz' and 'backup-apr.tar.lz' are real backups of
+my own working directory:
+
+Reference file File Reproducible (512)
+---------------------------------------------------------
+backup-feb.tar backup-apr.tar.lz 3273 / 4342 = 75.38%
+backup-apr.tar backup-feb.tar.lz 3259 / 4161 = 78.32%
+gawk-5.0.0.tar gawk-5.0.1.tar.lz 4369 / 5844 = 74.76%
+gawk-5.0.1.tar gawk-5.0.0.tar.lz 4379 / 5603 = 78.15%
+gmp-6.1.1.tar gmp-6.1.2.tar.lz 2454 / 3787 = 64.8%
+gmp-6.1.2.tar gmp-6.1.1.tar.lz 2461 / 3782 = 65.07%
+
+Reference file File Reproducible (4096)
+-----------------------------------------------------------
+mailbox-mar.tar mailbox-aug.tar.lz 4036 / 4252 = 94.92%
+backup-feb.tar backup-apr.tar.lz 264 / 542 = 48.71%
+backup-apr.tar backup-feb.tar.lz 264 / 520 = 50.77%
+gawk-5.0.0.tar gawk-5.0.1.tar.lz 327 / 730 = 44.79%
+gawk-5.0.1.tar gawk-5.0.0.tar.lz 326 / 700 = 46.57%
+gmp-6.1.1.tar gmp-6.1.2.tar.lz 175 / 473 = 37%
+gmp-6.1.2.tar gmp-6.1.1.tar.lz 181 / 472 = 38.35%
+
+ Note that the "performance of reproduce" is a probability, not a partial
+recovery. The data are either recovered fully (with the probability X shown
+in the last column of the tables above) or not recovered at all (with
+probability 1 - X).
+
+Example 1: Recover a damaged source tarball with a zeroed sector of 512
+bytes at file position 1019904, using as reference another source tarball
+for a different version of the software.
+
+ lziprecover -vv -e --reference-file=gmp-6.1.1.tar gmp-6.1.2.tar.lz
+ Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 512, value = 0x00)
+ (master mpos = 1019904, dpos = 6292134)
+ warning: gmp-6.1.1.tar: Partial match found at offset 6277798, len 8716.
+ Reference data may be mixed with other data.
+ Trying level -9
+ Reproducing position 1015808
+ Member reproduced successfully.
+ Copy of input file reproduced successfully.
+
+
+Example 2: Recover a damaged backup with a zeroed sector of 4096 bytes at
+file position 1019904, using as reference a previous backup. The damaged
+backup comes from a damaged partition copied with ddrescue.
+
+ ddrescue -b4096 -r10 /dev/sdc1 hdimage mapfile
+ mount -o loop,ro hdimage /mnt/hdimage
+ cp /mnt/hdimage/backup.tar.lz backup.tar.lz
+ umount /mnt/hdimage
+ lzip -t backup.tar.lz
+ backup.tar.lz: Decoder error at pos 1020530
+ lziprecover -vv -e --reference-file=old_backup.tar backup.tar.lz
+ Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 4096, value = 0x00)
+ (master mpos = 1019903, dpos = 5857954)
+ warning: old_backup.tar: Partial match found at offset 5743778, len 9546.
+ Reference data may be mixed with other data.
+ Trying level -9
+ Reproducing position 1015808
+ Member reproduced successfully.
+ Copy of input file reproduced successfully.
+
+
+Example 3: Recover a damaged backup with a zeroed sector of 4096 bytes at
+file position 1019904, using as reference a file from the filesystem. (If
+the zeroed sector encodes (part of) a tar header, the tarball can't be
+reproduced).
+
+ # List the contents of the backup tarball to locate the damaged member.
+ tarlz -n0 -tvf backup.tar.lz
+ [...]
+ example.txt
+ tarlz: Skipping to next header.
+ tarlz: backup.tar.lz: Archive ends unexpectedly.
+ # Find in the filesystem the last file listed and use it as reference.
+ lziprecover -vv -e --reference-file=/somedir/example.txt backup.tar.lz
+ Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 4096, value = 0x00)
+ (master mpos = 1019903, dpos = 5857954)
+ /somedir/example.txt: Match found at offset 9378
+ Trying level -9
+ Reproducing position 1015808
+ Member reproduced successfully.
+ Copy of input file reproduced successfully.
+
+ If 'backup.tar.lz' is a multimember file with more than one member
+damaged and lziprecover shows the message 'One member reproduced. Copy of
+input file still contains errors.', the procedure shown in the example
+above can be repeated until all the members have been reproduced.
+
+ 'tarlz --keep-damaged -n0 -xf backup.tar.lz example.txt' produces a
+partial copy of the reference file 'example.txt' that may help locate a
+complete copy in the filesystem or in another backup, even if 'example.txt'
+has been renamed.
+
+
+File: lziprecover.info, Node: Tarlz, Next: File names, Prev: Reproducing one sector, Up: Top
+
+7 Options supporting the tar.lz format
+**************************************
+
+Tarlz is a massively parallel (multi-threaded) combined implementation of
+the tar archiver and the lzip compressor.
+
+ Tarlz creates tar archives using a simplified and safer variant of the
+POSIX pax format compressed in lzip format, keeping the alignment between
+tar members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive. *Note tarlz manual: (tarlz)Top, and *note
+lzip manual: (lzip)Top.
+
+ Multimember tar.lz archives have some safety advantages over solidly
+compressed tar.lz archives. For example, in case of corruption, tarlz can
+extract all the undamaged members from the tar.lz archive, skipping over the
+damaged members, just like the standard (uncompressed) tar. Keeping the
+alignment between tar members and lzip members minimizes the amount of data
+lost in case of corruption. In this chapter we'll explain the ways in which
+lziprecover can recover and process multimember tar.lz archives.
+
+
+7.1 Recovering damaged multimember tar.lz archives
+==================================================
+
+If you have several copies of the damaged archive, try merging them first
+because merging has a high probability of success. *Note Merging files::. If
+the command below prints something like 'Input files merged successfully.'
+you are done and 'archive.tar.lz' now contains the recovered archive:
+
+ lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz
+
+ If you only have one copy of the damaged archive with a zeroed block of
+data caused by an I/O error, you may try to reproduce the archive. *Note
+Reproducing one sector::. If the command below prints something like
+'Copy of input file reproduced successfully.' you are done and
+'archive_fixed.tar.lz' now contains the recovered archive:
+
+ lziprecover -vv -e --reference-file=old_archive.tar archive.tar.lz
+
+ If you only have one copy of the damaged archive, you may try to repair
+the archive, but this has a lower probability of success. *Note Repairing
+one byte::. If the command below prints something like
+'Copy of input file repaired successfully.' you are done and
+'archive_fixed.tar.lz' now contains the recovered archive:
+
+ lziprecover -v -R archive.tar.lz
+
+ If all the above fails, and the archive was created with tarlz, you may
+save the damaged members for later and then copy the good members to another
+archive. If the two commands below succeed, 'bad_members.tar.lz' will
+contain all the damaged members and 'archive_cleaned.tar.lz' will contain a
+good archive with the damaged members removed:
+
+ lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz
+ lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz
+
+ You can then use 'tarlz --keep-damaged' to recover as much data as
+possible from each damaged member in 'bad_members.tar.lz':
+
+ mkdir tmp
+ cd tmp
+ tarlz --keep-damaged -xvf ../bad_members.tar.lz
+
+
+7.2 Processing multimember tar.lz archives
+==========================================
+
+Lziprecover is able to copy a list of members from a file to another. For
+example the command
+'lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz' creates
+a subset archive containing the first ten members, the end-of-file blocks,
+and the trailing data (if any) of 'archive.tar.lz'. The 'r1' part selects
+the last member, which in an appendable tar.lz archive contains the
+end-of-file blocks.
+
+
+File: lziprecover.info, Node: File names, Next: File format, Prev: Tarlz, Up: Top
+
+8 Names of the files produced by lziprecover
+********************************************
+
+The name of the fixed file produced by '--byte-repair' and '--merge' is
+made by appending the string '_fixed.lz' to the original file name. If the
+original file name ends with one of the extensions '.tar.lz', '.lz', or
+'.tlz', the string '_fixed' is inserted before the extension.
+
+
+File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top
+
+9 File format
+*************
+
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.
+-- Antoine de Saint-Exupery
+
+
+ In the diagram below, a box like this:
+
++---+
+| | <-- the vertical bars might be missing
++---+
+
+ represents one byte; a box like this:
+
++==============+
+| |
++==============+
+
+ represents a variable number of bytes.
+
+
+ A lzip file consists of one or more independent "members" (compressed
+data sets). The members simply appear one after another in the file, with no
+additional information before, between, or after them. Each member can
+encode in compressed form up to 16 EiB - 1 byte of uncompressed data. The
+size of a multimember file is unlimited.
+
+ Each member has the following structure:
+
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ All multibyte values are stored in little endian order.
+
+'ID string (the "magic" bytes)'
+ A four byte string, identifying the lzip format, with the value "LZIP"
+ (0x4C, 0x5A, 0x49, 0x50).
+
+'VN (version number, 1 byte)'
+ Just in case something needs to be modified in the future. 1 for now.
+
+'DS (coded dictionary size, 1 byte)'
+ The dictionary size is calculated by taking a power of 2 (the base
+ size) and subtracting from it a fraction between 0/16 and 7/16 of the
+ base size.
+ Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
+ Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
+ from the base size to obtain the dictionary size.
+ Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB
+ Valid values for dictionary size range from 4 KiB to 512 MiB.
+
+'LZMA stream'
+ The LZMA stream, finished by an "End Of Stream" marker. Uses default
+ values for encoder properties. *Note Stream format: (lzip)Stream
+ format, for a complete description.
+
+'CRC32 (4 bytes)'
+ Cyclic Redundancy Check (CRC) of the original uncompressed data.
+
+'Data size (8 bytes)'
+ Size of the original uncompressed data.
+
+'Member size (8 bytes)'
+ Total size of the member, including header and trailer. This field acts
+ as a distributed index, improves the checking of stream integrity, and
+ facilitates the safe recovery of undamaged members from multimember
+ files. Lzip limits the member size to 2 PiB to prevent the data size
+ field from overflowing.
+
+
+
+File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top
+
+10 Extra data appended to the file
+**********************************
+
+Sometimes extra data are found appended to a lzip file after the last
+member. Such trailing data may be:
+
+ * Padding added to make the file size a multiple of some block size, for
+ example when writing to a tape. It is safe to append any amount of
+ padding zero bytes to a lzip file.
+
+ * Useful data added by the user; an "End Of File" string (to check that
+ the file has not been truncated), a cryptographically secure hash, a
+ description of file contents, etc. It is safe to append any amount of
+ text to a lzip file as long as none of the first four bytes of the
+ text matches the corresponding byte in the string "LZIP", and the text
+ does not contain any zero bytes (null characters). Nonzero bytes and
+ zero bytes can't be safely mixed in trailing data.
+
+ * Garbage added by some not totally successful copy operation.
+
+ * Malicious data added to the file in order to make its total size and
+ hash value (for a chosen hash) coincide with those of another file.
+
+ * In rare cases, trailing data could be the corrupt header of another
+ member. In multimember or concatenated files the probability of
+ corruption happening in the magic bytes is 5 times smaller than the
+ probability of getting a false positive caused by the corruption of the
+ integrity information itself. Therefore it can be considered to be
+ below the noise level. Additionally, the test used by lziprecover to
+ discriminate trailing data from a corrupt header has a Hamming
+ distance (HD) of 3, and the 3 bit flips must happen in different magic
+ bytes for the test to fail. In any case, the option '--trailing-error'
+ guarantees that any corrupt header is detected.
+
+ Trailing data are in no way part of the lzip file format, but tools
+reading lzip files are expected to behave as correctly and usefully as
+possible in the presence of trailing data.
+
+ Trailing data can be safely ignored in most cases. In some cases, like
+that of user-added data, they are expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
+'--trailing-error' can be used. *Note --trailing-error::.
+
+ Lziprecover facilitates the management of metadata stored as trailing
+data in lzip files. See the following examples:
+
+Example 1: Add a comment or description to a compressed file.
+
+ # First append the comment as trailing data to a lzip file
+ echo 'This file contains this and that' >> file.lz
+ # This command prints the comment to standard output
+ lziprecover --dump=tdata file.lz
+ # This command outputs file.lz without the comment
+ lziprecover --strip=tdata file.lz > stripped_file.lz
+ # This command removes the comment from file.lz
+ lziprecover --remove=tdata file.lz
+
+
+Example 2: Add and check a cryptographically secure hash. (This may be
+convenient, but a separate copy of the hash must be kept in a safe place to
+guarantee that both file and hash have not been maliciously replaced).
+
+ sha256sum < file.lz >> file.lz
+ lziprecover --strip=tdata file.lz | sha256sum -c \
+ <(lziprecover --dump=tdata file.lz)
+
+
+File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top
+
+11 A small tutorial with examples
+*********************************
+
+Example 1: Extract all the files from archive 'foo.tar.lz'.
+
+ tar -xf foo.tar.lz
+ or
+ lziprecover -cd foo.tar.lz | tar -xf -
+
+
+Example 2: Restore a regular file from its compressed version 'file.lz'. If
+the operation is successful, 'file.lz' is removed.
+
+ lziprecover -d file.lz
+
+
+Example 3: Check the integrity of the compressed file 'file.lz' and show
+status.
+
+ lziprecover -tv file.lz
+
+
+Example 4: The right way of concatenating the decompressed output of two or
+more compressed files. *Note Trailing data::.
+
+ Don't do this
+ cat file1.lz file2.lz file3.lz | lziprecover -d -
+ Do this instead
+ lziprecover -cd file1.lz file2.lz file3.lz
+ You may also concatenate the compressed files like this
+ lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
+ Or keeping the trailing data of the last file like this
+ lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
+
+
+Example 5: Decompress 'file.lz' partially until 10 KiB of decompressed data
+are produced.
+
+ lziprecover -D 0,10KiB file.lz
+
+
+Example 6: Decompress 'file.lz' partially from decompressed byte at offset
+10000 to decompressed byte at offset 14999 (5000 bytes are produced).
+
+ lziprecover -D 10000-15000 file.lz
+
+
+Example 7: Repair a corrupt byte in the file 'file.lz'. (Indented lines are
+abridged diagnostic messages from lziprecover).
+
+ lziprecover -v -R file.lz
+ Copy of input file repaired successfully.
+ lziprecover -tv file_fixed.lz
+ file_fixed.lz: ok
+ mv file_fixed.lz file.lz
+
+
+Example 8: Split the multimember file 'file.lz' and write each member in
+its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the
+integrity of the resulting files.
+
+ lziprecover -s file.lz
+ lziprecover -tv rec*file.lz
+
+
+File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top
+
+12 Testing the robustness of decompressors
+******************************************
+
+*Note --unzcrash::, for a faster way of testing the robustness of lzip.
+
+ The lziprecover package also includes unzcrash, a program written to test
+robustness to decompression of corrupted data, inspired by unzcrash.c from
+Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source
+directory to build it.
+
+ By default, unzcrash reads the file specified and then repeatedly
+decompresses it, increasing 256 times each byte of the compressed data, so
+as to test all possible one-byte errors. Note that it may take years or even
+centuries to test all possible one-byte errors in a large file (tens of MB).
+
+ If the option '--block' is given, unzcrash reads the file specified and
+then repeatedly decompresses it, setting all bytes in each successive block
+to the value given, so as to test all possible full sector errors.
+
+ If the option '--truncate' is given, unzcrash reads the file specified
+and then repeatedly decompresses it, truncating the file to increasing
+lengths, so as to test all possible truncation points.
+
+ None of the three test modes described above should cause any invalid
+memory accesses. If any of them does, please, report it as a bug to the
+maintainers of the decompressor being tested.
+
+ Unzcrash really executes as a subprocess the shell command specified in
+the first non-option argument, and then writes the file specified in the
+second non-option argument to the standard input of the subprocess,
+modifying the corresponding byte each time. Therefore unzcrash can be used
+to test any decompressor (not only lzip), or even other decoder programs
+having a suitable command-line syntax.
+
+ If the decompressor returns with zero status, unzcrash compares the
+output of the decompressor for the original and corrupt files. If the
+outputs differ, it means that the decompressor returned a false negative;
+it failed to recognize the corruption and produced garbage output. The only
+exception is when a multimember file is truncated just after the last byte
+of a member, producing a shorter but valid compressed file. Except in this
+latter case, please, report any false negative as a bug.
+
+ In order to compare the outputs, unzcrash needs a 'zcmp' program able to
+understand the format being tested. For example the 'zcmp' provided by
+zutils. If the 'zcmp' program used does not understand the format being
+tested, all the comparisons fail because the compressed files are compared
+without being decompressed first. Use '--zcmp=false' to disable comparisons.
+*Note Zcmp: (zutils)Zcmp.
+
+ The format for running unzcrash is:
+
+ unzcrash [OPTIONS] 'lzip -t' FILE
+
+The compressed FILE must not contain errors and the decompressor being
+tested must decompress it correctly for the comparisons to work.
+
+ unzcrash supports the following options:
+
+'-h'
+'--help'
+ Print an informative help message describing the options and exit.
+
+'-V'
+'--version'
+ Print the version number of unzcrash on the standard output and exit.
+ This version number should be included in all bug reports.
+
+'-b RANGE'
+'--bits=RANGE'
+ Test N-bit errors only, instead of testing all the 255 wrong values for
+ each byte. 'N-bit error' means any value differing from the original
+ value in N bit positions, not a value differing from the original
+ value in the bit position N.
+ The number of N-bit errors per byte (N = 1 to 8) is:
+ 8 28 56 70 56 28 8 1
+
+ Examples of RANGE Tests errors of N-bits
+ 1 1
+ 1,2,3 1, 2, 3
+ 2-4 2, 3, 4
+ 1,3-5,8 1, 3, 4, 5, 8
+ 1-3,5-8 1, 2, 3, 5, 6, 7, 8
+
+'-B[SIZE][,VALUE]'
+'--block[=SIZE][,VALUE]'
+ Test block errors of given SIZE, simulating a whole sector I/O error.
+ SIZE defaults to 512 bytes. VALUE defaults to 0. By default, only
+ contiguous, non-overlapping blocks are tested, but this may be changed
+ with the option '--delta'.
+
+'-d N'
+'--delta=N'
+ Test one byte, block, or truncation size every N bytes. If '--delta'
+ is not specified, unzcrash tests all the bytes, non-overlapping
+ blocks, or truncation sizes. Values of N smaller than the block size
+ result in overlapping blocks. (Which is convenient for testing because
+ there are usually too few non-overlapping blocks in a file).
+
+'-e POSITION,VALUE'
+'--set-byte=POSITION,VALUE'
+ Set byte at POSITION to VALUE in the internal buffer after reading and
+ testing FILE but before the first test call to the decompressor. Byte
+ positions start at 0. If VALUE is preceded by '+', it is added to the
+ original value of the byte at POSITION. If VALUE is preceded by 'f'
+ (flip), it is XORed with the original value of the byte at POSITION.
+ This option can be used to run tests with a changed dictionary size,
+ for example.
+
+'-n'
+'--no-check'
+ Skip initial test of FILE and 'zcmp'. May speed up things a lot when
+ testing many (or large) known good files.
+
+'-p BYTES'
+'--position=BYTES'
+ First byte position to test in the file. Defaults to 0. Negative values
+ are relative to the end of the file.
+
+'-q'
+'--quiet'
+ Quiet operation. Suppress all messages.
+
+'-s BYTES'
+'--size=BYTES'
+ Number of byte positions to test. If not specified, the rest of the
+ file is tested (from '--position' to end of file). Negative values are
+ relative to the rest of the file.
+
+'-t'
+'--truncate'
+ Test all possible truncation points in the range specified by
+ '--position' and '--size'.
+
+'-v'
+'--verbose'
+ Verbose mode.
+
+'-z'
+'--zcmp=<command>'
+ Set zcmp command name and options. Defaults to 'zcmp'. Use
+ '--zcmp=false' to disable comparisons. If testing a decompressor
+ different from the one used by default by zcmp, it is needed to force
+ unzcrash and zcmp to use the same decompressor with a command like
+ 'unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' FILE'
+
+
+ Exit status: 0 for a normal exit, 1 for environmental problems (file not
+found, invalid command-line options, I/O errors, etc), 2 to indicate a
+corrupt or invalid input file, 3 for an internal consistency error (e.g.,
+bug) which caused unzcrash to panic.
+
+
+File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
+
+13 Reporting bugs
+*****************
+
+There are probably bugs in lziprecover. There are certainly errors and
+omissions in this manual. If you report them, they will get fixed. If you
+don't, no one will ever know about them and they will remain unfixed for
+all eternity, if not longer.
+
+ If you find a bug in lziprecover, please send electronic mail to
+<lzip-bug@nongnu.org>. Include the version number, which you can find by
+running 'lziprecover --version'.
+
+
+File: lziprecover.info, Node: Concept index, Prev: Problems, Up: Top
+
+Concept index
+*************
+
+
+* Menu:
+
+* bugs: Problems. (line 6)
+* data safety: Data safety. (line 6)
+* examples: Examples. (line 6)
+* file format: File format. (line 6)
+* file names: File names. (line 6)
+* getting help: Problems. (line 6)
+* introduction: Introduction. (line 6)
+* invoking: Invoking lziprecover. (line 6)
+* merging files: Merging files. (line 6)
+* merging with a backup: Merging with a backup. (line 6)
+* options: Invoking lziprecover. (line 6)
+* repairing one byte: Repairing one byte. (line 6)
+* reproducing a mailbox: Reproducing a mailbox. (line 6)
+* reproducing one sector: Reproducing one sector. (line 6)
+* tarlz: Tarlz. (line 6)
+* trailing data: Trailing data. (line 6)
+* unzcrash: Unzcrash. (line 6)
+* usage: Invoking lziprecover. (line 6)
+* version: Invoking lziprecover. (line 6)
+
+
+
+Tag Table:
+Node: Top226
+Node: Introduction1406
+Node: Invoking lziprecover5412
+Ref: --trailing-error6359
+Ref: range-format8791
+Ref: --reproduce9126
+Ref: --byte-repair13411
+Ref: --unzcrash23209
+Node: Data safety27459
+Node: Merging with a backup29443
+Node: Reproducing a mailbox30706
+Node: Repairing one byte33160
+Node: Merging files35220
+Ref: performance-of-merge36399
+Ref: ddrescue-example38008
+Node: Reproducing one sector39295
+Ref: performance-of-reproduce43181
+Ref: ddrescue-example245855
+Node: Tarlz48275
+Node: File names51933
+Node: File format52395
+Node: Trailing data55082
+Node: Examples58397
+Ref: concat-example58972
+Node: Unzcrash60364
+Node: Problems66704
+Node: Concept index67256
+
+End Tag Table
+
+
+Local Variables:
+coding: iso-8859-15
+End:
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
new file mode 100644
index 0000000..0d32d9d
--- /dev/null
+++ b/doc/lziprecover.texi
@@ -0,0 +1,1617 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header
+@setfilename lziprecover.info
+@documentencoding ISO-8859-15
+@settitle Lziprecover Manual
+@finalout
+@c %**end of header
+
+@set UPDATED 20 January 2024
+@set VERSION 1.24
+
+@dircategory Compression
+@direntry
+* Lziprecover: (lziprecover). Data recovery tool for the lzip format
+@end direntry
+
+
+@ifnothtml
+@titlepage
+@title Lziprecover
+@subtitle Data recovery tool for the lzip format
+@subtitle for Lziprecover version @value{VERSION}, @value{UPDATED}
+@author by Antonio Diaz Diaz
+
+@page
+@vskip 0pt plus 1filll
+@end titlepage
+
+@contents
+@end ifnothtml
+
+@ifnottex
+@node Top
+@top
+
+This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
+
+@menu
+* Introduction:: Purpose and features of lziprecover
+* Invoking lziprecover:: Command-line interface
+* Data safety:: Protecting data from accidental loss
+* Repairing one byte:: Fixing bit flips and similar errors
+* Merging files:: Fixing several damaged copies
+* Reproducing one sector:: Fixing a missing (zeroed) sector
+* Tarlz:: Options supporting the tar.lz format
+* File names:: Names of the files produced by lziprecover
+* File format:: Detailed format of the compressed file
+* Trailing data:: Extra data appended to the file
+* Examples:: A small tutorial with examples
+* Unzcrash:: Testing the robustness of decompressors
+* Problems:: Reporting bugs
+* Concept index:: Index of concepts
+@end menu
+
+@sp 1
+Copyright @copyright{} 2009-2024 Antonio Diaz Diaz.
+
+This manual is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+@end ifnottex
+
+
+@node Introduction
+@chapter Introduction
+@cindex introduction
+
+@uref{http://www.nongnu.org/lzip/lziprecover.html,,Lziprecover}
+is a data recovery tool and decompressor for files in the lzip
+compressed data format (.lz). Lziprecover is able to repair slightly damaged
+files (up to one single-byte error per member), produce a correct file by
+merging the good parts of two or more damaged copies, reproduce a missing
+(zeroed) sector using a reference file, extract data from damaged files,
+decompress files, and test integrity of files.
+
+Lziprecover can remove the damaged members from multimember files, for
+example multimember tar.lz archives.
+
+Lziprecover provides random access to the data in multimember files; it only
+decompresses the members containing the desired data.
+
+Lziprecover facilitates the management of metadata stored as trailing data
+in lzip files.
+
+Lziprecover is not a replacement for regular backups, but a last line of
+defense for the case where the backups are also damaged.
+
+The lzip file format is designed for data sharing and long-term archiving,
+taking into account both data integrity and decoder availability:
+
+@itemize @bullet
+@item
+The lzip format provides very safe integrity checking and some data
+recovery means. The program lziprecover can repair bit flip errors
+(one of the most common forms of data corruption) in lzip files, and
+provides data recovery capabilities, including error-checked merging
+of damaged copies of a file. @xref{Data safety}.
+
+@item
+The lzip format is as simple as possible (but not simpler). The lzip
+manual provides the source code of a simple decompressor along with a
+detailed explanation of how it works, so that with the only help of the
+lzip manual it would be possible for a digital archaeologist to extract
+the data from a lzip file long after quantum computers eventually
+render LZMA obsolete.
+
+@item
+Additionally the lzip reference implementation is copylefted, which
+guarantees that it will remain free forever.
+@end itemize
+
+A nice feature of the lzip format is that a corrupt byte is easier to repair
+the nearer it is from the beginning of the file. Therefore, with the help of
+lziprecover, losing an entire archive just because of a corrupt byte near
+the beginning is a thing of the past.
+
+Compression may be good for long-term archiving. For compressible data,
+multiple compressed copies may provide redundancy in a more useful form and
+may have a better chance of surviving intact than one uncompressed copy
+using the same amount of storage space. This is especially true if the
+format provides recovery capabilities like those of lziprecover, which is
+able to find and combine the good parts of several damaged copies.
+
+Lziprecover is able to recover or decompress files produced by any of the
+compressors in the lzip family: lzip, plzip, minilzip/lzlib, clzip, and
+pdlzip.
+
+If the cause of file corruption is a damaged medium, the combination
+@w{GNU ddrescue + lziprecover} is the recommended option for recovering data
+from damaged lzip files. @xref{ddrescue-example}, and
+@ref{ddrescue-example2}, for examples.
+
+If a file is too damaged for lziprecover to repair it, all the recoverable
+data in all members of the file can be extracted with the following command
+(the resulting file may contain errors and some garbage data may be produced
+at the end of each damaged member):
+
+@example
+lziprecover -cd --ignore-errors file.lz > file
+@end example
+
+When recovering data, lziprecover takes as arguments the names of the
+damaged files and writes zero or more recovered files depending on the
+operation selected and whether the recovery succeeded or not. The damaged
+files themselves are kept unchanged.
+
+When decompressing or testing file integrity, lziprecover behaves like lzip
+or lunzip.
+
+LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have
+been compressed. Decompressed is used to refer to data which have undergone
+the process of decompression.
+
+
+@node Invoking lziprecover
+@chapter Invoking lziprecover
+@cindex invoking
+@cindex options
+@cindex usage
+@cindex version
+
+The format for running lziprecover is:
+
+@example
+lziprecover [@var{options}] [@var{files}]
+@end example
+
+@noindent
+When decompressing or testing, a hyphen @samp{-} used as a @var{file}
+argument means standard input. It can be mixed with other @var{files} and is
+read just once, the first time it appears in the command line. If no file
+names are specified, lziprecover decompresses from standard input to
+standard output. Remember to prepend @file{./} to any file name beginning
+with a hyphen, or use @samp{--}.
+
+lziprecover supports the following
+@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
+@ifnothtml
+@xref{Argument syntax,,,arg_parser}.
+@end ifnothtml
+
+@table @code
+@item -h
+@itemx --help
+Print an informative help message describing the options and exit.
+
+@item -V
+@itemx --version
+Print the version number of lziprecover on the standard output and exit.
+This version number should be included in all bug reports.
+
+@anchor{--trailing-error}
+@item -a
+@itemx --trailing-error
+Exit with error status 2 if any remaining input is detected after
+decompressing the last member. Such remaining input is usually trailing
+garbage that can be safely ignored. @xref{concat-example}.
+
+@item -A
+@itemx --alone-to-lz
+Convert lzma-alone files to lzip format without recompressing, just
+adding a lzip header and trailer. The conversion minimizes the
+dictionary size of the resulting file (and therefore the amount of
+memory required to decompress it). Only streamed files with default LZMA
+properties can be converted; non-streamed lzma-alone files lack the "End
+Of Stream" marker required in lzip files.
+
+The name of the converted lzip file is derived from that of the original
+lzma-alone file as follows:
+
+@multitable {filename.lzma} {becomes} {anyothername.lz}
+@item filename.lzma @tab becomes @tab filename.lz
+@item filename.tlz @tab becomes @tab filename.tar.lz
+@item anyothername @tab becomes @tab anyothername.lz
+@end multitable
+
+@item -c
+@itemx --stdout
+Write decompressed data to standard output; keep input files unchanged. This
+option (or @option{-o}) is needed when reading from a named pipe (fifo) or
+from a device. Use it also to recover as much of the decompressed data as
+possible when decompressing a corrupt file. @option{-c} overrides @option{-o}.
+@option{-c} has no effect when merging, removing members, repairing,
+reproducing, splitting, testing or listing.
+
+@item -d
+@itemx --decompress
+Decompress the files specified. The integrity of the files specified is
+checked. If a file does not exist, can't be opened, or the destination file
+already exists and @option{--force} has not been specified, lziprecover
+continues decompressing the rest of the files and exits with error status 1.
+If a file fails to decompress, or is a terminal, lziprecover exits
+immediately with error status 2 without decompressing the rest of the files.
+A terminal is considered an uncompressed file, and therefore invalid.
+
+@item -D @var{range}
+@itemx --range-decompress=@var{range}
+Decompress only a range of bytes starting at decompressed byte position
+@var{begin} and up to byte position @w{@var{end} - 1}. Byte positions start
+at 0. This option provides random access to the data in multimember files;
+it only decompresses the members containing the desired data. In order to
+guarantee the correctness of the data produced, all members containing any
+part of the desired data are decompressed and their integrity is checked.
+
+@anchor{range-format}
+Four formats of @var{range} are recognized, @samp{@var{begin}},
+@samp{@var{begin}-@var{end}}, @samp{@var{begin},@var{size}}, and
+@samp{,@var{size}}. If only @var{begin} is specified, @var{end} is taken as
+the end of the file. If only @var{size} is specified, @var{begin} is taken
+as the beginning of the file. The bytes produced are sent to standard output
+unless the option @option{--output} is used.
+
+@anchor{--reproduce}
+@item -e
+@itemx --reproduce
+Try to recover a missing (zeroed) sector in @var{file} using a reference
+file and the same version of lzip that created @var{file}. If successful, a
+repaired copy is written to the file @var{file}_fixed.lz. @var{file} is not
+modified at all. The exit status is 0 if the member containing the zeroed
+sector could be repaired, 2 otherwise. Note that @var{file}_fixed.lz may
+still contain errors in the members following the one repaired.
+@xref{Reproducing one sector}, for a complete description of the reproduce
+mode.
+
+@item --lzip-level=@var{digit}|a|m[@var{length}]
+Try only the given compression level or match length limit when reproducing
+a zeroed sector. @option{--lzip-level=a} tries all the compression levels
+@w{(0 to 9)}, while @option{--lzip-level=m} tries all the match length limits
+@w{(5 to 273)}.
+
+@item --lzip-name=@var{name}
+Set the name of the lzip executable used by @option{--reproduce}. If
+@option{--lzip-name} is not specified, @samp{lzip} is used.
+
+@item --reference-file=@var{file}
+Set the reference file used by @option{--reproduce}. It must contain the
+uncompressed data corresponding to the missing compressed data of the zeroed
+sector, plus some context data before and after them.
+
+@item -f
+@itemx --force
+Force overwrite of output files.
+
+@item -i
+@itemx --ignore-errors
+Make @option{--decompress}, @option{--test}, and @option{--range-decompress}
+ignore format and data errors and continue decompressing the remaining
+members in the file; keep input files unchanged. For example, the commands
+@w{@samp{lziprecover -cd -i file.lz > file}} or
+@w{@samp{lziprecover -D0 -i file.lz > file}} decompress all the recoverable
+data in all members of @samp{file.lz} without having to split it first. The
+@w{@samp{-cd -i}} method resyncs to the next member header after each error,
+and is immune to some format errors that make @w{@samp{-D0 -i}} fail. The
+range decompressed may be smaller than the range requested, because of the
+errors. The exit status is set to 0 unless other errors are found (I/O
+errors, for example).
+
+Make @option{--list}, @option{--dump}, @option{--remove}, and @option{--strip}
+ignore format errors. The sizes of the members with errors (especially the
+last) may be wrong.
+
+@item -k
+@itemx --keep
+Keep (don't delete) input files during decompression.
+
+@item -l
+@itemx --list
+Print the uncompressed size, compressed size, and percentage saved of the
+files specified. Trailing data are ignored. The values produced are correct
+even for multimember files. If more than one file is given, a final line
+containing the cumulative sizes is printed. With @option{-v}, the dictionary
+size, the number of members in the file, and the amount of trailing data (if
+any) are also printed. With @option{-vv}, the positions and sizes of each
+member in multimember files are also printed. With @option{-i}, format errors
+are ignored, and with @option{-ivv}, gaps between members are shown. The
+member numbers shown coincide with the file numbers produced by @option{--split}.
+
+If any file is damaged, does not exist, can't be opened, or is not regular,
+the final exit status is @w{> 0}. @option{-lq} can be used to check quickly
+(without decompressing) the structural integrity of the files specified.
+(Use @option{--test} to check the data integrity). @option{-alq}
+additionally checks that none of the files specified contain trailing data.
+
+@item -m
+@itemx --merge
+Try to produce a correct file by merging the good parts of two or more
+damaged copies. If successful, a repaired copy is written to the file
+@var{file}_fixed.lz. The exit status is 0 if a correct file could be
+produced, 2 otherwise. @xref{Merging files}, for a complete description of
+the merge mode.
+
+@item -o @var{file}
+@itemx --output=@var{file}
+Place the repaired output into @var{file} instead of into
+@var{file}_fixed.lz. If splitting, the names of the files produced are in
+the form @samp{rec01@var{file}}, @samp{rec02@var{file}}, etc.
+
+If @option{-c} has not been also specified, write the (de)compressed output
+to @var{file}, automatically creating any missing parent directories; keep
+input files unchanged. This option (or @option{-c}) is needed when reading
+from a named pipe (fifo) or from a device. @w{@option{-o -}} is equivalent
+to @option{-c}. @option{-o} has no effect when testing or listing.
+
+@item -q
+@itemx --quiet
+Quiet operation. Suppress all messages.
+
+@anchor{--byte-repair}
+@item -R
+@itemx --byte-repair
+Try to repair a @var{file} with small errors (up to one single-byte error
+per member). If successful, a repaired copy is written to the file
+@var{file}_fixed.lz. @var{file} is not modified at all. The exit status is 0
+if the file could be repaired, 2 otherwise. @xref{Repairing one byte}, for a
+complete description of the repair mode.
+
+@item -s
+@itemx --split
+Search for members in @var{file} and write each member in its own file. Gaps
+between members are detected and each gap is saved in its own file. Trailing
+data (if any) are saved alone in the last file. You can then use
+@w{@samp{lziprecover -t}} to test the integrity of the resulting files,
+decompress those which are undamaged, and try to repair or partially
+decompress those which are damaged. Gaps may contain garbage or may be
+members with corrupt headers or trailers. If other lziprecover functions
+fail to work on a multimember @var{file} because of damage in headers or
+trailers, try to split @var{file} and then work on each member individually.
+
+The names of the files produced are in the form @samp{rec01@var{file}},
+@samp{rec02@var{file}}, etc, and are designed so that the use of wildcards
+in subsequent processing, for example,
+@w{@samp{lziprecover -cd rec*@var{file} > recovered_data}}, processes the
+files in the correct order. The number of digits used in the names varies
+depending on the number of members in @var{file}.
+
+@item -t
+@itemx --test
+Check integrity of the files specified, but don't decompress them. This
+really performs a trial decompression and throws away the result. Use it
+together with @option{-v} to see information about the files. If a file
+fails the test, does not exist, can't be opened, or is a terminal, lziprecover
+continues testing the rest of the files. A final diagnostic is shown at
+verbosity level 1 or higher if any file fails the test when testing multiple
+files.
+
+@item -v
+@itemx --verbose
+Verbose mode.@*
+When decompressing or testing, further -v's (up to 4) increase the
+verbosity level, showing status, compression ratio, dictionary size,
+trailer contents (CRC, data size, member size), and up to 6 bytes of
+trailing data (if any) both in hexadecimal and as a string of printable
+ASCII characters.@*
+Two or more @option{-v} options show the progress of decompression.@*
+In other modes, increasing verbosity levels show final status, progress
+of operations, and extra information (for example, the failed areas).
+
+@item --dump=[@var{member_list}][:damaged][:empty][:tdata]
+Dump the members listed, the damaged members (if any), the empty members (if
+any), or the trailing data (if any) of one or more regular multimember files
+to standard output, or to a file if the option @option{--output} is used. If
+more than one file is given, the elements dumped from all the files are
+concatenated. If a file does not exist, can't be opened, or is not regular,
+lziprecover continues processing the rest of the files. If the dump fails in
+one file, lziprecover exits immediately without processing the rest of the
+files. Only @option{--dump=tdata} can write to a terminal.
+@option{--dump=damaged} implies @option{--ignore-errors}.
+
+The argument to @option{--dump} is a colon-separated list of the following
+element specifiers; a member list (1,3-6), a reverse member list (r1,3-6),
+and the strings "damaged", "empty", and "tdata" (which may be shortened to
+'d', 'e', and 't' respectively). A member list selects the members (or gaps)
+listed, whose numbers coincide with those shown by @option{--list}. A reverse
+member list selects the members listed counting from the last member in the
+file (r1). Negated versions of both kinds of lists exist (^1,3-6:r^1,3-6)
+which select all the members except those in the list. The strings
+"damaged", "empty", and "tdata" select the damaged members, the empty
+members (those with a data size = 0), and the trailing data respectively. If
+the same member is selected more than once, for example by @samp{1:r1} in a
+single-member file, it is dumped just once. See the following examples:
+
+@multitable {@code{3,12:damaged:tdata}} {members 3, 12, damaged members, trailing data}
+@headitem @code{--dump} argument @tab Elements dumped
+@item @code{1,3-6} @tab members 1, 3, 4, 5, 6
+@item @code{r1-3} @tab last 3 members in file
+@item @code{^13,15} @tab all but 13th and 15th members in file
+@item @code{r^1} @tab all but last member in file
+@item @code{damaged} @tab all damaged members in file
+@item @code{empty} @tab all empty members in file
+@item @code{tdata} @tab trailing data
+@item @code{1-5:r1:tdata} @tab members 1 to 5, last member, trailing data
+@item @code{damaged:tdata} @tab damaged members, trailing data
+@item @code{3,12:damaged:tdata} @tab members 3, 12, damaged members, trailing data
+@end multitable
+
+@item --remove=[@var{member_list}][:damaged][:empty][:tdata]
+Remove the members listed, the damaged members (if any), the empty members
+(if any), or the trailing data (if any) from regular multimember files in
+place. The date of each file modified is preserved if possible. If all
+members in a file are selected to be removed, the file is left unchanged and
+the exit status is set to 2. If a file does not exist, can't be opened, is
+not regular, or is left unchanged, lziprecover continues processing the rest
+of the files. In case of I/O error, lziprecover exits immediately without
+processing the rest of the files. See @option{--dump} above for a description
+of the argument.
+
+This option may be dangerous even if only the trailing data are being
+removed because the file may be corrupt or the trailing data may contain a
+forbidden combination of characters. @xref{Trailing data}. It is safer to
+send the output of @option{--strip} to a temporary file, check it, and then
+copy it over the original file. But if you prefer @option{--remove} because of
+its more efficient in-place removal, it is advisable to make a backup before
+attempting the removal. At least check that @w{@samp{lzip -cd file.lz | wc -c}}
+and the uncompressed size shown by @w{@samp{lzip -l file.lz}} match before
+attempting the removal of trailing data.
+
+@item --strip=[@var{member_list}][:damaged][:empty][:tdata]
+Copy one or more regular multimember files to standard output (or to a file
+if the option @option{--output} is used), stripping the members listed, the
+damaged members (if any), the empty members (if any), or the trailing data
+(if any) from each file. If all members in a file are selected to be
+stripped, the trailing data (if any) are also stripped even if @samp{tdata}
+is not specified. If more than one file is given, the files are
+concatenated. In this case the trailing data are also stripped from all but
+the last file even if @samp{tdata} is not specified. If a file does not
+exist, can't be opened, or is not regular, lziprecover continues processing
+the rest of the files. If a file fails to copy, lziprecover exits
+immediately without processing the rest of the files. See @option{--dump}
+above for a description of the argument.
+
+@item --empty-error
+Exit with error status 2 if any empty member is found in the input files.
+
+@item --marking-error
+Exit with error status 2 if the first LZMA byte is non-zero in any member of
+the input files. This may be caused by data corruption or by deliberate
+insertion of tracking information in the file. Use
+@w{@samp{lziprecover --clear-marking}} to clear any such non-zero bytes.
+
+@item --loose-trailing
+When decompressing, testing, or listing, allow trailing data whose first
+bytes are so similar to the magic bytes of a lzip header that they can
+be confused with a corrupt header. Use this option if a file triggers a
+"corrupt header" error and the cause is not indeed a corrupt header.
+
+@item --clear-marking
+Set to zero the first LZMA byte of each member in the files specified. At
+verbosity level 1 (-v), print the number of members cleared. The date of
+each file modified is preserved if possible. This option exists because the
+first byte of the LZMA stream is ignored by the range decoder, and can
+therefore be (mis)used to store any value which can then be used as a
+watermark to track the path of the compressed payload.
+
+@end table
+
+Lziprecover also supports the following debug options (for experts):
+
+@table @code
+@item -E @var{range}[,@var{sector_size}]
+@itemx --debug-reproduce=@var{range}[,@var{sector_size}]
+Load the compressed @var{file} into memory, set all bytes in the positions
+specified by @var{range} to 0, and try to reproduce a correct compressed
+file. @xref{--reproduce}. @xref{range-format}, for a description of
+@var{range}. If a @var{sector_size} is specified, set each sector to 0 in
+sequence and try to reproduce the file, printing to standard output final
+statistics of the number of sectors reproduced successfully. Exit with
+nonzero status only in case of fatal error.
+
+@item -M
+@itemx --md5sum
+Print to standard output the MD5 digests of the input @var{files} one per
+line in the same format produced by the @command{md5sum} tool. Lziprecover
+uses MD5 digests to check the result of some operations. This option can be
+used to test the correctness of lziprecover's implementation of the MD5
+algorithm.
+
+@item -S[@var{value}]
+@itemx --nrep-stats[=@var{value}]
+Compare the frequency of sequences of N repeated bytes of a given
+@var{value} in the compressed LZMA streams of the input @var{files} with the
+frequency expected for random data (1 / 2^(8N)). If @var{value} is not
+specified, print the frequency of repeated sequences of all possible byte
+values. Print cumulative data for all the files, followed by the name of the
+first file with the longest sequence.
+
+@anchor{--unzcrash}
+@item -U 1|B@var{size}
+@itemx --unzcrash=1|B@var{size}
+With argument @samp{1}, test 1-bit errors in the LZMA stream of the
+compressed input @var{file} like the command
+@w{@samp{unzcrash -b1 -p7 -s-20 'lzip -t' @var{file}}} but in memory, and
+therefore much faster (30 to 50 times faster). @xref{Unzcrash}. This option
+tests all the members independently in a multimember file, skipping headers
+and trailers. If a decompression succeeds, the decompressed output is
+compared with the decompressed output of the original @var{file} using MD5
+digests. @var{file} must not contain errors and must decompress correctly
+for the comparisons to work.
+
+With argument @samp{B}, test zeroed sectors (blocks of bytes) in the LZMA
+stream of the compressed input @var{file} like the command
+@w{@samp{unzcrash --block=@var{size} -d1 -p7 -s-(@var{size}+20) 'lzip -t' @var{file}}}
+but in memory, and therefore much faster. Testing and comparisons work just
+like with the argument @samp{1} explained above.
+
+By default @option{--unzcrash} only prints the interesting cases; CRC
+mismatches, size mismatches, unsupported marker codes, unexpected EOFs,
+apparently successful decompressions, and decoder errors detected 50_000 or
+more bytes beyond the byte (or the start of the block) being tested. At
+verbosity level 1 (-v) it also prints decoder errors detected 10_000 or more
+bytes beyond the byte being tested. At verbosity level 2 (-vv) it prints all
+cases for 1-bit errors or the decoder errors detected beyond the end of the
+block for zeroed blocks.
+
+@item -W @var{position},@var{value}
+@itemx --debug-decompress=@var{position},@var{value}
+Load the compressed @var{file} into memory, set the byte at @var{position}
+to @var{value}, and decompress the modified compressed data to standard
+output. If the damaged member can be decompressed to the end (just fails
+with a CRC mismatch), the members following it are also decompressed.
+
+@item -X[@var{position},@var{value}]
+@itemx --show-packets[=@var{position},@var{value}]
+Load the compressed @var{file} into memory, optionally set the byte at
+@var{position} to @var{value}, decompress the modified compressed data
+(discarding the output), and print to standard output descriptions of the
+LZMA packets being decoded.
+
+@item -Y @var{range}
+@itemx --debug-delay=@var{range}
+Load the compressed @var{file} into memory and then repeatedly decompress
+it, increasing 256 times each byte of the subset of the compressed data
+positions specified by @var{range}, so as to test all possible one-byte
+errors. For each decompression error find the error detection delay and
+print to standard output the maximum delay. The error detection delay is the
+difference between the position of the error and the position where the
+decoder realized that the data contains an error. @xref{range-format}, for a
+description of @var{range}.
+
+@item -Z @var{position},@var{value}
+@itemx --debug-byte-repair=@var{position},@var{value}
+Load the compressed @var{file} into memory, set the byte at @var{position}
+to @var{value}, and then try to repair the byte error. @xref{--byte-repair}.
+
+@end table
+
+Numbers given as arguments to options may be expressed in decimal,
+hexadecimal, or octal (using the same syntax as integer constants in C++),
+and may be followed by a multiplier and an optional @samp{B} for "byte".
+
+Table of SI and binary prefixes (unit multipliers):
+
+@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)}
+@item Prefix @tab Value @tab | @tab Prefix @tab Value
+@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024)
+@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20)
+@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30)
+@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40)
+@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50)
+@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60)
+@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70)
+@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80)
+@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90)
+@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100)
+@end multitable
+
+@sp 1
+Exit status: 0 for a normal exit, 1 for environmental problems
+(file not found, invalid command-line options, I/O errors, etc), 2 to
+indicate a corrupt or invalid input file, 3 for an internal consistency
+error (e.g., bug) which caused lziprecover to panic.
+
+
+@node Data safety
+@chapter Protecting data from accidental loss
+@cindex data safety
+
+It is a fact of life that sometimes data becomes corrupt. Software has
+errors. Hardware may misbehave or fail. RAM may be struck by a cosmic ray.
+This is why a safe enough integrity checking is needed in compressed
+formats, and the reason why a data recovery tool is sometimes needed.
+
+There are 3 main types of data corruption that may cause data loss:
+single-byte errors, multibyte errors (generally affecting a whole sector
+in a block device), and total device failure.
+
+Lziprecover protects natively against single-byte errors as long as file
+integrity is checked frequently enough that a second single-byte error does
+not develop in the same member before the first one is repaired.
+@xref{Repairing one byte}.
+
+Lziprecover also protects against multibyte errors if at least one backup
+copy of the file is made (@pxref{Merging files}), or if the error is a
+zeroed sector and the uncompressed data corresponding to the zeroed sector
+are available (@pxref{Reproducing one sector}). If you can choose between
+merging and reproducing, try merging first because it is usually faster,
+easier to use, and has a high probability of success.
+
+Lziprecover can't help in case of device failure. The only remedy for total
+device failure is storing backup copies in separate media.
+
+The extraordinary safety of the lzip format allows lziprecover to exploit
+the redundance that occurrs naturally when making compressed backups.
+Lziprecover can recover data that would not be recoverable from files
+compressed in other formats. Let's see two examples of how much better is
+lzip compared with gzip and bzip2 with respect to data safety:
+
+@menu
+* Merging with a backup:: Recovering a file using a damaged backup
+* Reproducing a mailbox:: Recovering new messages using an old backup
+@end menu
+
+
+@node Merging with a backup
+@section Recovering a file using a damaged backup
+@cindex merging with a backup
+
+Let's suppose that you made a compressed backup of your valuable scientific
+data and stored two copies on separate media. Years later you notice that
+both copies are corrupt.
+
+If you compressed the data with gzip and both copies suffer any damage in
+the data stream, even if it is just one altered bit, the original data can
+only be recovered by an expert, if at all.
+
+If you used bzip2, and if the file is large enough to contain more than one
+compressed data block (usually larger than @w{900 kB} uncompressed), and if
+no block is damaged in both files, then the data can be manually recovered
+by splitting the files with bzip2recover, checking every block, and then
+copying the right blocks in the right order into another file.
+
+But if you used lzip, the data can be automatically recovered with
+@w{@samp{lziprecover --merge}} as long as the damaged areas don't overlap.
+
+Note that each error in a bzip2 file makes a whole block unusable, but each
+error in a lzip file only affects the damaged bytes, making it possible to
+recover a file with thousands of errors.
+
+
+@node Reproducing a mailbox
+@section Recovering new messages using an old backup
+@cindex reproducing a mailbox
+
+Let's suppose that you make periodic backups of your email messages stored
+in one or more mailboxes. (A mailbox is a file containing a possibly large
+number of email messages). New messages are appended to the end of each
+mailbox, therefore the initial part of two consecutive backups is identical
+unless some messages have been changed or deleted in the meantime. The new
+messages added to each backup are usually a small part of the whole mailbox.
+
+@verbatim
++============================================+
+| Older backup containing some messages |
++============================================+
++============================================+========================+
+| Newer backup containing the messages above | plus some new messages |
++============================================+========================+
+@end verbatim
+
+One day you discover that your mailbox has disappeared because you deleted
+it inadvertently or because of a bug in your email reader. Not only that.
+You need to recover a recent message, but the last backup you made of the
+mailbox (the newer backup above) has lost the data corresponding to a whole
+sector because of an I/O error in the part containing the old messages.
+
+If you compressed the mailbox with gzip, usually none of the new messages
+can be recovered even if they are intact because all the data beyond the
+missing sector can't be decoded.
+
+If you used bzip2, and if the newer backup is large enough that the new
+messages are in a different compressed data block than the one damaged
+(usually larger than @w{900 kB} uncompressed), then you can recover the new
+messages manually with bzip2recover. If the backups are identical except for
+the new messages appended, you may even recover the whole newer backup by
+combining the good blocks from both backups.
+
+But if you used lzip, the whole newer backup can be automatically recovered
+with @w{@samp{lziprecover --reproduce}} as long as the missing bytes can be
+recovered from the older backup, even if other messages in the common part
+have been changed or deleted. Mailboxes seem to be especially easy to
+reproduce. The probability of reproducing a mailbox
+(@pxref{performance-of-reproduce}) is almost as high as that of merging two
+identical backups (@pxref{performance-of-merge}).
+
+
+@node Repairing one byte
+@chapter Repairing one byte
+@cindex repairing one byte
+
+Lziprecover can repair perfectly most files with small errors (up to one
+single-byte error per member), without the need of any extra redundance at
+all. If the reparation is successful, the repaired file is identical bit for
+bit to the original. This makes lzip files resistant to bit flip, one of the
+most common forms of data corruption.
+
+The file is repaired in memory. Therefore, enough virtual memory
+@w{(RAM + swap)} to contain the largest damaged member is required.
+
+The error may be located anywhere in the file except in the first 5
+bytes of each member header or in the @samp{Member size} field of the
+trailer (last 8 bytes of each member). If the error is in the header it
+can be easily repaired with a text editor like GNU Moe (@pxref{File
+format}). If the error is in the member size, it is enough to ignore the
+message about @samp{bad member size} when decompressing.
+
+Bit flip happens when one bit in the file is changed from 0 to 1 or vice
+versa. It may be caused by bad RAM or even by natural radiation. I have
+seen a case of bit flip in a file stored on an USB flash drive.
+
+One byte may seem small, but most file corruptions not produced by
+transmission errors or I/O errors just affect one byte, or even one bit,
+of the file. Also, unlike magnetic media, where errors usually affect a
+whole sector, solid-state storage devices tend to produce single-byte
+errors, making of lzip the perfect format for data stored on such devices.
+
+Repairing a file can take some time. Small files or files with the error
+located near the beginning can be repaired in a few seconds. But
+repairing a large file compressed with a large dictionary size and with
+the error located far from the beginning, may take hours.
+
+On the other hand, errors located near the beginning of the file cause
+much more loss of data than errors located near the end. So lziprecover
+repairs more efficiently the worst errors.
+
+
+@node Merging files
+@chapter Merging files
+@cindex merging files
+
+If you have several copies of a file but all of them are too damaged to
+repair them individually (@pxref{Repairing one byte}), lziprecover can try
+to produce a correct file by merging the good parts of the damaged copies.
+
+The merge may succeed even if some copies of the file have all the headers
+and trailers damaged, as long as there is at least one copy of every header
+and trailer intact, even if they are in different copies of the file.
+
+The merge fails if the damaged areas overlap (at least one byte is damaged
+in all copies), or are adjacent and the boundary can't be determined, or if
+the copies have too many damaged areas.
+
+All the copies to be merged must have the same size. If any of them is
+larger or smaller than it should, either because it has been truncated or
+because it got some garbage data appended at the end, it can be brought to
+the correct size with the following command before merging it with the other
+copies:
+
+@example
+ddrescue -s<correct_size> -x<correct_size> file.lz correct_size_file.lz
+@end example
+
+@anchor{performance-of-merge}
+To give you an idea of its possibilities, when merging two copies, each of
+them with one damaged area affecting 1 percent of the copy, the probability
+of obtaining a correct file is about 98 percent. With three such copies the
+probability rises to 99.97 percent. For large files (a few MB) with small
+errors (one sector damaged per copy), the probability approaches 100 percent
+even with only two copies. (Supposing that the errors are randomly located
+inside each copy).
+
+Some types of solid-state device (NAND flash, for example) can produce
+bursts of scattered single-bit errors. Lziprecover is able to merge
+files with thousands of such scattered errors by grouping the errors
+into clusters and then merging the files as if each cluster were a
+single error.
+
+Here is a real case of successful merging. Two copies of the file
+@samp{icecat-3.5.3-x86.tar.lz} (compressed size @w{9 MB}) became corrupt
+while stored on the same NAND flash device. One of the copies had 76
+single-bit errors scattered in an area of 1020 bytes, and the other had
+3028 such errors in an area of 31729 bytes. Lziprecover produced a
+correct file, identical to the original, in just 5 seconds:
+
+@example
+lziprecover -vvm a/icecat-3.5.3-x86.tar.lz b/icecat-3.5.3-x86.tar.lz
+Merging member 1 of 1 (2552 errors)
+ 2552 errors have been grouped in 16 clusters.
+ Trying variation 2 of 2, block 2
+Input files merged successfully.
+@end example
+
+Note that the number of errors reported by lziprecover (2552) is lower
+than the number of corrupt bytes (3104) because contiguous corrupt bytes
+are counted as a single multibyte error.
+
+@sp 1
+@anchor{ddrescue-example}
+@noindent
+Example 1: Recover a compressed backup from two copies on CD-ROM with
+error-checked merging of copies.
+@ifnothtml
+@xref{Top,GNU ddrescue manual,,ddrescue},
+@end ifnothtml
+@ifhtml
+See the
+@uref{http://www.gnu.org/software/ddrescue/manual/ddrescue_manual.html,,ddrescue manual}
+@end ifhtml
+for details about ddrescue.
+
+@example
+ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
+mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
+cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz
+umount /mnt/cdimage
+ (insert second copy in the CD drive)
+ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2
+mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage
+cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz
+umount /mnt/cdimage
+lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz
+ Input files merged successfully.
+lziprecover -tv backup.tar.lz
+ backup.tar.lz: ok
+@end example
+
+@sp 1
+@noindent
+Example 2: Recover the first volume of those created with the command
+@w{@samp{lzip -b 32MiB -S 650MB big_db}} from two copies,
+@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07
+damaged in the first copy, member 18 damaged in the second copy, and
+member 12 damaged in both copies. The correct file produced is saved in
+@samp{big_db_00001.lz}.
+
+@example
+lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
+ Input files merged successfully.
+lziprecover -tv big_db_00001.lz
+ big_db_00001.lz: ok
+@end example
+
+
+@node Reproducing one sector
+@chapter Reproducing one sector
+@cindex reproducing one sector
+
+Lziprecover can recover a zeroed sector in a lzip file by concatenating the
+decompressed contents of the file up to the beginning of the zeroed sector
+and the uncompressed data corresponding to the zeroed sector, and then
+feeding the concatenated data to the same version of lzip that created the
+file. For this to work, a reference file is required containing the
+uncompressed data corresponding to the missing compressed data of the zeroed
+sector, plus some context data before and after them. It is possible to
+recover a large file using just a few kB of reference data.
+
+The difficult part is finding a suitable reference file. It must contain the
+exact data required (possibly mixed with other data). Containing similar
+data is not enough.
+
+A zeroed sector may be caused by the incomplete recovery of a damaged
+storage device (with I/O errors) using, for example, ddrescue. The
+reproduction can't be done if the zeroed sector overlaps with the first 15
+bytes of a member, or if the zeroed sector is smaller than 8 bytes.
+
+The file is reproduced in memory. Therefore, enough virtual memory
+@w{(RAM + swap)} to contain the damaged member is required.
+
+To understand how it works, take any lzipped file, say @samp{foo.lz},
+decompress it (keeping the original), and try to reproduce an artificially
+zeroed sector in it by running the following commands:
+
+@example
+lzip -kd foo.lz
+lziprecover -vv --debug-reproduce=65536,512 --reference-file=foo foo.lz
+@end example
+
+@noindent
+which should produce an output like the following:
+
+@example
+Reproducing: foo.lz
+Reference file: foo
+Testing sectors of size 512 at file positions 65536 to 66047
+ (master mpos = 65536, dpos = 296892)
+foo: Match found at offset 296892
+Reproduction succeeded at pos 65536
+
+ 1 sectors tested
+ 1 reproductions returned with zero status
+ all comparisons passed
+@end example
+
+Using @samp{foo} as reference file guarantees that any zeroed sector in
+@samp{foo.lz} can be reproduced because both files contain the same data. In
+real use, the reference file needs to contain the data corresponding to the
+zeroed sector, but the rest of the data (if any) may differ between both
+files. The reference data may be obtained from the partial decompression of
+the damaged file itself if it contains repeated data. For example if the
+damaged file is a compressed tarball containing several partially modified
+versions of the same file.
+
+The offset reported by lziprecover is the position in the reference file of
+the first byte that could not be decompressed. This is the first byte that
+will be compressed to reproduce the zeroed sector.
+
+The reproduce mode tries to reproduce the missing compressed data originally
+present in the zeroed sector. It is based on the perfect reproducibility of
+lzip files (lzip produces identical compressed output from identical input).
+Therefore, the same version of lzip that created the file to be reproduced
+should be used to reproduce the zeroed sector. Near versions may also work
+because the output of lzip changes infrequently. If reproducing a tar.lz
+archive created with tarlz, the version of lzip, clzip, or minilzip
+corresponding to the version of the lzlib library used by tarlz to create
+the archive should be used.
+
+When recovering a tar.lz archive and using as reference a file from the
+filesystem, if the zeroed sector encodes (part of) a tar header, the archive
+can't be reproduced. Therefore, the less overhead (smaller headers) a tar
+archive has, the more probable is that the zeroed sector does not include a
+header, and that the archive can be reproduced. The tarlz format has minimum
+overhead. It uses basic ustar headers, and only adds extended pax headers
+when they are required.
+
+@anchor{performance-of-reproduce}
+@section Performance of @option{--reproduce}
+Reproduce mode is especially useful when recovering a corrupt backup (or a
+corrupt source tarball) that is part of a series. Usually only a small
+fraction of the data changes from one backup to the next or from one version
+of a source tarball to the next. This makes sometimes possible to reproduce
+a given corrupted version using reference data from a near version. The
+following two tables show the fraction of reproducible sectors (reproducible
+sectors divided by total sectors in archive) for some archives, using sector
+sizes of 512 and 4096 bytes. @samp{mailbox-aug.tar.lz} is a backup of some
+of my mailboxes. @samp{backup-feb.tar.lz} and @samp{backup-apr.tar.lz} are
+real backups of my own working directory:
+
+@multitable {Reference file} {gawk-5.0.1.tar.lz} {4369 / 5844 = 74.76%}
+@headitem Reference file @tab File @tab Reproducible (512)
+@item backup-feb.tar @tab backup-apr.tar.lz @tab 3273 / 4342 = 75.38%
+@item backup-apr.tar @tab backup-feb.tar.lz @tab 3259 / 4161 = 78.32%
+@item gawk-5.0.0.tar @tab gawk-5.0.1.tar.lz @tab 4369 / 5844 = 74.76%
+@item gawk-5.0.1.tar @tab gawk-5.0.0.tar.lz @tab 4379 / 5603 = 78.15%
+@item gmp-6.1.1.tar @tab gmp-6.1.2.tar.lz @tab 2454 / 3787 = 64.8%
+@item gmp-6.1.2.tar @tab gmp-6.1.1.tar.lz @tab 2461 / 3782 = 65.07%
+@end multitable
+
+@multitable {mailbox-mar.tar} {mailbox-aug.tar.lz} {4036 / 4252 = 94.92%}
+@headitem Reference file @tab File @tab Reproducible (4096)
+@item mailbox-mar.tar @tab mailbox-aug.tar.lz @tab 4036 / 4252 = 94.92%
+@item backup-feb.tar @tab backup-apr.tar.lz @tab 264 / 542 = 48.71%
+@item backup-apr.tar @tab backup-feb.tar.lz @tab 264 / 520 = 50.77%
+@item gawk-5.0.0.tar @tab gawk-5.0.1.tar.lz @tab 327 / 730 = 44.79%
+@item gawk-5.0.1.tar @tab gawk-5.0.0.tar.lz @tab 326 / 700 = 46.57%
+@item gmp-6.1.1.tar @tab gmp-6.1.2.tar.lz @tab 175 / 473 = 37%
+@item gmp-6.1.2.tar @tab gmp-6.1.1.tar.lz @tab 181 / 472 = 38.35%
+@end multitable
+
+Note that the "performance of reproduce" is a probability, not a partial
+recovery. The data are either recovered fully (with the probability X shown
+in the last column of the tables above) or not recovered at all (with
+probability @w{1 - X}).
+
+@noindent
+Example 1: Recover a damaged source tarball with a zeroed sector of 512
+bytes at file position 1019904, using as reference another source tarball
+for a different version of the software.
+
+@example
+lziprecover -vv -e --reference-file=gmp-6.1.1.tar gmp-6.1.2.tar.lz
+Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 512, value = 0x00)
+ (master mpos = 1019904, dpos = 6292134)
+warning: gmp-6.1.1.tar: Partial match found at offset 6277798, len 8716.
+Reference data may be mixed with other data.
+Trying level -9
+ Reproducing position 1015808
+Member reproduced successfully.
+Copy of input file reproduced successfully.
+@end example
+
+@sp 1
+@anchor{ddrescue-example2}
+@noindent
+Example 2: Recover a damaged backup with a zeroed sector of 4096 bytes at
+file position 1019904, using as reference a previous backup. The damaged
+backup comes from a damaged partition copied with ddrescue.
+
+@example
+ddrescue -b4096 -r10 /dev/sdc1 hdimage mapfile
+mount -o loop,ro hdimage /mnt/hdimage
+cp /mnt/hdimage/backup.tar.lz backup.tar.lz
+umount /mnt/hdimage
+lzip -t backup.tar.lz
+ backup.tar.lz: Decoder error at pos 1020530
+lziprecover -vv -e --reference-file=old_backup.tar backup.tar.lz
+Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 4096, value = 0x00)
+ (master mpos = 1019903, dpos = 5857954)
+warning: old_backup.tar: Partial match found at offset 5743778, len 9546.
+Reference data may be mixed with other data.
+Trying level -9
+ Reproducing position 1015808
+Member reproduced successfully.
+Copy of input file reproduced successfully.
+@end example
+
+@sp 1
+@noindent
+Example 3: Recover a damaged backup with a zeroed sector of 4096 bytes at
+file position 1019904, using as reference a file from the filesystem. (If
+the zeroed sector encodes (part of) a tar header, the tarball can't be
+reproduced).
+
+@example
+# List the contents of the backup tarball to locate the damaged member.
+tarlz -n0 -tvf backup.tar.lz
+ [...]
+ example.txt
+tarlz: Skipping to next header.
+tarlz: backup.tar.lz: Archive ends unexpectedly.
+# Find in the filesystem the last file listed and use it as reference.
+lziprecover -vv -e --reference-file=/somedir/example.txt backup.tar.lz
+Reproducing bad area in member 1 of 1
+ (begin = 1019904, size = 4096, value = 0x00)
+ (master mpos = 1019903, dpos = 5857954)
+/somedir/example.txt: Match found at offset 9378
+Trying level -9
+ Reproducing position 1015808
+Member reproduced successfully.
+Copy of input file reproduced successfully.
+@end example
+
+If @samp{backup.tar.lz} is a multimember file with more than one member
+damaged and lziprecover shows the message @samp{One member reproduced. Copy
+of input file still contains errors.}, the procedure shown in the example
+above can be repeated until all the members have been reproduced.
+
+@samp{tarlz --keep-damaged -n0 -xf backup.tar.lz example.txt} produces a
+partial copy of the reference file @samp{example.txt} that may help locate a
+complete copy in the filesystem or in another backup, even if
+@samp{example.txt} has been renamed.
+
+
+@node Tarlz
+@chapter Options supporting the tar.lz format
+@cindex tarlz
+
+@uref{http://www.nongnu.org/lzip/manual/tarlz_manual.html,,Tarlz} is a
+massively parallel (multi-threaded) combined implementation of the tar
+archiver and the
+@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html,,lzip} compressor.
+
+Tarlz creates tar archives using a simplified and safer variant of the POSIX
+pax format compressed in lzip format, keeping the alignment between tar
+members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive.
+@ifnothtml
+@xref{Top,tarlz manual,,tarlz}, and @ref{Top,lzip manual,,lzip}.
+@end ifnothtml
+
+Multimember tar.lz archives have some safety advantages over solidly
+compressed tar.lz archives. For example, in case of corruption, tarlz can
+extract all the undamaged members from the tar.lz archive, skipping over the
+damaged members, just like the standard (uncompressed) tar. Keeping the
+alignment between tar members and lzip members minimizes the amount of data
+lost in case of corruption. In this chapter we'll explain the ways in which
+lziprecover can recover and process multimember tar.lz archives.
+
+@sp 1
+@section Recovering damaged multimember tar.lz archives
+
+If you have several copies of the damaged archive, try merging them first
+because merging has a high probability of success. @xref{Merging files}. If
+the command below prints something like
+@w{@samp{Input files merged successfully.}} you are done and
+@samp{archive.tar.lz} now contains the recovered archive:
+
+@example
+lziprecover -m -v -o archive.tar.lz a/archive.tar.lz b/archive.tar.lz
+@end example
+
+If you only have one copy of the damaged archive with a zeroed block of data
+caused by an I/O error, you may try to reproduce the archive.
+@xref{Reproducing one sector}. If the command below prints something like
+@w{@samp{Copy of input file reproduced successfully.}} you are done and
+@samp{archive_fixed.tar.lz} now contains the recovered archive:
+
+@example
+lziprecover -vv -e --reference-file=old_archive.tar archive.tar.lz
+@end example
+
+If you only have one copy of the damaged archive, you may try to repair the
+archive, but this has a lower probability of success. @xref{Repairing one
+byte}. If the command below prints something like
+@w{@samp{Copy of input file repaired successfully.}} you are done and
+@samp{archive_fixed.tar.lz} now contains the recovered archive:
+
+@example
+lziprecover -v -R archive.tar.lz
+@end example
+
+If all the above fails, and the archive was created with tarlz, you may save
+the damaged members for later and then copy the good members to another
+archive. If the two commands below succeed, @samp{bad_members.tar.lz} will
+contain all the damaged members and @samp{archive_cleaned.tar.lz} will
+contain a good archive with the damaged members removed:
+
+@example
+lziprecover -v --dump=damaged -o bad_members.tar.lz archive.tar.lz
+lziprecover -v --strip=damaged -o archive_cleaned.tar.lz archive.tar.lz
+@end example
+
+You can then use @samp{tarlz --keep-damaged} to recover as much data as
+possible from each damaged member in @samp{bad_members.tar.lz}:
+
+@example
+mkdir tmp
+cd tmp
+tarlz --keep-damaged -xvf ../bad_members.tar.lz
+@end example
+
+@sp 1
+@section Processing multimember tar.lz archives
+
+Lziprecover is able to copy a list of members from a file to another.
+For example the command
+@w{@samp{lziprecover --dump=1-10:r1:tdata archive.tar.lz > subarch.tar.lz}}
+creates a subset archive containing the first ten members, the end-of-file
+blocks, and the trailing data (if any) of @samp{archive.tar.lz}. The
+@samp{r1} part selects the last member, which in an appendable tar.lz
+archive contains the end-of-file blocks.
+
+
+@node File names
+@chapter Names of the files produced by lziprecover
+@cindex file names
+
+The name of the fixed file produced by @option{--byte-repair} and
+@option{--merge} is made by appending the string @samp{_fixed.lz} to the
+original file name. If the original file name ends with one of the
+extensions @samp{.tar.lz}, @samp{.lz}, or @samp{.tlz}, the string
+@samp{_fixed} is inserted before the extension.
+
+
+@node File format
+@chapter File format
+@cindex file format
+
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.@*
+--- Antoine de Saint-Exupery
+
+@sp 1
+In the diagram below, a box like this:
+
+@verbatim
++---+
+| | <-- the vertical bars might be missing
++---+
+@end verbatim
+
+represents one byte; a box like this:
+
+@verbatim
++==============+
+| |
++==============+
+@end verbatim
+
+represents a variable number of bytes.
+
+@sp 1
+A lzip file consists of one or more independent "members" (compressed data
+sets). The members simply appear one after another in the file, with no
+additional information before, between, or after them. Each member can
+encode in compressed form up to @w{16 EiB - 1 byte} of uncompressed data.
+The size of a multimember file is unlimited.
+
+Each member has the following structure:
+
+@verbatim
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+| ID string | VN | DS | LZMA stream | CRC32 | Data size | Member size |
++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+@end verbatim
+
+All multibyte values are stored in little endian order.
+
+@table @samp
+@item ID string (the "magic" bytes)
+A four byte string, identifying the lzip format, with the value "LZIP"
+(0x4C, 0x5A, 0x49, 0x50).
+
+@item VN (version number, 1 byte)
+Just in case something needs to be modified in the future. 1 for now.
+
+@item DS (coded dictionary size, 1 byte)
+The dictionary size is calculated by taking a power of 2 (the base size)
+and subtracting from it a fraction between 0/16 and 7/16 of the base size.@*
+Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
+Bits 7-5 contain the numerator of the fraction (0 to 7) to subtract
+from the base size to obtain the dictionary size.@*
+Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@*
+Valid values for dictionary size range from 4 KiB to 512 MiB.
+
+@item LZMA stream
+The LZMA stream, finished by an "End Of Stream" marker. Uses default values
+for encoder properties.
+@ifnothtml
+@xref{Stream format,,,lzip},
+@end ifnothtml
+@ifhtml
+See
+@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#Stream-format,,Stream format}
+@end ifhtml
+for a complete description.
+
+@item CRC32 (4 bytes)
+Cyclic Redundancy Check (CRC) of the original uncompressed data.
+
+@item Data size (8 bytes)
+Size of the original uncompressed data.
+
+@item Member size (8 bytes)
+Total size of the member, including header and trailer. This field acts
+as a distributed index, improves the checking of stream integrity, and
+facilitates the safe recovery of undamaged members from multimember files.
+Lzip limits the member size to @w{2 PiB} to prevent the data size field from
+overflowing.
+
+@end table
+
+
+@node Trailing data
+@chapter Extra data appended to the file
+@cindex trailing data
+
+Sometimes extra data are found appended to a lzip file after the last
+member. Such trailing data may be:
+
+@itemize @bullet
+@item
+Padding added to make the file size a multiple of some block size, for
+example when writing to a tape. It is safe to append any amount of
+padding zero bytes to a lzip file.
+
+@item
+Useful data added by the user; an "End Of File" string (to check that the
+file has not been truncated), a cryptographically secure hash, a description
+of file contents, etc. It is safe to append any amount of text to a lzip
+file as long as none of the first four bytes of the text matches the
+corresponding byte in the string "LZIP", and the text does not contain any
+zero bytes (null characters). Nonzero bytes and zero bytes can't be safely
+mixed in trailing data.
+
+@item
+Garbage added by some not totally successful copy operation.
+
+@item
+Malicious data added to the file in order to make its total size and
+hash value (for a chosen hash) coincide with those of another file.
+
+@item
+In rare cases, trailing data could be the corrupt header of another
+member. In multimember or concatenated files the probability of
+corruption happening in the magic bytes is 5 times smaller than the
+probability of getting a false positive caused by the corruption of the
+integrity information itself. Therefore it can be considered to be below
+the noise level. Additionally, the test used by lziprecover to discriminate
+trailing data from a corrupt header has a Hamming distance (HD) of 3,
+and the 3 bit flips must happen in different magic bytes for the test to
+fail. In any case, the option @option{--trailing-error} guarantees that
+any corrupt header is detected.
+@end itemize
+
+Trailing data are in no way part of the lzip file format, but tools
+reading lzip files are expected to behave as correctly and usefully as
+possible in the presence of trailing data.
+
+Trailing data can be safely ignored in most cases. In some cases, like
+that of user-added data, they are expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
+@option{--trailing-error} can be used. @xref{--trailing-error}.
+
+Lziprecover facilitates the management of metadata stored as trailing
+data in lzip files. See the following examples:
+
+@noindent
+Example 1: Add a comment or description to a compressed file.
+
+@example
+# First append the comment as trailing data to a lzip file
+echo 'This file contains this and that' >> file.lz
+# This command prints the comment to standard output
+lziprecover --dump=tdata file.lz
+# This command outputs file.lz without the comment
+lziprecover --strip=tdata file.lz > stripped_file.lz
+# This command removes the comment from file.lz
+lziprecover --remove=tdata file.lz
+@end example
+
+@sp 1
+@noindent
+Example 2: Add and check a cryptographically secure hash. (This may be
+convenient, but a separate copy of the hash must be kept in a safe place
+to guarantee that both file and hash have not been maliciously replaced).
+
+@example
+sha256sum < file.lz >> file.lz
+lziprecover --strip=tdata file.lz | sha256sum -c \
+ <(lziprecover --dump=tdata file.lz)
+@end example
+
+
+@node Examples
+@chapter A small tutorial with examples
+@cindex examples
+
+Example 1: Extract all the files from archive @samp{foo.tar.lz}.
+
+@example
+ tar -xf foo.tar.lz
+or
+ lziprecover -cd foo.tar.lz | tar -xf -
+@end example
+
+@sp 1
+@noindent
+Example 2: Restore a regular file from its compressed version
+@samp{file.lz}. If the operation is successful, @samp{file.lz} is removed.
+
+@example
+lziprecover -d file.lz
+@end example
+
+@sp 1
+@noindent
+Example 3: Check the integrity of the compressed file @samp{file.lz} and
+show status.
+
+@example
+lziprecover -tv file.lz
+@end example
+
+@sp 1
+@anchor{concat-example}
+@noindent
+Example 4: The right way of concatenating the decompressed output of two or
+more compressed files. @xref{Trailing data}.
+
+@example
+Don't do this
+ cat file1.lz file2.lz file3.lz | lziprecover -d -
+Do this instead
+ lziprecover -cd file1.lz file2.lz file3.lz
+You may also concatenate the compressed files like this
+ lziprecover --strip=tdata file1.lz file2.lz file3.lz > file123.lz
+Or keeping the trailing data of the last file like this
+ lziprecover --strip=empty file1.lz file2.lz file3.lz > file123.lz
+@end example
+
+@sp 1
+@noindent
+Example 5: Decompress @samp{file.lz} partially until @w{10 KiB} of
+decompressed data are produced.
+
+@example
+lziprecover -D 0,10KiB file.lz
+@end example
+
+@sp 1
+@noindent
+Example 6: Decompress @samp{file.lz} partially from decompressed byte at
+offset 10000 to decompressed byte at offset 14999 (5000 bytes are produced).
+
+@example
+lziprecover -D 10000-15000 file.lz
+@end example
+
+@sp 1
+@noindent
+Example 7: Repair a corrupt byte in the file @samp{file.lz}. (Indented lines
+are abridged diagnostic messages from lziprecover).
+
+@example
+lziprecover -v -R file.lz
+ Copy of input file repaired successfully.
+lziprecover -tv file_fixed.lz
+ file_fixed.lz: ok
+mv file_fixed.lz file.lz
+@end example
+
+@sp 1
+@noindent
+Example 8: Split the multimember file @samp{file.lz} and write each member
+in its own @samp{recXXXfile.lz} file. Then use @w{@samp{lziprecover -t}} to
+test the integrity of the resulting files.
+
+@example
+lziprecover -s file.lz
+lziprecover -tv rec*file.lz
+@end example
+
+
+@node Unzcrash
+@chapter Testing the robustness of decompressors
+@cindex unzcrash
+
+@xref{--unzcrash}, for a faster way of testing the robustness of lzip.
+
+The lziprecover package also includes unzcrash, a program written to test
+robustness to decompression of corrupted data, inspired by unzcrash.c from
+Julian Seward's bzip2. Type @samp{make unzcrash} in the lziprecover source
+directory to build it.
+
+By default, unzcrash reads the file specified and then repeatedly
+decompresses it, increasing 256 times each byte of the compressed data, so
+as to test all possible one-byte errors. Note that it may take years or even
+centuries to test all possible one-byte errors in a large file (tens of MB).
+
+If the option @option{--block} is given, unzcrash reads the file specified and
+then repeatedly decompresses it, setting all bytes in each successive block
+to the value given, so as to test all possible full sector errors.
+
+If the option @option{--truncate} is given, unzcrash reads the file specified
+and then repeatedly decompresses it, truncating the file to increasing
+lengths, so as to test all possible truncation points.
+
+None of the three test modes described above should cause any invalid memory
+accesses. If any of them does, please, report it as a bug to the maintainers
+of the decompressor being tested.
+
+Unzcrash really executes as a subprocess the shell command specified in the
+first non-option argument, and then writes the file specified in the second
+non-option argument to the standard input of the subprocess, modifying the
+corresponding byte each time. Therefore unzcrash can be used to test any
+decompressor (not only lzip), or even other decoder programs having a
+suitable command-line syntax.
+
+If the decompressor returns with zero status, unzcrash compares the output
+of the decompressor for the original and corrupt files. If the outputs
+differ, it means that the decompressor returned a false negative; it failed
+to recognize the corruption and produced garbage output. The only exception
+is when a multimember file is truncated just after the last byte of a
+member, producing a shorter but valid compressed file. Except in this latter
+case, please, report any false negative as a bug.
+
+In order to compare the outputs, unzcrash needs a @samp{zcmp} program able
+to understand the format being tested. For example the @samp{zcmp} provided
+by @uref{http://www.nongnu.org/zutils/manual/zutils_manual.html#Zcmp,,zutils}.
+If the @samp{zcmp} program used does not understand the format being tested,
+all the comparisons fail because the compressed files are compared without
+being decompressed first. Use @option{--zcmp=false} to disable comparisons.
+@ifnothtml
+@xref{Zcmp,,,zutils}.
+@end ifnothtml
+
+The format for running unzcrash is:
+
+@example
+unzcrash [@var{options}] 'lzip -t' @var{file}
+@end example
+
+@noindent
+The compressed @var{file} must not contain errors and the decompressor being
+tested must decompress it correctly for the comparisons to work.
+
+unzcrash supports the following options:
+
+@table @code
+@item -h
+@itemx --help
+Print an informative help message describing the options and exit.
+
+@item -V
+@itemx --version
+Print the version number of unzcrash on the standard output and exit.
+This version number should be included in all bug reports.
+
+@item -b @var{range}
+@itemx --bits=@var{range}
+Test N-bit errors only, instead of testing all the 255 wrong values for
+each byte. @samp{N-bit error} means any value differing from the
+original value in N bit positions, not a value differing from the
+original value in the bit position N.@*
+The number of N-bit errors per byte (N = 1 to 8) is:
+@w{8 28 56 70 56 28 8 1}
+
+@multitable {Examples of @var{range}} {Tests errors of N-bits}
+@item Examples of @var{range} @tab Tests errors of N-bits
+@item 1 @tab 1
+@item 1,2,3 @tab 1, 2, 3
+@item 2-4 @tab 2, 3, 4
+@item 1,3-5,8 @tab 1, 3, 4, 5, 8
+@item 1-3,5-8 @tab 1, 2, 3, 5, 6, 7, 8
+@end multitable
+
+@item -B[@var{size}][,@var{value}]
+@itemx --block[=@var{size}][,@var{value}]
+Test block errors of given @var{size}, simulating a whole sector I/O error.
+@var{size} defaults to 512 bytes. @var{value} defaults to 0. By default,
+only contiguous, non-overlapping blocks are tested, but this may be changed
+with the option @option{--delta}.
+
+@item -d @var{n}
+@itemx --delta=@var{n}
+Test one byte, block, or truncation size every @var{n} bytes. If
+@option{--delta} is not specified, unzcrash tests all the bytes,
+non-overlapping blocks, or truncation sizes. Values of @var{n} smaller than
+the block size result in overlapping blocks. (Which is convenient for
+testing because there are usually too few non-overlapping blocks in a file).
+
+@item -e @var{position},@var{value}
+@itemx --set-byte=@var{position},@var{value}
+Set byte at @var{position} to @var{value} in the internal buffer after
+reading and testing @var{file} but before the first test call to the
+decompressor. Byte positions start at 0. If @var{value} is preceded by
+@samp{+}, it is added to the original value of the byte at @var{position}.
+If @var{value} is preceded by @samp{f} (flip), it is XORed with the original
+value of the byte at @var{position}. This option can be used to run tests
+with a changed dictionary size, for example.
+
+@item -n
+@itemx --no-check
+Skip initial test of @var{file} and @samp{zcmp}. May speed up things a lot
+when testing many (or large) known good files.
+
+@item -p @var{bytes}
+@itemx --position=@var{bytes}
+First byte position to test in the file. Defaults to 0. Negative values
+are relative to the end of the file.
+
+@item -q
+@itemx --quiet
+Quiet operation. Suppress all messages.
+
+@item -s @var{bytes}
+@itemx --size=@var{bytes}
+Number of byte positions to test. If not specified, the rest of the file
+is tested (from @option{--position} to end of file). Negative values are
+relative to the rest of the file.
+
+@item -t
+@itemx --truncate
+Test all possible truncation points in the range specified by
+@option{--position} and @option{--size}.
+
+@item -v
+@itemx --verbose
+Verbose mode.
+
+@item -z
+@itemx --zcmp=<command>
+Set zcmp command name and options. Defaults to @samp{zcmp}. Use
+@option{--zcmp=false} to disable comparisons. If testing a decompressor
+different from the one used by default by zcmp, it is needed to force
+unzcrash and zcmp to use the same decompressor with a command like
+@w{@samp{unzcrash --zcmp='zcmp --lz=plzip' 'plzip -t' @var{file}}}
+
+@end table
+
+Exit status: 0 for a normal exit, 1 for environmental problems
+(file not found, invalid command-line options, I/O errors, etc), 2 to
+indicate a corrupt or invalid input file, 3 for an internal consistency
+error (e.g., bug) which caused unzcrash to panic.
+
+
+@node Problems
+@chapter Reporting bugs
+@cindex bugs
+@cindex getting help
+
+There are probably bugs in lziprecover. There are certainly errors and
+omissions in this manual. If you report them, they will get fixed. If
+you don't, no one will ever know about them and they will remain unfixed
+for all eternity, if not longer.
+
+If you find a bug in lziprecover, please send electronic mail to
+@email{lzip-bug@@nongnu.org}. Include the version number, which you can
+find by running @w{@samp{lziprecover --version}}.
+
+
+@node Concept index
+@unnumbered Concept index
+
+@printindex cp
+
+@bye
diff --git a/dump_remove.cc b/dump_remove.cc
new file mode 100644
index 0000000..3273303
--- /dev/null
+++ b/dump_remove.cc
@@ -0,0 +1,365 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+const char * const pdate_msg = "warning: can't preserve file date";
+
+
+/* If strip is false, dump to outfd members/gaps/tdata in member_list.
+ If strip is true, dump to outfd members/gaps/tdata not in member_list. */
+int dump_members( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const Member_list & member_list,
+ const bool force, const bool strip, const bool to_stdout )
+ {
+ if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
+ else
+ {
+ output_filename = default_output_filename;
+ set_signal_handler();
+ if( !open_outstream( force, false, false, false ) ) return 1;
+ }
+ if( ( strip || !member_list.tdata || member_list.damaged ||
+ member_list.empty || member_list.range() ) &&
+ !check_tty_out() ) return 1; // check tty except for --dump=tdata
+ unsigned long long copied_size = 0, stripped_size = 0;
+ unsigned long long copied_tsize = 0, stripped_tsize = 0;
+ long members = 0, smembers = 0;
+ int files = 0, tfiles = 0, retval = 0;
+ bool stdin_used = false;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const bool from_stdin = ( filenames[i] == "-" );
+ if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
+ const char * const input_filename =
+ from_stdin ? "(stdin)" : filenames[i].c_str();
+ struct stat in_stats; // not used
+ const int infd = from_stdin ? STDIN_FILENO :
+ open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( input_filename, lzip_index.error().c_str() );
+ set_retval( retval, lzip_index.retval() );
+ close( infd );
+ continue;
+ }
+ if( !safe_seek( infd, 0, input_filename ) ) cleanup_and_fail( 1 );
+ const long blocks = lzip_index.blocks( false ); // not counting tdata
+ long long stream_pos = 0; // first pos not yet read from file
+ long gaps = 0;
+ const long prev_members = members, prev_smembers = smembers;
+ const unsigned long long prev_stripped_size = stripped_size;
+ for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps
+ {
+ const Block & mb = lzip_index.mblock( j );
+ if( mb.pos() > stream_pos ) // gap
+ {
+ const bool in = member_list.damaged ||
+ member_list.includes( j + gaps, blocks );
+ if( in == !strip )
+ {
+ if( !safe_seek( infd, stream_pos, input_filename ) ||
+ !copy_file( infd, outfd, mb.pos() - stream_pos ) )
+ cleanup_and_fail( 1 );
+ copied_size += mb.pos() - stream_pos; ++members;
+ }
+ else { stripped_size += mb.pos() - stream_pos; ++smembers; }
+ ++gaps;
+ }
+ bool in = member_list.includes( j + gaps, blocks ); // member
+ if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 )
+ in = true;
+ if( !in && member_list.damaged )
+ {
+ if( !safe_seek( infd, mb.pos(), input_filename ) ) cleanup_and_fail( 1 );
+ in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
+ }
+ if( in == !strip )
+ {
+ if( !safe_seek( infd, mb.pos(), input_filename ) ||
+ !copy_file( infd, outfd, mb.size() ) ) cleanup_and_fail( 1 );
+ copied_size += mb.size(); ++members;
+ }
+ else { stripped_size += mb.size(); ++smembers; }
+ stream_pos = mb.end();
+ }
+ if( strip && members == prev_members ) // all members were stripped
+ { if( verbosity >= 1 )
+ show_file_error( input_filename, "All members stripped, skipping." );
+ stripped_size = prev_stripped_size; smembers = prev_smembers;
+ close( infd ); continue; }
+ if( ( !strip && members > prev_members ) ||
+ ( strip && smembers > prev_smembers ) ) ++files;
+ // copy trailing data
+ const unsigned long long cdata_size = lzip_index.cdata_size();
+ const long long trailing_size = lzip_index.file_size() - cdata_size;
+ if( member_list.tdata == !strip && trailing_size > 0 &&
+ ( !strip || i + 1 >= filenames.size() ) ) // strip all but last
+ {
+ if( !safe_seek( infd, cdata_size, input_filename ) ||
+ !copy_file( infd, outfd, trailing_size ) ) cleanup_and_fail( 1 );
+ copied_tsize += trailing_size;
+ }
+ else if( trailing_size > 0 ) { stripped_tsize += trailing_size; ++tfiles; }
+ close( infd );
+ }
+ if( !close_outstream( 0 ) ) set_retval( retval, 1 );
+ if( verbosity >= 1 )
+ {
+ if( !strip )
+ {
+ if( member_list.damaged || member_list.empty || member_list.range() )
+ std::fprintf( stderr, "%llu bytes dumped from %ld %s from %d %s.\n",
+ copied_size,
+ members, ( members == 1 ) ? "member" : "members",
+ files, ( files == 1 ) ? "file" : "files" );
+ if( member_list.tdata )
+ std::fprintf( stderr, "%llu trailing bytes dumped.\n", copied_tsize );
+ }
+ else
+ {
+ if( member_list.damaged || member_list.empty || member_list.range() )
+ std::fprintf( stderr, "%llu bytes stripped from %ld %s from %d %s.\n",
+ stripped_size,
+ smembers, ( smembers == 1 ) ? "member" : "members",
+ files, ( files == 1 ) ? "file" : "files" );
+ if( member_list.tdata )
+ std::fprintf( stderr, "%llu trailing bytes stripped from %d %s.\n",
+ stripped_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" );
+ }
+ }
+ return retval;
+ }
+
+
+/* Remove members, tdata from files in place by opening two descriptors for
+ each file. */
+int remove_members( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts, const Member_list & member_list )
+ {
+ unsigned long long removed_size = 0, removed_tsize = 0;
+ long members = 0;
+ int files = 0, tfiles = 0, retval = 0;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const char * const filename = filenames[i].c_str();
+ struct stat in_stats, dummy_stats;
+ const int infd = open_instream( filename, &in_stats, false, true );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( filename, lzip_index.error().c_str() );
+ set_retval( retval, lzip_index.retval() );
+ close( infd );
+ continue;
+ }
+ const int fd = open_truncable_stream( filename, &dummy_stats );
+ if( fd < 0 ) { close( infd ); set_retval( retval, 1 ); continue; }
+
+ if( !safe_seek( infd, 0, filename ) ) return 1;
+ const long blocks = lzip_index.blocks( false ); // not counting tdata
+ long long stream_pos = 0; // first pos not yet written to file
+ long gaps = 0;
+ bool error = false;
+ const long prev_members = members;
+ for( long j = 0; j < lzip_index.members(); ++j ) // copy members and gaps
+ {
+ const Block & mb = lzip_index.mblock( j );
+ const long long prev_end = (j > 0) ? lzip_index.mblock(j - 1).end() : 0;
+ if( mb.pos() > prev_end ) // gap
+ {
+ if( !member_list.damaged && !member_list.includes( j + gaps, blocks ) )
+ {
+ if( stream_pos != prev_end &&
+ ( !safe_seek( infd, prev_end, filename ) ||
+ !safe_seek( fd, stream_pos, filename ) ||
+ !copy_file( infd, fd, mb.pos() - prev_end ) ) )
+ { error = true; set_retval( retval, 1 ); break; }
+ stream_pos += mb.pos() - prev_end;
+ }
+ else ++members;
+ ++gaps;
+ }
+ bool in = member_list.includes( j + gaps, blocks ); // member
+ if( !in && member_list.empty && lzip_index.dblock( j ).size() == 0 )
+ in = true;
+ if( !in && member_list.damaged )
+ {
+ if( !safe_seek( infd, mb.pos(), filename ) )
+ { error = true; set_retval( retval, 1 ); break; }
+ in = ( test_member_from_file( infd, mb.size() ) != 0 ); // damaged
+ }
+ if( !in )
+ {
+ if( stream_pos != mb.pos() &&
+ ( !safe_seek( infd, mb.pos(), filename ) ||
+ !safe_seek( fd, stream_pos, filename ) ||
+ !copy_file( infd, fd, mb.size() ) ) )
+ { error = true; set_retval( retval, 1 ); break; }
+ stream_pos += mb.size();
+ }
+ else ++members;
+ }
+ if( error ) { close( fd ); close( infd ); break; }
+ if( stream_pos == 0 ) // all members were removed
+ { show_file_error( filename, "All members would be removed, skipping." );
+ close( fd ); close( infd ); set_retval( retval, 2 );
+ members = prev_members; continue; }
+ const long long cdata_size = lzip_index.cdata_size();
+ if( cdata_size > stream_pos )
+ { removed_size += cdata_size - stream_pos; ++files; }
+ const long long file_size = lzip_index.file_size();
+ const long long trailing_size = file_size - cdata_size;
+ if( trailing_size > 0 )
+ {
+ if( !member_list.tdata ) // copy trailing data
+ {
+ if( stream_pos != cdata_size &&
+ ( !safe_seek( infd, cdata_size, filename ) ||
+ !safe_seek( fd, stream_pos, filename ) ||
+ !copy_file( infd, fd, trailing_size ) ) )
+ { close( fd ); close( infd ); set_retval( retval, 1 ); break; }
+ stream_pos += trailing_size;
+ }
+ else { removed_tsize += trailing_size; ++tfiles; }
+ }
+ if( stream_pos >= file_size ) // no members were removed
+ { close( fd ); close( infd ); continue; }
+ int result;
+ do result = ftruncate( fd, stream_pos );
+ while( result != 0 && errno == EINTR );
+ if( result != 0 )
+ {
+ show_file_error( filename, "Can't truncate file", errno );
+ close( fd ); close( infd ); set_retval( retval, 1 ); break;
+ }
+ if( close( fd ) != 0 || close( infd ) != 0 )
+ {
+ show_file_error( filename, "Error closing file", errno );
+ set_retval( retval, 1 ); break;
+ }
+ struct utimbuf t;
+ t.actime = in_stats.st_atime;
+ t.modtime = in_stats.st_mtime;
+ if( utime( filename, &t ) != 0 && verbosity >= 1 )
+ show_file_error( filename, pdate_msg, errno );
+ }
+ if( verbosity >= 1 )
+ {
+ if( member_list.damaged || member_list.empty || member_list.range() )
+ std::fprintf( stderr, "%llu bytes removed from %ld %s from %d %s.\n",
+ removed_size,
+ members, ( members == 1 ) ? "member" : "members",
+ files, ( files == 1 ) ? "file" : "files" );
+ if( member_list.tdata )
+ std::fprintf( stderr, "%llu trailing bytes removed from %d %s.\n",
+ removed_tsize, tfiles, ( tfiles == 1 ) ? "file" : "files" );
+ }
+ return retval;
+ }
+
+
+/* Set to zero in place the first LZMA byte of each member in each file by
+ opening one rw descriptor for each file. */
+int clear_marking( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts )
+ {
+ long cleared_members = 0;
+ int files = 0, retval = 0;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const char * const filename = filenames[i].c_str();
+ struct stat in_stats;
+ const int fd = open_truncable_stream( filename, &in_stats );
+ if( fd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const Lzip_index lzip_index( fd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( filename, lzip_index.error().c_str() );
+ set_retval( retval, lzip_index.retval() );
+ close( fd );
+ continue;
+ }
+
+ enum { bufsize = Lzip_header::size + 1 };
+ uint8_t header_buf[bufsize];
+ const uint8_t * const p = header_buf; // keep gcc 6.1.0 quiet
+ const Lzip_header & header = *(const Lzip_header *)p;
+ uint8_t * const mark = header_buf + header.size;
+ bool write_attempted = false;
+ for( long j = 0; j < lzip_index.members(); ++j ) // clear the members
+ {
+ const Block & mb = lzip_index.mblock( j );
+ if( seek_read( fd, header_buf, bufsize, mb.pos() ) != bufsize )
+ { show_file_error( filename, "Error reading member header", errno );
+ set_retval( retval, 1 ); break; }
+ if( !header.check( cl_opts.ignore_errors ) )
+ { show_file_error( filename, "Member header became corrupt as we read it." );
+ set_retval( retval, 2 ); break; }
+ if( *mark == 0 ) continue;
+ *mark = 0; write_attempted = true;
+ if( seek_write( fd, mark, 1, mb.pos() + header.size ) != 1 )
+ { show_file_error( filename, "Error writing to file", errno );
+ set_retval( retval, 1 ); break; }
+ ++cleared_members;
+ }
+ if( close( fd ) != 0 )
+ {
+ show_file_error( filename, "Error closing file", errno );
+ set_retval( retval, 1 ); break;
+ }
+ if( write_attempted )
+ {
+ struct utimbuf t;
+ t.actime = in_stats.st_atime;
+ t.modtime = in_stats.st_mtime;
+ if( utime( filename, &t ) != 0 && verbosity >= 1 )
+ show_file_error( filename, pdate_msg, errno );
+ ++files;
+ }
+ }
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "%lu %s cleared in %d %s.\n", cleared_members,
+ ( cleared_members == 1 ) ? "member" : "members",
+ files, ( files == 1 ) ? "file" : "files" );
+ return retval;
+ }
diff --git a/list.cc b/list.cc
new file mode 100644
index 0000000..3c5ceb1
--- /dev/null
+++ b/list.cc
@@ -0,0 +1,124 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+void list_line( const unsigned long long uncomp_size,
+ const unsigned long long comp_size,
+ const char * const input_filename )
+ {
+ if( uncomp_size > 0 )
+ std::printf( "%14llu %14llu %6.2f%% %s\n", uncomp_size, comp_size,
+ 100.0 - ( ( 100.0 * comp_size ) / uncomp_size ),
+ input_filename );
+ else
+ std::printf( "%14llu %14llu -INF%% %s\n", uncomp_size, comp_size,
+ input_filename );
+ }
+
+} // end namespace
+
+
+int list_files( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts )
+ {
+ unsigned long long total_comp = 0, total_uncomp = 0;
+ int files = 0, retval = 0;
+ bool first_post = true;
+ bool stdin_used = false;
+
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const bool from_stdin = ( filenames[i] == "-" );
+ if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
+ const char * const input_filename =
+ from_stdin ? "(stdin)" : filenames[i].c_str();
+ struct stat in_stats; // not used
+ const int infd = from_stdin ? STDIN_FILENO :
+ open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ close( infd );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( input_filename, lzip_index.error().c_str() );
+ set_retval( retval, lzip_index.retval() );
+ continue;
+ }
+ if( verbosity < 0 ) continue;
+ const unsigned long long udata_size = lzip_index.udata_size();
+ const unsigned long long cdata_size = lzip_index.cdata_size();
+ total_comp += cdata_size; total_uncomp += udata_size; ++files;
+ const long members = lzip_index.members();
+ if( first_post )
+ {
+ first_post = false;
+ if( verbosity >= 1 ) std::fputs( " dict memb trail ", stdout );
+ std::fputs( " uncompressed compressed saved name\n", stdout );
+ }
+ if( verbosity >= 1 )
+ std::printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size() ),
+ members, lzip_index.file_size() - cdata_size );
+ list_line( udata_size, cdata_size, input_filename );
+
+ if( verbosity >= 2 && ( members > 1 ||
+ ( members == 1 && lzip_index.mblock( 0 ).pos() > 0 ) ) )
+ {
+ std::fputs( " member data_pos data_size member_pos member_size\n", stdout );
+ long long prev_end = 0;
+ for( long i = 0, gaps = 0; i < members; ++i )
+ {
+ const Block & db = lzip_index.dblock( i );
+ const Block & mb = lzip_index.mblock( i );
+ if( mb.pos() > prev_end )
+ {
+ std::printf( " gap - - %14llu %14llu\n",
+ prev_end, mb.pos() - prev_end );
+ ++gaps;
+ }
+ std::printf( "%6ld %14llu %14llu %14llu %14llu\n",
+ i + gaps + 1, db.pos(), db.size(), mb.pos(), mb.size() );
+ prev_end = mb.end();
+ }
+ first_post = true; // reprint heading after list of members
+ }
+ std::fflush( stdout );
+ }
+ if( verbosity >= 0 && files > 1 )
+ {
+ if( verbosity >= 1 ) std::fputs( " ", stdout );
+ list_line( total_uncomp, total_comp, "(totals)" );
+ std::fflush( stdout );
+ }
+ return retval;
+ }
diff --git a/lunzcrash.cc b/lunzcrash.cc
new file mode 100644
index 0000000..ad05697
--- /dev/null
+++ b/lunzcrash.cc
@@ -0,0 +1,374 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "mtester.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+bool check_member( const uint8_t * const mbuffer, const long msize,
+ const unsigned dictionary_size, const char * const name,
+ md5_type & digest )
+ {
+ MD5SUM md5sum;
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
+ if( mtester.test_member() != 0 || !mtester.finished() )
+ { show_file_error( name, "Error checking input file." ); return false; }
+ md5sum.md5_finish( digest );
+ return true;
+ }
+
+
+bool compare_member( const uint8_t * const mbuffer, const long msize,
+ const unsigned dictionary_size,
+ const long long byte_pos, const md5_type & digest )
+ {
+ MD5SUM md5sum;
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
+ bool error = ( mtester.test_member() != 0 || !mtester.finished() );
+ if( !error )
+ {
+ md5_type new_digest;
+ md5sum.md5_finish( new_digest );
+ if( digest != new_digest ) error = true;
+ }
+ if( error && verbosity >= 0 )
+ std::printf( "byte %llu comparison failed\n", byte_pos );
+ return !error;
+ }
+
+
+int test_member_rest( const LZ_mtester & master, uint8_t * const buffer2,
+ long * const failure_posp,
+ const unsigned long long byte_pos )
+ {
+ LZ_mtester mtester( master ); // tester with external buffer
+ mtester.duplicate_buffer( buffer2 );
+ int result = mtester.test_member( LONG_MAX, LLONG_MAX, stdout, byte_pos );
+ if( result == 0 && !mtester.finished() ) result = -1; // false negative
+ if( result != 0 ) *failure_posp = mtester.member_position();
+ return result;
+ }
+
+
+long next_pct_pos( const Lzip_index & lzip_index, const long i, const int pct,
+ const int sector_size = 0 )
+ {
+ if( pct <= 0 ) return 0;
+ const long long cdata_size = lzip_index.cdata_size() - sector_size;
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size() - sector_size;
+ long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) );
+
+ if( pct_pos <= mpos ) pct_pos = 0;
+ else if( pct_pos == cdata_size ) pct_pos = msize - 21; // 100%
+ else if( pct_pos >= mpos + msize ) pct_pos = msize;
+ else pct_pos -= mpos;
+ return pct_pos;
+ }
+
+} // end namespace
+
+
+/* Test 1-bit errors in LZMA streams in file.
+ Unless verbosity >= 1, print only the bytes with interesting results. */
+int lunzcrash_bit( const char * const input_filename,
+ const Cl_options & cl_opts )
+ {
+ struct stat in_stats; // not used
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+ if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
+
+ const long long cdata_size = lzip_index.cdata_size();
+ long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
+ int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
+ if( !mbuffer ) return 1;
+ const unsigned dictionary_size = lzip_index.dictionary_size( i );
+ md5_type md5_orig;
+ if( !check_member( mbuffer, msize, dictionary_size, input_filename,
+ md5_orig ) ) return 2;
+ long pct_pos = next_pct_pos( lzip_index, i, pct );
+ long pos = Lzip_header::size + 1, printed = 0; // last pos printed
+ const long end = msize - 20;
+ if( verbosity == 0 ) // give a clue of the range being tested
+ std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 );
+ LZ_mtester master( mbuffer, msize, dictionary_size );
+ uint8_t * const buffer2 = new uint8_t[dictionary_size];
+ for( ; pos < end; ++pos )
+ {
+ const long pos_limit = pos - 16;
+ if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
+ { show_error( "Can't advance master." ); return 1; }
+ if( verbosity >= 0 && pos >= pct_pos )
+ { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
+ pct_pos = next_pct_pos( lzip_index, i, pct ); }
+ if( verbosity >= 1 )
+ { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
+ ++positions;
+ const uint8_t byte = mbuffer[pos];
+ for( uint8_t mask = 1; mask != 0; mask <<= 1 )
+ {
+ ++decompressions;
+ mbuffer[pos] ^= mask;
+ long failure_pos = 0;
+ const int result = test_member_rest( master, buffer2, &failure_pos,
+ ( printed < pos ) ? mpos + pos : 0 );
+ if( result <= 0 )
+ {
+ ++successes;
+ if( verbosity >= 0 )
+ {
+ if( printed < pos )
+ { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
+ std::printf( "0x%02X (0x%02X^0x%02X) passed the test%s",
+ mbuffer[pos], byte, mask, ( result < 0 ) ? "" : "\n" );
+ if( result < 0 )
+ std::printf( ", but only consumed %lu bytes of %llu\n",
+ failure_pos, msize );
+ }
+ if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
+ md5_orig ) ) ++failed_comparisons;
+ }
+ else if( result == 1 )
+ {
+ if( verbosity >= 2 ||
+ ( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
+ ( verbosity >= 0 && failure_pos - pos >= 50000 ) )
+ {
+ if( printed < pos )
+ { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
+ std::printf( "Decoder error at pos %llu\n", mpos + failure_pos );
+ }
+ }
+ else if( result == 3 || result == 4 ) // test_member printed the error
+ { if( verbosity >= 0 && printed < pos ) printed = pos; }
+ else if( verbosity >= 0 )
+ {
+ if( printed < pos )
+ { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
+ if( result == 2 )
+ std::printf( "File ends unexpectedly at pos %llu\n",
+ mpos + failure_pos );
+ else
+ std::printf( "Unknown error code '%d'\n", result );
+ }
+ mbuffer[pos] ^= mask;
+ }
+ }
+ delete[] buffer2;
+ if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
+ internal_error( "Some byte was not properly restored." );
+ delete[] mbuffer;
+ }
+
+ if( verbosity >= 0 )
+ {
+ std::printf( "\n%9ld bytes tested\n%9ld total decompressions"
+ "\n%9ld decompressions returned with zero status",
+ positions, decompressions, successes );
+ if( successes > 0 )
+ {
+ if( failed_comparisons > 0 )
+ std::printf( ", of which\n%9ld comparisons failed\n",
+ failed_comparisons );
+ else std::fputs( "\n all comparisons passed\n", stdout );
+ }
+ else std::fputc( '\n', stdout );
+ }
+ return 0;
+ }
+
+
+/* Test zeroed blocks of given size in LZMA streams in file.
+ Unless verbosity >= 1, print only the bytes with interesting results. */
+int lunzcrash_block( const char * const input_filename,
+ const Cl_options & cl_opts, const int sector_size )
+ {
+ struct stat in_stats; // not used
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+ if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename );
+
+ const long long cdata_size = lzip_index.cdata_size();
+ long decompressions = 0, successes = 0, failed_comparisons = 0;
+ int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
+ uint8_t * const block = new uint8_t[sector_size];
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ // skip members with LZMA stream smaller than sector_size
+ if( msize - Lzip_header::size - 1 - 20 <= sector_size ) continue;
+ uint8_t * const mbuffer = read_member( infd, mpos, msize, input_filename );
+ if( !mbuffer ) return 1;
+ const unsigned dictionary_size = lzip_index.dictionary_size( i );
+ md5_type md5_orig;
+ if( !check_member( mbuffer, msize, dictionary_size, input_filename,
+ md5_orig ) ) return 2;
+ long pct_pos = next_pct_pos( lzip_index, i, pct, sector_size );
+ long pos = Lzip_header::size + 1;
+ const long end = msize - sector_size - 20;
+ if( verbosity >= 0 ) // give a clue of the range being tested
+ std::printf( "Testing blocks of size %u from pos %llu to %llu\n",
+ sector_size, mpos + pos, mpos + end - 1 );
+ LZ_mtester master( mbuffer, msize, dictionary_size );
+ uint8_t * const buffer2 = new uint8_t[dictionary_size];
+ for( ; pos < end; ++pos )
+ {
+ const long pos_limit = pos - 16;
+ if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
+ { show_error( "Can't advance master." ); return 1; }
+ if( verbosity >= 0 && pos >= pct_pos )
+ { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
+ pct_pos = next_pct_pos( lzip_index, i, pct, sector_size ); }
+ std::memcpy( block, mbuffer + pos, sector_size ); // save block
+ std::memset( mbuffer + pos, 0, sector_size );
+ ++decompressions;
+ long failure_pos = 0;
+ const int result =
+ test_member_rest( master, buffer2, &failure_pos, mpos + pos );
+ if( result <= 0 )
+ {
+ ++successes;
+ if( verbosity >= 0 )
+ {
+ std::printf( "block %llu,%u passed the test%s",
+ mpos + pos, sector_size, ( result < 0 ) ? "" : "\n" );
+ if( result < 0 )
+ std::printf( ", but only consumed %lu bytes of %llu\n",
+ failure_pos, msize );
+ }
+ if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
+ md5_orig ) ) ++failed_comparisons;
+ }
+ else if( result == 1 )
+ {
+ if( verbosity >= 3 ||
+ ( verbosity >= 2 && failure_pos - pos >= sector_size ) ||
+ ( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
+ ( verbosity >= 0 && failure_pos - pos >= 50000 ) )
+ std::printf( "block %llu,%u\nDecoder error at pos %llu\n",
+ mpos + pos, sector_size, mpos + failure_pos );
+ }
+ else if( result == 3 || result == 4 ) // test_member printed the error
+ {}
+ else if( verbosity >= 0 )
+ {
+ std::printf( "block %llu,%u\n", mpos + pos, sector_size );
+ if( result == 2 )
+ std::printf( "File ends unexpectedly at pos %llu\n",
+ mpos + failure_pos );
+ else
+ std::printf( "Unknown error code '%d'\n", result );
+ }
+ std::memcpy( mbuffer + pos, block, sector_size ); // restore block
+ }
+ delete[] buffer2;
+ if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos, md5_orig ) )
+ internal_error( "Block was not properly restored." );
+ delete[] mbuffer;
+ }
+ delete[] block;
+
+ if( verbosity >= 0 )
+ {
+ std::printf( "\n%9ld blocks tested\n%9ld total decompressions"
+ "\n%9ld decompressions returned with zero status",
+ decompressions, decompressions, successes );
+ if( successes > 0 )
+ {
+ if( failed_comparisons > 0 )
+ std::printf( ", of which\n%9ld comparisons failed\n",
+ failed_comparisons );
+ else std::fputs( "\n all comparisons passed\n", stdout );
+ }
+ else std::fputc( '\n', stdout );
+ }
+ return 0;
+ }
+
+
+int md5sum_files( const std::vector< std::string > & filenames )
+ {
+ int retval = 0;
+ bool stdin_used = false;
+
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const bool from_stdin = ( filenames[i] == "-" );
+ if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
+ const char * const input_filename = filenames[i].c_str();
+ struct stat in_stats; // not used
+ const int infd = from_stdin ? STDIN_FILENO :
+ open_instream( input_filename, &in_stats, false );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ enum { buffer_size = 16384 };
+ uint8_t buffer[buffer_size];
+ md5_type md5_digest;
+ MD5SUM md5sum;
+ while( true )
+ {
+ const int len = readblock( infd, buffer, buffer_size );
+ if( len != buffer_size && errno ) throw Error( "Read error" );
+ if( len > 0 ) md5sum.md5_update( buffer, len );
+ if( len < buffer_size ) break;
+ }
+ md5sum.md5_finish( md5_digest );
+ if( close( infd ) != 0 )
+ { show_file_error( input_filename, "Error closing input file", errno );
+ return 1; }
+
+ for( int i = 0; i < 16; ++i ) std::printf( "%02x", md5_digest[i] );
+ std::printf( " %s\n", input_filename );
+ std::fflush( stdout );
+ }
+ return retval;
+ }
diff --git a/lzip.h b/lzip.h
new file mode 100644
index 0000000..fb910ba
--- /dev/null
+++ b/lzip.h
@@ -0,0 +1,538 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "common.h"
+
+class State
+ {
+ int st;
+
+public:
+ enum { states = 12 };
+ State() : st( 0 ) {}
+ int operator()() const { return st; }
+ bool is_char() const { return st < 7; }
+
+ void set_char()
+ {
+ static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
+ st = next[st];
+ }
+ bool is_char_set_char() { set_char(); return st < 4; }
+ void set_match() { st = ( st < 7 ) ? 7 : 10; }
+ void set_rep() { st = ( st < 7 ) ? 8 : 11; }
+ void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
+ };
+
+
+enum {
+ min_dictionary_bits = 12,
+ min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances
+ max_dictionary_bits = 29,
+ max_dictionary_size = 1 << max_dictionary_bits,
+ min_member_size = 36,
+ literal_context_bits = 3,
+ literal_pos_state_bits = 0, // not used
+ pos_state_bits = 2,
+ pos_states = 1 << pos_state_bits,
+ pos_state_mask = pos_states - 1,
+
+ len_states = 4,
+ dis_slot_bits = 6,
+ start_dis_model = 4,
+ end_dis_model = 14,
+ modeled_distances = 1 << ( end_dis_model / 2 ), // 128
+ dis_align_bits = 4,
+ dis_align_size = 1 << dis_align_bits,
+
+ len_low_bits = 3,
+ len_mid_bits = 3,
+ len_high_bits = 8,
+ len_low_symbols = 1 << len_low_bits,
+ len_mid_symbols = 1 << len_mid_bits,
+ len_high_symbols = 1 << len_high_bits,
+ max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
+
+ min_match_len = 2, // must be 2
+ max_match_len = min_match_len + max_len_symbols - 1, // 273
+ min_match_len_limit = 5 };
+
+inline int get_len_state( const int len )
+ { return std::min( len - min_match_len, len_states - 1 ); }
+
+inline int get_lit_state( const uint8_t prev_byte )
+ { return prev_byte >> ( 8 - literal_context_bits ); }
+
+
+enum { bit_model_move_bits = 5,
+ bit_model_total_bits = 11,
+ bit_model_total = 1 << bit_model_total_bits };
+
+struct Bit_model
+ {
+ int probability;
+ Bit_model() : probability( bit_model_total / 2 ) {}
+ };
+
+struct Len_model
+ {
+ Bit_model choice1;
+ Bit_model choice2;
+ Bit_model bm_low[pos_states][len_low_symbols];
+ Bit_model bm_mid[pos_states][len_mid_symbols];
+ Bit_model bm_high[len_high_symbols];
+ };
+
+
+// defined in main.cc
+extern int verbosity;
+
+class Pretty_print // requires global var 'int verbosity'
+ {
+ std::string name_;
+ std::string padded_name;
+ const char * const stdin_name;
+ unsigned longest_name;
+ mutable bool first_post;
+
+public:
+ Pretty_print( const std::vector< std::string > & filenames )
+ : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
+ {
+ if( verbosity <= 0 ) return;
+ const unsigned stdin_name_len = std::strlen( stdin_name );
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const std::string & s = filenames[i];
+ const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
+ if( longest_name < len ) longest_name = len;
+ }
+ if( longest_name == 0 ) longest_name = stdin_name_len;
+ }
+
+ Pretty_print( const std::string & filename )
+ : stdin_name( "(stdin)" ), first_post( false )
+ {
+ const unsigned stdin_name_len = std::strlen( stdin_name );
+ longest_name = ( filename == "-" ) ? stdin_name_len : filename.size();
+ if( longest_name == 0 ) longest_name = stdin_name_len;
+ set_name( filename );
+ }
+
+ void set_name( const std::string & filename )
+ {
+ if( filename.size() && filename != "-" ) name_ = filename;
+ else name_ = stdin_name;
+ padded_name = " "; padded_name += name_; padded_name += ": ";
+ if( longest_name > name_.size() )
+ padded_name.append( longest_name - name_.size(), ' ' );
+ first_post = true;
+ }
+
+ void reset() const { if( name_.size() ) first_post = true; }
+ const char * name() const { return name_.c_str(); }
+ void operator()( const char * const msg = 0, FILE * const f = stderr ) const;
+ };
+
+
+class CRC32
+ {
+ uint32_t data[256]; // Table of CRCs of all 8-bit messages.
+
+public:
+ CRC32()
+ {
+ for( unsigned n = 0; n < 256; ++n )
+ {
+ unsigned c = n;
+ for( int k = 0; k < 8; ++k )
+ { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
+ data[n] = c;
+ }
+ }
+
+ uint32_t operator[]( const uint8_t byte ) const { return data[byte]; }
+
+ void update_byte( uint32_t & crc, const uint8_t byte ) const
+ { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+
+ // about as fast as it is possible without messing with endianness
+ void update_buf( uint32_t & crc, const uint8_t * const buffer,
+ const int size ) const
+ {
+ uint32_t c = crc;
+ for( int i = 0; i < size; ++i )
+ c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
+ crc = c;
+ }
+ };
+
+extern const CRC32 crc32;
+
+
+inline bool isvalid_ds( const unsigned dictionary_size )
+ { return dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size; }
+
+
+inline int real_bits( unsigned value )
+ {
+ int bits = 0;
+ while( value > 0 ) { value >>= 1; ++bits; }
+ return bits;
+ }
+
+
+const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
+
+struct Lzip_header
+ {
+ enum { size = 6 };
+ uint8_t data[size]; // 0-3 magic bytes
+ // 4 version
+ // 5 coded dictionary size
+
+ void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
+ bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; }
+
+ bool check_prefix( const int sz ) const // detect (truncated) header
+ {
+ for( int i = 0; i < sz && i < 4; ++i )
+ if( data[i] != lzip_magic[i] ) return false;
+ return sz > 0;
+ }
+
+ bool check_corrupt() const // detect corrupt header
+ {
+ int matches = 0;
+ for( int i = 0; i < 4; ++i )
+ if( data[i] == lzip_magic[i] ) ++matches;
+ return matches > 1 && matches < 4;
+ }
+
+ uint8_t version() const { return data[4]; }
+ bool check_version() const { return data[4] == 1; }
+
+ unsigned dictionary_size() const
+ {
+ unsigned sz = 1 << ( data[5] & 0x1F );
+ if( sz > min_dictionary_size )
+ sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
+ return sz;
+ }
+
+ bool dictionary_size( const unsigned sz )
+ {
+ if( !isvalid_ds( sz ) ) return false;
+ data[5] = real_bits( sz - 1 );
+ if( sz > min_dictionary_size )
+ {
+ const unsigned base_size = 1 << data[5];
+ const unsigned fraction = base_size / 16;
+ for( unsigned i = 7; i >= 1; --i )
+ if( base_size - ( i * fraction ) >= sz )
+ { data[5] |= i << 5; break; }
+ }
+ return true;
+ }
+
+ bool check( const bool ignore_bad_ds = false ) const
+ { return check_magic() && check_version() &&
+ ( ignore_bad_ds || isvalid_ds( dictionary_size() ) ); }
+ };
+
+
+struct Lzip_trailer
+ {
+ enum { size = 20 };
+ uint8_t data[size]; // 0-3 CRC32 of the uncompressed data
+ // 4-11 size of the uncompressed data
+ // 12-19 member size including header and trailer
+
+ unsigned data_crc() const
+ {
+ unsigned tmp = 0;
+ for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ void data_crc( unsigned crc )
+ { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
+
+ unsigned long long data_size() const
+ {
+ unsigned long long tmp = 0;
+ for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ void data_size( unsigned long long sz )
+ { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
+
+ unsigned long long member_size() const
+ {
+ unsigned long long tmp = 0;
+ for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ void member_size( unsigned long long sz )
+ { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
+
+ bool check_consistency() const // check internal consistency
+ {
+ const unsigned crc = data_crc();
+ const unsigned long long dsize = data_size();
+ if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
+ const unsigned long long msize = member_size();
+ if( msize < min_member_size ) return false;
+ const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
+ if( mlimit > dsize && msize > mlimit ) return false;
+ const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
+ if( dlimit > msize && dsize > dlimit ) return false;
+ return true;
+ }
+ };
+
+
+struct Cl_options // command-line options
+ {
+ bool ignore_empty;
+ bool ignore_errors;
+ bool ignore_marking;
+ bool ignore_trailing;
+ bool loose_trailing;
+
+ Cl_options()
+ : ignore_empty( true ), ignore_errors( false ), ignore_marking( true ),
+ ignore_trailing( true ), loose_trailing( false ) {}
+ };
+
+
+#ifndef INT64_MAX
+#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
+#endif
+
+class Block
+ {
+ long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX
+
+public:
+ Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+
+ long long pos() const { return pos_; }
+ long long size() const { return size_; }
+ long long end() const { return pos_ + size_; }
+
+ void pos( const long long p ) { pos_ = p; }
+ void size( const long long s ) { size_ = s; }
+
+ bool operator==( const Block & b ) const
+ { return pos_ == b.pos_ && size_ == b.size_; }
+ bool operator!=( const Block & b ) const
+ { return pos_ != b.pos_ || size_ != b.size_; }
+
+ bool operator<( const Block & b ) const { return pos_ < b.pos_; }
+
+ bool includes( const long long pos ) const
+ { return pos_ <= pos && end() > pos; }
+ bool overlaps( const Block & b ) const
+ { return pos_ < b.end() && b.pos_ < end(); }
+ bool overlaps( const long long pos, const long long size ) const
+ { return pos_ < pos + size && pos < end(); }
+
+ Block split( const long long pos );
+ };
+
+
+struct Member_list // members/gaps/tdata to be dumped/removed/stripped
+ {
+ bool damaged;
+ bool empty;
+ bool tdata;
+ bool in, rin;
+ std::vector< Block > range_vector, rrange_vector;
+
+ Member_list() : damaged( false ), empty( false ), tdata( false ),
+ in( true ), rin( true ) {}
+ void parse_ml( const char * const arg, const char * const option_name,
+ Cl_options & cl_opts );
+
+ bool range() const { return range_vector.size() || rrange_vector.size(); }
+
+ // blocks is the sum of members + gaps, excluding trailing data
+ bool includes( const long i, const long blocks ) const
+ {
+ for( unsigned j = 0; j < range_vector.size(); ++j )
+ {
+ if( range_vector[j].pos() > i ) break;
+ if( range_vector[j].end() > i ) return in;
+ }
+ if( i >= 0 && i < blocks )
+ for( unsigned j = 0; j < rrange_vector.size(); ++j )
+ {
+ if( rrange_vector[j].pos() > blocks - i - 1 ) break;
+ if( rrange_vector[j].end() > blocks - i - 1 ) return rin;
+ }
+ return !in || !rin;
+ }
+ };
+
+
+struct Error
+ {
+ const char * const msg;
+ explicit Error( const char * const s ) : msg( s ) {}
+ };
+
+inline unsigned long long positive_diff( const unsigned long long x,
+ const unsigned long long y )
+ { return ( x > y ) ? x - y : 0; }
+
+inline void set_retval( int & retval, const int new_val )
+ { if( retval < new_val ) retval = new_val; }
+
+const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
+const char * const bad_dict_msg = "Invalid dictionary size in member header.";
+const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
+const char * const empty_msg = "Empty member not allowed.";
+const char * const marking_msg = "Marking data not allowed.";
+const char * const trailing_msg = "Trailing data not allowed.";
+
+// defined in alone_to_lz.cc
+int alone_to_lz( const int infd, const Pretty_print & pp );
+
+// defined in byte_repair.cc
+long seek_write( const int fd, const uint8_t * const buf, const long size,
+ const long long pos );
+uint8_t * read_member( const int infd, const long long mpos,
+ const long long msize, const char * const filename );
+int byte_repair( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts,
+ const char terminator, const bool force );
+int debug_delay( const char * const input_filename,
+ const Cl_options & cl_opts, Block range,
+ const char terminator );
+int debug_byte_repair( const char * const input_filename,
+ const Cl_options & cl_opts, const Bad_byte & bad_byte,
+ const char terminator );
+int debug_decompress( const char * const input_filename,
+ const Cl_options & cl_opts, const Bad_byte & bad_byte,
+ const bool show_packets );
+
+// defined in decoder.cc
+long readblock( const int fd, uint8_t * const buf, const long size );
+long writeblock( const int fd, const uint8_t * const buf, const long size );
+
+// defined in dump_remove.cc
+int dump_members( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const Member_list & member_list,
+ const bool force, const bool strip, const bool to_stdout );
+int remove_members( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts, const Member_list & member_list );
+int clear_marking( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts );
+
+// defined in list.cc
+int list_files( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts );
+
+// defined in lzip_index.cc
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos );
+
+// defined in lunzcrash.cc
+int lunzcrash_bit( const char * const input_filename,
+ const Cl_options & cl_opts );
+int lunzcrash_block( const char * const input_filename,
+ const Cl_options & cl_opts, const int sector_size );
+int md5sum_files( const std::vector< std::string > & filenames );
+
+// defined in main.cc
+extern const char * const program_name;
+extern std::string output_filename; // global vars for output file
+extern int outfd;
+struct stat;
+bool fits_in_size_t( const unsigned long long size );
+const char * bad_version( const unsigned version );
+const char * format_ds( const unsigned dictionary_size );
+void show_header( const unsigned dictionary_size );
+int open_instream( const char * const name, struct stat * const in_statsp,
+ const bool one_to_one, const bool reg_only = false );
+int open_truncable_stream( const char * const name,
+ struct stat * const in_statsp );
+bool open_outstream( const bool force, const bool protect,
+ const bool rw = false, const bool skipping = true,
+ const bool to_file = false );
+bool output_file_exists();
+void cleanup_and_fail( const int retval );
+bool check_tty_out();
+void set_signal_handler();
+bool close_outstream( const struct stat * const in_statsp );
+std::string insert_fixed( std::string name );
+void show_2file_error( const char * const msg1, const char * const name1,
+ const char * const name2, const char * const msg2 );
+class Range_decoder;
+void show_dprogress( const unsigned long long cfile_size = 0,
+ const unsigned long long partial_size = 0,
+ const Range_decoder * const d = 0,
+ const Pretty_print * const p = 0 );
+
+// defined in merge.cc
+bool copy_file( const int infd, const int outfd,
+ const long long max_size = -1 );
+int test_member_from_file( const int infd, const unsigned long long msize,
+ long long * const failure_posp = 0 );
+int merge_files( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const char terminator,
+ const bool force );
+
+// defined in nrep_stats.cc
+int print_nrep_stats( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts, const int repeated_byte );
+
+// defined in range_dec.cc
+const char * format_num( unsigned long long num,
+ unsigned long long limit = -1ULL,
+ const int set_prefix = 0 );
+bool safe_seek( const int fd, const long long pos,
+ const char * const filename );
+int range_decompress( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, Block range,
+ const bool force, const bool to_stdout );
+
+// defined in reproduce.cc
+int reproduce_file( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const char * const lzip_name,
+ const char * const reference_filename,
+ const Cl_options & cl_opts, const int lzip_level,
+ const char terminator, const bool force );
+int debug_reproduce_file( const char * const input_filename,
+ const char * const lzip_name,
+ const char * const reference_filename,
+ const Cl_options & cl_opts, const Block & range,
+ const int sector_size, const int lzip_level );
+
+// defined in split.cc
+int split_file( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const bool force );
diff --git a/lzip_index.cc b/lzip_index.cc
new file mode 100644
index 0000000..459338e
--- /dev/null
+++ b/lzip_index.cc
@@ -0,0 +1,366 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+
+bool Lzip_index::check_header( const Lzip_header & header,
+ const bool ignore_bad_ds )
+ {
+ if( !header.check_magic() )
+ { error_ = bad_magic_msg; retval_ = 2; return false; }
+ if( !header.check_version() )
+ { error_ = bad_version( header.version() ); retval_ = 2; return false; }
+ if( !ignore_bad_ds && !isvalid_ds( header.dictionary_size() ) )
+ { error_ = bad_dict_msg; retval_ = 2; return false; }
+ return true;
+ }
+
+void Lzip_index::set_errno_error( const char * const msg )
+ {
+ error_ = msg; error_ += std::strerror( errno );
+ retval_ = 1;
+ }
+
+void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
+ {
+ char buf[80];
+ snprintf( buf, sizeof buf, "%s%llu", msg, num );
+ error_ = buf;
+ retval_ = 2;
+ }
+
+
+bool Lzip_index::read_header( const int fd, Lzip_header & header,
+ const long long pos, const bool ignore_marking )
+ {
+ if( seek_read( fd, header.data, header.size, pos ) != header.size )
+ { set_errno_error( "Error reading member header: " ); return false; }
+ uint8_t byte;
+ if( !ignore_marking && readblock( fd, &byte, 1 ) == 1 && byte != 0 )
+ { error_ = marking_msg; retval_ = 2; return false; }
+ return true;
+ }
+
+bool Lzip_index::read_trailer( const int fd, Lzip_trailer & trailer,
+ const long long pos )
+ {
+ if( seek_read( fd, trailer.data, trailer.size, pos - trailer.size ) !=
+ trailer.size )
+ { set_errno_error( "Error reading member trailer: " ); return false; }
+ return true;
+ }
+
+
+/* Skip backwards the gap or trailing data ending at pos.
+ 'ignore_gaps' also ignores format errors and a truncated last member.
+ If successful, push member preceding gap and set pos to member header. */
+bool Lzip_index::skip_gap( const int fd, unsigned long long & pos,
+ const Cl_options & cl_opts,
+ const bool ignore_bad_ds, const bool ignore_gaps )
+ {
+ if( pos < min_member_size )
+ {
+ if( ignore_gaps && !member_vector.empty() ) { pos = 0; return true; }
+ return false;
+ }
+ enum { block_size = 16384,
+ buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
+ uint8_t buffer[buffer_size];
+ int bsize = pos % block_size; // total bytes in buffer
+ if( bsize <= buffer_size - block_size ) bsize += block_size;
+ int search_size = bsize; // bytes to search for trailer
+ int rd_size = bsize; // bytes to read from file
+ unsigned long long ipos = pos - rd_size; // aligned to block_size
+
+ while( true )
+ {
+ if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
+ { set_errno_error( "Error seeking member trailer: " ); return false; }
+ const uint8_t max_msb = ( ipos + search_size ) >> 56;
+ for( int i = search_size; i >= Lzip_trailer::size; --i )
+ if( buffer[i-1] <= max_msb ) // most significant byte of member_size
+ {
+ const Lzip_trailer & trailer =
+ *(const Lzip_trailer *)( buffer + i - trailer.size );
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size == 0 ) // skip trailing zeros
+ { while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; }
+ if( member_size > ipos + i || !trailer.check_consistency() ) continue;
+ Lzip_header header;
+ if( !read_header( fd, header, ipos + i - member_size,
+ cl_opts.ignore_marking ) ) return false;
+ if( !header.check( ignore_bad_ds ) ) continue;
+ const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
+ const bool full_h2 = bsize - i >= header.size;
+ if( header2.check_prefix( bsize - i ) ) // next header
+ {
+ if( !ignore_gaps && member_vector.empty() ) // last member
+ {
+ if( !full_h2 ) error_ = "Last member in input file is truncated.";
+ else if( check_header( header2, ignore_bad_ds ) )
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false;
+ }
+ const unsigned dictionary_size =
+ full_h2 ? header2.dictionary_size() : 0;
+ const unsigned long long member_size = pos - ( ipos + i );
+ pos = ipos + i;
+ // approximate data and member sizes for '-i -D'
+ member_vector.push_back( Member( 0, member_size, pos,
+ member_size, dictionary_size ) );
+ }
+ if( !ignore_gaps && member_vector.empty() )
+ {
+ if( !cl_opts.loose_trailing && full_h2 && header2.check_corrupt() )
+ { error_ = corrupt_mm_msg; retval_ = 2; return false; }
+ if( !cl_opts.ignore_trailing )
+ { error_ = trailing_msg; retval_ = 2; return false; }
+ }
+ const unsigned long long data_size = trailer.data_size();
+ if( !cl_opts.ignore_empty && data_size == 0 )
+ { error_ = empty_msg; retval_ = 2; return false; }
+ pos = ipos + i - member_size; // good member
+ const unsigned dictionary_size = header.dictionary_size();
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
+ member_vector.push_back( Member( 0, data_size, pos, member_size,
+ dictionary_size ) );
+ return true;
+ }
+ if( ipos == 0 )
+ {
+ if( ignore_gaps && !member_vector.empty() )
+ {
+ const Lzip_header * header = (const Lzip_header *)buffer;
+ const unsigned dictionary_size = header->dictionary_size();
+ // approximate data and member sizes for '-i -D'
+ member_vector.push_back( Member( 0, pos, 0, pos, dictionary_size ) );
+ pos = 0; return true;
+ }
+ set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
+ return false;
+ }
+ bsize = buffer_size;
+ search_size = bsize - Lzip_header::size;
+ rd_size = block_size;
+ ipos -= rd_size;
+ std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
+ }
+ }
+
+
+Lzip_index::Lzip_index( const int infd, const Cl_options & cl_opts,
+ const bool ignore_bad_ds, const bool ignore_gaps,
+ const long long max_pos )
+ : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 )
+ {
+ if( insize < 0 )
+ { set_errno_error( "Input file is not seekable: " ); return; }
+ if( insize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( insize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+
+ Lzip_header header;
+ if( !read_header( infd, header, 0, cl_opts.ignore_marking ) ||
+ !check_header( header, ignore_bad_ds ) ) return;
+
+ // pos always points to a header or to ( EOF || max_pos )
+ unsigned long long pos = ( max_pos > 0 ) ? max_pos : insize;
+ while( pos >= min_member_size )
+ {
+ Lzip_trailer trailer;
+ if( !read_trailer( infd, trailer, pos ) ) break;
+ const unsigned long long member_size = trailer.member_size();
+ // if gaps are being ignored, check consistency of last trailer only.
+ if( member_size > pos || member_size < min_member_size ||
+ ( ( !ignore_gaps || member_vector.empty() ) &&
+ !trailer.check_consistency() ) ) // bad trailer
+ {
+ if( ignore_gaps || member_vector.empty() )
+ { if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) )
+ continue; else return; }
+ set_num_error( "Bad trailer at pos ", pos - trailer.size ); break;
+ }
+ if( !read_header( infd, header, pos - member_size, cl_opts.ignore_marking ) )
+ break;
+ if( !header.check( ignore_bad_ds ) ) // bad header
+ {
+ if( ignore_gaps || member_vector.empty() )
+ { if( skip_gap( infd, pos, cl_opts, ignore_bad_ds, ignore_gaps ) )
+ continue; else return; }
+ set_num_error( "Bad header at pos ", pos - member_size ); break;
+ }
+ const unsigned long long data_size = trailer.data_size();
+ if( !cl_opts.ignore_empty && data_size == 0 )
+ { error_ = empty_msg; retval_ = 2; break; }
+ pos -= member_size; // good member
+ const unsigned dictionary_size = header.dictionary_size();
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
+ member_vector.push_back( Member( 0, data_size, pos, member_size,
+ dictionary_size ) );
+ }
+ // block at pos == 0 must be a member unless shorter than min_member_size
+ if( pos >= min_member_size || ( pos != 0 && !ignore_gaps ) ||
+ member_vector.empty() || retval_ != 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned long i = 0; ; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ if( i + 1 >= member_vector.size() ) break;
+ member_vector[i+1].dblock.pos( end );
+ if( member_vector[i].mblock.end() > member_vector[i+1].mblock.pos() )
+ internal_error( "two mblocks overlap after constructing a Lzip_index." );
+ }
+ }
+
+
+// All files in 'infd_vector' must be at least 'fsize' bytes long.
+Lzip_index::Lzip_index( const std::vector< int > & infd_vector,
+ const long long fsize )
+ : insize( fsize ), retval_( 0 ), dictionary_size_( 0 ) // DS not used
+ {
+ if( insize < 0 )
+ { set_errno_error( "Input file is not seekable: " ); return; }
+ if( insize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( insize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+
+ const int files = infd_vector.size();
+ Lzip_header header;
+ bool done = false;
+ for( int i = 0; i < files && !done; ++i )
+ {
+ const int infd = infd_vector[i];
+ if( !read_header( infd, header, 0 ) ) return;
+ if( header.check_magic() && header.check_version() ) done = true;
+ }
+ if( !done )
+ { error_ = bad_magic_msg; retval_ = 2; return; }
+
+ long long pos = insize; // always points to a header or to EOF
+ while( pos >= min_member_size )
+ {
+ unsigned long long member_size;
+ Lzip_trailer trailer;
+ done = false;
+ for( int it = 0; it < files && !done; ++it )
+ {
+ const int tfd = infd_vector[it];
+ if( !read_trailer( tfd, trailer, pos ) ) goto error;
+ member_size = trailer.member_size();
+ if( member_size <= (unsigned long long)pos && trailer.check_consistency() )
+ for( int ih = 0; ih < files && !done; ++ih )
+ {
+ const int hfd = infd_vector[ih];
+ if( !read_header( hfd, header, pos - member_size ) ) goto error;
+ if( header.check_magic() && header.check_version() ) done = true;
+ }
+ }
+ if( !done )
+ {
+ if( member_vector.empty() ) { --pos; continue; } // maybe trailing data
+ set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 );
+ break;
+ }
+ if( member_vector.empty() && insize > pos )
+ {
+ const int size = std::min( (long long)header.size, insize - pos );
+ for( int i = 0; i < files; ++i )
+ {
+ const int infd = infd_vector[i];
+ if( seek_read( infd, header.data, size, pos ) == size &&
+ header.check_prefix( size ) )
+ {
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; goto error;
+ }
+ }
+ }
+ pos -= member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, 0 ) );
+ }
+error:
+ if( pos != 0 || member_vector.empty() || retval_ != 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned long i = 0; ; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ if( i + 1 >= member_vector.size() ) break;
+ member_vector[i+1].dblock.pos( end );
+ }
+ }
+
+
+// Return members + gaps [+ trailing data].
+long Lzip_index::blocks( const bool count_tdata ) const
+ {
+ long n = member_vector.size() + ( count_tdata && cdata_size() < file_size() );
+ if( member_vector.size() && member_vector[0].mblock.pos() > 0 ) ++n;
+ for( unsigned long i = 1; i < member_vector.size(); ++i )
+ if( member_vector[i-1].mblock.end() < member_vector[i].mblock.pos() ) ++n;
+ return n;
+ }
diff --git a/lzip_index.h b/lzip_index.h
new file mode 100644
index 0000000..95e277d
--- /dev/null
+++ b/lzip_index.h
@@ -0,0 +1,96 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Lzip_index
+ {
+ struct Member
+ {
+ Block dblock, mblock; // data block, member block
+ unsigned dictionary_size;
+
+ Member( const long long dpos, const long long dsize,
+ const long long mpos, const long long msize,
+ const unsigned dict_size )
+ : dblock( dpos, dsize ), mblock( mpos, msize ),
+ dictionary_size( dict_size ) {}
+
+ bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
+ bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
+ };
+
+ // member_vector only contains members with a valid header.
+ // Garbage between members is represented by gaps between mblocks.
+ std::vector< Member > member_vector;
+ std::string error_;
+ long long insize;
+ int retval_;
+ unsigned dictionary_size_; // largest dictionary size in the file
+
+ bool check_header( const Lzip_header & header, const bool ignore_bad_ds );
+ void set_errno_error( const char * const msg );
+ void set_num_error( const char * const msg, unsigned long long num );
+ bool read_header( const int fd, Lzip_header & header, const long long pos,
+ const bool ignore_marking = true );
+ bool read_trailer( const int fd, Lzip_trailer & trailer,
+ const long long pos );
+ bool skip_gap( const int fd, unsigned long long & pos,
+ const Cl_options & cl_opts,
+ const bool ignore_bad_ds, const bool ignore_gaps );
+
+public:
+ Lzip_index()
+ : error_( "No index" ), insize( 0 ), retval_( 2 ), dictionary_size_( 0 ) {}
+ Lzip_index( const int infd, const Cl_options & cl_opts,
+ const bool ignore_bad_ds = false, const bool ignore_gaps = false,
+ const long long max_pos = 0 );
+ Lzip_index( const std::vector< int > & infd_vector, const long long fsize );
+
+ long members() const { return member_vector.size(); }
+ long blocks( const bool count_tdata ) const; // members + gaps [+ tdata]
+ const std::string & error() const { return error_; }
+ int retval() const { return retval_; }
+ unsigned dictionary_size() const { return dictionary_size_; }
+
+ bool operator==( const Lzip_index & li ) const
+ {
+ if( retval_ || li.retval_ || insize != li.insize ||
+ member_vector.size() != li.member_vector.size() ) return false;
+ for( unsigned long i = 0; i < member_vector.size(); ++i )
+ if( member_vector[i] != li.member_vector[i] ) return false;
+ return true;
+ }
+ bool operator!=( const Lzip_index & li ) const { return !( *this == li ); }
+
+ long long udata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().dblock.end(); }
+
+ long long cdata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().mblock.end(); }
+
+ // total size including trailing data (if any)
+ long long file_size() const
+ { if( insize >= 0 ) return insize; else return 0; }
+
+ const Block & dblock( const long i ) const
+ { return member_vector[i].dblock; }
+ const Block & mblock( const long i ) const
+ { return member_vector[i].mblock; }
+ unsigned dictionary_size( const long i ) const
+ { return member_vector[i].dictionary_size; }
+ };
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000..f82118f
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,1126 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ Exit status: 0 for a normal exit, 1 for environmental problems
+ (file not found, invalid command-line options, I/O errors, etc), 2 to
+ indicate a corrupt or invalid input file, 3 for an internal consistency
+ error (e.g., bug) which caused lziprecover to panic.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <climits> // SSIZE_MAX
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <new>
+#include <string>
+#include <vector>
+#include <fcntl.h>
+#include <stdint.h> // SIZE_MAX
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
+#include <io.h>
+#if defined __MSVCRT__
+#define fchmod(x,y) 0
+#define fchown(x,y,z) 0
+#define SIGHUP SIGTERM
+#define S_ISSOCK(x) 0
+#ifndef S_IRGRP
+#define S_IRGRP 0
+#define S_IWGRP 0
+#define S_IROTH 0
+#define S_IWOTH 0
+#endif
+#endif
+#if defined __DJGPP__
+#define S_ISSOCK(x) 0
+#define S_ISVTX 0
+#endif
+#endif
+
+#include "arg_parser.h"
+#include "lzip.h"
+#include "decoder.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#if CHAR_BIT != 8
+#error "Environments where CHAR_BIT != 8 are not supported."
+#endif
+
+#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \
+ ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX )
+#error "Environments where 'size_t' is narrower than 'long' are not supported."
+#endif
+
+bool fits_in_size_t( const unsigned long long size ) // fits also in long
+ { return ( sizeof (long) <= sizeof (size_t) && size <= LONG_MAX ) ||
+ ( sizeof (int) <= sizeof (size_t) && size <= INT_MAX ); }
+
+int verbosity = 0;
+
+const char * const program_name = "lziprecover";
+std::string output_filename; // global vars for output file
+int outfd = -1; // see 'delete_output_on_interrupt' below
+
+namespace {
+
+const char * invocation_name = program_name; // default value
+
+const struct { const char * from; const char * to; } known_extensions[] = {
+ { ".lz", "" },
+ { ".tlz", ".tar" },
+ { 0, 0 } };
+
+enum Mode { m_none, m_alone_to_lz, m_byte_repair, m_clear_marking,
+ m_debug_byte_repair, m_debug_decompress, m_debug_delay,
+ m_decompress, m_dump, m_list, m_md5sum, m_merge, m_nrep_stats,
+ m_range_dec, m_remove, m_reproduce, m_show_packets, m_split,
+ m_strip, m_test, m_unzcrash_bit, m_unzcrash_block };
+
+/* Variable used in signal handler context.
+ It is not declared volatile because the handler never returns. */
+bool delete_output_on_interrupt = false;
+
+
+void show_help()
+ {
+ std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
+ "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
+ "files (up to one single-byte error per member), produce a correct file by\n"
+ "merging the good parts of two or more damaged copies, reproduce a missing\n"
+ "(zeroed) sector using a reference file, extract data from damaged files,\n"
+ "decompress files, and test integrity of files.\n"
+ "\nWith the help of lziprecover, losing an entire archive just because of a\n"
+ "corrupt byte near the beginning is a thing of the past.\n"
+ "\nLziprecover can remove the damaged members from multimember files, for\n"
+ "example multimember tar.lz archives.\n"
+ "\nLziprecover provides random access to the data in multimember files; it only\n"
+ "decompresses the members containing the desired data.\n"
+ "\nLziprecover facilitates the management of metadata stored as trailing data\n"
+ "in lzip files.\n"
+ "\nLziprecover is not a replacement for regular backups, but a last line of\n"
+ "defense for the case where the backups are also damaged.\n"
+ "\nUsage: %s [options] [files]\n", invocation_name );
+ std::printf( "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -a, --trailing-error exit with error status if trailing data\n"
+ " -A, --alone-to-lz convert lzma-alone files to lzip format\n"
+ " -c, --stdout write to standard output, keep input files\n"
+ " -d, --decompress decompress, test compressed file integrity\n"
+ " -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
+ " -e, --reproduce try to reproduce a zeroed sector in file\n"
+ " --lzip-level=N|a|m[N] reproduce one level, all, or match length\n"
+ " --lzip-name=<name> name of lzip executable for --reproduce\n"
+ " --reference-file=<file> reference file for --reproduce\n"
+ " -f, --force overwrite existing output files\n"
+ " -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n"
+ " -k, --keep keep (don't delete) input files\n"
+ " -l, --list print (un)compressed file sizes\n"
+ " -m, --merge repair errors in file using several copies\n"
+ " -o, --output=<file> place the output into <file>\n"
+ " -q, --quiet suppress all messages\n"
+ " -R, --byte-repair try to repair a corrupt byte in file\n"
+ " -s, --split split multimember file in single-member files\n"
+ " -t, --test test compressed file integrity\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n"
+ " --dump=<list>:d:e:t dump members, damaged/empty, tdata to stdout\n"
+ " --remove=<list>:d:e:t remove members, tdata from files in place\n"
+ " --strip=<list>:d:e:t copy files to stdout stripping members given\n"
+ " --empty-error exit with error status if empty member in file\n"
+ " --marking-error exit with error status if 1st LZMA byte not 0\n"
+ " --loose-trailing allow trailing data seeming corrupt header\n"
+ " --clear-marking reset the first LZMA byte of each member\n" );
+ if( verbosity >= 1 )
+ {
+ std::printf( "\nDebug options for experts:\n"
+ " -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
+ " -M, --md5sum print the MD5 digests of the input files\n"
+ " -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
+ " -U, --unzcrash=1|B<size> test 1-bit or block errors in input file\n"
+ " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
+ " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
+ " -Y, --debug-delay=<range> find max error detection delay in <range>\n"
+ " -Z, --debug-byte-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
+ }
+ std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
+ "from standard input to standard output.\n"
+ "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
+ "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
+ "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
+ "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
+ "\nExit status: 0 for a normal exit, 1 for environmental problems\n"
+ "(file not found, invalid command-line options, I/O errors, etc), 2 to\n"
+ "indicate a corrupt or invalid input file, 3 for an internal consistency\n"
+ "error (e.g., bug) which caused lziprecover to panic.\n"
+ "\nReport bugs to lzip-bug@nongnu.org\n"
+ "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
+ }
+
+} // end namespace
+
+void Pretty_print::operator()( const char * const msg, FILE * const f ) const
+ {
+ if( verbosity < 0 ) return;
+ if( first_post )
+ {
+ first_post = false;
+ std::fputs( padded_name.c_str(), f );
+ if( !msg ) std::fflush( f );
+ }
+ if( msg ) std::fprintf( f, "%s\n", msg );
+ }
+
+
+const char * bad_version( const unsigned version )
+ {
+ static char buf[80];
+ snprintf( buf, sizeof buf, "Version %u member format not supported.",
+ version );
+ return buf;
+ }
+
+
+const char * format_ds( const unsigned dictionary_size )
+ {
+ enum { bufsize = 16, factor = 1024, n = 3 };
+ static char buf[bufsize];
+ const char * const prefix[n] = { "Ki", "Mi", "Gi" };
+ const char * p = "";
+ const char * np = " ";
+ unsigned num = dictionary_size;
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < n && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false;
+ p = prefix[i]; np = ""; }
+ snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
+ return buf;
+ }
+
+
+void show_header( const unsigned dictionary_size )
+ {
+ std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
+ }
+
+
+#include "main_common.cc"
+
+
+// Colon-separated list of "damaged", "empty", "tdata", [r][^]<list> (1 1,3-5)
+void Member_list::parse_ml( const char * const arg,
+ const char * const option_name,
+ Cl_options & cl_opts )
+ {
+ const char * p = arg; // points to current char
+ while( true )
+ {
+ const char * tp = p; // points to terminator (':' or '\0')
+ while( *tp && *tp != ':' ) ++tp;
+ const unsigned len = tp - p;
+ if( std::islower( *(const unsigned char *)p ) )
+ {
+ if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
+ { damaged = true; cl_opts.ignore_errors = true; goto next; }
+ if( len <= 5 && std::strncmp( "empty", p, len ) == 0 )
+ { empty = true; cl_opts.ignore_empty = true; goto next; }
+ if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
+ { tdata = true; cl_opts.ignore_trailing = true; goto next; }
+ }
+ {
+ const bool reverse = ( *p == 'r' );
+ if( reverse ) ++p;
+ if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
+ std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
+ while( std::isdigit( *(const unsigned char *)p ) )
+ {
+ const char * tail;
+ const long pos = getnum( p, option_name, 0, 1, LONG_MAX, &tail ) - 1;
+ if( rvp->size() && pos < rvp->back().end() ) break;
+ const long size = (*tail == '-') ?
+ getnum( tail + 1, option_name, 0, pos + 1, LONG_MAX, &tail ) - pos : 1;
+ rvp->push_back( Block( pos, size ) );
+ if( tail == tp ) goto next;
+ if( *tail == ',' ) p = tail + 1; else break;
+ }
+ }
+ show_option_error( arg, "Invalid list of members in", option_name );
+ std::exit( 1 );
+next:
+ if( *(p = tp) != 0 ) ++p; else return;
+ }
+ }
+
+
+namespace {
+
+// Recognized formats: <digit> 'a' m[<match_length>]
+int parse_lzip_level( const char * const arg, const char * const option_name )
+ {
+ if( *arg == 'a' || std::isdigit( *(const unsigned char *)arg ) ) return *arg;
+ if( *arg != 'm' )
+ { show_option_error( arg, "Invalid argument in", option_name );
+ std::exit( 1 ); }
+ if( arg[1] == 0 ) return -1;
+ return -getnum( arg + 1, option_name, 0, min_match_len_limit, max_match_len );
+ }
+
+
+/* Recognized format: <range>[,<sector_size>]
+ range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
+ Return a pointer to the byte following the bytes parsed.
+*/
+const char * parse_range( const char * const arg, const char * const pn,
+ Block & range, int * const sector_sizep = 0 )
+ {
+ const char * tail = arg;
+ long long value =
+ ( arg[0] == ',' ) ? 0 : getnum( arg, pn, 0, 0, INT64_MAX - 1, &tail );
+ if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' || tail[0] == ':' )
+ {
+ range.pos( value );
+ if( tail[0] == 0 || tail[0] == ':' )
+ { range.size( INT64_MAX - value ); return tail; }
+ const bool is_size = ( tail[0] == ',' );
+ if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
+ else value = getnum( tail + 1, pn, 0, 1, INT64_MAX, &tail ); // size
+ if( !is_size && value <= range.pos() )
+ { show_option_error( arg, "Begin must be < end in", pn ); std::exit( 1 ); }
+ if( !is_size ) value -= range.pos(); // size = end - pos
+ if( INT64_MAX - value >= range.pos() )
+ {
+ range.size( value );
+ if( sector_sizep && tail[0] == ',' )
+ *sector_sizep = getnum( tail + 1, pn, 0, 8, INT_MAX, &tail );
+ return tail;
+ }
+ }
+ show_option_error( arg, "Invalid decompression range in", pn );
+ std::exit( 1 );
+ }
+
+
+void one_file( const int files )
+ {
+ if( files != 1 )
+ {
+ show_error( "You must specify exactly 1 file.", 0, true );
+ std::exit( 1 );
+ }
+ }
+
+void at_least_one_file( const int files )
+ {
+ if( files < 1 )
+ {
+ show_error( "You must specify at least 1 file.", 0, true );
+ std::exit( 1 );
+ }
+ }
+
+
+void set_mode( Mode & program_mode, const Mode new_mode )
+ {
+ if( program_mode != m_none && program_mode != new_mode )
+ {
+ show_error( "Only one operation can be specified.", 0, true );
+ std::exit( 1 );
+ }
+ program_mode = new_mode;
+ }
+
+
+void parse_u( const char * const arg, const char * const option_name,
+ Mode & program_mode, int & sector_size )
+ {
+ if( arg[0] == '1' ) set_mode( program_mode, m_unzcrash_bit );
+ else if( arg[0] == 'B' )
+ { set_mode( program_mode, m_unzcrash_block );
+ sector_size = getnum( arg + 1, option_name, 0, 1, INT_MAX ); }
+ else
+ { show_option_error( arg, "Invalid argument in", option_name );
+ std::exit( 1 ); }
+ }
+
+
+int extension_index( const std::string & name )
+ {
+ for( int eindex = 0; known_extensions[eindex].from; ++eindex )
+ {
+ const std::string ext( known_extensions[eindex].from );
+ if( name.size() > ext.size() &&
+ name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
+ return eindex;
+ }
+ return -1;
+ }
+
+
+void set_a_outname( const std::string & name )
+ {
+ output_filename = name;
+ if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 )
+ output_filename.erase( name.size() - 2 );
+ else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
+ output_filename.insert( name.size() - 2, "ar." );
+ else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 )
+ output_filename += known_extensions[0].from;
+ }
+
+
+void set_d_outname( const std::string & name, const int eindex )
+ {
+ if( eindex >= 0 )
+ {
+ const std::string from( known_extensions[eindex].from );
+ if( name.size() > from.size() )
+ {
+ output_filename.assign( name, 0, name.size() - from.size() );
+ output_filename += known_extensions[eindex].to;
+ return;
+ }
+ }
+ output_filename = name; output_filename += ".out";
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "%s: %s: Can't guess original name -- using '%s'\n",
+ program_name, name.c_str(), output_filename.c_str() );
+ }
+
+} // end namespace
+
+int open_instream( const char * const name, struct stat * const in_statsp,
+ const bool one_to_one, const bool reg_only )
+ {
+ int infd = open( name, O_RDONLY | O_BINARY );
+ if( infd < 0 )
+ show_file_error( name, "Can't open input file", errno );
+ else
+ {
+ const int i = fstat( infd, in_statsp );
+ const mode_t mode = in_statsp->st_mode;
+ const bool can_read = ( i == 0 && !reg_only &&
+ ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
+ S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
+ if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s: Input file is not a regular file%s.\n",
+ program_name, name, ( can_read && one_to_one ) ?
+ ",\n and neither '-c' nor '-o' were specified" : "" );
+ close( infd );
+ infd = -1;
+ }
+ }
+ return infd;
+ }
+
+
+int open_truncable_stream( const char * const name,
+ struct stat * const in_statsp )
+ {
+ int fd = open( name, O_RDWR | O_BINARY );
+ if( fd < 0 )
+ show_file_error( name, "Can't open input file", errno );
+ else
+ {
+ const int i = fstat( fd, in_statsp );
+ const mode_t mode = in_statsp->st_mode;
+ if( i != 0 || !S_ISREG( mode ) )
+ { show_file_error( name, "Not a regular file." ); close( fd ); fd = -1; }
+ }
+ return fd;
+ }
+
+namespace {
+
+bool make_dirs( const std::string & name )
+ {
+ int i = name.size();
+ while( i > 0 && name[i-1] != '/' ) --i; // remove last component
+ while( i > 0 && name[i-1] == '/' ) --i; // remove slash(es)
+ const int dirsize = i; // size of dirname without trailing slash(es)
+
+ for( i = 0; i < dirsize; ) // if dirsize == 0, dirname is '/' or empty
+ {
+ while( i < dirsize && name[i] == '/' ) ++i;
+ const int first = i;
+ while( i < dirsize && name[i] != '/' ) ++i;
+ if( first < i )
+ {
+ const std::string partial( name, 0, i );
+ const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+ struct stat st;
+ if( stat( partial.c_str(), &st ) == 0 )
+ { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } }
+ else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST )
+ return false; // if EEXIST, another process created the dir
+ }
+ }
+ return true;
+ }
+
+const char * const force_msg =
+ "Output file already exists. Use '--force' to overwrite it.";
+
+} // end namespace
+
+bool open_outstream( const bool force, const bool protect,
+ const bool rw, const bool skipping, const bool to_file )
+ {
+ const mode_t usr_rw = S_IRUSR | S_IWUSR;
+ const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
+ const mode_t outfd_mode = protect ? usr_rw : all_rw;
+ int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY;
+ if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
+
+ outfd = -1;
+ if( output_filename.size() &&
+ output_filename[output_filename.size()-1] == '/' ) errno = EISDIR;
+ else {
+ if( ( !protect || to_file ) && !make_dirs( output_filename ) )
+ { show_file_error( output_filename.c_str(),
+ "Error creating intermediate directory", errno ); return false; }
+ outfd = open( output_filename.c_str(), flags, outfd_mode );
+ if( outfd >= 0 ) { delete_output_on_interrupt = true; return true; }
+ if( errno == EEXIST )
+ { show_file_error( output_filename.c_str(), skipping ?
+ "Output file already exists, skipping." : force_msg ); return false; }
+ }
+ show_file_error( output_filename.c_str(), "Can't create output file", errno );
+ return false;
+ }
+
+
+bool output_file_exists()
+ {
+ struct stat st;
+ if( stat( output_filename.c_str(), &st ) == 0 )
+ { show_file_error( output_filename.c_str(), force_msg ); return true; }
+ return false;
+ }
+
+
+void set_signals( void (*action)(int) )
+ {
+ std::signal( SIGHUP, action );
+ std::signal( SIGINT, action );
+ std::signal( SIGTERM, action );
+ }
+
+
+void cleanup_and_fail( const int retval )
+ {
+ set_signals( SIG_IGN ); // ignore signals
+ if( delete_output_on_interrupt )
+ {
+ delete_output_on_interrupt = false;
+ show_file_error( output_filename.c_str(),
+ "Deleting output file, if it exists." );
+ if( outfd >= 0 ) { close( outfd ); outfd = -1; }
+ if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
+ show_error( "warning: deletion of output file failed", errno );
+ }
+ std::exit( retval );
+ }
+
+
+bool check_tty_out()
+ {
+ if( isatty( outfd ) )
+ { show_file_error( output_filename.size() ?
+ output_filename.c_str() : "(stdout)",
+ "I won't write compressed data to a terminal." );
+ return false; }
+ return true;
+ }
+
+namespace {
+
+extern "C" void signal_handler( int )
+ {
+ show_error( "Control-C or similar caught, quitting." );
+ cleanup_and_fail( 1 );
+ }
+
+
+bool check_tty_in( const char * const input_filename, const int infd,
+ const Mode program_mode, int & retval )
+ {
+ if( isatty( infd ) ) // all modes read compressed data
+ { show_file_error( input_filename,
+ "I won't read compressed data from a terminal." );
+ close( infd ); set_retval( retval, 2 );
+ if( program_mode != m_test ) cleanup_and_fail( retval );
+ return false; }
+ return true;
+ }
+
+bool check_tty_out( const Mode program_mode )
+ { return program_mode != m_alone_to_lz || ::check_tty_out(); }
+
+
+// Set permissions, owner, and times.
+void close_and_set_permissions( const struct stat * const in_statsp )
+ {
+ bool warning = false;
+ if( in_statsp )
+ {
+ const mode_t mode = in_statsp->st_mode;
+ // fchown in many cases returns with EPERM, which can be safely ignored.
+ if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
+ { if( fchmod( outfd, mode ) != 0 ) warning = true; }
+ else
+ if( errno != EPERM ||
+ fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
+ warning = true;
+ }
+ if( close( outfd ) != 0 )
+ { show_file_error( output_filename.c_str(), "Error closing output file",
+ errno ); cleanup_and_fail( 1 ); }
+ outfd = -1;
+ delete_output_on_interrupt = false;
+ if( in_statsp )
+ {
+ struct utimbuf t;
+ t.actime = in_statsp->st_atime;
+ t.modtime = in_statsp->st_mtime;
+ if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
+ }
+ if( warning && verbosity >= 1 )
+ show_file_error( output_filename.c_str(),
+ "warning: can't change output file attributes", errno );
+ }
+
+
+unsigned char xdigit( const unsigned value ) // hex digit for 'value'
+ {
+ if( value <= 9 ) return '0' + value;
+ if( value <= 15 ) return 'A' + value - 10;
+ return 0;
+ }
+
+
+bool show_trailing_data( const uint8_t * const data, const int size,
+ const Pretty_print & pp, const bool all,
+ const int ignore_trailing ) // -1 = show
+ {
+ if( verbosity >= 4 || ignore_trailing <= 0 )
+ {
+ std::string msg;
+ if( !all ) msg = "first bytes of ";
+ msg += "trailing data = ";
+ for( int i = 0; i < size; ++i )
+ {
+ msg += xdigit( data[i] >> 4 );
+ msg += xdigit( data[i] & 0x0F );
+ msg += ' ';
+ }
+ msg += '\'';
+ for( int i = 0; i < size; ++i )
+ { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
+ msg += '\'';
+ pp( msg.c_str() );
+ if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
+ }
+ return ignore_trailing > 0;
+ }
+
+
+int decompress( const unsigned long long cfile_size, const int infd,
+ const Cl_options & cl_opts, const Pretty_print & pp,
+ const bool testing )
+ {
+ unsigned long long partial_file_pos = 0;
+ Range_decoder rdec( infd );
+ int retval = 0;
+
+ for( bool first_member = true; ; first_member = false )
+ {
+ Lzip_header header;
+ rdec.reset_member_position();
+ const int size = rdec.read_header_carefully( header, cl_opts.ignore_errors );
+ if( rdec.finished() || // End Of File
+ ( size < header.size && !rdec.find_header( header ) ) )
+ {
+ if( first_member )
+ { show_file_error( pp.name(), "File ends unexpectedly at member header." );
+ retval = 2; }
+ else if( header.check_prefix( size ) )
+ { pp( "Truncated header in multimember file." );
+ show_trailing_data( header.data, size, pp, true, -1 ); retval = 2; }
+ else if( size > 0 && !show_trailing_data( header.data, size, pp, true,
+ cl_opts.ignore_trailing ) ) retval = 2;
+ break;
+ }
+ if( !header.check_magic() )
+ {
+ if( first_member )
+ { show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
+ else if( !cl_opts.loose_trailing && header.check_corrupt() )
+ { pp( corrupt_mm_msg );
+ show_trailing_data( header.data, size, pp, false, -1 ); retval = 2; }
+ else if( !show_trailing_data( header.data, size, pp, false,
+ cl_opts.ignore_trailing ) ) retval = 2;
+ if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
+ }
+ if( !header.check_version() )
+ { pp( bad_version( header.version() ) ); retval = 2;
+ if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; }
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !isvalid_ds( dictionary_size ) )
+ { pp( bad_dict_msg ); retval = 2;
+ if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break; }
+
+ if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
+
+ LZ_decoder decoder( rdec, dictionary_size, outfd );
+ show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
+ const int result = decoder.decode_member( cl_opts, pp );
+ partial_file_pos += rdec.member_position();
+ if( result != 0 )
+ {
+ retval = 2;
+ if( verbosity >= 0 && result <= 2 )
+ {
+ pp();
+ std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
+ "File ends unexpectedly" : "Decoder error",
+ partial_file_pos );
+ }
+ else if( result == 5 ) { pp( empty_msg ); break; }
+ else if( result == 6 ) { pp( marking_msg ); break; }
+ if( cl_opts.ignore_errors ) { pp.reset(); continue; } else break;
+ }
+ if( verbosity >= 2 )
+ { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
+ }
+ if( verbosity == 1 && retval == 0 )
+ std::fputs( testing ? "ok\n" : "done\n", stderr );
+ if( retval == 2 && cl_opts.ignore_errors ) retval = 0;
+ return retval;
+ }
+
+} // end namespace
+
+void set_signal_handler() { set_signals( signal_handler ); }
+
+bool close_outstream( const struct stat * const in_statsp )
+ {
+ if( delete_output_on_interrupt ) close_and_set_permissions( in_statsp );
+ if( outfd >= 0 && close( outfd ) != 0 )
+ { show_error( "Error closing stdout", errno ); return false; }
+ outfd = -1;
+ return true;
+ }
+
+
+std::string insert_fixed( std::string name )
+ {
+ if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
+ name.insert( name.size() - 7, "_fixed" );
+ else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 )
+ name.insert( name.size() - 3, "_fixed" );
+ else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
+ name.insert( name.size() - 4, "_fixed" );
+ else name += "_fixed.lz";
+ return name;
+ }
+
+
+void show_2file_error( const char * const msg1, const char * const name1,
+ const char * const name2, const char * const msg2 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n",
+ program_name, msg1, name1, name2, msg2 );
+ }
+
+
+void show_dprogress( const unsigned long long cfile_size,
+ const unsigned long long partial_size,
+ const Range_decoder * const d,
+ const Pretty_print * const p )
+ {
+ static unsigned long long csize = 0; // file_size / 100
+ static unsigned long long psize = 0;
+ static const Range_decoder * rdec = 0;
+ static const Pretty_print * pp = 0;
+ static int counter = 0;
+ static bool enabled = true;
+
+ if( !enabled ) return;
+ if( p ) // initialize static vars
+ {
+ if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
+ csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0;
+ }
+ if( rdec && pp && --counter <= 0 )
+ {
+ const unsigned long long pos = psize + rdec->member_position();
+ counter = 7; // update display every 114688 bytes
+ if( csize > 0 )
+ std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
+ else
+ std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
+ pp->reset(); (*pp)(); // restore cursor position
+ }
+ }
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ Block range( 0, 0 );
+ int sector_size = INT_MAX; // default larger than practical range
+ Bad_byte bad_byte;
+ Member_list member_list;
+ std::string default_output_filename;
+ const char * lzip_name = "lzip"; // default is lzip
+ const char * reference_filename = 0;
+ Mode program_mode = m_none;
+ int lzip_level = 0; // 0 = test all levels and match lengths
+ // '0'..'9' = level, 'a' = all levels
+ // -5..-273 = match length, -1 = all lengths
+ int repeated_byte = -1; // 0 to 255, or -1 for all values
+ Cl_options cl_opts; // command-line options
+ bool force = false;
+ bool keep_input_files = false;
+ bool to_stdout = false;
+ if( argc > 0 ) invocation_name = argv[0];
+
+ enum { opt_cm = 256, opt_du, opt_eer, opt_lt, opt_lzl, opt_lzn, opt_mer,
+ opt_ref, opt_rem, opt_st };
+ const Arg_parser::Option options[] =
+ {
+ { 'a', "trailing-error", Arg_parser::no },
+ { 'A', "alone-to-lz", Arg_parser::no },
+ { 'c', "stdout", Arg_parser::no },
+ { 'd', "decompress", Arg_parser::no },
+ { 'D', "range-decompress", Arg_parser::yes },
+ { 'e', "reproduce", Arg_parser::no },
+ { 'E', "debug-reproduce", Arg_parser::yes },
+ { 'f', "force", Arg_parser::no },
+ { 'h', "help", Arg_parser::no },
+ { 'i', "ignore-errors", Arg_parser::no },
+ { 'k', "keep", Arg_parser::no },
+ { 'l', "list", Arg_parser::no },
+ { 'm', "merge", Arg_parser::no },
+ { 'M', "md5sum", Arg_parser::no },
+ { 'n', "threads", Arg_parser::yes },
+ { 'o', "output", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 'R', "byte-repair", Arg_parser::no },
+ { 'R', "repair", Arg_parser::no },
+ { 's', "split", Arg_parser::no },
+ { 'S', "nrep-stats", Arg_parser::maybe },
+ { 't', "test", Arg_parser::no },
+ { 'U', "unzcrash", Arg_parser::yes },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { 'W', "debug-decompress", Arg_parser::yes },
+ { 'X', "show-packets", Arg_parser::maybe },
+ { 'Y', "debug-delay", Arg_parser::yes },
+ { 'Z', "debug-byte-repair", Arg_parser::yes },
+ { opt_cm, "clear-marking", Arg_parser::no },
+ { opt_du, "dump", Arg_parser::yes },
+ { opt_eer, "empty-error", Arg_parser::no },
+ { opt_lt, "loose-trailing", Arg_parser::no },
+ { opt_lzl, "lzip-level", Arg_parser::yes },
+ { opt_lzn, "lzip-name", Arg_parser::yes },
+ { opt_mer, "marking-error", Arg_parser::no },
+ { opt_ref, "reference-file", Arg_parser::yes },
+ { opt_rem, "remove", Arg_parser::yes },
+ { opt_st, "strip", Arg_parser::yes },
+ { 0, 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options );
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+
+ int argind = 0;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) break; // no more options
+ const char * const pn = parser.parsed_name( argind ).c_str();
+ const std::string & sarg = parser.argument( argind );
+ const char * const arg = sarg.c_str();
+ switch( code )
+ {
+ case 'a': cl_opts.ignore_trailing = false; break;
+ case 'A': set_mode( program_mode, m_alone_to_lz ); break;
+ case 'c': to_stdout = true; break;
+ case 'd': set_mode( program_mode, m_decompress ); break;
+ case 'D': set_mode( program_mode, m_range_dec );
+ parse_range( arg, pn, range ); break;
+ case 'e': set_mode( program_mode, m_reproduce ); break;
+ case 'E': set_mode( program_mode, m_reproduce );
+ parse_range( arg, pn, range, &sector_size ); break;
+ case 'f': force = true; break;
+ case 'h': show_help(); return 0;
+ case 'i': cl_opts.ignore_errors = true; break;
+ case 'k': keep_input_files = true; break;
+ case 'l': set_mode( program_mode, m_list ); break;
+ case 'm': set_mode( program_mode, m_merge ); break;
+ case 'M': set_mode( program_mode, m_md5sum ); break;
+ case 'n': break;
+ case 'o': if( sarg == "-" ) to_stdout = true;
+ else { default_output_filename = sarg; } break;
+ case 'q': verbosity = -1; break;
+ case 'R': set_mode( program_mode, m_byte_repair ); break;
+ case 's': set_mode( program_mode, m_split ); break;
+ case 'S': if( arg[0] ) repeated_byte = getnum( arg, pn, 0, 0, 255 );
+ set_mode( program_mode, m_nrep_stats ); break;
+ case 't': set_mode( program_mode, m_test ); break;
+ case 'U': parse_u( arg, pn, program_mode, sector_size ); break;
+ case 'v': if( verbosity < 4 ) ++verbosity; break;
+ case 'V': show_version(); return 0;
+ case 'W': set_mode( program_mode, m_debug_decompress );
+ bad_byte.parse_bb( arg, pn ); break;
+ case 'X': set_mode( program_mode, m_show_packets );
+ if( arg[0] ) { bad_byte.parse_bb( arg, pn ); } break;
+ case 'Y': set_mode( program_mode, m_debug_delay );
+ parse_range( arg, pn, range ); break;
+ case 'Z': set_mode( program_mode, m_debug_byte_repair );
+ bad_byte.parse_bb( arg, pn ); break;
+ case opt_cm: set_mode( program_mode, m_clear_marking );
+ cl_opts.ignore_marking = true; break;
+ case opt_du: set_mode( program_mode, m_dump );
+ member_list.parse_ml( arg, pn, cl_opts ); break;
+ case opt_eer: cl_opts.ignore_empty = false; break;
+ case opt_lt: cl_opts.loose_trailing = true; break;
+ case opt_lzl: lzip_level = parse_lzip_level( arg, pn ); break;
+ case opt_lzn: lzip_name = arg; break;
+ case opt_mer: cl_opts.ignore_marking = false; break;
+ case opt_ref: reference_filename = arg; break;
+ case opt_rem: set_mode( program_mode, m_remove );
+ member_list.parse_ml( arg, pn, cl_opts ); break;
+ case opt_st: set_mode( program_mode, m_strip );
+ member_list.parse_ml( arg, pn, cl_opts ); break;
+ default: internal_error( "uncaught option." );
+ }
+ } // end process options
+
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ if( program_mode == m_none )
+ {
+ show_error( "You must specify the operation to be performed.", 0, true );
+ return 1;
+ }
+
+ std::vector< std::string > filenames;
+ bool filenames_given = false;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ filenames.push_back( parser.argument( argind ) );
+ if( filenames.back() != "-" ) filenames_given = true;
+ }
+
+ const char terminator = isatty( STDOUT_FILENO ) ? '\r' : '\n';
+ try {
+ switch( program_mode )
+ {
+ case m_none: internal_error( "invalid operation." ); break;
+ case m_alone_to_lz: break;
+ case m_byte_repair:
+ one_file( filenames.size() );
+ return byte_repair( filenames[0], default_output_filename, cl_opts,
+ terminator, force );
+ case m_clear_marking:
+ at_least_one_file( filenames.size() );
+ return clear_marking( filenames, cl_opts );
+ case m_debug_byte_repair:
+ one_file( filenames.size() );
+ return debug_byte_repair( filenames[0].c_str(), cl_opts, bad_byte, terminator );
+ case m_debug_decompress:
+ one_file( filenames.size() );
+ return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, false );
+ case m_debug_delay:
+ one_file( filenames.size() );
+ return debug_delay( filenames[0].c_str(), cl_opts, range, terminator );
+ case m_decompress: break;
+ case m_dump:
+ case m_strip:
+ at_least_one_file( filenames.size() );
+ return dump_members( filenames, default_output_filename, cl_opts,
+ member_list, force, program_mode == m_strip, to_stdout );
+ case m_list: break;
+ case m_md5sum: break;
+ case m_merge:
+ if( filenames.size() < 2 )
+ { show_error( "You must specify at least 2 files.", 0, true ); return 1; }
+ return merge_files( filenames, default_output_filename, cl_opts,
+ terminator, force );
+ case m_nrep_stats:
+ return print_nrep_stats( filenames, cl_opts, repeated_byte );
+ case m_range_dec:
+ one_file( filenames.size() );
+ return range_decompress( filenames[0], default_output_filename,
+ cl_opts, range, force, to_stdout );
+ case m_remove:
+ at_least_one_file( filenames.size() );
+ return remove_members( filenames, cl_opts, member_list );
+ case m_reproduce:
+ one_file( filenames.size() );
+ if( !reference_filename || !reference_filename[0] )
+ { show_error( "You must specify a reference file.", 0, true ); return 1; }
+ if( range.size() > 0 )
+ return debug_reproduce_file( filenames[0].c_str(), lzip_name,
+ reference_filename, cl_opts, range, sector_size, lzip_level );
+ else
+ return reproduce_file( filenames[0], default_output_filename, lzip_name,
+ reference_filename, cl_opts, lzip_level, terminator, force );
+ case m_show_packets:
+ one_file( filenames.size() );
+ return debug_decompress( filenames[0].c_str(), cl_opts, bad_byte, true );
+ case m_split:
+ one_file( filenames.size() );
+ return split_file( filenames[0], default_output_filename, cl_opts, force );
+ case m_test: break;
+ case m_unzcrash_bit:
+ one_file( filenames.size() );
+ return lunzcrash_bit( filenames[0].c_str(), cl_opts );
+ case m_unzcrash_block:
+ one_file( filenames.size() );
+ return lunzcrash_block( filenames[0].c_str(), cl_opts, sector_size );
+ }
+ }
+ catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
+ catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
+
+ if( filenames.empty() ) filenames.push_back("-");
+
+ if( program_mode == m_list ) return list_files( filenames, cl_opts );
+ if( program_mode == m_md5sum ) return md5sum_files( filenames );
+
+ if( program_mode != m_alone_to_lz && program_mode != m_decompress &&
+ program_mode != m_test )
+ internal_error( "invalid decompressor operation." );
+
+ if( program_mode == m_test ) to_stdout = false; // apply overrides
+ if( program_mode == m_test || to_stdout ) default_output_filename.clear();
+
+ if( to_stdout && program_mode != m_test ) // check tty only once
+ { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
+ else outfd = -1;
+
+ const bool to_file = !to_stdout && program_mode != m_test &&
+ default_output_filename.size();
+ if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
+ set_signals( signal_handler );
+
+ Pretty_print pp( filenames );
+
+ int failed_tests = 0;
+ int retval = 0;
+ const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
+ bool stdin_used = false;
+ struct stat in_stats;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ std::string input_filename;
+ int infd;
+
+ pp.set_name( filenames[i] );
+ if( filenames[i] == "-" )
+ {
+ if( stdin_used ) continue; else stdin_used = true;
+ infd = STDIN_FILENO;
+ if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
+ if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
+ }
+ else
+ {
+ input_filename = filenames[i];
+ infd = open_instream( input_filename.c_str(), &in_stats, one_to_one );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+ if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
+ if( one_to_one ) // open outfd after checking infd
+ {
+ if( program_mode == m_alone_to_lz ) set_a_outname( input_filename );
+ else set_d_outname( input_filename, extension_index( input_filename ) );
+ if( !open_outstream( force, true ) )
+ { close( infd ); set_retval( retval, 1 ); continue; }
+ }
+ }
+
+ if( one_to_one && !check_tty_out( program_mode ) )
+ { set_retval( retval, 1 ); return retval; } // don't delete a tty
+
+ if( to_file && outfd < 0 ) // open outfd after checking infd
+ {
+ output_filename = default_output_filename;
+ if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
+ return 1; // check tty only once and don't try to delete a tty
+ }
+
+ const struct stat * const in_statsp =
+ ( input_filename.size() && one_to_one ) ? &in_stats : 0;
+ const unsigned long long cfile_size =
+ ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
+ ( in_stats.st_size + 99 ) / 100 : 0;
+ int tmp;
+ try {
+ if( program_mode == m_alone_to_lz )
+ tmp = alone_to_lz( infd, pp );
+ else
+ tmp = decompress( cfile_size, infd, cl_opts, pp, program_mode == m_test );
+ }
+ catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
+ catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
+ if( close( infd ) != 0 )
+ { show_file_error( pp.name(), "Error closing input file", errno );
+ set_retval( tmp, 1 ); }
+ set_retval( retval, tmp );
+ if( tmp )
+ { if( program_mode != m_test ) cleanup_and_fail( retval );
+ else ++failed_tests; }
+
+ if( delete_output_on_interrupt && one_to_one )
+ close_and_set_permissions( in_statsp );
+ if( input_filename.size() && !keep_input_files && one_to_one &&
+ ( program_mode != m_decompress || !cl_opts.ignore_errors ) )
+ std::remove( input_filename.c_str() );
+ }
+ if( delete_output_on_interrupt ) // -o
+ close_and_set_permissions( ( retval == 0 && !stdin_used &&
+ filenames_given && filenames.size() == 1 ) ? &in_stats : 0 );
+ else if( outfd >= 0 && close( outfd ) != 0 ) // -c
+ {
+ show_error( "Error closing stdout", errno );
+ set_retval( retval, 1 );
+ }
+ if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
+ std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
+ program_name, failed_tests,
+ ( failed_tests == 1 ) ? "file" : "files" );
+ return retval;
+ }
diff --git a/main_common.cc b/main_common.cc
new file mode 100644
index 0000000..dfaccac
--- /dev/null
+++ b/main_common.cc
@@ -0,0 +1,197 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+namespace {
+
+const char * const program_year = "2024";
+
+void show_version()
+ {
+ std::printf( "%s %s\n", program_name, PROGVERSION );
+ std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
+ std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
+ "This is free software: you are free to change and redistribute it.\n"
+ "There is NO WARRANTY, to the extent permitted by law.\n" );
+ }
+
+
+// separate numbers of 5 or more digits in groups of 3 digits using '_'
+const char * format_num3( long long num )
+ {
+ enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
+ const char * const si_prefix = "kMGTPEZYRQ";
+ const char * const binary_prefix = "KMGTPEZYRQ";
+ static char buffer[buffers][bufsize]; // circle of static buffers for printf
+ static int current = 0;
+
+ char * const buf = buffer[current++]; current %= buffers;
+ char * p = buf + bufsize - 1; // fill the buffer backwards
+ *p = 0; // terminator
+ const bool negative = num < 0;
+ if( num > 1024 || num < -1024 )
+ {
+ char prefix = 0; // try binary first, then si
+ for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i )
+ { num /= 1024; prefix = binary_prefix[i]; }
+ if( prefix ) *(--p) = 'i';
+ else
+ for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i )
+ { num /= 1000; prefix = si_prefix[i]; }
+ if( prefix ) *(--p) = prefix;
+ }
+ const bool split = num >= 10000 || num <= -10000;
+
+ for( int i = 0; ; )
+ {
+ const long long onum = num; num /= 10;
+ *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break;
+ if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
+ }
+ if( negative ) *(--p) = '-';
+ return p;
+ }
+
+
+void show_option_error( const char * const arg, const char * const msg,
+ const char * const option_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: '%s': %s option '%s'.\n",
+ program_name, arg, msg, option_name );
+ }
+
+
+// Recognized formats: <num>k[Bs], <num>Ki[Bs], <num>[MGTPEZYRQ][i][Bs]
+long long getnum( const char * const arg, const char * const option_name,
+ const int hardbs, const long long llimit = LLONG_MIN,
+ const long long ulimit = LLONG_MAX,
+ const char ** const tailp = 0 )
+ {
+ char * tail;
+ errno = 0;
+ long long result = strtoll( arg, &tail, 0 );
+ if( tail == arg )
+ { show_option_error( arg, "Bad or missing numerical argument in",
+ option_name ); std::exit( 1 ); }
+
+ if( !errno && tail[0] )
+ {
+ char * const p = tail++;
+ int factor = 1000; // default factor
+ int exponent = -1; // -1 = bad multiplier
+ char usuf = 0; // 'B' or 's' unit suffix is present
+ switch( *p )
+ {
+ case 'Q': exponent = 10; break;
+ case 'R': exponent = 9; break;
+ case 'Y': exponent = 8; break;
+ case 'Z': exponent = 7; break;
+ case 'E': exponent = 6; break;
+ case 'P': exponent = 5; break;
+ case 'T': exponent = 4; break;
+ case 'G': exponent = 3; break;
+ case 'M': exponent = 2; break;
+ case 'K': if( tail[0] == 'i' ) { ++tail; factor = 1024; exponent = 1; } break;
+ case 'k': if( tail[0] != 'i' ) exponent = 1; break;
+ case 'B':
+ case 's': usuf = *p; exponent = 0; break;
+ default: if( tailp ) { tail = p; exponent = 0; }
+ }
+ if( exponent > 1 && tail[0] == 'i' ) { ++tail; factor = 1024; }
+ if( exponent > 0 && usuf == 0 && ( tail[0] == 'B' || tail[0] == 's' ) )
+ { usuf = tail[0]; ++tail; }
+ if( exponent < 0 || ( usuf == 's' && hardbs <= 0 ) ||
+ ( !tailp && tail[0] != 0 ) )
+ { show_option_error( arg, "Bad multiplier in numerical argument of",
+ option_name ); std::exit( 1 ); }
+ for( int i = 0; i < exponent; ++i )
+ {
+ if( ( result >= 0 && LLONG_MAX / factor >= result ) ||
+ ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor;
+ else { errno = ERANGE; break; }
+ }
+ if( usuf == 's' )
+ {
+ if( ( result >= 0 && LLONG_MAX / hardbs >= result ) ||
+ ( result < 0 && LLONG_MIN / hardbs <= result ) ) result *= hardbs;
+ else errno = ERANGE;
+ }
+ }
+ if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
+ if( errno )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
+ "option '%s'.\n", program_name, arg, format_num3( llimit ),
+ format_num3( ulimit ), option_name );
+ std::exit( 1 );
+ }
+ if( tailp ) *tailp = tail;
+ return result;
+ }
+
+} // end namespace
+
+
+// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
+//
+void Bad_byte::parse_bb( const char * const arg, const char * const pn )
+ {
+ argument = arg;
+ option_name = pn;
+ const char * tail;
+ pos = getnum( arg, option_name, 0, 0, LLONG_MAX, &tail );
+ if( tail[0] != ',' )
+ { show_option_error( arg, ( tail[0] == 0 ) ? "Missing <val> in" :
+ "Missing comma between <pos> and <val> in",
+ option_name ); std::exit( 1 ); }
+ if( tail[1] == '+' ) { ++tail; mode = delta; }
+ else if( tail[1] == 'f' ) { ++tail; mode = flip; }
+ else mode = literal;
+ value = getnum( tail + 1, option_name, 0, 0, 255 );
+ }
+
+
+void show_error( const char * const msg, const int errcode, const bool help )
+ {
+ if( verbosity < 0 ) return;
+ if( msg && msg[0] )
+ std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
+ ( errcode > 0 ) ? ": " : "",
+ ( errcode > 0 ) ? std::strerror( errcode ) : "" );
+ if( help )
+ std::fprintf( stderr, "Try '%s --help' for more information.\n",
+ invocation_name );
+ }
+
+
+void show_file_error( const char * const filename, const char * const msg,
+ const int errcode )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
+ ( errcode > 0 ) ? ": " : "",
+ ( errcode > 0 ) ? std::strerror( errcode ) : "" );
+ }
+
+
+void internal_error( const char * const msg )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
+ std::exit( 3 );
+ }
diff --git a/md5.cc b/md5.cc
new file mode 100644
index 0000000..e0a8afb
--- /dev/null
+++ b/md5.cc
@@ -0,0 +1,206 @@
+/* Functions to compute MD5 message digest of memory blocks according to the
+ definition of MD5 in RFC 1321 from April 1992.
+ Copyright (C) 2020-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cstring>
+#include <stdint.h>
+
+#include "md5.h"
+
+
+namespace {
+
+/* These are the four functions used in the four steps of the MD5 algorithm
+ as defined in RFC 1321. */
+#define F(x, y, z) ((x & y) | (~x & z))
+#define G(x, y, z) ((x & z) | (y & ~z))
+#define H(x, y, z) (x ^ y ^ z)
+#define I(x, y, z) (y ^ (x | ~z))
+
+/* Rotate x left n bits.
+ It is unfortunate that C++ does not provide an operator for rotation.
+ Hope the compiler is smart enough. */
+#define ROTATE_LEFT(x, n) (x = (x << n) | (x >> (32 - n)))
+
+// FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+#define FF(a, b, c, d, x, s, ac) \
+ { a += F(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
+#define GG(a, b, c, d, x, s, ac) \
+ { a += G(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
+#define HH(a, b, c, d, x, s, ac) \
+ { a += H(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
+#define II(a, b, c, d, x, s, ac) \
+ { a += I(b, c, d) + x + ac; ROTATE_LEFT(a, s); a += b; }
+
+} // end namespace
+
+
+void MD5SUM::md5_process_block( const uint8_t block[64] )
+ {
+ uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+ for( int i = 0, j = 0; i < 16; ++i, j += 4 ) // fill x in little endian
+ x[i] = block[j] | (block[j+1] << 8) | (block[j+2] << 16) | (block[j+3] << 24);
+
+ /* Round 1 */
+ FF (a, b, c, d, x[ 0], 7, 0xD76AA478); // 1
+ FF (d, a, b, c, x[ 1], 12, 0xE8C7B756); // 2
+ FF (c, d, a, b, x[ 2], 17, 0x242070DB); // 3
+ FF (b, c, d, a, x[ 3], 22, 0xC1BDCEEE); // 4
+ FF (a, b, c, d, x[ 4], 7, 0xF57C0FAF); // 5
+ FF (d, a, b, c, x[ 5], 12, 0x4787C62A); // 6
+ FF (c, d, a, b, x[ 6], 17, 0xA8304613); // 7
+ FF (b, c, d, a, x[ 7], 22, 0xFD469501); // 8
+ FF (a, b, c, d, x[ 8], 7, 0x698098D8); // 9
+ FF (d, a, b, c, x[ 9], 12, 0x8B44F7AF); // 10
+ FF (c, d, a, b, x[10], 17, 0xFFFF5BB1); // 11
+ FF (b, c, d, a, x[11], 22, 0x895CD7BE); // 12
+ FF (a, b, c, d, x[12], 7, 0x6B901122); // 13
+ FF (d, a, b, c, x[13], 12, 0xFD987193); // 14
+ FF (c, d, a, b, x[14], 17, 0xA679438E); // 15
+ FF (b, c, d, a, x[15], 22, 0x49B40821); // 16
+
+ /* Round 2 */
+ GG (a, b, c, d, x[ 1], 5, 0xF61E2562); // 17
+ GG (d, a, b, c, x[ 6], 9, 0xC040B340); // 18
+ GG (c, d, a, b, x[11], 14, 0x265E5A51); // 19
+ GG (b, c, d, a, x[ 0], 20, 0xE9B6C7AA); // 20
+ GG (a, b, c, d, x[ 5], 5, 0xD62F105D); // 21
+ GG (d, a, b, c, x[10], 9, 0x02441453); // 22
+ GG (c, d, a, b, x[15], 14, 0xD8A1E681); // 23
+ GG (b, c, d, a, x[ 4], 20, 0xE7D3FBC8); // 24
+ GG (a, b, c, d, x[ 9], 5, 0x21E1CDE6); // 25
+ GG (d, a, b, c, x[14], 9, 0xC33707D6); // 26
+ GG (c, d, a, b, x[ 3], 14, 0xF4D50D87); // 27
+ GG (b, c, d, a, x[ 8], 20, 0x455A14ED); // 28
+ GG (a, b, c, d, x[13], 5, 0xA9E3E905); // 29
+ GG (d, a, b, c, x[ 2], 9, 0xFCEFA3F8); // 30
+ GG (c, d, a, b, x[ 7], 14, 0x676F02D9); // 31
+ GG (b, c, d, a, x[12], 20, 0x8D2A4C8A); // 32
+
+ /* Round 3 */
+ HH (a, b, c, d, x[ 5], 4, 0xFFFA3942); // 33
+ HH (d, a, b, c, x[ 8], 11, 0x8771F681); // 34
+ HH (c, d, a, b, x[11], 16, 0x6D9D6122); // 35
+ HH (b, c, d, a, x[14], 23, 0xFDE5380C); // 36
+ HH (a, b, c, d, x[ 1], 4, 0xA4BEEA44); // 37
+ HH (d, a, b, c, x[ 4], 11, 0x4BDECFA9); // 38
+ HH (c, d, a, b, x[ 7], 16, 0xF6BB4B60); // 39
+ HH (b, c, d, a, x[10], 23, 0xBEBFBC70); // 40
+ HH (a, b, c, d, x[13], 4, 0x289B7EC6); // 41
+ HH (d, a, b, c, x[ 0], 11, 0xEAA127FA); // 42
+ HH (c, d, a, b, x[ 3], 16, 0xD4EF3085); // 43
+ HH (b, c, d, a, x[ 6], 23, 0x04881D05); // 44
+ HH (a, b, c, d, x[ 9], 4, 0xD9D4D039); // 45
+ HH (d, a, b, c, x[12], 11, 0xE6DB99E5); // 46
+ HH (c, d, a, b, x[15], 16, 0x1FA27CF8); // 47
+ HH (b, c, d, a, x[ 2], 23, 0xC4AC5665); // 48
+
+ /* Round 4 */
+ II (a, b, c, d, x[ 0], 6, 0xF4292244); // 49
+ II (d, a, b, c, x[ 7], 10, 0x432AFF97); // 50
+ II (c, d, a, b, x[14], 15, 0xAB9423A7); // 51
+ II (b, c, d, a, x[ 5], 21, 0xFC93A039); // 52
+ II (a, b, c, d, x[12], 6, 0x655B59C3); // 53
+ II (d, a, b, c, x[ 3], 10, 0x8F0CCC92); // 54
+ II (c, d, a, b, x[10], 15, 0xFFEFF47D); // 55
+ II (b, c, d, a, x[ 1], 21, 0x85845DD1); // 56
+ II (a, b, c, d, x[ 8], 6, 0x6FA87E4F); // 57
+ II (d, a, b, c, x[15], 10, 0xFE2CE6E0); // 58
+ II (c, d, a, b, x[ 6], 15, 0xA3014314); // 59
+ II (b, c, d, a, x[13], 21, 0x4E0811A1); // 60
+ II (a, b, c, d, x[ 4], 6, 0xF7537E82); // 61
+ II (d, a, b, c, x[11], 10, 0xBD3AF235); // 62
+ II (c, d, a, b, x[ 2], 15, 0x2AD7D2BB); // 63
+ II (b, c, d, a, x[ 9], 21, 0xEB86D391); // 64
+
+ // add the processed values to the context
+ state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+ }
+
+
+/* Update the context for the next 'len' bytes of 'buffer'.
+ 'len' does not need to be a multiple of 64.
+*/
+void MD5SUM::md5_update( const uint8_t * const buffer, const unsigned long len )
+ {
+ unsigned index = count & 0x3F; // data length in bytes mod 64
+ count += len; // update data length
+ const unsigned rest = 64 - index;
+ unsigned long i;
+
+ if( len >= rest ) // process as many bytes as possible
+ {
+ std::memcpy( ibuf + index, buffer, rest );
+ md5_process_block( ibuf );
+ for( i = rest; i + 63 < len; i += 64 )
+ md5_process_block( buffer + i );
+ index = 0;
+ }
+ else i = 0;
+
+ std::memcpy( ibuf + index, buffer + i, len - i ); // save remaining input
+ }
+
+
+// finish computation and return the digest
+void MD5SUM::md5_finish( md5_type & digest )
+ {
+ uint8_t padding[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ uint8_t bits[8];
+ uint64_t c = count << 3; // save data length in bits
+ for( int i = 0; i <= 7; ++i ) { bits[i] = (uint8_t)c; c >>= 8; }
+
+ const unsigned index = count & 0x3F; // data length in bytes mod 64
+ const unsigned len = (index < 56) ? (56 - index) : (120 - index);
+ md5_update( padding, len ); // pad to 56 mod 64
+ md5_update( bits, 8 ); // append data length in bits
+
+ for( int i = 0, j = 0; i < 4; i++, j += 4 ) // store state in digest
+ {
+ digest[j ] = (uint8_t)state[i];
+ digest[j+1] = (uint8_t)(state[i] >> 8);
+ digest[j+2] = (uint8_t)(state[i] >> 16);
+ digest[j+3] = (uint8_t)(state[i] >> 24);
+ }
+ }
+
+
+void compute_md5( const uint8_t * const buffer, const unsigned long len,
+ md5_type & digest )
+ {
+ MD5SUM md5sum;
+ if( len > 0 ) md5sum.md5_update( buffer, len );
+ md5sum.md5_finish( digest );
+ }
+
+
+bool check_md5( const uint8_t * const buffer, const unsigned long len,
+ const md5_type & digest )
+ {
+ md5_type new_digest;
+ compute_md5( buffer, len, new_digest );
+ return digest == new_digest;
+ }
diff --git a/md5.h b/md5.h
new file mode 100644
index 0000000..9f3b598
--- /dev/null
+++ b/md5.h
@@ -0,0 +1,61 @@
+/* Functions to compute MD5 message digest of memory blocks according to the
+ definition of MD5 in RFC 1321 from April 1992.
+ Copyright (C) 2020-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+struct md5_type
+ {
+ uint8_t data[16]; // 128-bit md5 digest
+
+ bool operator==( const md5_type & d ) const
+ { return ( std::memcmp( data, d.data, 16 ) == 0 ); }
+ bool operator!=( const md5_type & d ) const { return !( *this == d ); }
+// const uint8_t & operator[]( const int i ) const { return data[i]; }
+ uint8_t & operator[]( const int i ) { return data[i]; }
+ };
+
+
+class MD5SUM
+ {
+ uint64_t count; // data length in bytes, modulo 2^64
+ uint32_t state[4]; // state (ABCD)
+ uint8_t ibuf[64]; // input buffer with space for a block
+
+ void md5_process_block( const uint8_t block[64] );
+
+public:
+ MD5SUM() { reset(); }
+
+ void reset()
+ {
+ count = 0;
+ state[0] = 0x67452301; // magic initialization constants
+ state[1] = 0xEFCDAB89;
+ state[2] = 0x98BADCFE;
+ state[3] = 0x10325476;
+ }
+
+ void md5_update( const uint8_t * const buffer, const unsigned long len );
+ void md5_finish( md5_type & digest );
+ };
+
+void compute_md5( const uint8_t * const buffer, const unsigned long len,
+ md5_type & digest );
+
+bool check_md5( const uint8_t * const buffer, const unsigned long len,
+ const md5_type & digest );
diff --git a/merge.cc b/merge.cc
new file mode 100644
index 0000000..2951b50
--- /dev/null
+++ b/merge.cc
@@ -0,0 +1,658 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "decoder.h"
+#include "lzip_index.h"
+
+
+Block Block::split( const long long pos )
+ {
+ if( pos > pos_ && pos < end() )
+ {
+ const Block b( pos_, pos - pos_ );
+ pos_ = pos; size_ -= b.size_;
+ return b;
+ }
+ return Block( 0, 0 );
+ }
+
+namespace {
+
+bool pending_newline = false;
+
+void print_pending_newline( const char terminator )
+ { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
+ pending_newline = false; }
+
+
+bool file_crc( uint32_t & crc, const int infd, const char * const filename )
+ {
+ const int buffer_size = 65536;
+ crc = 0xFFFFFFFFU;
+ uint8_t * const buffer = new uint8_t[buffer_size];
+ bool error = false;
+
+ while( true )
+ {
+ const int rd = readblock( infd, buffer, buffer_size );
+ if( rd != buffer_size && errno )
+ { show_file_error( filename, "Error reading input file", errno );
+ error = true; break; }
+ if( rd > 0 )
+ crc32.update_buf( crc, buffer, rd );
+ if( rd < buffer_size ) break; // EOF
+ }
+ delete[] buffer;
+ crc ^= 0xFFFFFFFFU;
+ return !error;
+ }
+
+
+// Add 'bv' to 'block_vector' splitting blocks as needed to keep all the
+// edges (pos and end of every block).
+// 'block_vector' contains the result. 'bv' is destroyed.
+void combine( std::vector< Block > & block_vector, std::vector< Block > & bv )
+ {
+ if( block_vector.empty() ) { block_vector.swap( bv ); return; }
+ unsigned i1 = 0, i2 = 0;
+ while( i1 < block_vector.size() && i2 < bv.size() )
+ {
+ Block & b1 = block_vector[i1];
+ Block & b2 = bv[i2];
+ if( b1.overlaps( b2 ) )
+ {
+ if( b1 < b2 )
+ {
+ Block b = b1.split( b2.pos() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1;
+ }
+ else if( b2 < b1 )
+ {
+ Block b( b2.pos(), b1.pos() - b2.pos() );
+ b2.split( b1.pos() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1;
+ }
+ else if( b1.end() < b2.end() ) { b2.split( b1.end() ); ++i1; }
+ else if( b2.end() < b1.end() )
+ {
+ Block b = b1.split( b2.end() );
+ block_vector.insert( block_vector.begin() + i1, b ); ++i1; ++i2;
+ }
+ else { ++i1; ++i2; } // blocks are identical
+ }
+ else if( b1 < b2 ) ++i1;
+ else { block_vector.insert( block_vector.begin() + i1, b2 ); ++i1; ++i2; }
+ }
+ if( i2 < bv.size() ) // tail copy
+ block_vector.insert( block_vector.end(), bv.begin() + i2, bv.end() );
+ }
+
+
+// positions in 'block_vector' are absolute file positions.
+// blocks in 'block_vector' are ascending and don't overlap.
+bool diff_member( const long long mpos, const long long msize,
+ const std::vector< std::string > & filenames,
+ const std::vector< int > & infd_vector,
+ std::vector< Block > & block_vector,
+ std::vector< int > & color_vector )
+ {
+ const int files = infd_vector.size();
+ const int buffer_size = 65536;
+ uint8_t * const buffer1 = new uint8_t[buffer_size];
+ uint8_t * const buffer2 = new uint8_t[buffer_size];
+ int next_color = 1;
+
+ bool error = false;
+ for( int i1 = 0; i1 < files && !error; ++i1 )
+ {
+ for( int i2 = i1 + 1; i2 < files && !error; ++i2 )
+ {
+ if( color_vector[i1] != 0 && color_vector[i1] == color_vector[i2] )
+ continue;
+ std::vector< Block > bv;
+ long long partial_pos = 0;
+ const char * const filename1 = filenames[i1].c_str();
+ const char * const filename2 = filenames[i2].c_str();
+ const int fd1 = infd_vector[i1], fd2 = infd_vector[i2];
+ int begin = -1; // begin of block. -1 means no block
+ bool prev_equal = true;
+ if( !safe_seek( fd1, mpos, filename1 ) ||
+ !safe_seek( fd2, mpos, filename2 ) ) { error = true; break; }
+
+ while( partial_pos < msize )
+ {
+ const int size = std::min( (long long)buffer_size, msize - partial_pos );
+ const int rd = readblock( fd1, buffer1, size );
+ if( rd != size && errno )
+ { show_file_error( filename1, "Error reading input file", errno );
+ error = true; break; }
+ if( rd > 0 )
+ {
+ if( readblock( fd2, buffer2, rd ) != rd )
+ { show_file_error( filename2, "Error reading input file", errno );
+ error = true; break; }
+ for( int i = 0; i < rd; ++i )
+ {
+ if( buffer1[i] != buffer2[i] )
+ {
+ prev_equal = false;
+ if( begin < 0 ) begin = partial_pos + i; // begin block
+ }
+ else if( !prev_equal ) prev_equal = true;
+ else if( begin >= 0 ) // end block
+ {
+ Block b( mpos + begin, partial_pos + i - 1 - begin );
+ begin = -1;
+ bv.push_back( b );
+ }
+ }
+ partial_pos += rd;
+ }
+ if( rd < buffer_size ) break; // EOF
+ }
+ if( begin >= 0 ) // finish last block
+ {
+ Block b( mpos + begin, partial_pos - prev_equal - begin );
+ bv.push_back( b );
+ }
+ if( bv.empty() ) // members are identical, set to same color
+ {
+ if( color_vector[i1] == 0 )
+ {
+ if( color_vector[i2] != 0 ) color_vector[i1] = color_vector[i2];
+ else color_vector[i1] = color_vector[i2] = next_color++;
+ }
+ else if( color_vector[i2] == 0 ) color_vector[i2] = color_vector[i1];
+ else internal_error( "different colors assigned to identical members." );
+ }
+ combine( block_vector, bv );
+ }
+ if( color_vector[i1] == 0 ) color_vector[i1] = next_color++;
+ }
+ delete[] buffer2; delete[] buffer1;
+ return !error;
+ }
+
+
+long ipow( const unsigned base, const unsigned exponent )
+ {
+ unsigned long result = 1;
+ for( unsigned i = 0; i < exponent; ++i )
+ {
+ if( LONG_MAX / result >= base ) result *= base;
+ else { result = LONG_MAX; break; }
+ }
+ return result;
+ }
+
+
+int open_input_files( const std::vector< std::string > & filenames,
+ std::vector< int > & infd_vector,
+ const Cl_options & cl_opts, Lzip_index & lzip_index,
+ struct stat * const in_statsp )
+ {
+ const int files = filenames.size();
+ for( int i = 0; i + 1 < files; ++i )
+ for( int j = i + 1; j < files; ++j )
+ if( filenames[i] == filenames[j] )
+ { show_file_error( filenames[i].c_str(), "Input file given twice." );
+ return 2; }
+ {
+ std::vector< uint32_t > crc_vector( files );
+ for( int i = 0; i < files; ++i )
+ {
+ struct stat in_stats; // not used
+ infd_vector[i] = open_instream( filenames[i].c_str(),
+ ( i == 0 ) ? in_statsp : &in_stats, false, true );
+ if( infd_vector[i] < 0 ) return 1;
+ if( !file_crc( crc_vector[i], infd_vector[i], filenames[i].c_str() ) )
+ return 1;
+ for( int j = 0; j < i; ++j )
+ if( crc_vector[i] == crc_vector[j] )
+ { show_2file_error( "Input files", filenames[j].c_str(),
+ filenames[i].c_str(), "are identical." ); return 2; }
+ }
+ }
+
+ long long insize = 0;
+ int good_i = -1;
+ for( int i = 0; i < files; ++i )
+ {
+ long long tmp;
+ const Lzip_index li( infd_vector[i], cl_opts, true );
+ if( li.retval() == 0 ) // file format is intact
+ {
+ if( good_i < 0 ) { good_i = i; lzip_index = li; }
+ else if( lzip_index != li )
+ { show_2file_error( "Input files", filenames[good_i].c_str(),
+ filenames[i].c_str(), "are different." ); return 2; }
+ tmp = lzip_index.file_size();
+ }
+ else // file format is damaged
+ {
+ tmp = lseek( infd_vector[i], 0, SEEK_END );
+ if( tmp < 0 )
+ {
+ show_file_error( filenames[i].c_str(), "Input file is not seekable." );
+ return 1;
+ }
+ }
+ if( tmp < min_member_size )
+ { show_file_error( filenames[i].c_str(), "Input file is too short." );
+ return 2; }
+ if( i == 0 ) insize = tmp;
+ else if( insize != tmp )
+ { show_2file_error( "Sizes of input files", filenames[0].c_str(),
+ filenames[i].c_str(), "are different." ); return 2; }
+ }
+
+ if( lzip_index.retval() != 0 )
+ {
+ const Lzip_index li( infd_vector, insize );
+ if( li.retval() == 0 ) // file format could be recovered
+ lzip_index = li;
+ else
+ { show_error( "Format damaged in all input files." ); return 2; }
+ }
+
+ for( int i = 0; i < files; ++i )
+ {
+ const char * const filename = filenames[i].c_str();
+ const int infd = infd_vector[i];
+ bool error = false;
+ for( long j = 0; j < lzip_index.members(); ++j )
+ {
+ const long long mpos = lzip_index.mblock( j ).pos();
+ const long long msize = lzip_index.mblock( j ).size();
+ if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( test_member_from_file( infd, msize ) != 0 ) { error = true; break; }
+ }
+ if( !error )
+ {
+ if( verbosity >= 1 )
+ std::printf( "Input file '%s' has no errors. Recovery is not needed.\n",
+ filename );
+ return 0;
+ }
+ }
+ return -1;
+ }
+
+
+void maybe_cluster_blocks( std::vector< Block > & block_vector )
+ {
+ const unsigned long old_size = block_vector.size();
+ if( old_size <= 16 ) return;
+ do {
+ int min_gap = INT_MAX;
+ bool same = true; // all gaps have the same size
+ for( unsigned i = 1; i < block_vector.size(); ++i )
+ {
+ const long long gap = block_vector[i].pos() - block_vector[i-1].end();
+ if( gap < min_gap )
+ { if( min_gap < INT_MAX ) same = false; min_gap = gap; }
+ else if( gap != min_gap ) same = false;
+ }
+ if( min_gap >= INT_MAX || same ) break;
+ for( unsigned i = block_vector.size() - 1; i > 0; --i )
+ {
+ const long long gap = block_vector[i].pos() - block_vector[i-1].end();
+ if( gap == min_gap )
+ {
+ block_vector[i-1].size( block_vector[i-1].size() + gap +
+ block_vector[i].size() );
+ block_vector.erase( block_vector.begin() + i );
+ }
+ }
+ } while( block_vector.size() > 16 );
+ if( verbosity >= 1 && old_size > block_vector.size() )
+ std::printf( " %lu errors have been grouped in %lu clusters.\n",
+ old_size, (long)block_vector.size() );
+ }
+
+
+bool color_done( const std::vector< int > & color_vector, const int i )
+ {
+ for( int j = i - 1; j >= 0; --j )
+ if( color_vector[j] == color_vector[i] ) return true;
+ return false;
+ }
+
+
+// try dividing blocks in 2 color groups at every gap
+bool try_merge_member2( const std::vector< std::string > & filenames,
+ const long long mpos, const long long msize,
+ const std::vector< Block > & block_vector,
+ const std::vector< int > & color_vector,
+ const std::vector< int > & infd_vector,
+ const char terminator )
+ {
+ const int blocks = block_vector.size();
+ const int files = infd_vector.size();
+ const int variations = files * ( files - 1 );
+
+ for( int i1 = 0; i1 < files; ++i1 )
+ for( int i2 = 0; i2 < files; ++i2 )
+ {
+ if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
+ color_done( color_vector, i1 ) ) continue;
+ for( int bi = 0; bi < blocks; ++bi )
+ if( !safe_seek( infd_vector[i2], block_vector[bi].pos(), filenames[i2].c_str() ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
+ !copy_file( infd_vector[i2], outfd, block_vector[bi].size() ) )
+ cleanup_and_fail( 1 );
+ const int infd = infd_vector[i1];
+ const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
+ for( int bi = 0; bi + 1 < blocks; ++bi )
+ {
+ if( verbosity >= 2 )
+ {
+ std::printf( " Trying variation %d of %d, block %d %c",
+ var, variations, bi + 1, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ if( !safe_seek( infd, block_vector[bi].pos(), filenames[i1].c_str() ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
+ !copy_file( infd, outfd, block_vector[bi].size() ) ||
+ !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ cleanup_and_fail( 1 );
+ long long failure_pos = 0;
+ if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
+ return true;
+ if( mpos + failure_pos < block_vector[bi].end() ) break;
+ }
+ }
+ return false;
+ }
+
+
+// merge block by block
+bool try_merge_member( const std::vector< std::string > & filenames,
+ const long long mpos, const long long msize,
+ const std::vector< Block > & block_vector,
+ const std::vector< int > & color_vector,
+ const std::vector< int > & infd_vector,
+ const char terminator )
+ {
+ const int blocks = block_vector.size();
+ const int files = infd_vector.size();
+ const long variations = ipow( files, blocks );
+ if( variations >= LONG_MAX )
+ {
+ if( files > 2 )
+ show_error( "Too many damaged blocks. Try merging fewer files." );
+ else
+ show_error( "Too many damaged blocks. Merging is not possible." );
+ cleanup_and_fail( 2 );
+ }
+ int bi = 0; // block index
+ std::vector< int > file_idx( blocks, 0 ); // file to read each block from
+
+ while( bi >= 0 )
+ {
+ if( verbosity >= 2 )
+ {
+ long var = 0;
+ for( int i = 0; i < blocks; ++i )
+ var = ( var * files ) + file_idx[i];
+ std::printf( " Trying variation %ld of %ld %c",
+ var + 1, variations, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ while( bi < blocks )
+ {
+ const int infd = infd_vector[file_idx[bi]];
+ if( !safe_seek( infd, block_vector[bi].pos(), filenames[file_idx[bi]].c_str() ) ||
+ !safe_seek( outfd, block_vector[bi].pos(), output_filename.c_str() ) ||
+ !copy_file( infd, outfd, block_vector[bi].size() ) )
+ cleanup_and_fail( 1 );
+ ++bi;
+ }
+ if( !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ cleanup_and_fail( 1 );
+ long long failure_pos = 0;
+ if( test_member_from_file( outfd, msize, &failure_pos ) == 0 ) return true;
+ while( bi > 0 && mpos + failure_pos < block_vector[bi-1].pos() ) --bi;
+ while( --bi >= 0 )
+ {
+ while( ++file_idx[bi] < files &&
+ color_done( color_vector, file_idx[bi] ) );
+ if( file_idx[bi] < files ) break;
+ file_idx[bi] = 0;
+ }
+ }
+ return false;
+ }
+
+
+// merge a single block split at every possible position
+bool try_merge_member1( const std::vector< std::string > & filenames,
+ const long long mpos, const long long msize,
+ const std::vector< Block > & block_vector,
+ const std::vector< int > & color_vector,
+ const std::vector< int > & infd_vector,
+ const char terminator )
+ {
+ if( block_vector.size() != 1 || block_vector[0].size() <= 1 ) return false;
+ const long long pos = block_vector[0].pos();
+ const long long size = block_vector[0].size();
+ const int files = infd_vector.size();
+ const int variations = files * ( files - 1 );
+ uint8_t byte;
+
+ for( int i1 = 0; i1 < files; ++i1 )
+ for( int i2 = 0; i2 < files; ++i2 )
+ {
+ if( i1 == i2 || color_vector[i1] == color_vector[i2] ||
+ color_done( color_vector, i1 ) ) continue;
+ const int infd = infd_vector[i1];
+ if( !safe_seek( infd, pos, filenames[i1].c_str() ) ||
+ !safe_seek( infd_vector[i2], pos, filenames[i2].c_str() ) ||
+ !safe_seek( outfd, pos, output_filename.c_str() ) ||
+ !copy_file( infd_vector[i2], outfd, size ) )
+ cleanup_and_fail( 1 );
+ const int var = ( i1 * ( files - 1 ) ) + i2 - ( i2 > i1 ) + 1;
+ for( long long i = 0; i + 1 < size; ++i )
+ {
+ if( verbosity >= 2 )
+ {
+ std::printf( " Trying variation %d of %d, position %lld %c",
+ var, variations, pos + i, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ if( !safe_seek( outfd, pos + i, output_filename.c_str() ) ||
+ readblock( infd, &byte, 1 ) != 1 ||
+ writeblock( outfd, &byte, 1 ) != 1 ||
+ !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ cleanup_and_fail( 1 );
+ long long failure_pos = 0;
+ if( test_member_from_file( outfd, msize, &failure_pos ) == 0 )
+ return true;
+ if( mpos + failure_pos <= pos + i ) break;
+ }
+ }
+ return false;
+ }
+
+} // end namespace
+
+
+/* infd and outfd can refer to the same file if copying to a lower file
+ position or if source and destination blocks don't overlap.
+ max_size < 0 means no size limit. */
+bool copy_file( const int infd, const int outfd, const long long max_size )
+ {
+ const int buffer_size = 65536;
+ // remaining number of bytes to copy
+ long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
+ long long copied_size = 0;
+ uint8_t * const buffer = new uint8_t[buffer_size];
+ bool error = false;
+
+ while( rest > 0 )
+ {
+ const int size = std::min( (long long)buffer_size, rest );
+ if( max_size >= 0 ) rest -= size;
+ const int rd = readblock( infd, buffer, size );
+ if( rd != size && errno )
+ { show_error( "Error reading input file", errno ); error = true; break; }
+ if( rd > 0 )
+ {
+ const int wr = writeblock( outfd, buffer, rd );
+ if( wr != rd )
+ { show_error( "Error writing output file", errno );
+ error = true; break; }
+ copied_size += rd;
+ }
+ if( rd < size ) break; // EOF
+ }
+ delete[] buffer;
+ if( !error && max_size >= 0 && copied_size != max_size )
+ { show_error( "Input file ends unexpectedly." ); error = true; }
+ return !error;
+ }
+
+
+/* Return value: 0 = OK, 1 = bad msize, 2 = data error.
+ 'failure_pos' is relative to the beginning of the member. */
+int test_member_from_file( const int infd, const unsigned long long msize,
+ long long * const failure_posp )
+ {
+ Range_decoder rdec( infd );
+ Lzip_header header;
+ rdec.read_data( header.data, header.size );
+ const unsigned dictionary_size = header.dictionary_size();
+ bool done = false;
+ if( !rdec.finished() && header.check_magic() &&
+ header.check_version() && isvalid_ds( dictionary_size ) )
+ {
+ LZ_decoder decoder( rdec, dictionary_size, -1 );
+ const int saved_verbosity = verbosity;
+ verbosity = -1; // suppress all messages
+ done = decoder.decode_member() == 0;
+ verbosity = saved_verbosity; // restore verbosity level
+ if( done && rdec.member_position() == msize ) return 0;
+ }
+ if( failure_posp ) *failure_posp = rdec.member_position();
+ return done ? 1 : 2;
+ }
+
+
+int merge_files( const std::vector< std::string > & filenames,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const char terminator,
+ const bool force )
+ {
+ const int files = filenames.size();
+ std::vector< int > infd_vector( files );
+ Lzip_index lzip_index;
+ struct stat in_stats;
+ const int retval =
+ open_input_files( filenames, infd_vector, cl_opts, lzip_index, &in_stats );
+ if( retval >= 0 ) return retval;
+ if( !safe_seek( infd_vector[0], 0, filenames[0].c_str() ) ) return 1;
+
+ const bool to_file = default_output_filename.size();
+ output_filename =
+ to_file ? default_output_filename : insert_fixed( filenames[0] );
+ set_signal_handler();
+ if( !open_outstream( force, true, true, false, to_file ) ) return 1;
+ if( !copy_file( infd_vector[0], outfd ) ) // copy whole file
+ cleanup_and_fail( 1 );
+
+ for( long j = 0; j < lzip_index.members(); ++j )
+ {
+ const long long mpos = lzip_index.mblock( j ).pos();
+ const long long msize = lzip_index.mblock( j ).size();
+ // vector of data blocks differing among the copies of the current member
+ std::vector< Block > block_vector;
+ // different color means members are different
+ std::vector< int > color_vector( files, 0 );
+ if( !diff_member( mpos, msize, filenames, infd_vector, block_vector,
+ color_vector ) || !safe_seek( outfd, mpos, output_filename.c_str() ) )
+ cleanup_and_fail( 1 );
+
+ if( block_vector.empty() )
+ {
+ if( lzip_index.members() > 1 && test_member_from_file( outfd, msize ) == 0 )
+ continue;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "Member %ld is damaged and identical in all files."
+ " Merging is not possible.\n", j + 1 );
+ cleanup_and_fail( 2 );
+ }
+
+ if( verbosity >= 2 )
+ {
+ std::printf( "Merging member %ld of %ld (%lu error%s)\n",
+ j + 1, lzip_index.members(), (long)block_vector.size(),
+ ( block_vector.size() == 1 ) ? "" : "s" );
+ std::fflush( stdout );
+ }
+
+ bool done = false;
+ if( block_vector.size() > 1 )
+ {
+ maybe_cluster_blocks( block_vector );
+ done = try_merge_member2( filenames, mpos, msize, block_vector,
+ color_vector, infd_vector, terminator );
+ print_pending_newline( terminator );
+ }
+ // With just one member and one differing block the merge can't succeed.
+ if( !done && ( lzip_index.members() > 1 || block_vector.size() > 1 ) )
+ {
+ done = try_merge_member( filenames, mpos, msize, block_vector,
+ color_vector, infd_vector, terminator );
+ print_pending_newline( terminator );
+ }
+ if( !done )
+ {
+ done = try_merge_member1( filenames, mpos, msize, block_vector,
+ color_vector, infd_vector, terminator );
+ print_pending_newline( terminator );
+ }
+ if( !done )
+ {
+ if( verbosity >= 3 )
+ for( unsigned i = 0; i < block_vector.size(); ++i )
+ std::fprintf( stderr, "area %2d from position %6lld to %6lld\n", i + 1,
+ block_vector[i].pos(), block_vector[i].end() - 1 );
+ show_error( "Some error areas overlap. Merging is not possible." );
+ cleanup_and_fail( 2 );
+ }
+ }
+
+ if( !close_outstream( &in_stats ) ) return 1;
+ if( verbosity >= 1 )
+ std::fputs( "Input files merged successfully.\n", stdout );
+ return 0;
+ }
diff --git a/mtester.cc b/mtester.cc
new file mode 100644
index 0000000..fb9eb97
--- /dev/null
+++ b/mtester.cc
@@ -0,0 +1,373 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "mtester.h"
+
+
+namespace {
+
+const char * format_byte( const uint8_t byte )
+ {
+ enum { buffers = 8, bufsize = 16 };
+ static char buffer[buffers][bufsize]; // circle of static buffers for printf
+ static int current = 0;
+ char * const buf = buffer[current++]; current %= buffers;
+ if( ( byte >= 0x20 && byte <= 0x7E ) || byte >= 0xA0 )
+ snprintf( buf, bufsize, "'%c' (0x%02X)", byte, byte );
+ else
+ snprintf( buf, bufsize, " (0x%02X)", byte );
+ return buf;
+ }
+
+} // end namespace
+
+
+void LZ_mtester::print_block( const int len )
+ {
+ std::fputs( " \"", stdout );
+ for( int i = len - 1; i >= 0; --i )
+ {
+ uint8_t byte = peek( i );
+ if( byte < 0x20 || ( byte > 0x7E && byte < 0xA0 ) ) byte = '.';
+ std::fputc( byte, stdout );
+ }
+ std::fputs( "\"\n", stdout );
+ }
+
+
+void LZ_mtester::duplicate_buffer( uint8_t * const buffer2 )
+ {
+ if( data_position() > 0 )
+ std::memcpy( buffer2, buffer, std::min( data_position(),
+ (unsigned long long)dictionary_size ) );
+ else buffer2[dictionary_size-1] = 0; // prev_byte of first byte
+ buffer = buffer2;
+ buffer_is_external = true;
+ }
+
+
+void LZ_mtester::flush_data()
+ {
+ if( pos > stream_pos )
+ {
+ const int size = pos - stream_pos;
+ crc32.update_buf( crc_, buffer + stream_pos, size );
+ if( md5sum ) md5sum->md5_update( buffer + stream_pos, size );
+ if( outfd >= 0 && writeblock( outfd, buffer + stream_pos, size ) != size )
+ throw Error( "Write error" );
+ if( pos >= dictionary_size )
+ { partial_data_pos += pos; pos = 0; pos_wrapped = true; }
+ stream_pos = pos;
+ }
+ }
+
+
+bool LZ_mtester::check_trailer( FILE * const f, unsigned long long byte_pos )
+ {
+ const Lzip_trailer * const trailer = rdec.get_trailer();
+ if( !trailer )
+ {
+ if( verbosity >= 0 && f )
+ { if( byte_pos )
+ { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
+ std::fputs( "Can't get trailer.\n", f ); }
+ return false;
+ }
+ bool error = false;
+
+ const unsigned td_crc = trailer->data_crc();
+ if( td_crc != crc() )
+ {
+ error = true;
+ if( verbosity >= 0 && f )
+ { if( byte_pos )
+ { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
+ std::fprintf( f, "CRC mismatch; stored %08X, computed %08X\n",
+ td_crc, crc() ); }
+ }
+ const unsigned long long data_size = data_position();
+ const unsigned long long td_size = trailer->data_size();
+ if( td_size != data_size )
+ {
+ error = true;
+ if( verbosity >= 0 && f )
+ { if( byte_pos )
+ { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
+ std::fprintf( f, "Data size mismatch; stored %llu (0x%llX), computed %llu (0x%llX)\n",
+ td_size, td_size, data_size, data_size ); }
+ }
+ const unsigned long member_size = rdec.member_position();
+ const unsigned long long tm_size = trailer->member_size();
+ if( tm_size != member_size )
+ {
+ error = true;
+ if( verbosity >= 0 && f )
+ { if( byte_pos )
+ { std::fprintf( f, "byte %llu\n", byte_pos ); byte_pos = 0; }
+ std::fprintf( f, "Member size mismatch; stored %llu (0x%llX), computed %lu (0x%lX)\n",
+ tm_size, tm_size, member_size, member_size ); }
+ }
+ return !error;
+ }
+
+
+/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
+ 3 = trailer error, 4 = unknown marker found,
+ -1 = pos_limit reached. */
+int LZ_mtester::test_member( const unsigned long mpos_limit,
+ const unsigned long long dpos_limit,
+ FILE * const f, const unsigned long long byte_pos )
+ {
+ if( mpos_limit < Lzip_header::size + 5 ) return -1;
+ if( member_position() == Lzip_header::size ) rdec.load();
+ while( !rdec.finished() )
+ {
+ if( member_position() >= mpos_limit || data_position() >= dpos_limit )
+ { flush_data(); return -1; }
+ const int pos_state = data_position() & pos_state_mask;
+ if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
+ {
+ // literal byte
+ Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
+ if( state.is_char_set_char() )
+ put_byte( rdec.decode_tree8( bm ) );
+ else
+ put_byte( rdec.decode_matched( bm, peek( rep0 ) ) );
+ continue;
+ }
+ // match or repeated match
+ int len;
+ if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
+ {
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ }
+ else
+ {
+ unsigned distance;
+ if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
+ distance = rep1;
+ else
+ {
+ if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
+ distance = rep2;
+ else
+ { distance = rep3; rep3 = rep2; }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state.set_rep();
+ len = rdec.decode_len( rep_len_model, pos_state );
+ }
+ else // match
+ {
+ len = rdec.decode_len( match_len_model, pos_state );
+ unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( distance >= start_dis_model )
+ {
+ const unsigned dis_slot = distance;
+ const int direct_bits = ( dis_slot >> 1 ) - 1;
+ distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ if( dis_slot < end_dis_model )
+ distance += rdec.decode_tree_reversed(
+ bm_dis + ( distance - dis_slot ), direct_bits );
+ else
+ {
+ distance +=
+ rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ distance += rdec.decode_tree_reversed4( bm_align );
+ if( distance == 0xFFFFFFFFU ) // marker found
+ {
+ rdec.normalize();
+ flush_data();
+ if( len == min_match_len ) // End Of Stream marker
+ { if( check_trailer( f, byte_pos ) ) return 0; else return 3; }
+ if( verbosity >= 0 && f )
+ {
+ if( byte_pos ) std::fprintf( f, "byte %llu\n", byte_pos );
+ std::fprintf( f, "Unsupported marker code '%d'\n", len );
+ }
+ return 4;
+ }
+ }
+ }
+ rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
+ if( rep0 > max_rep0 ) max_rep0 = rep0;
+ state.set_match();
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
+ { if( outfd >= 0 ) { flush_data(); } return 1; }
+ }
+ copy_block( rep0, len );
+ }
+ if( outfd >= 0 ) flush_data(); // else no need to flush if error
+ return 2;
+ }
+
+
+/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
+ 3 = trailer error, 4 = unknown marker found. */
+int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
+ const bool show_packets )
+ {
+ rdec.load();
+ unsigned old_tmpos = member_position(); // truncated member position
+ while( !rdec.finished() )
+ {
+ const unsigned long long dp = data_position() + dpos;
+ const unsigned long long mp = member_position() + mpos - 4;
+ const unsigned tmpos = member_position();
+ set_max_packet( tmpos - old_tmpos, mp );
+ old_tmpos = tmpos;
+ ++total_packets_;
+ const int pos_state = data_position() & pos_state_mask;
+ if( rdec.decode_bit( bm_match[state()][pos_state] ) == 0 ) // 1st bit
+ {
+ // literal byte
+ Bit_model * const bm = bm_literal[get_lit_state(peek_prev())];
+ if( state.is_char_set_char() )
+ {
+ const uint8_t cur_byte = rdec.decode_tree8( bm );
+ put_byte( cur_byte );
+ if( show_packets )
+ std::printf( "%6llu %6llu literal %s\n",
+ mp, dp, format_byte( cur_byte ) );
+ }
+ else
+ {
+ const uint8_t match_byte = peek( rep0 );
+ const uint8_t cur_byte = rdec.decode_matched( bm, match_byte );
+ put_byte( cur_byte );
+ if( show_packets )
+ std::printf( "%6llu %6llu literal %s, match byte %6llu %s\n",
+ mp, dp, format_byte( cur_byte ), dp - rep0 - 1,
+ format_byte( match_byte ) );
+ }
+ continue;
+ }
+ // match or repeated match
+ int len;
+ if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
+ {
+ int rep = 0;
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ {
+ if( show_packets )
+ std::printf( "%6llu %6llu shortrep %s %6u (%6llu)\n",
+ mp, dp, format_byte( peek( rep0 ) ),
+ rep0 + 1, dp - rep0 - 1 );
+ state.set_short_rep(); put_byte( peek( rep0 ) ); continue;
+ }
+ }
+ else
+ {
+ unsigned distance;
+ if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
+ { distance = rep1; rep = 1; }
+ else
+ {
+ if( rdec.decode_bit( bm_rep2[state()] ) == 0 ) // 5th bit
+ { distance = rep2; rep = 2; }
+ else
+ { distance = rep3; rep3 = rep2; rep = 3; }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ state.set_rep();
+ len = rdec.decode_len( rep_len_model, pos_state );
+ if( show_packets )
+ std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
+ mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
+ }
+ else // match
+ {
+ len = rdec.decode_len( match_len_model, pos_state );
+ unsigned distance = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] );
+ if( distance >= start_dis_model )
+ {
+ const unsigned dis_slot = distance;
+ const int direct_bits = ( dis_slot >> 1 ) - 1;
+ distance = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ if( dis_slot < end_dis_model )
+ distance += rdec.decode_tree_reversed(
+ bm_dis + ( distance - dis_slot ), direct_bits );
+ else
+ {
+ distance +=
+ rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
+ distance += rdec.decode_tree_reversed4( bm_align );
+ if( distance == 0xFFFFFFFFU ) // marker found
+ {
+ rdec.normalize();
+ flush_data();
+ const unsigned tmpos = member_position();
+ set_max_marker( tmpos - old_tmpos );
+ old_tmpos = tmpos;
+ if( show_packets )
+ std::printf( "%6llu %6llu marker code '%d'\n", mp, dp, len );
+ if( len == min_match_len ) // End Of Stream marker
+ {
+ if( show_packets )
+ std::printf( "%6llu %6llu member trailer\n",
+ mpos + member_position(), dpos + data_position() );
+ if( check_trailer( show_packets ? stdout : 0 ) ) return 0;
+ return 3;
+ }
+ if( len == min_match_len + 1 ) // Sync Flush marker
+ { rdec.load(); continue; }
+ return 4;
+ }
+ }
+ }
+ rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance;
+ if( rep0 > max_rep0 ) { max_rep0 = rep0; max_rep0_pos = mp; }
+ state.set_match();
+ if( show_packets )
+ std::printf( "%6llu %6llu match %6u,%3d (%6lld)",
+ mp, dp, rep0 + 1, len, dp - rep0 - 1 );
+ if( rep0 >= dictionary_size || ( rep0 >= pos && !pos_wrapped ) )
+ { flush_data(); if( show_packets ) std::fputc( '\n', stdout );
+ return 1; }
+ }
+ copy_block( rep0, len );
+ if( show_packets ) print_block( len );
+ }
+ flush_data();
+ return 2;
+ }
diff --git a/mtester.h b/mtester.h
new file mode 100644
index 0000000..49c50b1
--- /dev/null
+++ b/mtester.h
@@ -0,0 +1,395 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Range_mtester
+ {
+ const uint8_t * const buffer; // input buffer
+ const long buffer_size;
+ long pos; // current pos in buffer
+ uint32_t code;
+ uint32_t range;
+ bool at_stream_end;
+
+public:
+ Range_mtester( const uint8_t * const buf, const long buf_size )
+ :
+ buffer( buf ),
+ buffer_size( buf_size ),
+ pos( Lzip_header::size ),
+ code( 0 ),
+ range( 0xFFFFFFFFU ),
+ at_stream_end( false )
+ {}
+
+ bool finished() { return pos >= buffer_size; }
+ unsigned long member_position() const { return pos; }
+
+ uint8_t get_byte()
+ {
+ // 0xFF avoids decoder error if member is truncated at EOS marker
+ if( finished() ) return 0xFF;
+ return buffer[pos++];
+ }
+
+ const Lzip_trailer * get_trailer()
+ {
+ if( buffer_size - pos < Lzip_trailer::size ) return 0;
+ const Lzip_trailer * const p = (const Lzip_trailer *)( buffer + pos );
+ pos += Lzip_trailer::size;
+ return p;
+ }
+
+ void load()
+ {
+ code = 0;
+ range = 0xFFFFFFFFU;
+ get_byte(); // discard first byte of the LZMA stream
+ for( int i = 0; i < 4; ++i ) code = ( code << 8 ) | get_byte();
+ }
+
+ void normalize()
+ {
+ if( range <= 0x00FFFFFFU )
+ { range <<= 8; code = ( code << 8 ) | get_byte(); }
+ }
+
+ unsigned decode( const int num_bits )
+ {
+ unsigned symbol = 0;
+ for( int i = num_bits; i > 0; --i )
+ {
+ normalize();
+ range >>= 1;
+// symbol <<= 1;
+// if( code >= range ) { code -= range; symbol |= 1; }
+ const bool bit = ( code >= range );
+ symbol <<= 1; symbol += bit;
+ code -= range & ( 0U - bit );
+ }
+ return symbol;
+ }
+
+ bool decode_bit( Bit_model & bm )
+ {
+ normalize();
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ return 0;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ return 1;
+ }
+ }
+
+ void decode_symbol_bit( Bit_model & bm, unsigned & symbol )
+ {
+ normalize();
+ symbol <<= 1;
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ symbol |= 1;
+ }
+ }
+
+ void decode_symbol_bit_reversed( Bit_model & bm, unsigned & model,
+ unsigned & symbol, const int i )
+ {
+ normalize();
+ model <<= 1;
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability +=
+ ( bit_model_total - bm.probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ code -= bound;
+ range -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ model |= 1;
+ symbol |= 1 << i;
+ }
+ }
+
+ unsigned decode_tree6( Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return symbol & 0x3F;
+ }
+
+ unsigned decode_tree8( Bit_model bm[] )
+ {
+ unsigned symbol = 1;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return symbol & 0xFF;
+ }
+
+ unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ for( int i = 0; i < num_bits; ++i )
+ decode_symbol_bit_reversed( bm[model], model, symbol, i );
+ return symbol;
+ }
+
+ unsigned decode_tree_reversed4( Bit_model bm[] )
+ {
+ unsigned model = 1;
+ unsigned symbol = 0;
+ decode_symbol_bit_reversed( bm[model], model, symbol, 0 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 1 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 2 );
+ decode_symbol_bit_reversed( bm[model], model, symbol, 3 );
+ return symbol;
+ }
+
+ unsigned decode_matched( Bit_model bm[], unsigned match_byte )
+ {
+ Bit_model * const bm1 = bm + 0x100;
+ unsigned symbol = 1;
+ while( symbol < 0x100 )
+ {
+ const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
+ const bool bit = decode_bit( bm1[symbol+match_bit] );
+ symbol <<= 1; symbol |= bit;
+ if( match_bit >> 8 != bit )
+ {
+ while( symbol < 0x100 ) decode_symbol_bit( bm[symbol], symbol );
+ break;
+ }
+ }
+ return symbol & 0xFF;
+ }
+
+ unsigned decode_len( Len_model & lm, const int pos_state )
+ {
+ Bit_model * bm;
+ unsigned mask, offset, symbol = 1;
+
+ if( decode_bit( lm.choice1 ) == 0 )
+ { bm = lm.bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
+ if( decode_bit( lm.choice2 ) == 0 )
+ { bm = lm.bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
+ bm = lm.bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+len3:
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ decode_symbol_bit( bm[symbol], symbol );
+ return ( symbol & mask ) + min_match_len + offset;
+ }
+ };
+
+class MD5SUM; // forward declaration
+
+class LZ_mtester
+ {
+ unsigned long long partial_data_pos;
+ Range_mtester rdec;
+ const unsigned dictionary_size;
+ uint8_t * buffer; // output buffer
+ unsigned pos; // current pos in buffer
+ unsigned stream_pos; // first byte not yet written to file
+ uint32_t crc_;
+ const int outfd; // output file descriptor
+ unsigned rep0; // rep[0-3] latest four distances
+ unsigned rep1; // used for efficient coding of
+ unsigned rep2; // repeated distances
+ unsigned rep3;
+ State state;
+ MD5SUM * const md5sum;
+ unsigned long long total_packets_; // total number of packets in member
+ unsigned long long max_rep0_pos; // file position of maximum distance
+ unsigned max_rep0; // maximum distance found
+ std::vector< unsigned long long > max_packet_posv_; // file pos of large packets
+ unsigned max_packet_size_; // maximum packet size found
+ unsigned max_marker_size_; // maximum marker size found
+ bool pos_wrapped;
+ bool buffer_is_external;
+
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
+ Bit_model bm_match[State::states][pos_states];
+ Bit_model bm_rep[State::states];
+ Bit_model bm_rep0[State::states];
+ Bit_model bm_rep1[State::states];
+ Bit_model bm_rep2[State::states];
+ Bit_model bm_len[State::states][pos_states];
+ Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
+ Bit_model bm_dis[modeled_distances-end_dis_model+1];
+ Bit_model bm_align[dis_align_size];
+
+ Len_model match_len_model;
+ Len_model rep_len_model;
+
+ void print_block( const int len );
+ void flush_data();
+ bool check_trailer( FILE * const f = 0, unsigned long long byte_pos = 0 );
+
+ uint8_t peek_prev() const
+ { return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
+
+ uint8_t peek( const unsigned distance ) const
+ {
+ const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
+ pos - distance - 1;
+ return buffer[i];
+ }
+
+ void put_byte( const uint8_t b )
+ {
+ buffer[pos] = b;
+ if( ++pos >= dictionary_size ) flush_data();
+ }
+
+ void copy_block( const unsigned distance, unsigned len )
+ {
+ unsigned lpos = pos, i = lpos - distance - 1;
+ bool fast, fast2;
+ if( lpos > distance )
+ {
+ fast = ( len < dictionary_size - lpos );
+ fast2 = ( fast && len <= lpos - i );
+ }
+ else
+ {
+ i += dictionary_size;
+ fast = ( len < dictionary_size - i ); // (i == pos) may happen
+ fast2 = ( fast && len <= i - lpos );
+ }
+ if( fast ) // no wrap
+ {
+ pos += len;
+ if( fast2 ) // no wrap, no overlap
+ std::memcpy( buffer + lpos, buffer + i, len );
+ else
+ for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
+ }
+ else for( ; len > 0; --len )
+ {
+ buffer[pos] = buffer[i];
+ if( ++pos >= dictionary_size ) flush_data();
+ if( ++i >= dictionary_size ) i = 0;
+ }
+ }
+
+void set_max_packet( const unsigned new_size, const unsigned long long pos )
+ {
+ if( max_packet_size_ > new_size || new_size == 0 ) return;
+ if( max_packet_size_ < new_size ) // new max size
+ { max_packet_size_ = new_size; max_packet_posv_.clear(); }
+ max_packet_posv_.push_back( pos - new_size ); // pos of first byte
+ }
+
+void set_max_marker( const unsigned new_size )
+ { if( max_marker_size_ < new_size ) max_marker_size_ = new_size; }
+
+public:
+ LZ_mtester( const uint8_t * const ibuf, const long ibuf_size,
+ const unsigned dict_size, const int ofd = -1,
+ MD5SUM * const md5sum_ = 0 )
+ :
+ partial_data_pos( 0 ),
+ rdec( ibuf, ibuf_size ),
+ dictionary_size( dict_size ),
+ buffer( new uint8_t[dictionary_size] ),
+ pos( 0 ),
+ stream_pos( 0 ),
+ crc_( 0xFFFFFFFFU ),
+ outfd( ofd ),
+ rep0( 0 ),
+ rep1( 0 ),
+ rep2( 0 ),
+ rep3( 0 ),
+ md5sum( md5sum_ ),
+ total_packets_( -1ULL ), // don't count EOS marker
+ max_rep0_pos( 0 ),
+ max_rep0( 0 ),
+ max_packet_size_( 0 ),
+ max_marker_size_( 0 ),
+ pos_wrapped( false ), buffer_is_external( false )
+ // prev_byte of first byte; also for peek( 0 ) on corrupt file
+ { buffer[dictionary_size-1] = 0; }
+
+ ~LZ_mtester() { if( !buffer_is_external ) delete[] buffer; }
+
+ unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
+ unsigned long long data_position() const { return partial_data_pos + pos; }
+ bool finished() { return rdec.finished(); }
+ unsigned long member_position() const { return rdec.member_position(); }
+ unsigned long long total_packets() const { return total_packets_; }
+ unsigned long long max_distance_pos() const { return max_rep0_pos; }
+ unsigned max_distance() const { return max_rep0 + 1; }
+ const std::vector< unsigned long long > & max_packet_posv() const
+ { return max_packet_posv_; }
+ unsigned max_packet_size() const { return max_packet_size_; }
+ unsigned max_marker_size() const { return max_marker_size_; }
+
+ const uint8_t * get_buffers( const uint8_t ** const prev_bufferp,
+ int * const sizep, int * const prev_sizep ) const
+ { *sizep = ( pos_wrapped && pos == 0 ) ? dictionary_size : pos;
+ *prev_sizep = ( pos_wrapped && pos > 0 ) ? dictionary_size - pos : 0;
+ *prev_bufferp = buffer + pos; return buffer; }
+
+ void duplicate_buffer( uint8_t * const buffer2 );
+
+ // these two functions set max_rep0
+ int test_member( const unsigned long mpos_limit = LONG_MAX,
+ const unsigned long long dpos_limit = LLONG_MAX,
+ FILE * const f = 0, const unsigned long long byte_pos = 0 );
+ /* this function also sets max_rep0_pos, total_packets_, max_packet_size_,
+ max_packet_posv_, and max_marker_size_ */
+ int debug_decode_member( const long long dpos, const long long mpos,
+ const bool show_packets );
+ };
diff --git a/nrep_stats.cc b/nrep_stats.cc
new file mode 100644
index 0000000..2c97d4d
--- /dev/null
+++ b/nrep_stats.cc
@@ -0,0 +1,121 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+/* Show how well the frequency of sequences of N repeated bytes in LZMA data
+ matches the value expected for random data. ( 1 / 2^( 8 * N ) )
+ Print cumulative data for all files followed by the name of the first
+ file with the longest sequence.
+*/
+int print_nrep_stats( const std::vector< std::string > & filenames,
+ const Cl_options & cl_opts, const int repeated_byte )
+ {
+ std::vector< unsigned long > len_vector;
+ unsigned long long lzma_size = 0; // total size of LZMA data
+ unsigned long best_pos = 0;
+ int best_name = -1, retval = 0;
+ const bool count_all = ( repeated_byte < 0 || repeated_byte >= 256 );
+ bool stdin_used = false;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ const bool from_stdin = ( filenames[i] == "-" );
+ if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
+ const char * const input_filename =
+ from_stdin ? "(stdin)" : filenames[i].c_str();
+ struct stat in_stats; // not used
+ const int infd = from_stdin ? STDIN_FILENO :
+ open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) { set_retval( retval, 1 ); continue; }
+
+ const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( input_filename, lzip_index.error().c_str() );
+ set_retval( retval, lzip_index.retval() );
+ close( infd );
+ continue;
+ }
+ const unsigned long long cdata_size = lzip_index.cdata_size();
+ if( !fits_in_size_t( cdata_size ) ) // mmap uses size_t
+ { show_file_error( input_filename, "Input file is too large for mmap." );
+ set_retval( retval, 1 ); close( infd ); continue; }
+ const uint8_t * const buffer =
+ (const uint8_t *)mmap( 0, cdata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
+ close( infd );
+ if( buffer == MAP_FAILED )
+ { show_file_error( input_filename, "Can't mmap", errno );
+ set_retval( retval, 1 ); continue; }
+ for( long j = 0; j < lzip_index.members(); ++j )
+ {
+ const Block & mb = lzip_index.mblock( j );
+ long pos = mb.pos() + 7; // skip header (+1 byte) and
+ const long end = mb.end() - 20; // trailer of each member
+ lzma_size += end - pos;
+ while( pos < end )
+ {
+ const uint8_t byte = buffer[pos++];
+ if( buffer[pos] == byte )
+ {
+ unsigned len = 2;
+ ++pos;
+ while( pos < end && buffer[pos] == byte ) { ++pos; ++len; }
+ if( !count_all && repeated_byte != (int)byte ) continue;
+ if( len >= len_vector.size() ) { len_vector.resize( len + 1 );
+ best_name = i; best_pos = pos - len; }
+ ++len_vector[len];
+ }
+ }
+ }
+ munmap( (void *)buffer, cdata_size );
+ }
+
+ if( verbosity < 0 ) return retval;
+ if( count_all )
+ std::fputs( "\nShowing repeated sequences of any byte value.\n", stdout );
+ else
+ std::printf( "\nShowing repeated sequences of the byte value 0x%02X\n",
+ repeated_byte );
+ std::printf( "Total size of LZMA data: %llu bytes (%sBytes)\n",
+ lzma_size, format_num( lzma_size, 999 ) );
+ for( unsigned len = 2; len < len_vector.size(); ++len )
+ if( len_vector[len] > 0 )
+ std::printf( "len %u found %lu times, 1 every %llu bytes "
+ "(expected 1 every %sB)\n",
+ len, len_vector[len], lzma_size / len_vector[len],
+ format_num( 1ULL << ( 8 * ( len - count_all ) ), -1ULL, -1 ) );
+ if( best_name >= 0 )
+ std::printf( "Longest sequence found at position %lu of '%s'\n",
+ best_pos, filenames[best_name].c_str() );
+ return retval;
+ }
diff --git a/range_dec.cc b/range_dec.cc
new file mode 100644
index 0000000..0d03264
--- /dev/null
+++ b/range_dec.cc
@@ -0,0 +1,186 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "decoder.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+bool decompress_member( const int infd, const Cl_options & cl_opts,
+ const Pretty_print & pp, const unsigned long long mpos,
+ const unsigned long long outskip, const unsigned long long outend )
+ {
+ Range_decoder rdec( infd );
+ Lzip_header header;
+ rdec.read_data( header.data, header.size );
+ if( rdec.finished() ) // End Of File
+ { pp( "File ends unexpectedly at member header." ); return false; }
+ if( !header.check_magic() ) { pp( bad_magic_msg ); return false; }
+ if( !header.check_version() )
+ { pp( bad_version( header.version() ) ); return false; }
+ const unsigned dictionary_size = header.dictionary_size();
+ if( !isvalid_ds( dictionary_size ) ) { pp( bad_dict_msg ); return false; }
+
+ if( verbosity >= 2 ) pp();
+
+ LZ_decoder decoder( rdec, dictionary_size, outfd, outskip, outend );
+ const int result = decoder.decode_member( cl_opts, pp );
+ if( result != 0 )
+ {
+ if( verbosity >= 0 && result <= 2 )
+ {
+ pp();
+ std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
+ "File ends unexpectedly" : "Decoder error",
+ mpos + rdec.member_position() );
+ }
+ return false;
+ }
+ if( decoder.data_position() < outend - outskip )
+ {
+ if( verbosity >= 0 )
+ { pp(); std::fprintf( stderr,
+ "%sMember at pos %llu contains only %llu bytes of %llu requested.\n",
+ ( verbosity >= 2 ) ? "\n" : "", mpos,
+ decoder.data_position() - outskip, outend - outskip ); }
+ return false;
+ }
+ if( verbosity >= 2 ) std::fputs( "done\n", stderr );
+ return true;
+ }
+
+} // end namespace
+
+
+const char * format_num( unsigned long long num,
+ unsigned long long limit,
+ const int set_prefix )
+ {
+ enum { buffers = 8, bufsize = 32, n = 10 };
+ const char * const si_prefix[n] =
+ { "k", "M", "G", "T", "P", "E", "Z", "Y", "R", "Q" };
+ const char * const binary_prefix[n] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi", "Ri", "Qi" };
+ static char buffer[buffers][bufsize]; // circle of static buffers for printf
+ static int current = 0;
+ static bool si = true;
+
+ if( set_prefix ) si = ( set_prefix > 0 );
+ unsigned long long den = 1;
+ const unsigned factor = si ? 1000 : 1024;
+ char * const buf = buffer[current++]; current %= buffers;
+ const char * const * prefix = si ? si_prefix : binary_prefix;
+ const char * p = "";
+
+ for( int i = 0; i < n && num / den >= factor && den * factor > den; ++i )
+ { if( num / den <= limit && num % ( den * factor ) != 0 ) break;
+ den *= factor; p = prefix[i]; }
+ if( num % den == 0 )
+ snprintf( buf, bufsize, "%llu %s", num / den, p );
+ else
+ snprintf( buf, bufsize, "%3.2f %s", (double)num / den, p );
+ return buf;
+ }
+
+
+bool safe_seek( const int fd, const long long pos,
+ const char * const filename )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
+ show_file_error( filename, "Seek error", errno );
+ return false;
+ }
+
+
+int range_decompress( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, Block range,
+ const bool force, const bool to_stdout )
+ {
+ const char * const filename = input_filename.c_str();
+ struct stat in_stats;
+ const int infd = open_instream( filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts, cl_opts.ignore_errors,
+ cl_opts.ignore_errors );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ const long long udata_size = lzip_index.udata_size();
+ if( range.end() > udata_size )
+ range.size( std::max( 0LL, udata_size - range.pos() ) );
+ if( range.size() <= 0 )
+ { if( udata_size > 0 ) show_file_error( filename, "Nothing to do." );
+ return 0; }
+
+ if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO;
+ else
+ {
+ output_filename = default_output_filename;
+ set_signal_handler();
+ if( !open_outstream( force, true, false, false, true ) ) return 1;
+ }
+
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "Decompressing range %sB to %sB (%sB of %sBytes)\n",
+ format_num( range.pos() ),
+ format_num( range.pos() + range.size() ),
+ format_num( range.size() ), format_num( udata_size ) );
+
+ Pretty_print pp( input_filename );
+ bool error = false;
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const Block & db = lzip_index.dblock( i );
+ if( range.overlaps( db ) )
+ {
+ if( verbosity >= 3 && lzip_index.members() > 1 )
+ std::fprintf( stderr, "Decompressing member %3ld\n", i + 1 );
+ const long long outskip = std::max( 0LL, range.pos() - db.pos() );
+ const long long outend = std::min( db.size(), range.end() - db.pos() );
+ const long long mpos = lzip_index.mblock( i ).pos();
+ if( !safe_seek( infd, mpos, filename ) ) cleanup_and_fail( 1 );
+ if( !decompress_member( infd, cl_opts, pp, mpos, outskip, outend ) )
+ { if( cl_opts.ignore_errors ) error = true; else cleanup_and_fail( 2 ); }
+ pp.reset();
+ }
+ }
+ if( close( infd ) != 0 )
+ { show_file_error( filename, "Error closing input file", errno );
+ cleanup_and_fail( 1 ); }
+ if( !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
+ if( verbosity >= 2 && !error )
+ std::fputs( "Byte range decompressed successfully.\n", stderr );
+ return 0; // either no error or ignored
+ }
diff --git a/reproduce.cc b/reproduce.cc
new file mode 100644
index 0000000..5ca91a7
--- /dev/null
+++ b/reproduce.cc
@@ -0,0 +1,783 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "lzip.h"
+#include "md5.h"
+#include "mtester.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+const char * final_msg = 0;
+
+bool pending_newline = false;
+
+void print_pending_newline( const char terminator )
+ { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
+ pending_newline = false; }
+
+int fatal_retval = 0;
+
+int fatal( const int retval )
+ { if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
+
+// Return the position of the damaged area in the member, or -1 if error.
+long zeroed_sector_pos( const uint8_t * const mbuffer, const long msize,
+ const char * const input_filename,
+ long * const sizep, uint8_t * const valuep )
+ {
+ enum { minlen = 8 }; // min number of consecutive identical bytes
+ long i = Lzip_header::size;
+ const long end = msize - minlen;
+ long begin = -1;
+ long size = 0;
+ uint8_t value = 0;
+ while( i < end ) // leave i pointing to the first differing byte
+ {
+ const uint8_t byte = mbuffer[i++];
+ if( mbuffer[i] == byte )
+ {
+ const long pos = i - 1;
+ ++i;
+ while( i < msize && mbuffer[i] == byte ) ++i;
+ if( i - pos >= minlen )
+ {
+ if( size > 0 )
+ { show_file_error( input_filename,
+ "Member contains more than one damaged area." );
+ return -1; }
+ begin = pos;
+ size = i - pos;
+ value = byte;
+ break;
+ }
+ }
+ }
+ if( begin < 0 || size <= 0 )
+ { show_file_error( input_filename, "Can't locate damaged area." );
+ return -1; }
+ *sizep = size;
+ *valuep = value;
+ return begin;
+ }
+
+
+const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
+ const long msize, const long begin,
+ const unsigned dictionary_size )
+ {
+ long pos_limit = std::max( begin - 16, (long)Lzip_header::size );
+ LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
+ if( master->test_member( pos_limit ) != -1 ||
+ master->member_position() > (unsigned long)begin )
+ { delete master; return 0; }
+ // decompress as much data as possible without surpassing begin
+ while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
+ master->member_position() <= (unsigned long)begin )
+ ++pos_limit;
+ delete master;
+ master = new LZ_mtester( mbuffer, msize, dictionary_size );
+ if( master->test_member( pos_limit ) == -1 &&
+ master->member_position() <= (unsigned long)begin ) return master;
+ delete master;
+ return 0;
+ }
+
+
+/* Locate in the reference file (rbuf) the truncated data in the dictionary.
+ The reference file must match from the last byte decoded back to the
+ beginning of the file or to the beginning of the dictionary.
+ Choose the match nearest to the beginning of the file.
+ As a fallback, locate the longest partial match at least 512 bytes long.
+ Return the offset in file of the first undecoded byte, or -1 if no match. */
+long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
+ const long rsize, const char * const reference_filename )
+ {
+ const uint8_t * prev_buffer;
+ int dec_size, prev_size;
+ const uint8_t * const dec_buffer =
+ master.get_buffers( &prev_buffer, &dec_size, &prev_size );
+ if( dec_size < 4 )
+ { if( verbosity >= 1 )
+ { std::printf( "'%s' can't match: not enough data in dictionary.\n",
+ reference_filename ); pending_newline = false; }
+ return -1; }
+ long offset = -1; // offset in file of the first undecoded byte
+ bool multiple = false;
+ const uint8_t last_byte = dec_buffer[dec_size-1];
+ for( long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
+ if( rbuf[i] == last_byte )
+ {
+ // compare file with the two parts of the dictionary
+ int len = std::min( (long)dec_size - 1, i );
+ if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
+ {
+ int len2 = std::min( (long)prev_size, i - len );
+ if( len2 <= 0 || !prev_buffer ||
+ std::memcmp( rbuf + i - len - len2,
+ prev_buffer + prev_size - len2, len2 ) == 0 )
+ {
+ if( offset >= 0 ) multiple = true;
+ offset = i + 1;
+ i -= len + len2;
+ }
+ }
+ }
+ if( offset >= 0 )
+ {
+ if( multiple && verbosity >= 1 )
+ { std::printf( "warning: %s: Multiple matches. Using match at offset %ld\n",
+ reference_filename, offset ); std::fflush( stdout ); }
+ if( !multiple && verbosity >= 2 )
+ { std::printf( "%s: Match found at offset %ld\n",
+ reference_filename, offset ); std::fflush( stdout ); }
+ return offset;
+ }
+ int maxlen = 0; // choose longest match in reference file
+ for( long i = rsize - 1; i >= 0; --i )
+ if( rbuf[i] == last_byte )
+ {
+ // compare file with the two parts of the dictionary
+ const int size1 = std::min( (long)dec_size, i + 1 );
+ int len = 1;
+ while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
+ if( len == size1 )
+ {
+ int size2 = std::min( (long)prev_size, i + 1 - size1 );
+ while( len < size1 + size2 &&
+ rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
+ }
+ if( len > maxlen ) { maxlen = len; offset = i + 1; i -= len; }
+ }
+ if( maxlen >= 512 && offset >= 0 )
+ {
+ if( verbosity >= 1 )
+ { std::printf( "warning: %s: Partial match found at offset %ld, len %d."
+ " Reference data may be mixed with other data.\n",
+ reference_filename, offset, maxlen );
+ std::fflush( stdout ); }
+ return offset;
+ }
+ if( verbosity >= 1 )
+ { std::printf( "'%s' does not match with decoded data.\n",
+ reference_filename ); pending_newline = false; }
+ return -1;
+ }
+
+
+void show_close_error( const char * const prog_name = "data feeder" )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Error closing output of %s: %s\n",
+ program_name, prog_name, std::strerror( errno ) );
+ }
+
+
+void show_exec_error( const char * const prog_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
+ program_name, prog_name, std::strerror( errno ) );
+ }
+
+
+void show_fork_error( const char * const prog_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
+ program_name, prog_name, std::strerror( errno ) );
+ }
+
+
+/* Return -1 if child not terminated, 1 in case of error, or exit status of
+ child process 'pid'.
+*/
+int child_status( const pid_t pid, const char * const name )
+ {
+ int status;
+ while( true )
+ {
+ const int tmp = waitpid( pid, &status, WNOHANG );
+ if( tmp == -1 && errno != EINTR )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
+ program_name, name, std::strerror( errno ) );
+ return 1;
+ }
+ if( tmp == 0 ) return -1; // child not terminated
+ if( tmp == pid ) break; // child terminated
+ }
+ if( WIFEXITED( status ) ) return WEXITSTATUS( status );
+ return 1;
+ }
+
+
+// Return exit status of child process 'pid', or 1 in case of error.
+//
+int wait_for_child( const pid_t pid, const char * const name )
+ {
+ int status;
+ while( waitpid( pid, &status, 0 ) == -1 )
+ {
+ if( errno != EINTR )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
+ program_name, name, std::strerror( errno ) );
+ return 1;
+ }
+ }
+ if( WIFEXITED( status ) ) return WEXITSTATUS( status );
+ return 1;
+ }
+
+
+bool good_status( const pid_t pid, const char * const name, const bool finished )
+ {
+ bool error = false;
+ if( pid )
+ {
+ if( !finished )
+ {
+ const int tmp = child_status( pid, name );
+ if( tmp < 0 ) // child not terminated
+ { kill( pid, SIGTERM ); wait_for_child( pid, name ); }
+ else if( tmp != 0 ) error = true; // child status != 0
+ }
+ else
+ if( wait_for_child( pid, name ) != 0 ) error = true;
+ if( error )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: %s: Child terminated with error status.\n",
+ program_name, name );
+ return false;
+ }
+ }
+ return !error;
+ }
+
+
+/* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
+ (master->data_position) followed by the reference data from byte at
+ offset 'offset' of reference file, up to a total of 'dsize' bytes. */
+bool feed_data( uint8_t * const mbuffer, const long msize,
+ const long long dsize, const unsigned long long good_dsize,
+ const uint8_t * const rbuf, const long rsize,
+ const long offset, const unsigned dictionary_size,
+ const int ofd )
+ {
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
+ if( mtester.test_member( LONG_MAX, good_dsize ) != -1 ||
+ good_dsize != mtester.data_position() )
+ { show_error( "Error decompressing prefix data for compressor." );
+ return false; }
+ // limit reference data to remaining decompressed data in member
+ const long size =
+ std::min( (unsigned long long)rsize - offset, dsize - good_dsize );
+ if( writeblock( ofd, rbuf + offset, size ) != size )
+ { show_error( "Error writing reference data to compressor", errno );
+ return false; }
+ return true;
+ }
+
+
+/* Try to reproduce the zeroed sector.
+ Return value: -1 = failure, 0 = success, > 0 = fatal error. */
+int try_reproduce( uint8_t * const mbuffer, const long msize,
+ const long long dsize, const unsigned long long good_dsize,
+ const long begin, const long end,
+ const uint8_t * const rbuf, const long rsize,
+ const long offset, const unsigned dictionary_size,
+ const char ** const lzip_argv, MD5SUM * const md5sump,
+ const char terminator, const bool auto0 = false )
+ {
+ int fda[2]; // pipe to compressor
+ int fda2[2]; // pipe from compressor
+ if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
+ { show_error( "Can't create pipe", errno ); return fatal( 1 ); }
+ const pid_t pid = fork();
+ if( pid == 0 ) // child 1 (compressor feeder)
+ {
+ if( close( fda[0] ) != 0 ||
+ close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
+ !feed_data( mbuffer, msize, dsize, good_dsize, rbuf, rsize, offset,
+ dictionary_size, fda[1] ) )
+ { close( fda[1] ); _exit( 2 ); }
+ if( close( fda[1] ) != 0 )
+ { show_close_error(); _exit( 2 ); }
+ _exit( 0 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( "data feeder" ); return fatal( 1 ); }
+
+ const pid_t pid2 = fork();
+ if( pid2 == 0 ) // child 2 (compressor)
+ {
+ if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+ dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
+ close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
+ execvp( lzip_argv[0], (char **)lzip_argv );
+ show_exec_error( lzip_argv[0] );
+ _exit( 2 );
+ }
+ if( pid2 < 0 ) // parent
+ { show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
+
+ close( fda[0] ); close( fda[1] ); close( fda2[1] );
+ const long xend = std::min( end + 4, msize );
+ int retval = 0; // -1 = mismatch
+ bool first_post = true;
+ bool same_ds = true; // reproduced DS == header DS
+ bool tail_mismatch = false; // mismatch after end
+ for( long i = 0; i < xend; )
+ {
+ enum { buffer_size = 16384 }; // 65536 makes it slower
+ uint8_t buffer[buffer_size];
+ if( verbosity >= 2 && i >= 65536 && terminator )
+ {
+ if( first_post )
+ { first_post = false; print_pending_newline( terminator ); }
+ std::printf( " Reproducing position %ld %c", i, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ const int rd = readblock( fda2[0], buffer, buffer_size );
+ // not enough reference data to fill zeroed sector at this level
+ if( rd <= 0 ) { if( i < end ) retval = -1; break; }
+ int j = 0;
+ /* Compare reproduced bytes with data in mbuffer.
+ Do not fail because of a mismatch beyond the end of the zeroed sector
+ to prevent the reproduction from failing because of the reference file
+ just covering the zeroed sector. */
+ for( ; j < rd && i < begin; ++j, ++i )
+ if( mbuffer[i] != buffer[j] ) // mismatch
+ {
+ if( i != 5 ) { retval = -1; goto done; } // ignore different DS
+ const Lzip_header * header = (const Lzip_header *)buffer;
+ if( header->dictionary_size() != dictionary_size ) same_ds = false;
+ }
+ // copy reproduced bytes into zeroed sector of mbuffer
+ for( ; j < rd && i < end; ++j, ++i ) mbuffer[i] = buffer[j];
+ for( ; j < rd && i < xend; ++j, ++i )
+ if( mbuffer[i] != buffer[j] ) { tail_mismatch = true; goto done; }
+ }
+done:
+ if( !first_post && terminator ) print_pending_newline( terminator );
+ if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
+ if( !good_status( pid, "data feeder", false ) ||
+ !good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
+ if( retval == 0 ) // test whole member after reproduction
+ {
+ if( md5sump ) md5sump->reset();
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
+ if( mtester.test_member() != 0 || !mtester.finished() )
+ {
+ if( verbosity >= 2 && same_ds && begin >= 4096 && terminator )
+ {
+ if( !tail_mismatch )
+ final_msg = " Zeroed sector reproduced, but CRC does not match."
+ " (Multiple damages in file?).\n";
+ else if( !final_msg )
+ final_msg = " Zeroed sector reproduced, but data after it does not"
+ " match. (Maybe wrong reference data or lzip version).\n";
+ }
+ retval = -1; // incorrect reproduction of zeroed sector
+ }
+ }
+ return retval;
+ }
+
+
+// Return value: -1 = master failed, 0 = success, > 0 = failure
+int reproduce_member( uint8_t * const mbuffer, const long msize,
+ const long long dsize, const char * const lzip_name,
+ const char * const reference_filename,
+ const long begin, const long size,
+ const int lzip_level, MD5SUM * const md5sump,
+ const char terminator )
+ {
+ struct stat st;
+ const int rfd = open_instream( reference_filename, &st, false, true );
+ if( rfd < 0 ) return fatal( 1 );
+ if( !fits_in_size_t( st.st_size ) ) // mmap uses size_t
+ { show_file_error( reference_filename, "Reference file is too large for mmap." );
+ close( rfd ); return fatal( 1 ); }
+ const long rsize = st.st_size;
+ const uint8_t * const rbuf =
+ (const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
+ close( rfd );
+ if( rbuf == MAP_FAILED )
+ { show_file_error( reference_filename, "Can't mmap", errno );
+ return fatal( 1 ); }
+
+ const Lzip_header & header = *(const Lzip_header *)mbuffer;
+ const unsigned dictionary_size = header.dictionary_size();
+ const LZ_mtester * const master =
+ prepare_master2( mbuffer, msize, begin, dictionary_size );
+ if( !master ) return -1;
+ if( verbosity >= 2 )
+ {
+ std::printf( " (master mpos = %lu, dpos = %llu)\n",
+ master->member_position(), master->data_position() );
+ std::fflush( stdout );
+ }
+
+ const long offset = match_file( *master, rbuf, rsize, reference_filename );
+ if( offset < 0 ) { delete master; return 2; } // no match
+ // Reference data from offset must be at least as large as zeroed sector
+ // minus member trailer if trailer is inside the zeroed sector.
+ const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
+ if( rsize - offset < size - t )
+ { show_file_error( reference_filename, "Not enough reference data after match." );
+ delete master; return 2; }
+
+ const unsigned long long good_dsize = master->data_position();
+ const long end = begin + size;
+ char level_str[8] = "-0"; // compression level or match length limit
+ char dict_str[16];
+ snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
+ const char * lzip0_argv[3] = { lzip_name, "-0", 0 };
+ const char * lzip_argv[4] = { lzip_name, level_str, dict_str, 0 };
+ if( lzip_level >= 0 )
+ for( unsigned char level = '0'; level <= '9'; ++level )
+ {
+ if( std::isdigit( lzip_level ) && level != lzip_level ) continue;
+ level_str[1] = level;
+ if( verbosity >= 1 && terminator )
+ {
+ std::printf( "Trying level %s %c", level_str, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ const bool level0 = level == '0';
+ const bool auto0 = ( level0 && lzip_level != '0' );
+ int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
+ rbuf, rsize, offset, dictionary_size,
+ level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
+ if( ret >= 0 )
+ { delete master; munmap( (void *)rbuf, rsize ); return ret; }
+ }
+ if( lzip_level <= 0 )
+ {
+ for( int len = min_match_len_limit; len <= max_match_len; ++len )
+ {
+ if( lzip_level < -1 && -lzip_level != len ) continue;
+ snprintf( level_str, sizeof level_str, "-m%u", len );
+ if( verbosity >= 1 && terminator )
+ {
+ std::printf( "Trying match length limit %d %c", len, terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
+ rbuf, rsize, offset, dictionary_size,
+ lzip_argv, md5sump, terminator );
+ if( ret >= 0 )
+ { delete master; munmap( (void *)rbuf, rsize ); return ret; }
+ }
+ }
+ delete master;
+ munmap( (void *)rbuf, rsize );
+ return 2;
+ }
+
+} // end namespace
+
+
+int reproduce_file( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const char * const lzip_name,
+ const char * const reference_filename,
+ const Cl_options & cl_opts, const int lzip_level,
+ const char terminator, const bool force )
+ {
+ const char * const filename = input_filename.c_str();
+ struct stat in_stats;
+ const int infd = open_instream( filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts, true );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ const bool to_file = default_output_filename.size();
+ output_filename =
+ to_file ? default_output_filename : insert_fixed( input_filename );
+ if( !force && output_file_exists() ) return 1;
+ outfd = -1;
+ int errors = 0;
+ const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long dsize = lzip_index.dblock( i ).size();
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ if( verbosity >= 1 && lzip_index.members() > 1 )
+ {
+ std::printf( "Testing member %ld of %ld %c",
+ i + 1, lzip_index.members(), terminator );
+ std::fflush( stdout ); pending_newline = true;
+ }
+ if( !safe_seek( infd, mpos, filename ) ) return 1;
+ long long failure_pos = 0;
+ if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
+ continue; // member is not damaged
+ print_pending_newline( terminator );
+ if( ++errors > 1 ) break; // only one member can be reproduced
+ if( failure_pos < Lzip_header::size ) // End Of File
+ { show_file_error( filename, "Unexpected end of file." ); return 2; }
+ if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t
+ { show_file_error( filename,
+ "Input file contains member too large for mmap." ); return 1; }
+
+ // without mmap, 3 times more memory are required because of fork
+ const long mpos_rem = mpos % page_size;
+ uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
+ if( mbuffer_base == MAP_FAILED )
+ { show_file_error( filename, "Can't mmap", errno ); return 1; }
+ uint8_t * const mbuffer = mbuffer_base + mpos_rem;
+ long size = 0;
+ uint8_t value = 0;
+ const long begin =
+ zeroed_sector_pos( mbuffer, msize, filename, &size, &value );
+ if( begin < 0 ) return 2;
+ if( failure_pos < begin )
+ { show_file_error( filename, "Data error found before damaged area." );
+ return 2; }
+ if( verbosity >= 1 )
+ {
+ std::printf( "Reproducing bad area in member %ld of %ld\n"
+ " (begin = %ld, size = %ld, value = 0x%02X)\n",
+ i + 1, lzip_index.members(), begin, size, value );
+ std::fflush( stdout );
+ }
+ const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
+ reference_filename, begin, size, lzip_level, 0, terminator );
+ if( ret <= 0 ) print_pending_newline( terminator );
+ if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
+ if( ret == 0 )
+ {
+ if( outfd < 0 ) // first damaged member reproduced
+ {
+ if( !safe_seek( infd, 0, filename ) ) return 1;
+ set_signal_handler();
+ if( !open_outstream( true, true, false, true, to_file ) ) return 1;
+ if( !copy_file( infd, outfd ) ) // copy whole file
+ cleanup_and_fail( 1 );
+ }
+ if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
+ { show_file_error( output_filename.c_str(), "Error writing file", errno );
+ cleanup_and_fail( 1 ); }
+ if( verbosity >= 1 )
+ std::fputs( "Member reproduced successfully.\n", stdout );
+ }
+ munmap( mbuffer_base, msize + mpos_rem );
+ if( ret > 0 )
+ {
+ if( final_msg )
+ { std::fputs( final_msg, stdout ); std::fflush( stdout ); }
+ show_file_error( filename, "Unable to reproduce member." ); return ret;
+ }
+ }
+
+ if( outfd < 0 )
+ {
+ if( verbosity >= 1 )
+ std::printf( "Input file '%s' has no errors. Recovery is not needed.\n",
+ filename );
+ return 0;
+ }
+ if( !close_outstream( &in_stats ) ) return 1;
+ if( verbosity >= 0 )
+ {
+ if( errors > 1 )
+ std::fputs( "One member reproduced."
+ " Copy of input file still contains errors.\n", stdout );
+ else
+ std::fputs( "Copy of input file reproduced successfully.\n", stdout );
+ }
+ return 0;
+ }
+
+
+/* Passes a 0 terminator to other functions to prevent intramember feedback.
+ Exits only in case of fatal error. (reference file too large, etc). */
+int debug_reproduce_file( const char * const input_filename,
+ const char * const lzip_name,
+ const char * const reference_filename,
+ const Cl_options & cl_opts, const Block & range,
+ const int sector_size, const int lzip_level )
+ {
+ struct stat in_stats; // not used
+ const int infd = open_instream( input_filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ const Lzip_index lzip_index( infd, cl_opts );
+ if( lzip_index.retval() != 0 )
+ { show_file_error( input_filename, lzip_index.error().c_str() );
+ return lzip_index.retval(); }
+
+ const long long cdata_size = lzip_index.cdata_size();
+ if( range.pos() >= cdata_size )
+ { show_file_error( input_filename, "Range is beyond end of last member." );
+ return 1; }
+
+ const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
+ const long long positions_to_test =
+ ( ( std::min( range.size(), cdata_size - range.pos() ) ) +
+ sector_size - 9 ) / sector_size;
+ long positions = 0, successes = 0, failed_comparisons = 0;
+ long alternative_reproductions = 0;
+ const bool pct_enabled = cdata_size > sector_size &&
+ isatty( STDERR_FILENO ) && !isatty( STDOUT_FILENO );
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const long long mpos = lzip_index.mblock( i ).pos();
+ const long long msize = lzip_index.mblock( i ).size();
+ if( !range.overlaps( mpos, msize ) ) continue;
+ if( !fits_in_size_t( msize + page_size ) ) // mmap uses size_t
+ { show_file_error( input_filename,
+ "Input file contains member too large for mmap." ); return 1; }
+ const long long dsize = lzip_index.dblock( i ).size();
+ const unsigned dictionary_size = lzip_index.dictionary_size( i );
+
+ // md5sums of original not damaged member (compressed and decompressed)
+ md5_type md5_digest_c, md5_digest_d;
+ bool md5_valid = false;
+ const long long rm_end = std::min( range.end(), mpos + msize );
+ for( long long sector_pos = std::max( range.pos(), mpos );
+ sector_pos + 8 <= rm_end; sector_pos += sector_size )
+ {
+ // without mmap, 3 times more memory are required because of fork
+ const long mpos_rem = mpos % page_size;
+ uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
+ if( mbuffer_base == MAP_FAILED )
+ { show_file_error( input_filename, "Can't mmap", errno ); return 1; }
+ uint8_t * const mbuffer = mbuffer_base + mpos_rem;
+ if( !md5_valid )
+ {
+ if( verbosity >= 0 ) // give a clue of the range being tested
+ { std::printf( "Reproducing: %s\nReference file: %s\nTesting "
+ "sectors of size %llu at file positions %llu to %llu\n",
+ input_filename, reference_filename,
+ std::min( (long long)sector_size, rm_end - sector_pos ),
+ sector_pos, rm_end - 1 ); std::fflush( stdout ); }
+ md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
+ MD5SUM md5sum;
+ LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
+ if( mtester.test_member() != 0 || !mtester.finished() )
+ {
+ if( verbosity >= 0 )
+ { std::printf( "Member %ld of %ld already damaged (failure pos "
+ "= %llu)\n", i + 1, lzip_index.members(),
+ mpos + mtester.member_position() );
+ std::fflush( stdout ); }
+ munmap( mbuffer_base, msize + mpos_rem ); break;
+ }
+ md5sum.md5_finish( md5_digest_d );
+ }
+ ++positions;
+ const int sector_sz =
+ std::min( (long long)sector_size, rm_end - sector_pos );
+ // set mbuffer[sector] to 0
+ std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
+ long size = 0;
+ uint8_t value = 0;
+ const long begin =
+ zeroed_sector_pos( mbuffer, msize, input_filename, &size, &value );
+ if( begin < 0 ) return 2;
+ MD5SUM md5sum;
+ const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
+ reference_filename, begin, size, lzip_level, &md5sum, 0 );
+ if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
+ if( ret == 0 )
+ {
+ ++successes;
+ md5_type new_digest;
+ md5sum.md5_finish( new_digest );
+ if( md5_digest_d != new_digest )
+ {
+ ++failed_comparisons;
+ if( verbosity >= 0 )
+ std::printf( "Comparison failed at pos %llu\n", sector_pos );
+ }
+ else if( !check_md5( mbuffer, msize, md5_digest_c ) )
+ {
+ ++alternative_reproductions;
+ if( verbosity >= 0 )
+ std::printf( "Alternative reproduction at pos %llu\n", sector_pos );
+ }
+ else if( verbosity >= 0 )
+ std::printf( "Reproduction succeeded at pos %llu\n", sector_pos );
+ }
+ else if( verbosity >= 0 ) // ret > 0
+ std::printf( "Unable to reproduce at pos %llu\n", sector_pos );
+ if( verbosity >= 0 )
+ {
+ std::fflush( stdout ); // flush result line
+ if( pct_enabled ) // show feedback
+ std::fprintf( stderr, "\r%ld sectors %ld successes %ld failcomp "
+ "%ld altrep %3u%% done\r", positions, successes,
+ failed_comparisons, alternative_reproductions,
+ (unsigned)( ( positions * 100.0 ) / positions_to_test ) );
+ }
+ munmap( mbuffer_base, msize + mpos_rem );
+ if( fatal_retval ) goto done;
+ }
+ }
+done:
+ if( verbosity >= 0 )
+ {
+ std::printf( "\n%9ld sectors tested"
+ "\n%9ld reproductions returned with zero status",
+ positions, successes );
+ if( successes > 0 )
+ {
+ if( failed_comparisons > 0 )
+ std::printf( ", of which\n%9ld comparisons failed\n",
+ failed_comparisons );
+ else std::fputs( "\n all comparisons passed\n", stdout );
+ if( alternative_reproductions > 0 )
+ std::printf( "%9ld alternative reproductions found\n",
+ alternative_reproductions );
+ }
+ else std::fputc( '\n', stdout );
+ if( fatal_retval )
+ std::fputs( "Exiting because of a fatal error\n", stdout );
+ }
+ return fatal_retval;
+ }
diff --git a/split.cc b/split.cc
new file mode 100644
index 0000000..48d7915
--- /dev/null
+++ b/split.cc
@@ -0,0 +1,142 @@
+/* Lziprecover - Data recovery tool for the lzip format
+ Copyright (C) 2009-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "lzip.h"
+#include "lzip_index.h"
+
+
+namespace {
+
+bool first_filename( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const int max_digits )
+ {
+ const bool to_file = default_output_filename.size();
+ output_filename = to_file ? default_output_filename : input_filename;
+ int b = output_filename.size();
+ while( b > 0 && output_filename[b-1] != '/' ) --b;
+ output_filename.insert( b, "rec1" );
+ if( max_digits > 1 ) output_filename.insert( b + 3, max_digits - 1, '0' );
+ return to_file;
+ }
+
+
+bool next_filename( const int max_digits )
+ {
+ if( verbosity >= 1 )
+ {
+ std::printf( "Member '%s' done \n", output_filename.c_str() );
+ std::fflush( stdout );
+ }
+ int b = output_filename.size();
+ while( b > 0 && output_filename[b-1] != '/' ) --b;
+ for( int i = b + max_digits + 2; i > b + 2; --i ) // "rec<max_digits>"
+ {
+ if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
+ else output_filename[i] = '0';
+ }
+ return false;
+ }
+
+} // end namespace
+
+
+int split_file( const std::string & input_filename,
+ const std::string & default_output_filename,
+ const Cl_options & cl_opts, const bool force )
+ {
+ const char * const filename = input_filename.c_str();
+ struct stat in_stats;
+ const int infd = open_instream( filename, &in_stats, false, true );
+ if( infd < 0 ) return 1;
+
+ Lzip_index lzip_index( infd, cl_opts, true, true );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( filename, lzip_index.error().c_str() );
+ return lzip_index.retval();
+ }
+ // check last member
+ const Block b = lzip_index.mblock( lzip_index.members() - 1 );
+ long long mpos = b.pos();
+ long long msize = b.size();
+ long long failure_pos = 0;
+ if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( test_member_from_file( infd, msize, &failure_pos ) == 1 )
+ { // corrupt or fake trailer
+ while( true )
+ {
+ mpos += failure_pos; msize -= failure_pos;
+ if( msize < min_member_size ) break; // trailing data
+ if( !safe_seek( infd, mpos, filename ) ) return 1;
+ if( test_member_from_file( infd, msize, &failure_pos ) != 1 ) break;
+ }
+ lzip_index = Lzip_index( infd, cl_opts, true, true, mpos );
+ if( lzip_index.retval() != 0 )
+ {
+ show_file_error( filename, lzip_index.error().c_str() );
+ return lzip_index.retval();
+ }
+ }
+
+ if( !safe_seek( infd, 0, filename ) ) return 1;
+ int max_digits = 1;
+ for( long i = lzip_index.blocks( true ); i >= 10; i /= 10 ) ++max_digits;
+ bool to_file = // if true, create intermediate dirs
+ first_filename( input_filename, default_output_filename, max_digits );
+
+ long long stream_pos = 0; // first pos not yet written to file
+ set_signal_handler();
+ for( long i = 0; i < lzip_index.members(); ++i )
+ {
+ const Block & mb = lzip_index.mblock( i );
+ if( mb.pos() > stream_pos ) // gap
+ {
+ if( !open_outstream( force, true, false, false, to_file ) ) return 1;
+ if( !copy_file( infd, outfd, mb.pos() - stream_pos ) ||
+ !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
+ next_filename( max_digits ); to_file = false;
+ }
+ if( !open_outstream( force, true, false, false, to_file ) ) return 1; // member
+ if( !copy_file( infd, outfd, mb.size() ) ||
+ !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
+ next_filename( max_digits ); to_file = false;
+ stream_pos = mb.end();
+ }
+ if( lzip_index.file_size() > stream_pos ) // trailing data
+ {
+ if( !open_outstream( force, true, false, false, to_file ) ) return 1;
+ if( !copy_file( infd, outfd, lzip_index.file_size() - stream_pos ) ||
+ !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
+ next_filename( max_digits ); to_file = false;
+ }
+ close( infd );
+ return 0;
+ }
diff --git a/testsuite/check.sh b/testsuite/check.sh
new file mode 100755
index 0000000..a65f062
--- /dev/null
+++ b/testsuite/check.sh
@@ -0,0 +1,1538 @@
+#! /bin/sh
+# check script for Lziprecover - Data recovery tool for the lzip format
+# Copyright (C) 2009-2024 Antonio Diaz Diaz.
+#
+# This script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+LC_ALL=C
+export LC_ALL
+objdir=`pwd`
+testdir=`cd "$1" ; pwd`
+LZIP="${objdir}"/lziprecover
+LZIPRECOVER="${LZIP}"
+framework_failure() { echo "failure in testing framework" ; exit 1 ; }
+
+if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then
+ echo "${LZIP}: cannot execute"
+ exit 1
+fi
+
+[ -e "${LZIP}" ] 2> /dev/null ||
+ {
+ echo "$0: a POSIX shell is required to run the tests"
+ echo "Try bash -c \"$0 $1 $2\""
+ exit 1
+ }
+
+if [ -d tmp ] ; then rm -rf tmp ; fi
+mkdir tmp
+cd "${objdir}"/tmp || framework_failure
+
+cat "${testdir}"/test.txt > in || framework_failure
+in_lz="${testdir}"/test.txt.lz
+in_lzma="${testdir}"/test.txt.lzma
+in_em="${testdir}"/test_em.txt.lz
+inD="${testdir}"/test21723.txt
+bad1_lz="${testdir}"/test_bad1.lz
+bad2_lz="${testdir}"/test_bad2.lz
+bad3_lz="${testdir}"/test_bad3.lz
+bad4_lz="${testdir}"/test_bad4.lz
+bad5_lz="${testdir}"/test_bad5.lz
+bad6_lz="${testdir}"/test_bad6.lz
+bad7_lz="${testdir}"/test_bad7.lz
+bad8_lz="${testdir}"/test_bad8.lz
+bad9_lz="${testdir}"/test_bad9.lz
+fox_lz="${testdir}"/fox.lz
+fox6_lz="${testdir}"/fox6.lz
+f6b1="${testdir}"/fox6_bad1.txt
+f6b1_lz="${testdir}"/fox6_bad1.lz
+f6b2_lz="${testdir}"/fox6_bad2.lz
+f6b3_lz="${testdir}"/fox6_bad3.lz
+f6b4_lz="${testdir}"/fox6_bad4.lz
+f6b5_lz="${testdir}"/fox6_bad5.lz
+f6b6_lz="${testdir}"/fox6_bad6.lz
+f6s1_lz="${testdir}"/fox6_sc1.lz
+f6s2_lz="${testdir}"/fox6_sc2.lz
+f6s3_lz="${testdir}"/fox6_sc3.lz
+f6s4_lz="${testdir}"/fox6_sc4.lz
+f6s5_lz="${testdir}"/fox6_sc5.lz
+f6s6_lz="${testdir}"/fox6_sc6.lz
+f6mk_lz="${testdir}"/fox6_mark.lz
+num_lz="${testdir}"/numbers.lz
+nbt_lz="${testdir}"/numbersbt.lz
+fail=0
+test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
+
+# Description of test files for lziprecover:
+# single-member files with one or more errors
+# test_bad1.lz: byte at offset 66 changed from 0xA6 to 0x26
+# test_bad2.lz: [ 34- 65] --> copy of bytes [ 68- 99]
+# test_bad3.lz: [ 512-1535] --> zeroed [2560-3583] --> zeroed
+# test_bad4.lz: [3072-4095] --> random errors [4608-5631] --> zeroed
+# test_bad5.lz: [1024-2047] --> random errors [5120-6143] --> random data
+# test_bad6.lz: [ 512-1023] --> zeroed (reference test.txt [ 891- 2137])
+# test_bad7.lz: [6656-7167] --> zeroed (reference test.txt [20428-32231])
+# test_bad8.lz: [ 66- 73] --> zeroed (reference test.txt [ 89- 110])
+# test_bad9.lz: [6491-6498] --> zeroed (reference test.txt [17977-18120])
+#
+# test_em.txt.lz: test.txt split in 3, with 5 empty members (1,3,5-6,8)
+# test_3m.txt.lz.md5: md5sum of test_em.txt.lz after removing empty members
+#
+# 6-member files with one or more errors
+# fox6_bad1.lz: byte at offset 5 changed from 0x0C to 0x00 (DS)
+# byte at offset 142 changed from 0x50 to 0x70 (CRC)
+# byte at offset 224 changed from 0x2D to 0x2E (data_size)
+# byte at offset 268 changed from 0x34 to 0x33 (mid stream)
+# byte at offset 327 changed from 0x2A to 0x2B (byte 7)
+# byte at offset 458 changed from 0xA0 to 0x20 (EOS marker)
+# fox6_bad2.lz: [110-129] --> zeroed (member 2)
+# fox6_bad3.lz: [180-379] --> zeroed (members 3-5)
+# fox6_bad4.lz: [330-429] --> zeroed (members 5,6)
+# fox6_bad5.lz: [380-479] --> zeroed (members 5,6)
+# fox6_bad6.lz: [430-439] --> zeroed (member 6)
+#
+# fox6_mark.lz: 4 last members marked with bytes 'm', 'a', 'r', 'k'
+#
+# 6-member files "shortcircuited" by a corrupt or fake trailer
+# fox6_sc1.lz: (corrupt but consistent last trailer)
+# last CRC != 0 ; dsize = 4 * msize ; msize = 480 (file size)
+# fox6_sc2.lz: (appended fake but consistent trailer)
+# fake CRC != 0 ; dsize = 4 * msize ; msize = 500 (file size)
+# fox6_sc3.lz: fake CRC = 0
+# fox6_sc4.lz: fake dsize = 0
+# fox6_sc5.lz: fake dsize = 411 (< 8 * ( fake msize - 36 ) / 9)
+# fox6_sc6.lz: fake dsize = 3360660 (>= 7090 * ( fake msize - 26 ))
+#
+# 9-member files "one_" "two_" "three_" "four_" "five_" "six_" "seven_"
+# "eight_" "nine_"
+# numbers.lz : good file containing the 9 members shown above
+# numbersbt.lz: "gap" after "three_", "damaged" after "six_", "trailing data"
+
+printf "testing lziprecover-%s..." "$2"
+
+"${LZIPRECOVER}" -q --nrep-stats=0N "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+for i in 0 255 0kB 0KiB 0M 0G 0T 0P 0E 0Z 0Y 0R 0Q ; do
+ "${LZIPRECOVER}" -q --nrep-stats=$i "${in_lz}" || test_failed $LINENO $i
+done
+"${LZIP}" -lq in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq < in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -cdq in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -cdq < in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -dq -o in < "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -dq -o in "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -dq -o out nx_file.lz
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out ] || test_failed $LINENO
+# these are for code coverage
+"${LZIP}" -lt "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -cdl "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -cdt "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -t -- nx_file.lz 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -t "" < /dev/null 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --help > /dev/null || test_failed $LINENO
+"${LZIP}" -n1 -V > /dev/null || test_failed $LINENO
+"${LZIP}" -m 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -z 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --bad_option 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --t 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --test=2 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --output= 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" --output 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
+printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
+printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
+
+"${LZIPRECOVER}" -eq "${bad6_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -mq "${bad1_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -Rq
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -sq
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -t --remove=damaged "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged -t "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" --remove=tdata -t "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -t --strip=tdata "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=tdata --strip=damaged "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" --remove=tdata --strip=damaged "${in_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=damaged
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=damaged in > out # /dev/null returns 1 on OS/2
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=damagedd "${in_lz}" > /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=empty
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged in > out # /dev/null returns 1 on OS/2
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damagedd "${in_lz}" > /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=damaged
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=damaged in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=damagedd "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=tdata
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=tdata in > /dev/null
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --dump=tdataa "${in_lz}" > /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=tdata
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=tdata in > out # /dev/null returns 1 on OS/2
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=tdataa "${in_lz}" > /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=tdata
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=tdata in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=tdataa "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+
+"${LZIPRECOVER}" -Aq in
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -Aq < in > out # /dev/null returns 1 on OS/2
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -Aq < "${in_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -Aq "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIPRECOVER}" -Akq "${in_lzma}"
+[ $? = 1 ] || test_failed $LINENO
+rm -f out || framework_failure
+"${LZIPRECOVER}" -A "${in_lzma}" -o out.lz || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -Ac "${in_lzma}" > out.lz || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+rm -f out.lz || framework_failure
+"${LZIPRECOVER}" -A -o out.lz < "${in_lzma}" || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -A < "${in_lzma}" > out.lz || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+rm -f out.lz || framework_failure
+cat "${in_lzma}" > out.lzma || framework_failure
+"${LZIPRECOVER}" -Ak out.lzma || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+printf "to be overwritten" > out.lz || framework_failure
+"${LZIPRECOVER}" -Af out.lzma || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+rm -f out.lz || framework_failure
+cat "${in_lzma}" > out.tlz || framework_failure
+"${LZIPRECOVER}" -Ak out.tlz || test_failed $LINENO
+cmp "${in_lz}" out.tar.lz || test_failed $LINENO
+printf "to be overwritten" > out.tar.lz || framework_failure
+"${LZIPRECOVER}" -Af out.tlz || test_failed $LINENO
+cmp "${in_lz}" out.tar.lz || test_failed $LINENO
+rm -f out.tar.lz || framework_failure
+cat in in > in2 || framework_failure
+"${LZIPRECOVER}" -A -o out2.lz - "${in_lzma}" - < "${in_lzma}" ||
+ test_failed $LINENO
+"${LZIP}" -cd out2.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+rm -f out2.lz out2 || framework_failure
+"${LZIPRECOVER}" -A "${in_lzma}" -o a/b/c/out.lz || test_failed $LINENO
+cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+printf "\ntesting decompression..."
+
+for i in "${in_lz}" "${in_em}" ; do
+ "${LZIP}" -lq "$i" || test_failed $LINENO "$i"
+ "${LZIP}" -t "$i" || test_failed $LINENO "$i"
+ "${LZIP}" -d "$i" -o out || test_failed $LINENO "$i"
+ cmp in out || test_failed $LINENO "$i"
+ "${LZIP}" -cd "$i" > out || test_failed $LINENO "$i"
+ cmp in out || test_failed $LINENO "$i"
+ "${LZIP}" -d "$i" -o - > out || test_failed $LINENO "$i"
+ cmp in out || test_failed $LINENO "$i"
+ "${LZIP}" -d < "$i" > out || test_failed $LINENO "$i"
+ cmp in out || test_failed $LINENO "$i"
+ rm -f out || framework_failure
+done
+
+lines=`"${LZIP}" -tvv "${in_em}" 2>&1 | wc -l` || test_failed $LINENO
+[ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}"
+"${LZIP}" -tq "${in_em}" --empty-error
+[ $? = 2 ] || test_failed $LINENO
+
+lines=`"${LZIP}" -lvv "${in_em}" | wc -l` || test_failed $LINENO
+[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
+"${LZIP}" -lq "${in_em}" --empty-error
+[ $? = 2 ] || test_failed $LINENO
+
+cat "${in_lz}" > out.lz || framework_failure
+"${LZIP}" -dk out.lz || test_failed $LINENO
+cmp in out || test_failed $LINENO
+rm -f out || framework_failure
+"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
+cat fox > copy || framework_failure
+cat "${in_lz}" > copy.lz || framework_failure
+"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+cmp fox copy || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIP}" -df copy.lz || test_failed $LINENO
+[ ! -e copy.lz ] || test_failed $LINENO
+cmp in copy || test_failed $LINENO
+rm -f copy out || framework_failure
+
+printf "to be overwritten" > out || framework_failure
+"${LZIP}" -df -o out < "${in_lz}" || test_failed $LINENO
+cmp in out || test_failed $LINENO
+rm -f out || framework_failure
+"${LZIP}" -d -o ./- "${in_lz}" || test_failed $LINENO
+cmp in ./- || test_failed $LINENO
+rm -f ./- || framework_failure
+"${LZIP}" -d -o ./- < "${in_lz}" || test_failed $LINENO
+cmp in ./- || test_failed $LINENO
+rm -f ./- || framework_failure
+
+cat "${in_lz}" > anyothername || framework_failure
+"${LZIP}" -dv - anyothername - < "${in_lz}" > out 2> /dev/null ||
+ test_failed $LINENO
+cmp in out || test_failed $LINENO
+cmp in anyothername.out || test_failed $LINENO
+rm -f out anyothername.out || framework_failure
+
+"${LZIP}" -lq in "${in_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -lq nx_file.lz "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -tq in "${in_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq nx_file.lz "${in_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${LZIP}" -cdq in "${in_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cat out in | cmp in - || test_failed $LINENO # out must be empty
+"${LZIP}" -cdq nx_file.lz "${in_lz}" > out # skip nx_file, decompress in
+[ $? = 1 ] || test_failed $LINENO
+cmp in out || test_failed $LINENO
+rm -f out || framework_failure
+cat "${in_lz}" > out.lz || framework_failure
+for i in 1 2 3 4 5 6 7 ; do
+ printf "g" >> out.lz || framework_failure
+ "${LZIP}" -alvv out.lz "${in_lz}" > /dev/null 2>&1
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${LZIP}" -atvvvv out.lz "${in_lz}" 2> /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+done
+"${LZIP}" -dq in out.lz
+[ $? = 2 ] || test_failed $LINENO
+[ -e out.lz ] || test_failed $LINENO
+[ ! -e out ] || test_failed $LINENO
+[ ! -e in.out ] || test_failed $LINENO
+"${LZIP}" -dq nx_file.lz out.lz
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+[ ! -e nx_file ] || test_failed $LINENO
+cmp in out || test_failed $LINENO
+rm -f out || framework_failure
+
+"${LZIP}" -lq "${in_lz}" "${in_lz}" || test_failed $LINENO
+"${LZIP}" -t "${in_lz}" "${in_lz}" || test_failed $LINENO
+"${LZIP}" -cd "${in_lz}" "${in_lz}" -o out > out2 || test_failed $LINENO
+[ ! -e out ] || test_failed $LINENO # override -o
+cmp in2 out2 || test_failed $LINENO
+rm -f out2 || framework_failure
+"${LZIP}" -d "${in_lz}" "${in_lz}" -o out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+rm -f out2 || framework_failure
+
+cat "${in_lz}" "${in_lz}" > out2.lz || framework_failure
+printf "\ngarbage" >> out2.lz || framework_failure
+"${LZIP}" -tvvvv out2.lz 2> /dev/null || test_failed $LINENO
+"${LZIPRECOVER}" -aD0 -q out2.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -alq out2.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -atq out2.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -atq < out2.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -adkq out2.lz
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out2 ] || test_failed $LINENO
+"${LZIP}" -adkq -o out2 < out2.lz
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out2 ] || test_failed $LINENO
+printf "to be overwritten" > out2 || framework_failure
+"${LZIP}" -df out2.lz || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+rm -f out2 || framework_failure
+
+"${LZIPRECOVER}" -D ,18000 "${in_lz}" > out || test_failed $LINENO
+"${LZIPRECOVER}" -D 18000 "${in_lz}" >> out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIPRECOVER}" -D 21723-22120 -fo out "${in_lz}" || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+"${LZIPRECOVER}" -D 21723,397 "${in_lz}" > out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+"${LZIPRECOVER}" -D 21723,397 "${in_em}" > out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+"${LZIPRECOVER}" -q -D 21723,397 --empty-error "${in_em}"
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -D 0 "${in_lz}" -o a/b/c/out || test_failed $LINENO
+cmp in a/b/c/out || test_failed $LINENO
+rm -rf a || framework_failure
+
+"${LZIP}" -cd "${fox6_lz}" > out || test_failed $LINENO
+"${LZIP}" -cd "${f6mk_lz}" > copy || test_failed $LINENO
+cmp copy out || test_failed $LINENO
+rm -f copy out || framework_failure
+"${LZIP}" -lq "${f6mk_lz}" --marking-error
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -tq "${f6mk_lz}" --marking-error
+[ $? = 2 ] || test_failed $LINENO
+cat "${f6mk_lz}" > f6mk.lz || framework_failure
+cat "${f6mk_lz}" > f6mk2.lz || framework_failure
+cmp -s "${fox6_lz}" f6mk.lz && test_failed $LINENO
+"${LZIPRECOVER}" --clear-marking f6mk.lz f6mk2.lz || test_failed $LINENO
+cmp "${fox6_lz}" f6mk.lz || test_failed $LINENO
+cmp "${fox6_lz}" f6mk2.lz || test_failed $LINENO
+rm -f f6mk.lz f6mk2.lz || framework_failure
+
+"${LZIP}" -d "${fox_lz}" -o a/b/c/fox || test_failed $LINENO
+cmp fox a/b/c/fox || test_failed $LINENO
+rm -rf a || framework_failure
+"${LZIP}" -d -o a/b/c/fox < "${fox_lz}" || test_failed $LINENO
+cmp fox a/b/c/fox || test_failed $LINENO
+rm -rf a || framework_failure
+"${LZIP}" -dq "${fox_lz}" -o a/b/c/
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e a ] || test_failed $LINENO
+
+printf "\ntesting bad input..."
+
+headers='LZIp LZiP LZip LzIP LzIp LziP lZIP lZIp lZiP lzIP'
+body='\001\014\000\203\377\373\377\377\300\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000$\000\000\000\000\000\000\000'
+cat "${in_lz}" > int.lz || framework_failure
+printf "LZIP${body}" >> int.lz || framework_failure
+if "${LZIP}" -tq int.lz ; then
+ for header in ${headers} ; do
+ printf "${header}${body}" > int.lz || framework_failure
+ "${LZIP}" -lq int.lz # first member
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq < int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq int.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing < int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq --loose-trailing int.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ cat "${in_lz}" > int.lz || framework_failure
+ printf "${header}${body}" >> int.lz || framework_failure
+ "${LZIP}" -lq int.lz # trailing data
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq < int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq int.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing int.lz ||
+ test_failed $LINENO ${header}
+ "${LZIP}" -t --loose-trailing int.lz ||
+ test_failed $LINENO ${header}
+ "${LZIP}" -t --loose-trailing < int.lz ||
+ test_failed $LINENO ${header}
+ "${LZIP}" -cd --loose-trailing int.lz > /dev/null ||
+ test_failed $LINENO ${header}
+ "${LZIP}" -lq --loose-trailing --trailing-error int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing --trailing-error int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -tq --loose-trailing --trailing-error < int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIP}" -cdq --loose-trailing --trailing-error int.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" -q --dump=tdata int.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" -q --strip=tdata int.lz > out # /dev/null returns 1 on OS/2
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" --dump=tdata --loose-trailing int.lz > \
+ /dev/null || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" --strip=tdata --loose-trailing int.lz > \
+ out || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" -q --remove=tdata int.lz
+ [ $? = 2 ] || test_failed $LINENO ${header}
+ "${LZIPRECOVER}" --remove=tdata --loose-trailing int.lz ||
+ test_failed $LINENO ${header}
+ cmp "${in_lz}" int.lz || test_failed $LINENO ${header}
+ done
+else
+ printf "\nwarning: skipping header test: 'printf' does not work on your system."
+fi
+rm -f int.lz out || framework_failure
+
+for i in fox_v2.lz fox_s11.lz fox_de20.lz \
+ fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
+ "${LZIP}" -tq "${testdir}"/$i
+ [ $? = 2 ] || test_failed $LINENO $i
+done
+
+for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
+ "${LZIP}" -cdq "${testdir}"/$i > out
+ [ $? = 2 ] || test_failed $LINENO $i
+ cmp fox out || test_failed $LINENO $i
+ "${LZIPRECOVER}" -tq -i "${testdir}"/$i || test_failed $LINENO $i
+ "${LZIPRECOVER}" -cdq -i "${testdir}"/$i > out || test_failed $LINENO $i
+ cmp fox out || test_failed $LINENO $i
+done
+rm -f fox out || framework_failure
+
+cat "${in_lz}" "${in_lz}" > in2.lz || framework_failure
+cat "${in_lz}" "${in_lz}" "${in_lz}" > in3.lz || framework_failure
+if dd if=in3.lz of=trunc.lz bs=14752 count=1 2> /dev/null &&
+ [ -e trunc.lz ] && cmp in2.lz trunc.lz > /dev/null 2>&1 ; then
+ for i in 6 20 14734 14753 14754 14755 14756 14757 14758 ; do
+ dd if=in3.lz of=trunc.lz bs=$i count=1 2> /dev/null
+ "${LZIP}" -lq trunc.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${LZIP}" -tq trunc.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${LZIP}" -tq < trunc.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${LZIP}" -cdq trunc.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${LZIP}" -dq < trunc.lz > /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ done
+else
+ printf "\nwarning: skipping truncation test: 'dd' does not work on your system."
+fi
+rm -f in3.lz trunc.lz || framework_failure
+
+for i in "${f6s1_lz}" "${f6s2_lz}" ; do
+ lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
+ [ "${lines}" -eq 2 ] || test_failed $LINENO "$i ${lines}"
+done
+for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
+ lines=`"${LZIP}" -lvv "$i" | wc -l || test_failed $LINENO "$i"`
+ [ "${lines}" -eq 9 ] || test_failed $LINENO "$i ${lines}"
+done
+
+cat "${in_lz}" > ingin.lz || framework_failure
+printf "g" >> ingin.lz || framework_failure
+cat "${in_lz}" >> ingin.lz || framework_failure
+"${LZIP}" -lq ingin.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -atq ingin.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -atq < ingin.lz
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -acdq ingin.lz > /dev/null
+[ $? = 2 ] || test_failed $LINENO
+"${LZIP}" -adq < ingin.lz > /dev/null
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -lq -i ingin.lz || test_failed $LINENO
+"${LZIP}" -t ingin.lz || test_failed $LINENO
+"${LZIP}" -t < ingin.lz || test_failed $LINENO
+"${LZIP}" -cd ingin.lz > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIP}" -d < ingin.lz > out || test_failed $LINENO
+cmp in out || test_failed $LINENO
+"${LZIPRECOVER}" -cd -i ingin.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+
+"${LZIPRECOVER}" -D0 -q "${f6b1_lz}" -fo out
+[ $? = 2 ] || test_failed $LINENO
+cmp -s "${f6b1}" out && test_failed $LINENO
+"${LZIPRECOVER}" -D0 -q "${f6b1_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cmp -s "${f6b1}" out && test_failed $LINENO
+"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" -fo out || test_failed $LINENO
+cmp "${f6b1}" out || test_failed $LINENO
+"${LZIPRECOVER}" -D0 -iq "${f6b1_lz}" > out || test_failed $LINENO
+cmp "${f6b1}" out || test_failed $LINENO
+
+touch empty || framework_failure
+"${LZIPRECOVER}" -D0 -q ingin.lz > out
+[ $? = 2 ] || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" -D0 -i ingin.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+printf "LZIP\001+" > in2t.lz || framework_failure # gap size < 36 bytes
+cat "${in_lz}" in "${in_lz}" >> in2t.lz || framework_failure
+printf "LZIP\001-" >> in2t.lz || framework_failure # truncated member
+"${LZIPRECOVER}" -D0 -iq in2t.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+"${LZIPRECOVER}" -cd -iq in2t.lz > out2 || test_failed $LINENO
+cmp in2 out2 || test_failed $LINENO
+"${LZIPRECOVER}" -t -iq in2t.lz || test_failed $LINENO
+rm -f in2 in2t.lz out out2 || framework_failure
+
+printf "\ntesting --merge..."
+
+rm -f out.lz || framework_failure
+"${LZIPRECOVER}" -m -o out.lz "${fox6_lz}" "${f6b1_lz}" || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${f6b1_lz}" "${fox6_lz}" || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" "${bad2_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+cat "${bad2_lz}" > bad2.lz || framework_failure
+"${LZIPRECOVER}" -m -o out.lz "${bad1_lz}" "${bad2_lz}" bad2.lz -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+rm -f bad2.lz || framework_failure
+"${LZIPRECOVER}" -m -o out.lz "${f6b1_lz}" "${f6b5_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${f6b3_lz}" "${f6b5_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -m -o out.lz "${bad3_lz}" "${bad4_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+
+"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b4_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${f6b4_lz}" "${f6b1_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -m -o a/b/c/out.lz "${f6b1_lz}" "${f6b4_lz}" ||
+ test_failed $LINENO
+cmp "${fox6_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+for i in "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do
+ "${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "$i" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b2_lz}" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+done
+
+for i in "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" "${f6b6_lz}" ; do
+ "${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b2_lz}" "$i" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "$i" "${f6b2_lz}" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "${f6b1_lz}" "$i" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "$i" "${f6b1_lz}" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b1_lz}" "${f6b2_lz}" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -mf -o out.lz "$i" "${f6b2_lz}" "${f6b1_lz}" ||
+ test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out.lz || test_failed $LINENO "$i"
+done
+
+"${LZIPRECOVER}" -mf -o out.lz "${f6b3_lz}" "${f6b4_lz}" "${f6b5_lz}" ||
+ test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b3_lz}" "${f6b4_lz}" \
+ "${f6b5_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${f6b2_lz}" "${f6b3_lz}" "${f6b4_lz}" \
+ "${f6b5_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${f6b1_lz}" "${f6b2_lz}" "${f6b3_lz}" \
+ "${f6b4_lz}" "${f6b5_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+
+"${LZIPRECOVER}" -mf -o out.lz "${bad1_lz}" "${bad2_lz}" || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad2_lz}" "${bad1_lz}" || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+
+cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > in4.lz || framework_failure
+cat "${bad1_lz}" "${in_lz}" "${bad1_lz}" "${bad1_lz}" > bad11.lz || framework_failure
+cat "${bad1_lz}" "${in_lz}" "${bad2_lz}" "${in_lz}" > bad12.lz || framework_failure
+cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_failure
+"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+rm -f bad11.lz bad12.lz bad22.lz || framework_failure
+
+for i in "${bad1_lz}" "${bad2_lz}" ; do
+ for j in "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ; do
+ "${LZIPRECOVER}" -mf -o out.lz "$i" "$j" ||
+ test_failed $LINENO "$i $j"
+ cmp "${in_lz}" out.lz || test_failed $LINENO "$i $j"
+ "${LZIPRECOVER}" -mf -o out.lz "$j" "$i" ||
+ test_failed $LINENO "$i $j"
+ cmp "${in_lz}" out.lz || test_failed $LINENO "$i $j"
+ done
+done
+
+"${LZIPRECOVER}" -mf -o out.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" ||
+ test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+
+cat "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" "${in_lz}" > bad345.lz || framework_failure
+cat "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" "${in_lz}" > bad453.lz || framework_failure
+cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_failure
+"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz ||
+ test_failed $LINENO
+cmp in4.lz out4.lz || test_failed $LINENO
+rm -f bad345.lz bad453.lz bad534.lz out4.lz || framework_failure
+
+printf "\ntesting --byte-repair..."
+
+rm -f out.lz || framework_failure
+"${LZIPRECOVER}" -R -o out.lz "${fox6_lz}" || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -R -o out.lz "${bad2_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -R -o out.lz "${bad3_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -R -o out.lz "${bad4_lz}" -q
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e out.lz ] || test_failed $LINENO
+"${LZIPRECOVER}" -Rf -o out.lz "${f6b1_lz}" || test_failed $LINENO
+cmp "${fox6_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -Rf -o out.lz "${bad1_lz}" || test_failed $LINENO
+cmp "${in_lz}" out.lz || test_failed $LINENO
+"${LZIPRECOVER}" -R -o a/b/c/out.lz "${bad1_lz}" || test_failed $LINENO
+cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+cat "${f6b1_lz}" > out.lz || framework_failure
+"${LZIPRECOVER}" -R out.lz || test_failed $LINENO
+[ -e out_fixed.lz ] || test_failed $LINENO
+mv out.lz out.tar.lz || framework_failure
+"${LZIPRECOVER}" -R out.tar.lz || test_failed $LINENO
+[ -e out_fixed.tar.lz ] || test_failed $LINENO
+mv out.tar.lz out.tlz || framework_failure
+"${LZIPRECOVER}" -R out.tlz || test_failed $LINENO
+[ -e out_fixed.tlz ] || test_failed $LINENO
+rm -f out.tlz out_fixed.lz out_fixed.tar.lz out_fixed.tlz ||
+ framework_failure
+
+printf "\ntesting --reproduce..."
+
+if [ -z "${LZIP_NAME}" ] ; then LZIP_NAME=lzip ; fi
+if /bin/sh -c "${LZIP_NAME} -s18KiB" < in > out 2> /dev/null &&
+ cmp "${in_lz}" out > /dev/null 2>&1 ; then
+ rm -f out || framework_failure
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o out \
+ --reference-file=foo "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
+ [ ! -e out ] || test_failed $LINENO
+ "${LZIPRECOVER}" --reproduce --lzip-name="${LZIP_NAME}" -o a/b/c/out \
+ --reference-file=in "${bad6_lz}" > /dev/null ||
+ test_failed $LINENO "${LZIP_NAME}"
+ cmp "${in_lz}" a/b/c/out || test_failed $LINENO "${LZIP_NAME}"
+ rm -rf a || framework_failure
+
+ for i in 6 7 8 9 ; do
+ for f in "${testdir}"/test_bad${i}.txt in ; do
+ rm -f out || framework_failure
+ "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
+ --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out ||
+ test_failed $LINENO "${LZIP_NAME} $i $f"
+ cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f"
+ rm -f out || framework_failure
+ "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
+ --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
+ --lzip-level=6 || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
+ cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=6"
+ rm -f out || framework_failure
+ "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
+ --reference-file="$f" "${testdir}"/test_bad${i}.lz -o out \
+ --lzip-level=m36 || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
+ cmp "${in_lz}" out || test_failed $LINENO "${LZIP_NAME} $i $f level=m36"
+ done
+ done
+
+ # multimember reproduction using test_bad[6789].txt as reference
+ cat "${bad6_lz}" "${bad7_lz}" "${bad8_lz}" "${bad9_lz}" > mm_bad.lz ||
+ framework_failure
+ rm -f out || framework_failure
+ for i in 6 7 8 9 ; do # reproduce one member each time
+ "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" \
+ --reference-file="${testdir}"/test_bad${i}.txt mm_bad.lz -o out ||
+ test_failed $LINENO "${LZIP_NAME} $i"
+ mv out mm_bad.lz || framework_failure
+ done
+ cmp in4.lz mm_bad.lz || test_failed $LINENO "${LZIP_NAME}"
+
+ # multimember reproduction using test.txt as reference
+ cat "${bad6_lz}" "${bad7_lz}" "${bad8_lz}" "${bad9_lz}" > mm_bad.lz ||
+ framework_failure
+ rm -f out || framework_failure
+ for i in 6 7 8 9 ; do # reproduce one member each time
+ "${LZIPRECOVER}" -q --reproduce --lzip-name="${LZIP_NAME}" -o out \
+ --reference-file=in mm_bad.lz || test_failed $LINENO "${LZIP_NAME} $i"
+ mv out mm_bad.lz || framework_failure
+ done
+ cmp in4.lz mm_bad.lz || test_failed $LINENO "${LZIP_NAME}"
+ rm -f mm_bad.lz || framework_failure
+
+ "${LZIPRECOVER}" -q --debug-reproduce=13-7356 --lzip-name="${LZIP_NAME}" \
+ --reference-file=in "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
+
+ "${LZIPRECOVER}" --debug-reproduce=512,5120,512 --lzip-name="${LZIP_NAME}" \
+ -q --reference-file=in "${in_lz}" || test_failed $LINENO "${LZIP_NAME}"
+else
+ printf "\nwarning: skipping --reproduce test: ${LZIP_NAME} not found or not the right version.\n"
+ ${LZIP_NAME} -V
+ printf "\nTry 'make LZIP_NAME=<name_of_lzip_executable> check'."
+fi
+rm -f in4.lz || framework_failure
+
+printf "\ntesting --split..."
+
+cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" \
+ "${in_lz}" "${in_lz}" "${in_lz}" > in9.lz || framework_failure
+"${LZIPRECOVER}" -s -o a/b/c/in9.lz in9.lz || test_failed $LINENO
+for i in 1 2 3 4 5 6 7 8 9 ; do
+ cmp "${in_lz}" a/b/c/rec${i}in9.lz || test_failed $LINENO $i
+ "${LZIP}" -cd a/b/c/rec${i}in9.lz > out || test_failed $LINENO $i
+ cmp in out || test_failed $LINENO $i
+done
+cat a/b/c/rec*in9.lz | cmp in9.lz - || test_failed $LINENO
+rm -rf a || framework_failure
+
+cat in9.lz > in9t.lz || framework_failure
+printf "garbage" >> in9t.lz || framework_failure
+"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO
+for i in 01 02 03 04 05 06 07 08 09 ; do
+ cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i
+ "${LZIP}" -cd rec${i}in9t.lz > out || test_failed $LINENO $i
+ cmp in out || test_failed $LINENO $i
+done
+[ -e rec10in9t.lz ] || test_failed $LINENO
+[ ! -e rec11in9t.lz ] || test_failed $LINENO
+cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO
+rm -f rec*in9t.lz in9t.lz || framework_failure
+
+printf "LZIP\001+" > in9t.lz || framework_failure # gap size < 36 bytes
+cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" "${in_lz}" "${in_lz}" \
+ "${in_lz}" "${in_lz}" "${in_lz}" in >> in9t.lz || framework_failure
+"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO
+for i in 02 03 04 06 07 08 09 10 11 ; do
+ cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i
+ "${LZIP}" -cd rec${i}in9t.lz > out || test_failed $LINENO $i
+ cmp in out || test_failed $LINENO $i
+done
+cmp in rec05in9t.lz || test_failed $LINENO
+cmp in rec12in9t.lz || test_failed $LINENO
+[ -e rec01in9t.lz ] || test_failed $LINENO
+[ ! -e rec13in9t.lz ] || test_failed $LINENO
+cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO
+rm -f rec*in9t.lz in9t.lz || framework_failure
+
+cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" \
+ "${in_lz}" "${in_lz}" in "${in_lz}" > in9t.lz || framework_failure
+printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member
+"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO
+for i in 01 02 03 04 05 06 07 08 10 ; do
+ cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i
+ "${LZIP}" -cd rec${i}in9t.lz > out || test_failed $LINENO $i
+ cmp in out || test_failed $LINENO $i
+done
+cmp in rec09in9t.lz || test_failed $LINENO
+[ -e rec11in9t.lz ] || test_failed $LINENO
+[ ! -e rec12in9t.lz ] || test_failed $LINENO
+cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO
+rm -f rec*in9t.lz in9t.lz || framework_failure
+
+cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" > in9t.lz || framework_failure
+printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member
+cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" >> in9t.lz ||
+ framework_failure
+"${LZIPRECOVER}" -s in9t.lz || test_failed $LINENO
+for i in 01 02 03 05 07 08 09 10 11 ; do
+ cmp "${in_lz}" rec${i}in9t.lz || test_failed $LINENO $i
+ "${LZIP}" -cd rec${i}in9t.lz > out || test_failed $LINENO $i
+ cmp in out || test_failed $LINENO $i
+done
+cmp in rec04in9t.lz || test_failed $LINENO
+[ -e rec06in9t.lz ] || test_failed $LINENO
+[ ! -e rec12in9t.lz ] || test_failed $LINENO
+cat rec*in9t.lz | cmp in9t.lz - || test_failed $LINENO
+rm -f rec*in9t.lz in9t.lz || framework_failure
+
+"${LZIPRECOVER}" -s "${f6b1_lz}" -o f6.lz || test_failed $LINENO
+for i in 1 2 3 4 5 6 ; do
+ [ -e rec${i}f6.lz ] || test_failed $LINENO
+done
+[ ! -e rec7f6.lz ] || test_failed $LINENO
+cat rec*f6.lz | cmp "${f6b1_lz}" - || test_failed $LINENO
+rm -f rec*f6.lz || framework_failure
+
+"${LZIPRECOVER}" -s "${f6b2_lz}" -o f6.lz || test_failed $LINENO
+for i in 1 3 4 5 6 ; do
+ cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO
+done
+[ -e rec2f6.lz ] || test_failed $LINENO
+[ ! -e rec7f6.lz ] || test_failed $LINENO
+cat rec*f6.lz | cmp "${f6b2_lz}" - || test_failed $LINENO
+rm -f rec*f6.lz || framework_failure
+
+"${LZIPRECOVER}" -s "${f6b3_lz}" -o f6.lz || test_failed $LINENO
+for i in 1 2 4 ; do
+ cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO
+done
+[ -e rec3f6.lz ] || test_failed $LINENO
+[ ! -e rec5f6.lz ] || test_failed $LINENO
+cat rec*f6.lz | cmp "${f6b3_lz}" - || test_failed $LINENO
+rm -f rec*f6.lz || framework_failure
+
+for i in "${f6b4_lz}" "${f6b5_lz}" ; do
+ "${LZIPRECOVER}" -s "$i" -o f6.lz || test_failed $LINENO
+ for j in 1 2 3 4 ; do
+ cmp "${fox_lz}" rec${j}f6.lz || test_failed $LINENO
+ done
+ [ -e rec5f6.lz ] || test_failed $LINENO
+ [ ! -e rec6f6.lz ] || test_failed $LINENO
+ cat rec*f6.lz | cmp "$i" - || test_failed $LINENO
+ rm -f rec*f6.lz || framework_failure
+done
+
+"${LZIPRECOVER}" -s "${f6b6_lz}" -o f6.lz || test_failed $LINENO
+for i in 1 2 3 4 5 ; do
+ cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO
+done
+[ -e rec6f6.lz ] || test_failed $LINENO
+[ ! -e rec7f6.lz ] || test_failed $LINENO
+cat rec*f6.lz | cmp "${f6b6_lz}" - || test_failed $LINENO
+rm -f rec*f6.lz || framework_failure
+
+"${LZIPRECOVER}" -s "${f6s1_lz}" -o f6.lz || test_failed $LINENO
+for i in 1 2 3 4 5 ; do
+ cmp "${fox_lz}" rec${i}f6.lz || test_failed $LINENO
+done
+[ -e rec6f6.lz ] || test_failed $LINENO
+[ ! -e rec7f6.lz ] || test_failed $LINENO
+cat rec*f6.lz | cmp "${f6s1_lz}" - || test_failed $LINENO
+rm -f rec*f6.lz || framework_failure
+for i in "${f6s2_lz}" "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
+ "${LZIPRECOVER}" -s "$i" -o f6.lz || test_failed $LINENO "$i"
+ for j in 1 2 3 4 5 6 ; do
+ cmp "${fox_lz}" rec${j}f6.lz || test_failed $LINENO "$i $j"
+ done
+ [ -e rec7f6.lz ] || test_failed $LINENO "$i"
+ [ ! -e rec8f6.lz ] || test_failed $LINENO "$i"
+ cat rec*f6.lz | cmp "$i" - || test_failed $LINENO "$i"
+ rm -f rec*f6.lz || framework_failure
+done
+
+"${LZIPRECOVER}" -s ingin.lz || test_failed $LINENO
+cmp "${in_lz}" rec1ingin.lz || test_failed $LINENO
+cmp "${in_lz}" rec3ingin.lz || test_failed $LINENO
+printf "g" | cmp rec2ingin.lz - || test_failed $LINENO
+[ ! -e rec4ingin.lz ] || test_failed $LINENO
+cat rec*ingin.lz | cmp ingin.lz - || test_failed $LINENO
+rm -f rec*ingin.lz || framework_failure
+
+printf "\ntesting --*=damaged..."
+
+cat "${in_lz}" > in.lz || framework_failure
+cat "${in_lz}" in > int.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged in.lz > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" --dump=damage int.lz > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damag in.lz > out || test_failed $LINENO
+cmp in.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=dama int.lz > out || test_failed $LINENO
+cmp int.lz out || test_failed $LINENO
+# strip trailing data from all but the last file
+"${LZIPRECOVER}" --strip=dam int.lz int.lz > out || test_failed $LINENO
+cat "${in_lz}" "${in_lz}" in | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --remove=da in.lz || test_failed $LINENO
+cmp "${in_lz}" in.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=d int.lz || test_failed $LINENO
+cat "${in_lz}" in | cmp int.lz - || test_failed $LINENO
+rm -f in.lz int.lz || framework_failure
+
+cat in9.lz in > in9t.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged in9.lz > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" --dump=damaged in9t.lz > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged in9.lz > out || test_failed $LINENO
+cmp in9.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged in9t.lz > out || test_failed $LINENO
+cmp in9t.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO
+cat in9.lz in | cmp in9t.lz - || test_failed $LINENO
+cat in9.lz > in9t.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO
+cmp in9.lz in9t.lz || test_failed $LINENO
+rm -f in9t.lz || framework_failure
+
+printf "LZIP\001+" > in9t.lz || framework_failure # gap size < 36 bytes
+cat "${in_lz}" "${in_lz}" "${in_lz}" in "${in_lz}" "${in_lz}" "${in_lz}" \
+ "${in_lz}" "${in_lz}" "${in_lz}" >> in9t.lz || framework_failure
+printf "LZIP\001-" >> in9t.lz || framework_failure # truncated member
+printf "LZIP\001+" > gaps || framework_failure
+cat in >> gaps || framework_failure
+printf "LZIP\001-" >> gaps || framework_failure
+"${LZIPRECOVER}" --dump=damaged in9t.lz > out || test_failed $LINENO
+cmp gaps out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged in9t.lz > out || test_failed $LINENO
+cmp in9.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged in9t.lz || test_failed $LINENO
+cmp in9.lz in9t.lz || test_failed $LINENO
+rm -f in9.lz in9t.lz gaps || framework_failure
+
+"${LZIPRECOVER}" --dump=damaged "${f6b1_lz}" > out || test_failed $LINENO
+cmp "${f6b1_lz}" out || test_failed $LINENO
+cat "${f6b1_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
+cmp "${f6b1_lz}" out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged "${f6b1_lz}" > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+cat "${f6b1_lz}" > f6b.lz || framework_failure
+"${LZIPRECOVER}" -q --remove=damaged f6b.lz
+[ $? = 2 ] || test_failed $LINENO
+cmp "${f6b1_lz}" f6b.lz || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=damaged f6bt.lz
+[ $? = 2 ] || test_failed $LINENO
+cat "${f6b1_lz}" in | cmp f6bt.lz - || test_failed $LINENO
+rm -f f6b.lz f6bt.lz || framework_failure
+
+"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" > out || test_failed $LINENO
+cat "${fox_lz}" out "${fox_lz}" "${fox_lz}" "${fox_lz}" \
+ "${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO
+cat "${f6b2_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
+cat "${fox_lz}" out "${fox_lz}" "${fox_lz}" "${fox_lz}" \
+ "${fox_lz}" | cmp "${f6b2_lz}" - || test_failed $LINENO
+cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox5.lz ||
+ framework_failure
+"${LZIPRECOVER}" --strip=damaged "${f6b2_lz}" > out || test_failed $LINENO
+cmp fox5.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
+cat fox5.lz in | cmp out - || test_failed $LINENO
+cat "${f6b2_lz}" > f6b.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
+cmp fox5.lz f6b.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
+cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO
+rm -f f6b.lz f6bt.lz || framework_failure
+
+"${LZIPRECOVER}" --dump=damaged "${f6b3_lz}" > out || test_failed $LINENO
+cat "${fox_lz}" "${fox_lz}" out "${fox_lz}" | cmp "${f6b3_lz}" - ||
+ test_failed $LINENO
+cat "${f6b3_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
+cat "${fox_lz}" "${fox_lz}" out "${fox_lz}" | cmp "${f6b3_lz}" - ||
+ test_failed $LINENO
+cat "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox3.lz || framework_failure
+"${LZIPRECOVER}" --strip=damaged "${f6b3_lz}" > out || test_failed $LINENO
+cmp fox3.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
+cat fox3.lz in | cmp out - || test_failed $LINENO
+cat "${f6b3_lz}" > f6b.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
+cmp fox3.lz f6b.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
+cat fox3.lz in | cmp f6bt.lz - || test_failed $LINENO
+rm -f f6b.lz f6bt.lz fox3.lz || framework_failure
+
+cat "${fox_lz}" "${fox_lz}" "${fox_lz}" "${fox_lz}" > fox4.lz ||
+ framework_failure
+for i in "${f6b4_lz}" "${f6b5_lz}" ; do
+ "${LZIPRECOVER}" --dump=damaged "$i" > out || test_failed $LINENO "$i"
+ cat fox4.lz out | cmp "$i" - || test_failed $LINENO "$i"
+ cat "$i" in > f6bt.lz || framework_failure
+ "${LZIPRECOVER}" --dump=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cat fox4.lz out | cmp f6bt.lz - || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --strip=damaged "$i" > out || test_failed $LINENO "$i"
+ cmp fox4.lz out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --strip=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cmp fox4.lz out || test_failed $LINENO "$i"
+ cat "$i" > f6b.lz || framework_failure
+ "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i"
+ cmp fox4.lz f6b.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i"
+ cmp fox4.lz f6bt.lz || test_failed $LINENO "$i"
+done
+rm -f f6b.lz f6bt.lz fox4.lz || framework_failure
+
+"${LZIPRECOVER}" --dump=damaged "${f6b6_lz}" > out || test_failed $LINENO
+cat fox5.lz out | cmp "${f6b6_lz}" - || test_failed $LINENO
+cat "${f6b6_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged f6bt.lz > out || test_failed $LINENO
+cat fox5.lz out | cmp "${f6b6_lz}" - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged "${f6b6_lz}" > out || test_failed $LINENO
+cmp fox5.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged f6bt.lz > out || test_failed $LINENO
+cat fox5.lz in | cmp out - || test_failed $LINENO
+cat "${f6b6_lz}" > f6b.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO
+cmp fox5.lz f6b.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO
+cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO
+rm -f f6b.lz f6bt.lz || framework_failure
+
+for i in "${f6s1_lz}" "${f6s2_lz}" ; do
+ "${LZIPRECOVER}" --dump=damaged "$i" > out || test_failed $LINENO "$i"
+ cmp "$i" out || test_failed $LINENO "$i"
+ cat "$i" in > f6bt.lz || framework_failure
+ "${LZIPRECOVER}" --dump=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cmp "$i" out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -q --strip=damaged "$i" > out ||
+ test_failed $LINENO "$i"
+ cmp empty out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -q --strip=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cmp empty out || test_failed $LINENO "$i"
+ cat "$i" > f6b.lz || framework_failure
+ "${LZIPRECOVER}" -q --remove=damaged f6b.lz
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ cmp "$i" f6b.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" -q --remove=damaged f6bt.lz
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i"
+done
+rm -f f6b.lz f6bt.lz || framework_failure
+
+for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
+ "${LZIPRECOVER}" --dump=damaged "$i" > out || test_failed $LINENO "$i"
+ cmp empty out || test_failed $LINENO "$i"
+ cat "$i" in > f6bt.lz || framework_failure
+ "${LZIPRECOVER}" --dump=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cmp empty out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --strip=damaged "$i" > out || test_failed $LINENO "$i"
+ cmp "$i" out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --strip=damaged f6bt.lz > out ||
+ test_failed $LINENO "$i"
+ cat "$i" in | cmp out - || test_failed $LINENO "$i"
+ cat "$i" > f6b.lz || framework_failure
+ "${LZIPRECOVER}" --remove=damaged f6b.lz || test_failed $LINENO "$i"
+ cmp "$i" f6b.lz || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --remove=damaged f6bt.lz || test_failed $LINENO "$i"
+ cat "$i" in | cmp f6bt.lz - || test_failed $LINENO "$i"
+done
+rm -f f6b.lz f6bt.lz || framework_failure
+
+cat ingin.lz "${inD}" > ingint.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged ingin.lz > out || test_failed $LINENO
+printf "g" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=damaged ingint.lz > out || test_failed $LINENO
+printf "g" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged ingin.lz > out || test_failed $LINENO
+cmp in2.lz out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged ingint.lz > out || test_failed $LINENO
+cat "${in_lz}" "${in_lz}" "${inD}" | cmp out - || test_failed $LINENO
+cat ingin.lz > ingin2.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged ingin2.lz || test_failed $LINENO
+cmp in2.lz ingin2.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=damaged ingint.lz || test_failed $LINENO
+cat "${in_lz}" "${in_lz}" "${inD}" | cmp ingint.lz - || test_failed $LINENO
+rm -f ingin2.lz ingint.lz || framework_failure
+
+# concatenate output from several files
+"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" > out || test_failed $LINENO
+"${LZIPRECOVER}" --dump=damaged "${bad2_lz}" "${f6b2_lz}" > out2 ||
+ test_failed $LINENO
+cat "${bad2_lz}" out | cmp out2 - || test_failed $LINENO
+cat "${bad2_lz}" in > bad2t.lz || framework_failure
+cat "${f6b2_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" --dump=damaged bad2t.lz "${f6b2_lz}" "${bad2_lz}" \
+ f6bt.lz > out4 || test_failed $LINENO
+cat "${bad2_lz}" out "${bad2_lz}" out | cmp out4 - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=damaged "${f6b2_lz}" bad2t.lz f6bt.lz \
+ "${bad2_lz}" > out4 || test_failed $LINENO
+cat out "${bad2_lz}" out "${bad2_lz}" | cmp out4 - || test_failed $LINENO
+#
+"${LZIPRECOVER}" -q --strip=damaged "${bad2_lz}" "${f6b2_lz}" > out ||
+ test_failed $LINENO
+cmp fox5.lz out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged bad2t.lz "${f6b2_lz}" > out ||
+ test_failed $LINENO
+cmp fox5.lz out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" bad2t.lz f6bt.lz > out ||
+ test_failed $LINENO
+cat fox5.lz fox5.lz in | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged "${f6b2_lz}" f6bt.lz bad2t.lz > out ||
+ test_failed $LINENO
+cat fox5.lz fox5.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged f6bt.lz bad2t.lz > out ||
+ test_failed $LINENO
+cmp fox5.lz out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=damaged f6bt.lz "${in_lz}" > out ||
+ test_failed $LINENO
+cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=damaged --strip=tdata f6bt.lz "${in_lz}" > out ||
+ test_failed $LINENO
+cat fox5.lz "${in_lz}" | cmp out - || test_failed $LINENO
+#
+cat "${f6b2_lz}" > f6b.lz || framework_failure
+"${LZIPRECOVER}" -q --remove=damaged f6b.lz bad2t.lz f6bt.lz
+[ $? = 2 ] || test_failed $LINENO
+cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO
+cmp fox5.lz f6b.lz || test_failed $LINENO
+cat fox5.lz in | cmp f6bt.lz - || test_failed $LINENO
+cat "${bad2_lz}" in > bad2t.lz || framework_failure
+cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure
+cat "${f6b1_lz}" in > f6abt.lz || framework_failure
+cat "${f6b2_lz}" > f6b.lz || framework_failure
+cat "${f6b2_lz}" in > f6bt.lz || framework_failure
+"${LZIPRECOVER}" -q --remove=d:t fox6t.lz f6abt.lz f6b.lz bad2t.lz f6bt.lz
+[ $? = 2 ] || test_failed $LINENO
+cat "${bad2_lz}" in | cmp bad2t.lz - || test_failed $LINENO
+cat "${f6b1_lz}" in | cmp f6abt.lz - || test_failed $LINENO
+cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+cmp fox5.lz f6b.lz || test_failed $LINENO
+cmp fox5.lz f6bt.lz || test_failed $LINENO
+rm -f fox6t.lz f6b.lz f6bt.lz bad2t.lz fox5.lz out2 out4 || framework_failure
+
+printf "\ntesting trailing data..."
+
+cat "${in_lz}" "${inD}" > int.lz || framework_failure
+"${LZIPRECOVER}" --dump=tdata int.lz > out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+rm -f out || framework_failure
+"${LZIPRECOVER}" --dump=tdat int.lz -o out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+cat "${fox6_lz}" "${inD}" > fox6t.lz || framework_failure
+cat "${inD}" "${inD}" > inD2 || framework_failure
+"${LZIPRECOVER}" --dump=tda int.lz fox6t.lz -f -o out || test_failed $LINENO
+cmp inD2 out || test_failed $LINENO
+rm -f inD2 || framework_failure
+cat ingin.lz "${inD}" > ingint.lz || framework_failure
+"${LZIPRECOVER}" -q --dump=td ingint.lz > /dev/null
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=t ingint.lz > out || test_failed $LINENO
+cmp "${inD}" out || test_failed $LINENO
+
+"${LZIPRECOVER}" --strip=tdata int.lz > out || test_failed $LINENO
+cmp "${in_lz}" out || test_failed $LINENO
+rm -f out || framework_failure
+"${LZIPRECOVER}" --strip=tdata int.lz -o out || test_failed $LINENO
+cmp "${in_lz}" out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=tdata fox6t.lz -f -o out || test_failed $LINENO
+cmp "${fox6_lz}" out || test_failed $LINENO
+"${LZIPRECOVER}" --strip=tdata int.lz int.lz -f -o out || test_failed $LINENO
+cmp in2.lz out || test_failed $LINENO
+rm -f in2.lz || framework_failure
+"${LZIPRECOVER}" --strip=tdata int.lz fox6t.lz > out || test_failed $LINENO
+cat "${in_lz}" "${fox6_lz}" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=tdata ingint.lz > out # /dev/null returns 1 on OS/2
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=tdata ingint.lz > out || test_failed $LINENO
+cmp ingin.lz out || test_failed $LINENO
+
+"${LZIPRECOVER}" --remove=tdata int.lz fox6t.lz || test_failed $LINENO
+cmp "${in_lz}" int.lz || test_failed $LINENO
+cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=tdata int.lz || test_failed $LINENO
+cmp "${in_lz}" int.lz || test_failed $LINENO
+"${LZIPRECOVER}" --remove=tdata fox6t.lz || test_failed $LINENO
+cmp "${fox6_lz}" fox6t.lz || test_failed $LINENO
+"${LZIPRECOVER}" -q --remove=tdata ingint.lz
+[ $? = 2 ] || test_failed $LINENO
+cmp -s ingin.lz ingint.lz && test_failed $LINENO
+"${LZIPRECOVER}" -i --remove=tdata ingint.lz || test_failed $LINENO
+cmp ingin.lz ingint.lz || test_failed $LINENO
+rm -f int.lz fox6t.lz ingint.lz ingin.lz || framework_failure
+
+for i in "${f6s3_lz}" "${f6s4_lz}" "${f6s5_lz}" "${f6s6_lz}" ; do
+ "${LZIPRECOVER}" --strip=tdata "$i" > out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --dump=tdata "$i" > tdata || test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out || test_failed $LINENO "$i"
+ cat out tdata | cmp "$i" - || test_failed $LINENO "$i"
+ cat "$i" "${inD}" > f6t.lz || framework_failure
+ "${LZIPRECOVER}" --strip=tdata f6t.lz > out || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --dump=tdata f6t.lz > tdata || test_failed $LINENO "$i"
+ cmp "${fox6_lz}" out || test_failed $LINENO "$i"
+ cat out tdata | cmp f6t.lz - || test_failed $LINENO "$i"
+ "${LZIPRECOVER}" --remove=tdata f6t.lz || test_failed $LINENO "$i"
+ cmp "${fox6_lz}" f6t.lz || test_failed $LINENO "$i"
+ rm -f out tdata f6t.lz || framework_failure
+done
+
+printf "\ntesting --dump/remove/strip..."
+
+"${LZIPRECOVER}" --dump=1 "${in_lz}" -o a/b/c/out.lz || test_failed $LINENO
+cmp "${in_lz}" a/b/c/out.lz || test_failed $LINENO
+rm -rf a || framework_failure
+
+"${LZIPRECOVER}" -s "${num_lz}" -o num.lz || test_failed $LINENO
+[ -e rec9num.lz ] || test_failed $LINENO
+[ ! -e rec10num.lz ] || test_failed $LINENO
+cat rec*num.lz | cmp "${num_lz}" - || test_failed $LINENO
+for i in 1 2 3 4 5 6 7 8 9 ; do
+ "${LZIPRECOVER}" --dump=$i "${num_lz}" | cmp rec${i}num.lz - ||
+ test_failed $LINENO $i
+ "${LZIPRECOVER}" --strip=^$i "${num_lz}" | cmp rec${i}num.lz - ||
+ test_failed $LINENO $i
+ cat "${num_lz}" > num.lz || framework_failure
+ "${LZIPRECOVER}" --remove=^$i num.lz || test_failed $LINENO $i
+ cmp rec${i}num.lz num.lz || test_failed $LINENO $i
+done
+"${LZIPRECOVER}" -q --dump=1 in "${num_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cmp rec1num.lz out || test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=^1 in "${num_lz}" > out
+[ $? = 2 ] || test_failed $LINENO
+cmp rec1num.lz out || test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=r1 "${num_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=d:r3 "${num_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r5:d "${num_lz}" | cmp rec5num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=t:r9 "${num_lz}" | cmp rec1num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1:t "${num_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=d:r^3:t "${num_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^5:d:t "${num_lz}" | cmp rec5num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=d:t:r^9 "${num_lz}" | cmp rec1num.lz - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=1,5 "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=3,6 "${num_lz}" > out || test_failed $LINENO
+cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=2-4 "${num_lz}" > out || test_failed $LINENO
+cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=4,6,8 "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1,5 "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec5num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^3,6 "${num_lz}" > out || test_failed $LINENO
+cat rec3num.lz rec6num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^2-4 "${num_lz}" > out || test_failed $LINENO
+cat rec2num.lz rec3num.lz rec4num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --strip=^4,6,8 "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp out - || test_failed $LINENO
+
+# create a subset tarlz archive
+"${LZIPRECOVER}" --dump=1-2:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec1num.lz rec2num.lz rec9num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=4-5:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec4num.lz rec5num.lz rec9num.lz | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" --dump=7-8:r1:t "${num_lz}" > out || test_failed $LINENO
+cat rec7num.lz rec8num.lz rec9num.lz | cmp out - || test_failed $LINENO
+
+"${LZIPRECOVER}" --dump=1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=r1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=1-4:r1-4:5 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=^10 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1-9 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=r^1-1000 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=^1-4:r^1-4:^5 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=10 "${num_lz}" | cmp "${num_lz}" - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" -i --dump=r1 "${nbt_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=r3 "${nbt_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=r7 "${nbt_lz}" | cmp rec4num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^1:t "${nbt_lz}" | cmp rec9num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^3:t "${nbt_lz}" | cmp rec7num.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^7:t "${nbt_lz}" | cmp rec4num.lz - ||
+ test_failed $LINENO
+
+"${LZIPRECOVER}" -i --dump=4 -f -o out "${nbt_lz}" || test_failed $LINENO
+printf "gap" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=8 "${nbt_lz}" > out || test_failed $LINENO
+printf "damaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=tdata "${nbt_lz}" > out || test_failed $LINENO
+printf "trailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gaptrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=damaged "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --dump=d:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4:t -f -o out "${nbt_lz}" || test_failed $LINENO
+printf "gap" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "damaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=1-11 "${nbt_lz}" > out || test_failed $LINENO
+cmp empty out || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4 "${nbt_lz}" > out || test_failed $LINENO
+printf "gaptrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=^4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^4,8:t "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamaged" | cmp out - || test_failed $LINENO
+"${LZIPRECOVER}" -i --strip=r^4,8 "${nbt_lz}" > out || test_failed $LINENO
+printf "gapdamagedtrailing data" | cmp out - || test_failed $LINENO
+
+cat "${num_lz}" > num.lz || framework_failure
+"${LZIPRECOVER}" --remove=1-3,5,7,9 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cat "${num_lz}" > num.lz || framework_failure
+"${LZIPRECOVER}" --remove=^4,6,8 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cat "${num_lz}" > num.lz || framework_failure
+"${LZIPRECOVER}" --remove=r1,3,5,7-9 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+cat "${num_lz}" > num.lz || framework_failure
+"${LZIPRECOVER}" --remove=r^2,4,6 num.lz || test_failed $LINENO
+cat rec4num.lz rec6num.lz rec8num.lz | cmp num.lz - || test_failed $LINENO
+
+cat "${nbt_lz}" > nbt.lz || framework_failure
+"${LZIPRECOVER}" -i --remove=4,8:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+cat "${nbt_lz}" > nbt.lz || framework_failure
+"${LZIPRECOVER}" -i --remove=r4,8:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+cat "${nbt_lz}" > nbt.lz || framework_failure
+"${LZIPRECOVER}" --remove=damaged:tdata nbt.lz || test_failed $LINENO
+cmp "${num_lz}" nbt.lz || test_failed $LINENO
+rm -f rec*num.lz nbt.lz || framework_failure
+
+for i in 1 2 3 4 5 6 7 8 9 10 ; do
+ "${LZIPRECOVER}" -i --strip=1-$i "${nbt_lz}" > out ||
+ test_failed $LINENO $i
+ cat "${nbt_lz}" > nbt.lz || framework_failure
+ "${LZIPRECOVER}" -i --remove=1-$i nbt.lz || test_failed $LINENO $i
+ cmp nbt.lz out || test_failed $LINENO $i
+done
+rm -f nbt.lz || framework_failure
+
+cat "${in_em}" > test_3m.txt.lz || framework_failure
+"${LZIPRECOVER}" --remove=empty test_3m.txt.lz || test_failed $LINENO
+"${LZIPRECOVER}" -M test_3m.txt.lz | cmp "${testdir}"/test_3m.txt.lz.md5 - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --dump=2,4,7 "${in_em}" | cmp test_3m.txt.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --empty-error --strip=e "${in_em}" | cmp test_3m.txt.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" --strip=1,3,5-6,8 "${in_em}" | cmp test_3m.txt.lz - ||
+ test_failed $LINENO
+"${LZIPRECOVER}" -q --strip=1,3,5-6,8 --empty-error "${in_em}" > out
+[ $? = 2 ] || test_failed $LINENO
+"${LZIPRECOVER}" --dump=emp "${in_em}" | "${LZIP}" -d | cmp empty - ||
+ test_failed $LINENO
+rm -f test_3m.txt.lz empty out || framework_failure
+
+echo
+if [ ${fail} = 0 ] ; then
+ echo "tests completed successfully."
+ cd "${objdir}" && rm -r tmp
+else
+ echo "tests failed."
+fi
+exit ${fail}
diff --git a/testsuite/fox.lz b/testsuite/fox.lz
new file mode 100644
index 0000000..509da82
--- /dev/null
+++ b/testsuite/fox.lz
Binary files differ
diff --git a/testsuite/fox6.lz b/testsuite/fox6.lz
new file mode 100644
index 0000000..8401b99
--- /dev/null
+++ b/testsuite/fox6.lz
Binary files differ
diff --git a/testsuite/fox6_bad1.lz b/testsuite/fox6_bad1.lz
new file mode 100644
index 0000000..4e0d8fd
--- /dev/null
+++ b/testsuite/fox6_bad1.lz
Binary files differ
diff --git a/testsuite/fox6_bad1.txt b/testsuite/fox6_bad1.txt
new file mode 100644
index 0000000..14e5367
--- /dev/null
+++ b/testsuite/fox6_bad1.txt
@@ -0,0 +1,4 @@
+The quick brown fox jumps over the lazy dog.
+The quick brown fox jumps over the lazy dog.
+The quick brown fox c††zzzzzzzzzzzzzzzzzzzzzzVhe quick brown fox jumps over the lazy dog.
+The quick brown fox jumps over the lazy dog.
diff --git a/testsuite/fox6_bad2.lz b/testsuite/fox6_bad2.lz
new file mode 100644
index 0000000..bf8a04a
--- /dev/null
+++ b/testsuite/fox6_bad2.lz
Binary files differ
diff --git a/testsuite/fox6_bad3.lz b/testsuite/fox6_bad3.lz
new file mode 100644
index 0000000..2d3cff2
--- /dev/null
+++ b/testsuite/fox6_bad3.lz
Binary files differ
diff --git a/testsuite/fox6_bad4.lz b/testsuite/fox6_bad4.lz
new file mode 100644
index 0000000..e931d7d
--- /dev/null
+++ b/testsuite/fox6_bad4.lz
Binary files differ
diff --git a/testsuite/fox6_bad5.lz b/testsuite/fox6_bad5.lz
new file mode 100644
index 0000000..95f44f3
--- /dev/null
+++ b/testsuite/fox6_bad5.lz
Binary files differ
diff --git a/testsuite/fox6_bad6.lz b/testsuite/fox6_bad6.lz
new file mode 100644
index 0000000..085b2fd
--- /dev/null
+++ b/testsuite/fox6_bad6.lz
Binary files differ
diff --git a/testsuite/fox6_mark.lz b/testsuite/fox6_mark.lz
new file mode 100644
index 0000000..32b2ac0
--- /dev/null
+++ b/testsuite/fox6_mark.lz
Binary files differ
diff --git a/testsuite/fox6_sc1.lz b/testsuite/fox6_sc1.lz
new file mode 100644
index 0000000..278f8a8
--- /dev/null
+++ b/testsuite/fox6_sc1.lz
Binary files differ
diff --git a/testsuite/fox6_sc2.lz b/testsuite/fox6_sc2.lz
new file mode 100644
index 0000000..dc17461
--- /dev/null
+++ b/testsuite/fox6_sc2.lz
Binary files differ
diff --git a/testsuite/fox6_sc3.lz b/testsuite/fox6_sc3.lz
new file mode 100644
index 0000000..a602938
--- /dev/null
+++ b/testsuite/fox6_sc3.lz
Binary files differ
diff --git a/testsuite/fox6_sc4.lz b/testsuite/fox6_sc4.lz
new file mode 100644
index 0000000..d1a77f7
--- /dev/null
+++ b/testsuite/fox6_sc4.lz
Binary files differ
diff --git a/testsuite/fox6_sc5.lz b/testsuite/fox6_sc5.lz
new file mode 100644
index 0000000..35453c6
--- /dev/null
+++ b/testsuite/fox6_sc5.lz
Binary files differ
diff --git a/testsuite/fox6_sc6.lz b/testsuite/fox6_sc6.lz
new file mode 100644
index 0000000..c1fad92
--- /dev/null
+++ b/testsuite/fox6_sc6.lz
Binary files differ
diff --git a/testsuite/fox_bcrc.lz b/testsuite/fox_bcrc.lz
new file mode 100644
index 0000000..8f6a7c4
--- /dev/null
+++ b/testsuite/fox_bcrc.lz
Binary files differ
diff --git a/testsuite/fox_crc0.lz b/testsuite/fox_crc0.lz
new file mode 100644
index 0000000..1abe926
--- /dev/null
+++ b/testsuite/fox_crc0.lz
Binary files differ
diff --git a/testsuite/fox_das46.lz b/testsuite/fox_das46.lz
new file mode 100644
index 0000000..43ed9f9
--- /dev/null
+++ b/testsuite/fox_das46.lz
Binary files differ
diff --git a/testsuite/fox_de20.lz b/testsuite/fox_de20.lz
new file mode 100644
index 0000000..10949d8
--- /dev/null
+++ b/testsuite/fox_de20.lz
Binary files differ
diff --git a/testsuite/fox_mes81.lz b/testsuite/fox_mes81.lz
new file mode 100644
index 0000000..d50ef2e
--- /dev/null
+++ b/testsuite/fox_mes81.lz
Binary files differ
diff --git a/testsuite/fox_s11.lz b/testsuite/fox_s11.lz
new file mode 100644
index 0000000..dca909c
--- /dev/null
+++ b/testsuite/fox_s11.lz
Binary files differ
diff --git a/testsuite/fox_v2.lz b/testsuite/fox_v2.lz
new file mode 100644
index 0000000..8620981
--- /dev/null
+++ b/testsuite/fox_v2.lz
Binary files differ
diff --git a/testsuite/numbers.lz b/testsuite/numbers.lz
new file mode 100644
index 0000000..57460bc
--- /dev/null
+++ b/testsuite/numbers.lz
Binary files differ
diff --git a/testsuite/numbersbt.lz b/testsuite/numbersbt.lz
new file mode 100644
index 0000000..019e54d
--- /dev/null
+++ b/testsuite/numbersbt.lz
Binary files differ
diff --git a/testsuite/test.txt b/testsuite/test.txt
new file mode 100644
index 0000000..9196a3a
--- /dev/null
+++ b/testsuite/test.txt
@@ -0,0 +1,676 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
new file mode 100644
index 0000000..22cea6e
--- /dev/null
+++ b/testsuite/test.txt.lz
Binary files differ
diff --git a/testsuite/test.txt.lzma b/testsuite/test.txt.lzma
new file mode 100644
index 0000000..53e54ea
--- /dev/null
+++ b/testsuite/test.txt.lzma
Binary files differ
diff --git a/testsuite/test21723.txt b/testsuite/test21723.txt
new file mode 100644
index 0000000..7194547
--- /dev/null
+++ b/testsuite/test21723.txt
@@ -0,0 +1,7 @@
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
diff --git a/testsuite/test_3m.txt.lz.md5 b/testsuite/test_3m.txt.lz.md5
new file mode 100644
index 0000000..5bec6bc
--- /dev/null
+++ b/testsuite/test_3m.txt.lz.md5
@@ -0,0 +1 @@
+6a6bb58464ec8567eab17015064d0c5b test_3m.txt.lz
diff --git a/testsuite/test_bad1.lz b/testsuite/test_bad1.lz
new file mode 100644
index 0000000..2129c90
--- /dev/null
+++ b/testsuite/test_bad1.lz
Binary files differ
diff --git a/testsuite/test_bad2.lz b/testsuite/test_bad2.lz
new file mode 100644
index 0000000..e013c34
--- /dev/null
+++ b/testsuite/test_bad2.lz
Binary files differ
diff --git a/testsuite/test_bad3.lz b/testsuite/test_bad3.lz
new file mode 100644
index 0000000..0ae9e7d
--- /dev/null
+++ b/testsuite/test_bad3.lz
Binary files differ
diff --git a/testsuite/test_bad4.lz b/testsuite/test_bad4.lz
new file mode 100644
index 0000000..ddb0d6b
--- /dev/null
+++ b/testsuite/test_bad4.lz
Binary files differ
diff --git a/testsuite/test_bad5.lz b/testsuite/test_bad5.lz
new file mode 100644
index 0000000..6fab91c
--- /dev/null
+++ b/testsuite/test_bad5.lz
Binary files differ
diff --git a/testsuite/test_bad6.lz b/testsuite/test_bad6.lz
new file mode 100644
index 0000000..cfea88c
--- /dev/null
+++ b/testsuite/test_bad6.lz
Binary files differ
diff --git a/testsuite/test_bad6.txt b/testsuite/test_bad6.txt
new file mode 100644
index 0000000..b47462e
--- /dev/null
+++ b/testsuite/test_bad6.txt
@@ -0,0 +1,26 @@
+) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to \ No newline at end of file
diff --git a/testsuite/test_bad7.lz b/testsuite/test_bad7.lz
new file mode 100644
index 0000000..77f2b85
--- /dev/null
+++ b/testsuite/test_bad7.lz
Binary files differ
diff --git a/testsuite/test_bad7.txt b/testsuite/test_bad7.txt
new file mode 100644
index 0000000..be54c7c
--- /dev/null
+++ b/testsuite/test_bad7.txt
@@ -0,0 +1,215 @@
+, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY \ No newline at end of file
diff --git a/testsuite/test_bad8.lz b/testsuite/test_bad8.lz
new file mode 100644
index 0000000..fca701b
--- /dev/null
+++ b/testsuite/test_bad8.lz
Binary files differ
diff --git a/testsuite/test_bad8.txt b/testsuite/test_bad8.txt
new file mode 100644
index 0000000..3cb3ff4
--- /dev/null
+++ b/testsuite/test_bad8.txt
@@ -0,0 +1,3 @@
+1
+
+ Copyright (C) 1989 \ No newline at end of file
diff --git a/testsuite/test_bad9.lz b/testsuite/test_bad9.lz
new file mode 100644
index 0000000..becb0ec
--- /dev/null
+++ b/testsuite/test_bad9.lz
Binary files differ
diff --git a/testsuite/test_bad9.txt b/testsuite/test_bad9.txt
new file mode 100644
index 0000000..b72a626
--- /dev/null
+++ b/testsuite/test_bad9.txt
@@ -0,0 +1,5 @@
+General
+Public License instead of this License.
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
diff --git a/testsuite/test_em.txt.lz b/testsuite/test_em.txt.lz
new file mode 100644
index 0000000..7e96250
--- /dev/null
+++ b/testsuite/test_em.txt.lz
Binary files differ
diff --git a/unzcrash.cc b/unzcrash.cc
new file mode 100644
index 0000000..107189f
--- /dev/null
+++ b/unzcrash.cc
@@ -0,0 +1,631 @@
+/* Unzcrash - Tests robustness of decompressors to corrupted data.
+ Inspired by unzcrash.c from Julian Seward's bzip2.
+ Copyright (C) 2008-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ Exit status: 0 for a normal exit, 1 for environmental problems
+ (file not found, invalid command-line options, I/O errors, etc), 2 to
+ indicate a corrupt or invalid input file, 3 for an internal consistency
+ error (e.g., bug) which caused unzcrash to panic.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <climits> // SSIZE_MAX
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h> // SIZE_MAX
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include "arg_parser.h"
+#include "common.h"
+
+#if CHAR_BIT != 8
+#error "Environments where CHAR_BIT != 8 are not supported."
+#endif
+
+#if ( defined SIZE_MAX && SIZE_MAX < ULONG_MAX ) || \
+ ( defined SSIZE_MAX && SSIZE_MAX < LONG_MAX )
+#error "Environments where 'size_t' is narrower than 'long' are not supported."
+#endif
+
+namespace {
+
+const char * const program_name = "unzcrash";
+const char * invocation_name = program_name; // default value
+
+int verbosity = 0;
+
+
+void show_help()
+ {
+ std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n"
+ "\nBy default, unzcrash reads the file specified and then repeatedly\n"
+ "decompresses it, increasing 256 times each byte of the compressed data, so\n"
+ "as to test all possible one-byte errors. Note that it may take years or even\n"
+ "centuries to test all possible one-byte errors in a large file (tens of MB).\n"
+ "\nIf the option '--block' is given, unzcrash reads the file specified and\n"
+ "then repeatedly decompresses it, setting all bytes in each successive block\n"
+ "to the value given, so as to test all possible full sector errors.\n"
+ "\nIf the option '--truncate' is given, unzcrash reads the file specified\n"
+ "and then repeatedly decompresses it, truncating the file to increasing\n"
+ "lengths, so as to test all possible truncation points.\n"
+ "\nNone of the three test modes described above should cause any invalid memory\n"
+ "accesses. If any of them does, please, report it as a bug to the maintainers\n"
+ "of the decompressor being tested.\n"
+ "\nIf the decompressor returns with zero status, unzcrash compares the output\n"
+ "of the decompressor for the original and corrupt files. If the outputs\n"
+ "differ, it means that the decompressor returned a false negative; it failed\n"
+ "to recognize the corruption and produced garbage output. The only exception\n"
+ "is when a multimember file is truncated just after the last byte of a\n"
+ "member, producing a shorter but valid compressed file. Except in this latter\n"
+ "case, please, report any false negative as a bug.\n"
+ "\nIn order to compare the outputs, unzcrash needs a 'zcmp' program able to\n"
+ "understand the format being tested. For example the zcmp provided by zutils.\n"
+ "Use '--zcmp=false' to disable comparisons.\n"
+ "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
+ std::printf( "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -b, --bits=<range> test N-bit errors instead of full byte\n"
+ " -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n"
+ " -d, --delta=<n> test one byte/block/truncation every n bytes\n"
+ " -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n"
+ " -n, --no-check skip initial test of file.lz and zcmp\n"
+ " -p, --position=<bytes> first byte position to test [default 0]\n"
+ " -q, --quiet suppress all messages\n"
+ " -s, --size=<bytes> number of byte positions to test [all]\n"
+ " -t, --truncate test decompression of truncated file\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n"
+ " -z, --zcmp=<command> set zcmp command name and options [zcmp]\n"
+ "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
+ "A negative position is relative to the end of file.\n"
+ "A negative size is relative to the rest of the file.\n"
+ "\nExit status: 0 for a normal exit, 1 for environmental problems\n"
+ "(file not found, invalid command-line options, I/O errors, etc), 2 to\n"
+ "indicate a corrupt or invalid input file, 3 for an internal consistency\n"
+ "error (e.g., bug) which caused unzcrash to panic.\n"
+ "\nReport bugs to lzip-bug@nongnu.org\n"
+ "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
+ }
+
+} // end namespace
+
+#include "main_common.cc"
+
+namespace {
+
+void parse_block( const char * const arg, const char * const option_name,
+ long & size, uint8_t & value )
+ {
+ const char * tail = arg;
+
+ if( tail[0] != ',' )
+ size = getnum( arg, option_name, 0, 1, INT_MAX, &tail );
+ if( tail[0] == ',' )
+ value = getnum( tail + 1, option_name, 0, 0, 255 );
+ else if( tail[0] )
+ { show_option_error( arg, "Missing comma between <size> and <value> in",
+ option_name ); std::exit( 1 ); }
+ }
+
+
+/* Return the address of a malloc'd buffer containing the file data and
+ the file size in '*file_sizep'.
+ In case of error, return 0 and do not modify '*file_sizep'.
+*/
+uint8_t * read_file( const char * const filename, long * const file_sizep )
+ {
+ FILE * const f = std::fopen( filename, "rb" );
+ if( !f )
+ { show_file_error( filename, "Can't open input file", errno ); return 0; }
+
+ long buffer_size = 65536;
+ uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
+ if( !buffer ) { show_error( mem_msg ); return 0; }
+ long file_size = std::fread( buffer, 1, buffer_size, f );
+ while( file_size >= buffer_size || ( !std::ferror( f ) && !std::feof( f ) ) )
+ {
+ if( file_size >= buffer_size ) // may be false because of EINTR
+ {
+ if( buffer_size >= LONG_MAX )
+ { show_file_error( filename, "Input file is larger than LONG_MAX." );
+ std::free( buffer ); return 0; }
+ buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
+ uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
+ if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
+ buffer = tmp;
+ }
+ file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
+ }
+ if( std::ferror( f ) || !std::feof( f ) )
+ {
+ show_file_error( filename, "Error reading input file", errno );
+ std::free( buffer ); return 0;
+ }
+ std::fclose( f );
+ *file_sizep = file_size;
+ return buffer;
+ }
+
+
+class Bitset8 // 8 value bitset (1 to 8)
+ {
+ bool data[8];
+ static bool valid_digit( const unsigned char ch )
+ { return ( ch >= '1' && ch <= '8' ); }
+
+public:
+ Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; }
+
+ bool includes( const int i ) const
+ { return ( i >= 1 && i <= 8 && data[i-1] ); }
+
+ // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
+ void parse_bs( const char * const arg, const char * const option_name )
+ {
+ const char * p = arg;
+ for( int i = 0; i < 8; ++i ) data[i] = false;
+ while( true )
+ {
+ const unsigned char ch1 = *p++;
+ if( !valid_digit( ch1 ) ) break;
+ if( *p != '-' ) data[ch1-'1'] = true;
+ else
+ {
+ ++p;
+ if( !valid_digit( *p ) || ch1 > *p ) break;
+ for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true;
+ ++p;
+ }
+ if( *p == 0 ) return;
+ if( *p == ',' ) ++p; else break;
+ }
+ show_option_error( arg, "Invalid bit position or range in", option_name );
+ std::exit( 1 );
+ }
+
+ // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1
+ void print() const
+ {
+ std::fflush( stderr );
+ int c = 0;
+ for( int i = 0; i < 8; ++i ) if( data[i] ) ++c;
+ if( c == 8 ) std::fputs( "Testing full byte.\n", stdout );
+ else if( c == 0 ) std::fputs( "Nothing to test.\n", stdout );
+ else
+ {
+ std::fputs( "Testing ", stdout );
+ for( int i = 0; i < 8; ++i )
+ if( data[i] )
+ {
+ std::printf( "%d", i + 1 );
+ if( --c ) std::fputc( ',', stdout );
+ }
+ std::fputs( " bit errors.\n", stdout );
+ }
+ std::fflush( stdout );
+ }
+ };
+
+
+int differing_bits( const uint8_t byte1, const uint8_t byte2 )
+ {
+ int count = 0;
+ uint8_t dif = byte1 ^ byte2;
+ while( dif )
+ { count += ( dif & 1 ); dif >>= 1; }
+ return count;
+ }
+
+
+/* Return the number of bytes really written.
+ If (value returned < size), it is always an error.
+*/
+long writeblock( const int fd, const uint8_t * const buf, const long size )
+ {
+ long sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const long n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+void show_exec_error( const char * const prog_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
+ program_name, prog_name, std::strerror( errno ) );
+ }
+
+
+void show_fork_error( const char * const prog_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
+ program_name, prog_name, std::strerror( errno ) );
+ }
+
+
+int wait_for_child( const pid_t pid, const char * const name )
+ {
+ int status;
+ while( waitpid( pid, &status, 0 ) == -1 )
+ {
+ if( errno != EINTR )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
+ program_name, name, std::strerror( errno ) );
+ return -1;
+ }
+ }
+ if( WIFEXITED( status ) )
+ { const int ret = WEXITSTATUS( status ); if( ret != 255 ) return ret; }
+ return -1;
+ }
+
+
+bool word_split( const char * const command, std::vector< std::string > & args )
+ {
+ const unsigned long old_size = args.size();
+ for( const char * p = command; *p; )
+ {
+ while( *p && std::isspace( *p ) ) ++p; // strip leading space
+ if( !*p ) break;
+ if( *p == '\'' || *p == '"' ) // quoted name
+ {
+ const char quote = *p;
+ const char * const begin = ++p; // skip leading quote
+ while( *p && *p != quote ) ++p;
+ if( !*p || begin == p ) return false; // umbalanced or empty
+ args.push_back( std::string( begin, p - begin ) );
+ ++p; continue; // skip trailing quote
+ }
+ const char * const begin = p++;
+ while( *p && !std::isspace( *p ) ) ++p;
+ args.push_back( std::string( begin, p - begin ) );
+ }
+ return args.size() > old_size;
+ }
+
+
+// return -1 if fatal error, 0 if OK, > 0 if error
+int fork_and_feed( const uint8_t * const buffer, const long buffer_size,
+ const char ** const argv, const bool check = false )
+ {
+ int fda[2]; // pipe to child
+ if( pipe( fda ) < 0 )
+ { show_error( "Can't create pipe", errno ); return -1; }
+
+ const pid_t pid = vfork();
+ if( pid < 0 ) // parent
+ { show_fork_error( argv[0] ); return -1; }
+ else if( pid > 0 ) // parent (feed data to child)
+ {
+ if( close( fda[0] ) != 0 )
+ { show_error( "Error closing unused pipe", errno ); return -1; }
+ if( writeblock( fda[1], buffer, buffer_size ) != buffer_size && check )
+ { show_error( "Can't write to child process", errno ); return -1; }
+ if( close( fda[1] ) != 0 )
+ { show_error( "Error closing pipe", errno ); return -1; }
+ }
+ else if( pid == 0 ) // child
+ {
+ if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 )
+ execvp( argv[0], (char **)argv );
+ show_exec_error( argv[0] );
+ _exit( 255 ); // 255 means fatal error in wait_for_child
+ }
+
+ return wait_for_child( pid, argv[0] );
+ }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ enum Mode { m_block, m_byte, m_truncate };
+ const char * mode_str[3] = { "block", "byte", "size" };
+ Bitset8 bits; // if Bitset8::parse_bs not called test full byte
+ Bad_byte bad_byte;
+ const char * zcmp_program = "zcmp";
+ long pos = 0;
+ long max_size = LONG_MAX;
+ long delta = 0; // to be set later
+ long block_size = 512;
+ Mode program_mode = m_byte;
+ uint8_t block_value = 0;
+ bool check = true;
+ if( argc > 0 ) invocation_name = argv[0];
+
+ const Arg_parser::Option options[] =
+ {
+ { 'h', "help", Arg_parser::no },
+ { 'b', "bits", Arg_parser::yes },
+ { 'B', "block", Arg_parser::maybe },
+ { 'd', "delta", Arg_parser::yes },
+ { 'e', "set-byte", Arg_parser::yes },
+ { 'n', "no-check", Arg_parser::no },
+ { 'n', "no-verify", Arg_parser::no },
+ { 'p', "position", Arg_parser::yes },
+ { 'q', "quiet", Arg_parser::no },
+ { 's', "size", Arg_parser::yes },
+ { 't', "truncate", Arg_parser::no },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { 'z', "zcmp", Arg_parser::yes },
+ { 0 , 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options );
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+
+ int argind = 0;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) break; // no more options
+ const char * const pn = parser.parsed_name( argind ).c_str();
+ const char * const arg = parser.argument( argind ).c_str();
+ switch( code )
+ {
+ case 'h': show_help(); return 0;
+ case 'b': bits.parse_bs( arg, pn ); program_mode = m_byte; break;
+ case 'B': if( arg[0] ) parse_block( arg, pn, block_size, block_value );
+ program_mode = m_block; break;
+ case 'd': delta = getnum( arg, pn, block_size, 1, INT_MAX ); break;
+ case 'e': bad_byte.parse_bb( arg, pn ); break;
+ case 'n': check = false; break;
+ case 'p': pos = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
+ case 'q': verbosity = -1; break;
+ case 's': max_size = getnum( arg, pn, block_size, -LONG_MAX, LONG_MAX ); break;
+ case 't': program_mode = m_truncate; break;
+ case 'v': if( verbosity < 4 ) ++verbosity; break;
+ case 'V': show_version(); return 0;
+ case 'z': zcmp_program = arg; break;
+ default: internal_error( "uncaught option." );
+ }
+ } // end process options
+
+ if( parser.arguments() - argind != 2 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
+ return 1;
+ }
+
+ if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
+
+ const char * const command = parser.argument( argind ).c_str();
+ std::vector< std::string > command_args;
+ if( !word_split( command, command_args ) )
+ { show_file_error( command, "Invalid command." ); return 1; }
+ const char ** const command_argv = new const char *[command_args.size()+1];
+ for( unsigned i = 0; i < command_args.size(); ++i )
+ command_argv[i] = command_args[i].c_str();
+ command_argv[command_args.size()] = 0;
+
+ const char * const filename = parser.argument( argind + 1 ).c_str();
+ long file_size = 0;
+ uint8_t * const buffer = read_file( filename, &file_size );
+ if( !buffer ) return 1;
+ std::string zcmp_command;
+ std::vector< std::string > zcmp_args;
+ const char ** zcmp_argv = 0;
+ if( std::strcmp( zcmp_program, "false" ) != 0 )
+ {
+ zcmp_command = zcmp_program;
+ zcmp_command += " '"; zcmp_command += filename; zcmp_command += "' -";
+ if( !word_split( zcmp_command.c_str(), zcmp_args ) )
+ { show_file_error( zcmp_command.c_str(), "Invalid zcmp command." );
+ return 1; }
+ zcmp_argv = new const char *[zcmp_args.size()+1];
+ for( unsigned i = 0; i < zcmp_args.size(); ++i )
+ zcmp_argv[i] = zcmp_args[i].c_str();
+ zcmp_argv[zcmp_args.size()] = 0;
+ }
+
+ // check original file
+ if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
+ if( check )
+ {
+ const int ret = fork_and_feed( buffer, file_size, command_argv, true );
+ if( ret != 0 )
+ {
+ if( verbosity >= 0 )
+ {
+ if( ret < 0 )
+ std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
+ else
+ std::fprintf( stderr, "%s: \"%s\" failed (%d).\n",
+ program_name, command, ret );
+ }
+ return 1;
+ }
+ if( zcmp_command.size() )
+ {
+ const int ret = fork_and_feed( buffer, file_size, zcmp_argv, true );
+ if( ret != 0 )
+ {
+ if( verbosity >= 0 )
+ {
+ if( ret < 0 )
+ std::fprintf( stderr, "%s: Can't run '%s'.\n",
+ program_name, zcmp_command.c_str() );
+ else
+ std::fprintf( stderr, "%s: \"%s\" failed (%d). Disabling comparisons.\n",
+ program_name, zcmp_command.c_str(), ret );
+ }
+ if( ret < 0 ) return 1;
+ zcmp_command.clear();
+ }
+ }
+ }
+
+ std::signal( SIGPIPE, SIG_IGN );
+
+ if( pos < 0 ) pos = std::max( 0L, file_size + pos );
+ if( pos >= file_size || max_size == 0 ||
+ ( max_size < 0 && -max_size >= file_size - pos ) )
+ { show_error( "Nothing to do; domain is empty." ); return 0; }
+ if( max_size < 0 ) max_size += file_size - pos;
+ const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
+ if( bad_byte.pos >= file_size )
+ { show_option_error( bad_byte.argument, "Position is beyond end of file in",
+ bad_byte.option_name ); return 1; }
+ if( bad_byte.pos >= 0 )
+ buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
+ long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
+ if( program_mode == m_truncate )
+ for( long i = pos; i < end; i += std::min( delta, end - i ) )
+ {
+ if( verbosity >= 1 ) std::fprintf( stderr, "length %ld\n", i );
+ ++positions; ++decompressions;
+ const int ret = fork_and_feed( buffer, i, command_argv );
+ if( ret < 0 ) return 1;
+ if( ret == 0 )
+ {
+ ++successes;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "length %ld passed the test\n", i );
+ if( zcmp_command.size() )
+ {
+ const int ret = fork_and_feed( buffer, i, zcmp_argv );
+ if( ret < 0 ) return 1;
+ if( ret > 0 )
+ {
+ ++failed_comparisons;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "length %ld comparison failed\n", i );
+ }
+ }
+ }
+ }
+ else if( program_mode == m_block )
+ {
+ uint8_t * block = (uint8_t *)std::malloc( block_size );
+ if( !block ) { show_error( mem_msg ); return 1; }
+ for( long i = pos; i < end; i += std::min( delta, end - i ) )
+ {
+ const long size = std::min( block_size, file_size - i );
+ if( verbosity >= 1 ) std::fprintf( stderr, "block %ld,%ld\n", i, size );
+ ++positions; ++decompressions;
+ std::memcpy( block, buffer + i, size );
+ std::memset( buffer + i, block_value, size );
+ const int ret = fork_and_feed( buffer, file_size, command_argv );
+ if( ret < 0 ) return 1;
+ if( ret == 0 )
+ {
+ ++successes;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "block %ld,%ld passed the test\n", i, size );
+ if( zcmp_command.size() )
+ {
+ const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
+ if( ret < 0 ) return 1;
+ if( ret > 0 )
+ {
+ ++failed_comparisons;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "block %ld,%ld comparison failed\n", i, size );
+ }
+ }
+ }
+ std::memcpy( buffer + i, block, size );
+ }
+ std::free( block );
+ }
+ else
+ {
+ if( verbosity >= 1 ) bits.print();
+ for( long i = pos; i < end; i += std::min( delta, end - i ) )
+ {
+ if( verbosity >= 1 ) std::fprintf( stderr, "byte %ld\n", i );
+ ++positions;
+ const uint8_t byte = buffer[i];
+ for( int j = 1; j < 256; ++j )
+ {
+ ++buffer[i];
+ if( bits.includes( differing_bits( byte, buffer[i] ) ) )
+ {
+ ++decompressions;
+ if( verbosity >= 2 )
+ std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
+ buffer[i], byte, j );
+ const int ret = fork_and_feed( buffer, file_size, command_argv );
+ if( ret < 0 ) return 1;
+ if( ret == 0 )
+ {
+ ++successes;
+ if( verbosity >= 0 )
+ { if( verbosity < 2 ) // else already printed above
+ std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
+ buffer[i], byte, j );
+ std::fprintf( stderr, "byte %ld passed the test\n", i ); }
+ if( zcmp_command.size() )
+ {
+ const int ret = fork_and_feed( buffer, file_size, zcmp_argv );
+ if( ret < 0 ) return 1;
+ if( ret > 0 )
+ {
+ ++failed_comparisons;
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "byte %ld comparison failed\n", i );
+ }
+ }
+ }
+ }
+ }
+ buffer[i] = byte;
+ }
+ }
+
+ if( verbosity >= 0 )
+ {
+ std::fprintf( stderr, "\n%9ld %ss tested\n%9ld total decompressions"
+ "\n%9ld decompressions returned with zero status",
+ positions, mode_str[program_mode], decompressions, successes );
+ if( successes > 0 )
+ {
+ if( zcmp_command.empty() )
+ std::fputs( "\n comparisons disabled\n", stderr );
+ else if( failed_comparisons > 0 )
+ std::fprintf( stderr, ", of which\n%9ld comparisons failed\n",
+ failed_comparisons );
+ else std::fputs( "\n all comparisons passed\n", stderr );
+ }
+ else std::fputc( '\n', stderr );
+ }
+
+ std::free( buffer );
+ return 0;
+ }