diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | INSTALL | 21 | ||||
-rw-r--r-- | Makefile.in | 12 | ||||
-rw-r--r-- | NEWS | 13 | ||||
-rw-r--r-- | README | 31 | ||||
-rw-r--r-- | arg_parser.cc | 7 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 8 | ||||
-rw-r--r-- | decoder.cc | 42 | ||||
-rw-r--r-- | decoder.h | 43 | ||||
-rw-r--r-- | doc/lziprecover.1 | 12 | ||||
-rw-r--r-- | doc/lziprecover.info | 233 | ||||
-rw-r--r-- | doc/lziprecover.texi (renamed from doc/lziprecover.texinfo) | 37 | ||||
-rw-r--r-- | file_index.cc | 104 | ||||
-rw-r--r-- | file_index.h | 14 | ||||
-rw-r--r-- | lzip.h | 30 | ||||
-rw-r--r-- | main.cc | 94 | ||||
-rw-r--r-- | merge.cc | 35 | ||||
-rw-r--r-- | range_dec.cc | 58 | ||||
-rw-r--r-- | repair.cc | 7 | ||||
-rw-r--r-- | split.cc | 40 | ||||
-rwxr-xr-x | testsuite/check.sh | 105 | ||||
-rw-r--r-- | testsuite/unzcrash.cc | 9 |
23 files changed, 520 insertions, 446 deletions
@@ -1,3 +1,10 @@ +2014-04-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.16-pre1 released. + * main.cc (close_and_set_permissions): Behave like 'cp -p'. + * Minor improvements. + * lziprecover.texinfo: Renamed to lziprecover.texi. + 2013-09-14 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.15 released. @@ -83,7 +90,7 @@ * testsuite/unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -14,8 +14,8 @@ Procedure or lzip -cd lziprecover[version].tar.lz | tar -xf - -This creates the directory ./lziprecover[version] containing the source -from the main archive. +This creates the directory ./lziprecover[version] containing the source from +the main archive. 2. Change to lziprecover directory and run configure. (Try 'configure --help' for usage instructions). @@ -27,8 +27,7 @@ from the main archive. make -4. Optionally, type 'make check' to run the tests that come with - lziprecover. +4. Optionally, type 'make check' to run the tests that come with lziprecover. 5. Type 'make install' to install the program and any data files and documentation. @@ -44,12 +43,12 @@ from the main archive. Another way ----------- -You can also compile lziprecover into a separate directory. To do this, -you must use a version of 'make' that supports the 'VPATH' variable, -such as GNU 'make'. 'cd' to the directory where you want the object -files and executables to go and run the 'configure' script. 'configure' -automatically checks for the source code in '.', in '..' and in the -directory that 'configure' is in. +You can also compile lziprecover into a separate directory. +To do this, you must use a version of 'make' that supports the 'VPATH' +variable, such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..' and +in the directory that 'configure' is in. 'configure' recognizes the option '--srcdir=DIR' to control where to look for the sources. Usually 'configure' can determine that directory @@ -59,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index dbd1917..54e61a3 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,8 +1,8 @@ DISTNAME = $(pkgname)-$(pkgversion) INSTALL = install -INSTALL_PROGRAM = $(INSTALL) -p -m 755 -INSTALL_DATA = $(INSTALL) -p -m 644 +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh @@ -51,8 +51,8 @@ doc : info man info : $(VPATH)/doc/$(pkgname).info -$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo - cd $(VPATH)/doc && makeinfo $(pkgname).texinfo +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -101,7 +101,7 @@ uninstall-man : dist : doc ln -sf $(VPATH) $(DISTNAME) - tar -cvf $(DISTNAME).tar \ + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ $(DISTNAME)/AUTHORS \ $(DISTNAME)/COPYING \ $(DISTNAME)/ChangeLog \ @@ -112,7 +112,7 @@ dist : doc $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/doc/$(pkgname).info \ - $(DISTNAME)/doc/$(pkgname).texinfo \ + $(DISTNAME)/doc/$(pkgname).texi \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/fox5.lz \ $(DISTNAME)/testsuite/fox5_bad[1-5].lz \ @@ -1,10 +1,9 @@ -Changes in version 1.15: +Changes in version 1.16: -Lziprecover can now repair multi-member files with up to one byte error -per member, without having to split them first. +Copying of file dates, permissions, and ownership now behaves like "cp -p". +(If the user ID or the group ID can't be duplicated, the file permission +bits S_ISUID and S_ISGID are cleared). -Lziprecover can now merge multi-member files without having to split -them first even if some copies have the header and the trailer damaged. +Some minor improvements have been made. -The chapters "Repairing files", "Merging files" and "Unzcrash" have been -added to the manual. +"lziprecover.texinfo" has been renamed to "lziprecover.texi". @@ -5,25 +5,36 @@ lzip compressed data format (.lz), able to repair slightly damaged files, recover badly damaged files from two or more copies, extract data from damaged files, decompress files and test integrity of files. -The lzip file format is designed for long-term data archiving. It is -clean, provides very safe 4 factor integrity checking, and is backed by -the recovery capabilities of lziprecover. +The lzip file format is designed for long-term data archiving, taking +into account both data integrity and decoder availability: + + * The lzip format provides very safe integrity checking and some data + recovery means. The lziprecover program can repair bit-flip errors + (one of the most common forms of data corruption) in lzip files, + and provides data recovery capabilities, including error-checked + merging of damaged copies of a file. + + * The lzip format is as simple as possible (but not simpler). The + lzip manual provides the code of a simple decompressor along with a + detailed explanation of how it works, so that with the only help of + the lzip manual it would be possible for a digital archaeologist to + extract the data from a lzip file long after quantum computers + eventually render LZMA obsolete. + + * Additionally lzip is copylefted, which guarantees that it will + remain free forever. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. -Lziprecover makes lzip files resistant to bit-flip (one of the most -common forms of data corruption), and can safely merge multiple damaged -backup copies. - If the cause of file corruption is damaged media, the combination GNU ddrescue + lziprecover is the best option for recovering data from multiple damaged copies. If a file is too damaged for lziprecover to repair it, all the -recoverable data in all members of the file can be extracted with the -'-D' option. +recoverable data in all members of the file can be extracted in one step +with the '-D' option. Lziprecover is able to efficiently extract a range of bytes from a multi-member file, because it only decompresses the members containing @@ -56,7 +67,7 @@ unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index 5cb98a9..44e158e 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -120,7 +120,7 @@ bool Arg_parser::parse_short_option( const char * const opt, const char * const if( index < 0 ) { - error_ = "invalid option -- "; error_ += c; + error_ = "invalid option -- '"; error_ += c; error_ += '\''; return false; } @@ -135,7 +135,8 @@ bool Arg_parser::parse_short_option( const char * const opt, const char * const { if( !arg || !arg[0] ) { - error_ = "option requires an argument -- "; error_ += c; + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; return false; } data.back().argument = arg; ++argind; cind = 0; diff --git a/arg_parser.h b/arg_parser.h index 5248cb1..8d0d5b3 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -1,14 +1,14 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lziprecover -pkgversion=1.15 +pkgversion=1.16-pre1 progname=lziprecover -srctrigger=doc/${pkgname}.texinfo +srctrigger=doc/${pkgname}.texi # clear some things potentially inherited from environment. LC_ALL=C @@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,14 +46,14 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const std::fprintf( f, " " ); if( !msg ) std::fflush( f ); } - if( msg ) std::fprintf( f, "%s.\n", msg ); + if( msg ) std::fprintf( f, "%s\n", msg ); } } -// Returns the number of bytes really read. -// If (returned value < size) and (errno == 0), means EOF was reached. -// +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ int readblock( const int fd, uint8_t * const buf, const int size ) { int rest = size; @@ -63,16 +63,16 @@ int readblock( const int fd, uint8_t * const buf, const int size ) const int n = read( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; else if( n == 0 ) break; // EOF - else if( errno != EINTR && errno != EAGAIN ) break; + else if( errno != EINTR ) break; errno = 0; } return size - rest; } -// Returns the number of bytes really written. -// If (returned value < size), it is always an error. -// +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ int writeblock( const int fd, const uint8_t * const buf, const int size ) { int rest = size; @@ -81,7 +81,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) { const int n = write( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; - else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; + else if( n < 0 && errno != EINTR ) break; errno = 0; } return size - rest; @@ -127,8 +127,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { File_trailer trailer; const int trailer_size = File_trailer::size; - const unsigned long long member_size = - rdec.member_position() + trailer_size; + const unsigned long long member_size = rdec.member_position() + trailer_size; bool error = false; int size = rdec.read_data( trailer.data, trailer_size ); @@ -147,7 +146,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( !rdec.code_is_zero() ) { error = true; - pp( "Range decoder final code is not zero" ); + pp( "Range decoder final code is not zero." ); } if( trailer.data_crc() != crc() ) { @@ -191,8 +190,8 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const } -// Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, -// 3 = trailer error, 4 = unknown marker found. +/* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, + 3 = trailer error, 4 = unknown marker found. */ int LZ_decoder::decode_member( const Pretty_print & pp ) { Bit_model bm_literal[1<<literal_context_bits][0x300]; @@ -202,7 +201,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) Bit_model bm_rep1[State::states]; Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; - Bit_model bm_dis_slot[dis_states][1<<dis_slot_bits]; + Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; Len_model match_len_model; @@ -223,7 +222,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) if( state.is_char() ) { state.set_char1(); - put_byte( rdec.decode_tree( bm_literal[get_lit_state(prev_byte)], 8 ) ); + put_byte( rdec.decode_tree8( bm_literal[get_lit_state(prev_byte)] ) ); } else { @@ -235,9 +234,9 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) else { int len; - if( rdec.decode_bit( bm_rep[state()] ) == 1 ) // 2nd bit + if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) == 1 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -265,7 +264,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) { const unsigned rep0_saved = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); - const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_dis_state(len)] ); + const int dis_slot = rdec.decode_tree6( bm_dis_slot[get_len_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { @@ -302,8 +301,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state.set_match(); - if( rep0 >= (unsigned)dictionary_size || - ( rep0 >= (unsigned)pos && !partial_data_pos ) ) + if( rep0 >= dictionary_size || rep0 >= data_position() ) { flush_data(); return 1; } } copy_block( rep0, len ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,6 +76,7 @@ public: code = 0; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); range = 0xFFFFFFFFU; + code &= range; // make sure that first byte is discarded } void normalize() @@ -120,12 +121,13 @@ public: } } - int decode_tree( Bit_model bm[], const int num_bits ) + int decode_tree3( Bit_model bm[] ) { int symbol = 1; - for( int i = num_bits; i > 0; --i ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol - (1 << num_bits); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol & 7; } int decode_tree6( Bit_model bm[] ) @@ -137,7 +139,15 @@ public: symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); - return symbol - (1 << 6); + return symbol & 0x3F; + } + + int decode_tree8( Bit_model bm[] ) + { + int symbol = 1; + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + return symbol & 0xFF; } int decode_tree_reversed( Bit_model bm[], const int num_bits ) @@ -156,10 +166,9 @@ public: int decode_tree_reversed4( Bit_model bm[] ) { int model = 1; - int symbol = 0; + int symbol = decode_bit( bm[model] ); + model = (model << 1) + symbol; int bit = decode_bit( bm[model] ); - model = (model << 1) + bit; symbol |= bit; - bit = decode_bit( bm[model] ); model = (model << 1) + bit; symbol |= (bit << 1); bit = decode_bit( bm[model] ); model = (model << 1) + bit; symbol |= (bit << 2); @@ -171,7 +180,7 @@ public: { Bit_model * const bm1 = bm + 0x100; int symbol = 1; - for( int i = 7; i >= 0; --i ) + while( symbol < 0x100 ) { match_byte <<= 1; const int match_bit = match_byte & 0x100; @@ -184,18 +193,16 @@ public: break; } } - return symbol - 0x100; + return symbol & 0xFF; } int decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) - return decode_tree( lm.bm_low[pos_state], len_low_bits ); + return decode_tree3( lm.bm_low[pos_state] ); if( decode_bit( lm.choice2 ) == 0 ) - return len_low_symbols + - decode_tree( lm.bm_mid[pos_state], len_mid_bits ); - return len_low_symbols + len_mid_symbols + - decode_tree( lm.bm_high, len_high_bits ); + return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] ); + return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high ); } }; @@ -206,7 +213,7 @@ class LZ_decoder const unsigned long long outend; unsigned long long partial_data_pos; Range_decoder & rdec; - const int dictionary_size; + const unsigned dictionary_size; const int buffer_size; uint8_t * const buffer; // output buffer int pos; // current pos in buffer @@ -268,7 +275,7 @@ public: partial_data_pos( 0 ), rdec( rde ), dictionary_size( header.dictionary_size() ), - buffer_size( std::max( 65536, dictionary_size ) ), + buffer_size( std::max( 65536U, dictionary_size ) ), buffer( new uint8_t[buffer_size] ), pos( 0 ), stream_pos( 0 ), diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index cda6779..3a6300f 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,7 +1,7 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIPRECOVER "1" "September 2013" "Lziprecover 1.15" "User Commands" +.TH LZIPRECOVER "1" "April 2014" "lziprecover 1.16-pre1" "User Commands" .SH NAME -Lziprecover \- recovers data from damaged lzip files +lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS .B lziprecover [\fIoptions\fR] [\fIfiles\fR] @@ -69,20 +69,20 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2013 Antonio Diaz Diaz. +Copyright \(co 2014 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. .SH "SEE ALSO" The full documentation for -.B Lziprecover +.B lziprecover is maintained as a Texinfo manual. If the .B info and -.B Lziprecover +.B lziprecover programs are properly installed at your site, the command .IP -.B info Lziprecover +.B info lziprecover .PP should give you access to the complete manual. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 5f8ad08..1248e6f 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -1,5 +1,5 @@ -This is lziprecover.info, produced by makeinfo version 4.13 from -lziprecover.texinfo. +This is lziprecover.info, produced by makeinfo version 4.13+ from +lziprecover.texi. INFO-DIR-SECTION Data Compression START-INFO-DIR-ENTRY @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.15, 14 September 2013). +This manual is for Lziprecover (version 1.16-pre1, 5 April 2014). * Menu: @@ -27,7 +27,7 @@ This manual is for Lziprecover (version 1.15, 14 September 2013). * Concept index:: Index of concepts - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -43,18 +43,29 @@ lzip compressed data format (.lz), able to repair slightly damaged files, recover badly damaged files from two or more copies, extract data from damaged files, decompress files and test integrity of files. - The lzip file format is designed for long-term data archiving. It is -clean, provides very safe 4 factor integrity checking, and is backed by -the recovery capabilities of lziprecover. + The lzip file format is designed for long-term data archiving, taking +into account both data integrity and decoder availability: + + * The lzip format provides very safe integrity checking and some data + recovery means. The lziprecover program can repair bit-flip errors + (one of the most common forms of data corruption) in lzip files, + and provides data recovery capabilities, including error-checked + merging of damaged copies of a file. + + * The lzip format is as simple as possible (but not simpler). The + lzip manual provides the code of a simple decompressor along with + a detailed explanation of how it works, so that with the only help + of the lzip manual it would be possible for a digital + archaeologist to extract the data from a lzip file long after + quantum computers eventually render LZMA obsolete. + + * Additionally lzip is copylefted, which guarantees that it will + remain free forever. Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. - Lziprecover makes lzip files resistant to bit-flip (one of the most -common forms of data corruption), and can safely merge multiple damaged -backup copies. - If the cause of file corruption is damaged media, the combination GNU ddrescue + lziprecover is the best option for recovering data from multiple damaged copies. *Note ddrescue-example::, for an example. @@ -96,113 +107,113 @@ The format for running lziprecover is: Lziprecover supports the following options: -`-h' -`--help' +'-h' +'--help' Print an informative help message describing the options and exit. -`-V' -`--version' +'-V' +'--version' Print the version number of lziprecover on the standard output and exit. -`-c' -`--stdout' +'-c' +'--stdout' Decompress to standard output. Needed when reading from a named pipe (fifo) or from a device. Use it to recover as much of the uncompressed data as possible when decompressing a corrupt file. -`-d' -`--decompress' +'-d' +'--decompress' Decompress. -`-D RANGE' -`--range-decompress=RANGE' +'-D RANGE' +'--range-decompress=RANGE' Decompress only a range of bytes starting at decompressed byte - position `BEGIN' and up to byte position `END - 1'. Three formats - of RANGE are recognized, `BEGIN', `BEGIN-END', and `BEGIN,SIZE'. + position 'BEGIN' and up to byte position 'END - 1'. Three formats + of RANGE are recognized, 'BEGIN', 'BEGIN-END', and 'BEGIN,SIZE'. If only BEGIN is specified, END is taken as the end of the file. The produced bytes are sent to standard output unless the - `--output' option is used. In order to guarantee the correctness + '--output' option is used. In order to guarantee the correctness of the data produced, all members containing any part of the desired data are decompressed and their integrity is verified. This operation is more efficient in multi-member files because it only decompresses the members containing the desired data. -`-f' -`--force' +'-f' +'--force' Force overwrite of output files. -`-i' -`--ignore-errors' - Make `--range-decompress' ignore data errors and continue +'-i' +'--ignore-errors' + Make '--range-decompress' ignore data errors and continue decompressing the remaining members in the file. For example, - `lziprecover -i -D0 file.lz > file' decompresses all the - recoverable data in all members of `file.lz' without having to + 'lziprecover -i -D0 file.lz > file' decompresses all the + recoverable data in all members of 'file.lz' without having to split it first. -`-k' -`--keep' +'-k' +'--keep' Keep (don't delete) input files during decompression. -`-l' -`--list' +'-l' +'--list' Print total file sizes and ratios. The values produced are correct - even for multi-member files. Use it together with `-v' to see + even for multi-member files. Use it together with '-v' to see information about the members in the file. -`-m' -`--merge' +'-m' +'--merge' Try to produce a correct file merging the good parts of two or more damaged copies. If successful, a repaired copy is written to the - file `FILE_fixed.lz'. The exit status is 0 if a correct file could - be produced, 2 otherwise. See the chapter `Merging files' (*note + file 'FILE_fixed.lz'. The exit status is 0 if a correct file could + be produced, 2 otherwise. See the chapter 'Merging files' (*note Merging files::) for a complete description of the merge mode. -`-o FILE' -`--output=FILE' - Place the output into `FILE' instead of into `FILE_fixed.lz'. If +'-o FILE' +'--output=FILE' + Place the output into 'FILE' instead of into 'FILE_fixed.lz'. If splitting, the names of the files produced are in the form - `rec01FILE', `rec02FILE', etc. If decompressing from standard - input and `--stdout' has not been specified, use `FILE' as the + 'rec01FILE', 'rec02FILE', etc. If decompressing from standard + input and '--stdout' has not been specified, use 'FILE' as the name of the decompressed file. -`-q' -`--quiet' +'-q' +'--quiet' Quiet operation. Suppress all messages. -`-R' -`--repair' +'-R' +'--repair' Try to repair a file with small errors (up to one byte error per member). If successful, a repaired copy is written to the file - `FILE_fixed.lz'. `FILE' is not modified at all. The exit status + 'FILE_fixed.lz'. 'FILE' is not modified at all. The exit status is 0 if the file could be repaired, 2 otherwise. See the chapter - `Repairing files' (*note Repairing files::) for a complete + 'Repairing files' (*note Repairing files::) for a complete description of the repair mode. -`-s' -`--split' - Search for members in `FILE' and write each member in its own - `.lz' file. You can then use `lziprecover -t' to test the +'-s' +'--split' + Search for members in 'FILE' and write each member in its own + '.lz' file. You can then use 'lziprecover -t' to test the integrity of the resulting files, decompress those which are undamaged, and try to repair or partially decompress those which are damaged. - The names of the files produced are in the form `rec01FILE.lz', - `rec02FILE.lz', etc, and are designed so that the use of wildcards + The names of the files produced are in the form 'rec01FILE.lz', + 'rec02FILE.lz', etc, and are designed so that the use of wildcards in subsequent processing, for example, - `lziprecover -cd rec*FILE.lz > recovered_data', processes the + 'lziprecover -cd rec*FILE.lz > recovered_data', processes the files in the correct order. The number of digits used in the names - varies depending on the number of members in `FILE'. + varies depending on the number of members in 'FILE'. -`-t' -`--test' +'-t' +'--test' Check integrity of the specified file(s), but don't decompress them. This really performs a trial decompression and throws away - the result. Use it together with `-v' to see information about + the result. Use it together with '-v' to see information about the file. -`-v' -`--verbose' +'-v' +'--verbose' Verbose mode. When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, compression ratio, dictionary @@ -211,7 +222,7 @@ The format for running lziprecover is: Numbers given as arguments to options may be followed by a multiplier -and an optional `B' for "byte". +and an optional 'B' for "byte". Table of SI and binary prefixes (unit multipliers): @@ -239,7 +250,7 @@ File: lziprecover.info, Node: Repairing files, Next: Merging files, Prev: Inv Lziprecover is able to repair files with small errors (up to one byte error per member). The error may be located anywhere in the file except -in the header (first 6 bytes of each member) or in the `Member size' +in the header (first 6 bytes of each member) or in the 'Member size' field of the trailer (last 8 bytes of each member). This makes lzip files resistant to bit-flip, one of the most common forms of data corruption. @@ -323,15 +334,15 @@ additional information before, between, or after them. All multibyte values are stored in little endian order. -`ID string' +'ID string' A four byte string, identifying the lzip format, with the value "LZIP" (0x4C, 0x5A, 0x49, 0x50). -`VN (version number, 1 byte)' +'VN (version number, 1 byte)' Just in case something needs to be modified in the future. 1 for now. -`DS (coded dictionary size, 1 byte)' +'DS (coded dictionary size, 1 byte)' Lzip divides the distance between any two powers of 2 into 8 equally spaced intervals, named "wedges". The dictionary size is calculated by taking a power of 2 (the base size) and substracting @@ -343,18 +354,18 @@ additional information before, between, or after them. Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB Valid values for dictionary size range from 4 KiB to 512 MiB. -`Lzma stream' +'Lzma stream' The lzma stream, finished by an end of stream marker. Uses default values for encoder properties. See the lzip manual for a full description. -`CRC32 (4 bytes)' +'CRC32 (4 bytes)' CRC of the uncompressed original data. -`Data size (8 bytes)' +'Data size (8 bytes)' Size of the uncompressed original data. -`Member size (8 bytes)' +'Member size (8 bytes)' Total size of the member, including header and trailer. This field acts as a distributed index, allows the verification of stream integrity, and facilitates safe recovery of undamaged members from @@ -368,30 +379,30 @@ File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, U ******************************** Example 1: Restore a regular file from its compressed version -`file.lz'. If the operation is successful, `file.lz' is removed. +'file.lz'. If the operation is successful, 'file.lz' is removed. lziprecover -d file.lz -Example 2: Verify the integrity of the compressed file `file.lz' and +Example 2: Verify the integrity of the compressed file 'file.lz' and show status. lziprecover -tv file.lz -Example 3: Decompress `file.lz' partially until 10 KiB of decompressed +Example 3: Decompress 'file.lz' partially until 10 KiB of decompressed data are produced. lziprecover -D 0,10KiB file.lz -Example 4: Decompress `file.lz' partially from decompressed byte 10000 +Example 4: Decompress 'file.lz' partially from decompressed byte 10000 to decompressed byte 15000 (5000 bytes are produced). lziprecover -D 10000-15000 file.lz -Example 5: Repair small errors in the file `file.lz'. (Indented lines +Example 5: Repair small errors in the file 'file.lz'. (Indented lines are abridged diagnostic messages from lziprecover). lziprecover -v -R file.lz @@ -399,8 +410,8 @@ are abridged diagnostic messages from lziprecover). mv file_fixed.lz file.lz -Example 6: Split the multi-member file `file.lz' and write each member -in its own `recXXXfile.lz' file. Then use `lziprecover -t' to test the +Example 6: Split the multi-member file 'file.lz' and write each member +in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the integrity of the resulting files. lziprecover -s file.lz @@ -424,10 +435,10 @@ error-checked merging of copies (*Note GNU ddrescue manual: Example 8: Recover the first volume of those created with the command -`lzip -b 32MiB -S 650MB big_db' from two copies, `big_db1_00001.lz' and -`big_db2_00001.lz', with member 07 damaged in the first copy, member 18 +'lzip -b 32MiB -S 650MB big_db' from two copies, 'big_db1_00001.lz' and +'big_db2_00001.lz', with member 07 damaged in the first copy, member 18 damaged in the second copy, and member 12 damaged in both copies. The -correct file produced is saved in `big_db_00001.lz'. +correct file produced is saved in 'big_db_00001.lz'. lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz Input files merged successfully @@ -440,7 +451,7 @@ File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: The lziprecover package also includes unzcrash, a program written to test robustness to decompression of corrupted data, inspired by -unzcrash.c from Julian Seward's bzip2. Type `make unzcrash' in the +unzcrash.c from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover source directory to build it. Unzcrash reads the specified file and then repeatedly decompresses @@ -461,40 +472,40 @@ programs with a suitable command line syntax. Unzcrash supports the following options: -`-h' -`--help' +'-h' +'--help' Print an informative help message describing the options and exit. -`-V' -`--version' +'-V' +'--version' Print the version number of unzcrash on the standard output and exit. -`-b RANGE' -`--bits=RANGE' +'-b RANGE' +'--bits=RANGE' Test N-bit errors only, instead of testing all the 255 wrong - values for each byte. `N-bit error' means any value differing from + values for each byte. 'N-bit error' means any value differing from the original value in N bit positions, not a value differing from the original value in the bit position N. The number of N-bit errors per byte (N = 1 to 8) is: 8 28 56 70 56 28 8 1 Examples of RANGE: 1 1,2,3 1-4 1,3-5,8 1-3,5-8 -`-p BYTES' -`--position=BYTES' +'-p BYTES' +'--position=BYTES' First byte position to test in the file. Defaults to 0. -`-q' -`--quiet' +'-q' +'--quiet' Quiet operation. Suppress all messages. -`-s BYTES' -`--size=BYTES' +'-s BYTES' +'--size=BYTES' Number of byte positions to test. If not specified, the whole file is tested. -`-v' -`--verbose' +'-v' +'--verbose' Verbose mode. @@ -516,7 +527,7 @@ for all eternity, if not longer. If you find a bug in lziprecover, please send electronic mail to <lzip-bug@nongnu.org>. Include the version number, which you can find -by running `lziprecover --version'. +by running 'lziprecover --version'. File: lziprecover.info, Node: Concept index, Prev: Problems, Up: Top @@ -540,17 +551,17 @@ Concept index Tag Table: -Node: Top228 -Node: Introduction1096 -Node: Invoking lziprecover3249 -Node: Repairing files8687 -Node: Merging files9406 -Node: File format11177 -Node: Examples13687 -Ref: ddrescue-example14888 -Node: Unzcrash15997 -Node: Problems18369 -Node: Concept index18919 +Node: Top226 +Node: Introduction1100 +Node: Invoking lziprecover3858 +Node: Repairing files9296 +Node: Merging files10015 +Node: File format11786 +Node: Examples14296 +Ref: ddrescue-example15497 +Node: Unzcrash16606 +Node: Problems18978 +Node: Concept index19528 End Tag Table diff --git a/doc/lziprecover.texinfo b/doc/lziprecover.texi index 3dbceb9..be4fc27 100644 --- a/doc/lziprecover.texinfo +++ b/doc/lziprecover.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 14 September 2013 -@set VERSION 1.15 +@set UPDATED 5 April 2014 +@set VERSION 1.16-pre1 @dircategory Data Compression @direntry @@ -47,7 +47,8 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright @copyright{} 2009, 2010, 2011, 2012, 2013, 2014 +Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -62,18 +63,34 @@ lzip compressed data format (.lz), able to repair slightly damaged files, recover badly damaged files from two or more copies, extract data from damaged files, decompress files and test integrity of files. -The lzip file format is designed for long-term data archiving. It is -clean, provides very safe 4 factor integrity checking, and is backed by -the recovery capabilities of lziprecover. +The lzip file format is designed for long-term data archiving, taking +into account both data integrity and decoder availability: + +@itemize @bullet +@item +The lzip format provides very safe integrity checking and some data +recovery means. The lziprecover program can repair bit-flip errors (one +of the most common forms of data corruption) in lzip files, and provides +data recovery capabilities, including error-checked merging of damaged +copies of a file. + +@item +The lzip format is as simple as possible (but not simpler). The lzip +manual provides the code of a simple decompressor along with a detailed +explanation of how it works, so that with the only help of the lzip +manual it would be possible for a digital archaeologist to extract the +data from a lzip file long after quantum computers eventually render +LZMA obsolete. + +@item +Additionally lzip is copylefted, which guarantees that it will remain +free forever. +@end itemize Lziprecover is able to recover or decompress files produced by any of the compressors in the lzip family; lzip, plzip, minilzip/lzlib, clzip and pdlzip. -Lziprecover makes lzip files resistant to bit-flip (one of the most -common forms of data corruption), and can safely merge multiple damaged -backup copies. - If the cause of file corruption is damaged media, the combination @w{GNU ddrescue + lziprecover} is the best option for recovering data from multiple damaged copies. @xref{ddrescue-example}, for an example. diff --git a/file_index.cc b/file_index.cc index b3d9629..cdb4031 100644 --- a/file_index.cc +++ b/file_index.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,33 +28,9 @@ #include "file_index.h" -const char * format_num( unsigned long long num, - unsigned long long limit, - const int set_prefix ) - { - const char * const si_prefix[8] = - { "k", "M", "G", "T", "P", "E", "Z", "Y" }; - const char * const binary_prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - static bool si = true; - static char buf[32]; - - if( set_prefix ) si = ( set_prefix > 0 ); - const unsigned factor = ( si ? 1000 : 1024 ); - const char * const * prefix = ( si ? si_prefix : binary_prefix ); - const char * p = ""; - bool exact = ( num % factor == 0 ); - - for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, sizeof buf, "%llu %s", num, p ); - return buf; - } - - Block Block::split( const long long pos ) { - if( pos_ < pos && end() > pos ) + if( pos > pos_ && pos < end() ) { const Block b( pos_, pos - pos_ ); pos_ = pos; size_ -= b.size_; @@ -64,13 +40,27 @@ Block Block::split( const long long pos ) } +void File_index::set_errno_error( const char * const msg ) + { + error_ = msg; error_ += std::strerror( errno ); error_ += '.'; + retval_ = 1; + } + +void File_index::set_num_error( const char * const msg1, unsigned long long num, + const char * const msg2 ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu%s", msg1, num, msg2 ); + error_ = buf; + retval_ = 2; + } + + File_index::File_index( const int infd ) - : - isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) + : isize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ) { if( isize < 0 ) - { error_ = "Input file is not seekable :"; - error_ += std::strerror( errno ); retval_ = 1; return; } + { set_errno_error( "Input file is not seekable :" ); return; } if( isize < min_member_size ) { error_ = "Input file is too short."; retval_ = 2; return; } if( isize > INT64_MAX ) @@ -79,14 +69,13 @@ File_index::File_index( const int infd ) File_header header; if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) - { error_ = "Error reading member header :"; - error_ += std::strerror( errno ); retval_ = 1; return; } + { set_errno_error( "Error reading member header :" ); return; } if( !header.verify_magic() ) { error_ = "Bad magic number (file not in lzip format)."; retval_ = 2; return; } if( !header.verify_version() ) - { error_ = "Version "; error_ += format_num( header.version() ); - error_ += "member format not supported."; retval_ = 2; return; } + { set_num_error( "Version ", header.version(), + " member format not supported." ); return; } long long pos = isize; // always points to a header or to EOF while( pos >= min_member_size ) @@ -94,28 +83,26 @@ File_index::File_index( const int infd ) File_trailer trailer; if( seek_read( infd, trailer.data, File_trailer::size, pos - File_trailer::size ) != File_trailer::size ) - { error_ = "Error reading member trailer :"; - error_ += std::strerror( errno ); retval_ = 1; break; } + { set_errno_error( "Error reading member trailer :" ); break; } const long long member_size = trailer.member_size(); if( member_size < min_member_size || member_size > pos ) { - if( member_vector.size() == 0 ) // maybe trailing garbage + if( member_vector.empty() ) // maybe trailing garbage { --pos; continue; } - error_ = "Member size in trailer is corrupt at pos "; - error_ += format_num( pos - 8 ); retval_ = 2; break; + set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 ); + break; } if( seek_read( infd, header.data, File_header::size, pos - member_size ) != File_header::size ) - { error_ = "Error reading member header :"; - error_ += std::strerror( errno ); retval_ = 1; break; } + { set_errno_error( "Error reading member header :" ); break; } if( !header.verify_magic() || !header.verify_version() ) { - if( member_vector.size() == 0 ) // maybe trailing garbage + if( member_vector.empty() ) // maybe trailing garbage { --pos; continue; } - error_ = "Bad header at pos "; - error_ += format_num( pos - member_size ); retval_ = 2; break; + set_num_error( "Bad header at pos ", pos - member_size ); + break; } - if( member_vector.size() == 0 && isize - pos > File_header::size && + if( member_vector.empty() && isize - pos > File_header::size && seek_read( infd, header.data, File_header::size, pos ) == File_header::size && header.verify_magic() && header.verify_version() ) { @@ -126,7 +113,7 @@ File_index::File_index( const int infd ) member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size ) ); } - if( pos != 0 || member_vector.size() == 0 ) + if( pos != 0 || member_vector.empty() ) { member_vector.clear(); if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } @@ -150,12 +137,10 @@ File_index::File_index( const int infd ) // All files in 'infd_vector' must be at least 'fsize' bytes long. File_index::File_index( const std::vector< int > & infd_vector, const long long fsize ) - : - isize( fsize ), retval_( 0 ) + : isize( fsize ), retval_( 0 ) { if( isize < 0 ) - { error_ = "Input file is not seekable :"; - error_ += std::strerror( errno ); retval_ = 1; return; } + { set_errno_error( "Input file is not seekable :" ); return; } if( isize < min_member_size ) { error_ = "Input file is too short."; retval_ = 2; return; } if( isize > INT64_MAX ) @@ -169,8 +154,7 @@ File_index::File_index( const std::vector< int > & infd_vector, { const int infd = infd_vector[i]; if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) - { error_ = "Error reading member header :"; - error_ += std::strerror( errno ); retval_ = 1; return; } + { set_errno_error( "Error reading member header :" ); return; } if( header.verify_magic() && header.verify_version() ) done = true; } if( !done ) @@ -188,8 +172,7 @@ File_index::File_index( const std::vector< int > & infd_vector, const int tfd = infd_vector[it]; if( seek_read( tfd, trailer.data, File_trailer::size, pos - File_trailer::size ) != File_trailer::size ) - { error_ = "Error reading member trailer :"; - error_ += std::strerror( errno ); retval_ = 1; goto error; } + { set_errno_error( "Error reading member trailer :" ); goto error; } member_size = trailer.member_size(); if( member_size >= min_member_size && member_size <= pos ) for( int ih = 0; ih < files && !done; ++ih ) @@ -197,19 +180,18 @@ File_index::File_index( const std::vector< int > & infd_vector, const int hfd = infd_vector[ih]; if( seek_read( hfd, header.data, File_header::size, pos - member_size ) != File_header::size ) - { error_ = "Error reading member header :"; - error_ += std::strerror( errno ); retval_ = 1; goto error; } + { set_errno_error( "Error reading member header :" ); goto error; } if( header.verify_magic() && header.verify_version() ) done = true; } } if( !done ) { - if( member_vector.size() == 0 ) // maybe trailing garbage + if( member_vector.empty() ) // maybe trailing garbage { --pos; continue; } - error_ = "Member size in trailer may be corrupt at pos "; - error_ += format_num( pos - 8 ); retval_ = 2; break; + set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 ); + break; } - if( member_vector.size() == 0 && isize - pos > File_header::size ) + if( member_vector.empty() && isize - pos > File_header::size ) for( int i = 0; i < files; ++i ) { const int infd = infd_vector[i]; @@ -225,7 +207,7 @@ File_index::File_index( const std::vector< int > & infd_vector, pos, member_size ) ); } error: - if( pos != 0 || member_vector.size() == 0 ) + if( pos != 0 || member_vector.empty() ) { member_vector.clear(); if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } diff --git a/file_index.h b/file_index.h index fd2d488..8acb60f 100644 --- a/file_index.h +++ b/file_index.h @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,8 +25,7 @@ class Block long long pos_, size_; // pos + size <= INT64_MAX public: - Block( const long long p, const long long s ) - : pos_( p ), size_( s ) {} + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} long long pos() const { return pos_; } long long size() const { return size_; } @@ -68,6 +67,10 @@ class File_index long long isize; int retval_; + void set_errno_error( const char * const msg ); + void set_num_error( const char * const msg1, unsigned long long num, + const char * const msg2 = "." ); + public: File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {} explicit File_index( const int infd ); @@ -104,8 +107,3 @@ public: const Block & mblock( const int i ) const { return member_vector[i].mblock; } }; - - -const char * format_num( unsigned long long num, - unsigned long long limit = -1ULL, - const int set_prefix = 0 ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -40,7 +40,7 @@ public: enum { min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, + min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, min_member_size = 36, @@ -49,6 +49,7 @@ enum { pos_states = 1 << pos_state_bits, pos_state_mask = pos_states - 1, + len_states = 4, dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, @@ -66,12 +67,10 @@ enum { min_match_len = 2, // must be 2 max_match_len = min_match_len + max_len_symbols - 1, // 273 - min_match_len_limit = 5, + min_match_len_limit = 5 }; - dis_states = 4 }; - -inline int get_dis_state( const int len ) - { return std::min( len - min_match_len, dis_states - 1 ); } +inline int get_len_state( const int len ) + { return std::min( len - min_match_len, len_states - 1 ); } inline int get_lit_state( const uint8_t prev_byte ) { return ( prev_byte >> ( 8 - literal_context_bits ) ); } @@ -114,7 +113,7 @@ public: for( unsigned i = 0; i < filenames.size(); ++i ) { const std::string & s = filenames[i]; - const unsigned len = ( ( s == "-" ) ? stdin_name_len : s.size() ); + const unsigned len = ( s == "-" ) ? stdin_name_len : s.size(); if( len > longest_name ) longest_name = len; } if( longest_name == 0 ) longest_name = stdin_name_len; @@ -124,7 +123,7 @@ public: : stdin_name( "(stdin)" ), verbosity_( v ), first_post( false ) { const unsigned stdin_name_len = std::strlen( stdin_name ); - longest_name = ( ( filename == "-" ) ? stdin_name_len : filename.size() ); + longest_name = ( filename == "-" ) ? stdin_name_len : filename.size(); if( longest_name == 0 ) longest_name = stdin_name_len; set_name( filename ); } @@ -253,9 +252,7 @@ struct File_trailer } void data_size( unsigned long long sz ) - { - for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } + { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } unsigned long long member_size() const { @@ -265,9 +262,7 @@ struct File_trailer } void member_size( unsigned long long sz ) - { - for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } + { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } }; @@ -288,12 +283,14 @@ int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); // defined in main.cc -int open_instream( const std::string & name, struct stat * const in_statsp, +int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only = false ); int open_outstream_rw( const std::string & output_filename, const bool force ); void show_header( const File_header & header ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); +void show_error2( const char * const msg1, const char * const name, + const char * const msg2 ); void internal_error( const char * const msg ); // defined in merge.cc @@ -303,7 +300,6 @@ bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); bool try_decompress_member( const int fd, const unsigned long long msize, long long * failure_posp = 0 ); -bool verify_header( const File_header & header, const int verbosity ); int merge_files( const std::vector< std::string > & filenames, const std::string & output_filename, const int verbosity, const bool force ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,6 +56,10 @@ #include "lzip.h" #include "decoder.h" +#ifndef O_BINARY +#define O_BINARY 0 +#endif + #if CHAR_BIT != 8 #error "Environments where CHAR_BIT != 8 are not supported." #endif @@ -65,22 +69,16 @@ namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2013"; +const char * const program_year = "2014"; const char * invocation_name = 0; -#ifdef O_BINARY -const int o_binary = O_BINARY; -#else -const int o_binary = 0; -#endif - struct { const char * from; const char * to; } const known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { 0, 0 } }; -enum Mode { m_none, m_decompress, m_generate, m_list, m_merge, m_range, - m_recover, m_repair, m_split, m_test }; +enum Mode { m_none, m_decompress, m_list, m_merge, m_range, m_repair, + m_split, m_test }; std::string output_filename; int outfd = -1; @@ -102,14 +100,12 @@ void show_help() " -d, --decompress decompress\n" " -D, --range-decompress=<range> decompress only a range of bytes (N-M)\n" " -f, --force overwrite existing output files\n" -// " -g, --generate-recover-file generate a recover file\n" " -i, --ignore-errors make '--range-decompress' ignore data errors\n" " -k, --keep keep (don't delete) input files\n" " -l, --list print total file sizes and ratios\n" " -m, --merge correct errors in file using several copies\n" " -o, --output=<file> place the output into <file>\n" " -q, --quiet suppress all messages\n" -// " -r, --recover correct errors in file using a recover file\n" " -R, --repair try to repair a small error in file\n" " -s, --split split multi-member file in single-member files\n" " -t, --test test compressed file integrity\n" @@ -127,7 +123,7 @@ void show_help() void show_version() { - std::printf( "%s %s\n", Program_name, PROGVERSION ); + std::printf( "%s %s\n", program_name, PROGVERSION ); std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); std::printf( "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n" "This is free software: you are free to change and redistribute it.\n" @@ -189,15 +185,15 @@ int extension_index( const std::string & name ) } // end namespace -int open_instream( const std::string & name, struct stat * const in_statsp, +int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile, const bool reg_only ) { - int infd = open( name.c_str(), O_RDONLY | o_binary ); + int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", - program_name, name.c_str(), std::strerror( errno ) ); + program_name, name, std::strerror( errno ) ); } else { @@ -210,7 +206,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name.c_str(), + program_name, name, ( can_read && !no_ofile ) ? " and '--stdout' was not specified" : "" ); close( infd ); @@ -243,7 +239,7 @@ void set_d_outname( const std::string & name, const int i ) bool open_outstream( const bool force ) { - int flags = O_CREAT | O_WRONLY | o_binary; + int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename.c_str(), flags, outfd_mode ); @@ -293,10 +289,14 @@ void close_and_set_permissions( const struct stat * const in_statsp ) bool warning = false; if( in_statsp ) { + const mode_t mode = in_statsp->st_mode; // fchown will in many cases return with EPERM, which can be safely ignored. - if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && - errno != EPERM ) || - fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true; + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; } if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); outfd = -1; @@ -356,6 +356,7 @@ void show_trailing_garbage( const uint8_t * const data, const int size, garbage_msg += xdigit( data[i] & 0x0F ); } } + garbage_msg += '.'; pp( garbage_msg.c_str() ); } @@ -375,7 +376,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) if( rdec.finished() ) // End Of File { if( first_member ) - { pp( "File ends unexpectedly at member header" ); retval = 2; } + { pp( "File ends unexpectedly at member header." ); retval = 2; } else if( verbosity >= 4 && size > 0 ) show_trailing_garbage( header.data, size, pp, true ); break; @@ -383,7 +384,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) if( !header.verify_magic() ) { if( first_member ) - { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } + { pp( "Bad magic number (file not in lzip format)." ); retval = 2; } else if( verbosity >= 4 ) show_trailing_garbage( header.data, size, pp, false ); break; @@ -398,7 +399,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) - { pp( "Invalid dictionary size in member header" ); retval = 2; break; } + { pp( "Invalid dictionary size in member header." ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { pp(); if( verbosity >= 3 ) show_header( header ); } @@ -412,10 +413,10 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { pp(); if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %llu\n", + std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n", partial_file_pos ); else - std::fprintf( stderr, "Decoder error at pos %llu\n", + std::fprintf( stderr, "Decoder error at pos %llu.\n", partial_file_pos ); } retval = 2; break; @@ -424,11 +425,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { std::fprintf( stderr, testing ? "ok\n" : "done\n" ); pp.reset(); } } } - catch( std::bad_alloc ) - { - pp( "Not enough memory. Find a machine with more memory" ); - retval = 1; - } + catch( std::bad_alloc ) { pp( "Not enough memory." ); retval = 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } if( verbosity == 1 && retval == 0 ) std::fprintf( stderr, testing ? "ok\n" : "done\n" ); @@ -455,7 +452,7 @@ void set_signals() int open_outstream_rw( const std::string & output_filename, const bool force ) { - int flags = O_CREAT | O_RDWR | o_binary; + int flags = O_CREAT | O_RDWR | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; int outfd = open( output_filename.c_str(), flags, all_rw ); @@ -481,7 +478,7 @@ void show_error( const char * const msg, const int errcode, const bool help ) { std::fprintf( stderr, "%s: %s", program_name, msg ); if( errcode > 0 ) - std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + std::fprintf( stderr, ": %s.", std::strerror( errcode ) ); std::fprintf( stderr, "\n" ); } if( help ) @@ -491,10 +488,18 @@ void show_error( const char * const msg, const int errcode, const bool help ) } +void show_error2( const char * const msg1, const char * const name, + const char * const msg2 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s '%s' %s\n", program_name, msg1, name, msg2 ); + } + + void internal_error( const char * const msg ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); std::exit( 3 ); } @@ -564,7 +569,7 @@ int main( const int argc, const char * const argv[] ) case 't': set_mode( program_mode, m_test ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - default : internal_error( "uncaught option" ); + default : internal_error( "uncaught option." ); } } // end process options @@ -588,9 +593,7 @@ int main( const int argc, const char * const argv[] ) switch( program_mode ) { - case m_generate: - case m_recover: - case m_none: internal_error( "invalid operation" ); break; + case m_none: internal_error( "invalid operation." ); break; case m_decompress: break; case m_list: if( filenames.size() < 1 ) @@ -599,7 +602,7 @@ int main( const int argc, const char * const argv[] ) case m_merge: if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } - if( !default_output_filename.size() ) + if( default_output_filename.empty() ) default_output_filename = insert_fixed( filenames[0] ); return merge_files( filenames, default_output_filename, verbosity, force ); case m_range: @@ -608,9 +611,10 @@ int main( const int argc, const char * const argv[] ) range_string, verbosity, force, ignore, to_stdout ); case m_repair: one_file( filenames.size() ); - if( !default_output_filename.size() ) + if( default_output_filename.empty() ) default_output_filename = insert_fixed( filenames[0] ); - return repair_file( filenames[0], default_output_filename, verbosity, force ); + return repair_file( filenames[0], default_output_filename, + verbosity, force ); case m_split: one_file( filenames.size() ); return split_file( filenames[0], default_output_filename, verbosity, force ); @@ -620,7 +624,7 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_test ) outfd = -1; else if( program_mode != m_decompress ) - internal_error( "invalid decompressor operation" ); + internal_error( "invalid decompressor operation." ); if( filenames.empty() ) filenames.push_back("-"); if( !to_stdout && program_mode != m_test && @@ -635,13 +639,13 @@ int main( const int argc, const char * const argv[] ) struct stat in_stats; output_filename.clear(); - if( !filenames[i].size() || filenames[i] == "-" ) + if( filenames[i].empty() || filenames[i] == "-" ) { input_filename.clear(); infd = STDIN_FILENO; if( program_mode != m_test ) { - if( to_stdout || !default_output_filename.size() ) + if( to_stdout || default_output_filename.empty() ) outfd = STDOUT_FILENO; else { @@ -659,7 +663,7 @@ int main( const int argc, const char * const argv[] ) else { input_filename = filenames[i]; - infd = open_instream( input_filename, &in_stats, + infd = open_instream( input_filename.c_str(), &in_stats, to_stdout || program_mode == m_test ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -167,7 +167,7 @@ int open_input_files( const std::vector< std::string > & filenames, struct stat in_stats; ino_t st_ino0 = 0; dev_t st_dev0 = 0; - infd_vector[i] = open_instream( filenames[i], &in_stats, true, true ); + infd_vector[i] = open_instream( filenames[i].c_str(), &in_stats, true, true ); if( infd_vector[i] < 0 ) return 1; if( i == 0 ) { st_ino0 = in_stats.st_ino; st_dev0 = in_stats.st_dev; } else if( st_ino0 == in_stats.st_ino && st_dev0 == in_stats.st_dev ) @@ -192,8 +192,7 @@ int open_input_files( const std::vector< std::string > & filenames, tmp = lseek( infd_vector[i], 0, SEEK_END ); if( tmp < 0 ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "File '%s' is not seekable.\n", filenames[i].c_str() ); + show_error2( "Input file", filenames[i].c_str(), "is not seekable." ); return 1; } } @@ -201,7 +200,8 @@ int open_input_files( const std::vector< std::string > & filenames, { isize = tmp; if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } + { show_error2( "Input file", filenames[i].c_str(), "is too short." ); + return 2; } } else if( isize != tmp ) { show_error( "Sizes of input files are different." ); return 2; } @@ -302,33 +302,12 @@ bool try_decompress_member( const int fd, const unsigned long long msize, } } catch( std::bad_alloc ) - { - show_error( "Not enough memory. Find a machine with more memory." ); - std::exit( 1 ); - } + { show_error( "Not enough memory." ); std::exit( 1 ); } catch( Error e ) {} return false; } -bool verify_header( const File_header & header, const int verbosity ) - { - if( !header.verify_magic() ) - { - show_error( "Bad magic number (file not in lzip format)." ); - return false; - } - if( !header.verify_version() ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "Version %d member format not supported.\n", - header.version() ); - return false; - } - return true; - } - - int merge_files( const std::vector< std::string > & filenames, const std::string & output_filename, const int verbosity, const bool force ) @@ -356,7 +335,7 @@ int merge_files( const std::vector< std::string > & filenames, !safe_seek( outfd, mpos ) ) cleanup_and_fail( output_filename, outfd, 1 ); - if( block_vector.size() == 0 ) + if( block_vector.empty() ) { if( file_index.members() > 1 && try_decompress_member( outfd, msize ) ) continue; diff --git a/range_dec.cc b/range_dec.cc index 5df48b2..2c6c342 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,30 @@ namespace { +const char * format_num( unsigned long long num, + unsigned long long limit = -1ULL, + const int set_prefix = 0 ) + { + const char * const si_prefix[8] = + { "k", "M", "G", "T", "P", "E", "Z", "Y" }; + const char * const binary_prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + static bool si = true; + static char buf[32]; + + if( set_prefix ) si = ( set_prefix > 0 ); + const unsigned factor = ( si ? 1000 : 1024 ); + const char * const * prefix = ( si ? si_prefix : binary_prefix ); + const char * p = ""; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } + snprintf( buf, sizeof buf, "%llu %s", num, p ); + return buf; + } + + // Returns the number of chars read, or 0 if error. // int parse_long_long( const char * const ptr, long long & value ) @@ -112,9 +136,9 @@ int decompress_member( const int infd, const int outfd, File_header header; rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File - { pp( "File ends unexpectedly at member header" ); return 2; } + { pp( "File ends unexpectedly at member header." ); return 2; } if( !header.verify_magic() ) - { pp( "Bad magic number (file not in lzip format)" ); return 2; } + { pp( "Bad magic number (file not in lzip format)." ); return 2; } if( !header.verify_version() ) { if( pp.verbosity() >= 0 ) @@ -125,7 +149,7 @@ int decompress_member( const int infd, const int outfd, } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) - { pp( "Invalid dictionary size in member header" ); return 2; } + { pp( "Invalid dictionary size in member header." ); return 2; } if( pp.verbosity() >= 2 ) { pp(); show_header( header ); } @@ -137,27 +161,23 @@ int decompress_member( const int infd, const int outfd, { pp(); if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %llu\n", + std::fprintf( stderr, "File ends unexpectedly at pos %llu.\n", mpos + rdec.member_position() ); else - std::fprintf( stderr, "Decoder error at pos %llu\n", + std::fprintf( stderr, "Decoder error at pos %llu.\n", mpos + rdec.member_position() ); } return 2; } if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); } - catch( std::bad_alloc ) - { - pp( "Not enough memory. Find a machine with more memory" ); - return 1; - } + catch( std::bad_alloc ) { pp( "Not enough memory." ); return 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } return 0; } -int list_file( const std::string & input_filename, const Pretty_print & pp ) +int list_file( const char * const input_filename, const Pretty_print & pp ) { struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); @@ -166,7 +186,7 @@ int list_file( const std::string & input_filename, const Pretty_print & pp ) const File_index file_index( infd ); close( infd ); if( file_index.retval() != 0 ) - { show_error( file_index.error().c_str() ); return file_index.retval(); } + { pp( file_index.error().c_str() ); return file_index.retval(); } if( pp.verbosity() >= 0 ) { @@ -216,7 +236,7 @@ int list_files( const std::vector< std::string > & filenames, for( unsigned i = 0; i < filenames.size(); ++i ) { pp.set_name( filenames[i] ); - const int tmp = list_file( filenames[i], pp ); + const int tmp = list_file( filenames[i].c_str(), pp ); if( tmp > retval ) retval = tmp; } return retval; @@ -231,17 +251,18 @@ int range_decompress( const std::string & input_filename, Block range( 0, 0 ); parse_range( range_string.c_str(), range ); struct stat in_stats; - const int infd = open_instream( input_filename, &in_stats, true, true ); + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; + Pretty_print pp( input_filename, verbosity ); const File_index file_index( infd ); if( file_index.retval() != 0 ) - { show_error( file_index.error().c_str() ); return file_index.retval(); } + { pp( file_index.error().c_str() ); return file_index.retval(); } if( range.end() > file_index.data_end() ) range.size( std::max( 0LL, file_index.data_end() - range.pos() ) ); if( range.size() <= 0 ) - { if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; } + { if( verbosity >= 1 ) pp( "Nothing to do." ); return 0; } if( verbosity >= 1 ) { @@ -254,13 +275,12 @@ int range_decompress( const std::string & input_filename, } int outfd = -1; - if( to_stdout || !output_filename.size() ) + if( to_stdout || output_filename.empty() ) outfd = STDOUT_FILENO; else { outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) return 1; } - Pretty_print pp( input_filename, verbosity ); int retval = 0; for( int i = 0; i < file_index.members(); ++i ) { @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -54,12 +54,13 @@ int repair_file( const std::string & input_filename, const bool force ) { struct stat in_stats; - const int infd = open_instream( input_filename, &in_stats, true, true ); + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; + Pretty_print pp( input_filename, verbosity ); const File_index file_index( infd ); if( file_index.retval() != 0 ) - { show_error( file_index.error().c_str() ); return file_index.retval(); } + { pp( file_index.error().c_str() ); return file_index.retval(); } int outfd = -1; for( int i = 0; i < file_index.members(); ++i ) @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,6 +62,25 @@ bool next_filename( std::string & output_filename, const int max_digits ) } +bool verify_header( const File_header & header, const Pretty_print & pp ) + { + if( !header.verify_magic() ) + { + pp( "Bad magic number (file not in lzip format)." ); + return false; + } + if( !header.verify_version() ) + { + if( pp.verbosity() >= 0 ) + { pp(); + std::fprintf( stderr, "Version %d member format not supported.\n", + header.version() ); } + return false; + } + return true; + } + + // Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm) // Return pos of found string or 'pos+size' if not found. // @@ -97,21 +116,22 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, uint8_t * const buffer = base_buffer + tsize; struct stat in_stats; - const int infd = open_instream( input_filename, &in_stats, true, true ); + const int infd = open_instream( input_filename.c_str(), &in_stats, true, true ); if( infd < 0 ) return 1; - const File_index file_index( infd ); - if( file_index.retval() != 0 ) show_error( file_index.error().c_str() ); - const int max_members = ( file_index.retval() ? 999999 : file_index.members() ); - int max_digits = 1; - for( int i = max_members; i >= 10; i /= 10 ) ++max_digits; - + Pretty_print pp( input_filename, verbosity ); int size = seek_read( infd, buffer, buffer_size + hsize, 0 ) - hsize; bool at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "Read error", errno ); return 1; } if( size < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - if( !verify_header( *(File_header *)buffer, verbosity ) ) return 2; + { pp( "Input file is too short." ); return 2; } + if( !verify_header( *(File_header *)buffer, pp ) ) return 2; + + const File_index file_index( infd ); + if( file_index.retval() != 0 ) pp( file_index.error().c_str() ); + const int max_members = file_index.retval() ? 999999 : file_index.members(); + int max_digits = 1; + for( int i = max_members; i >= 10; i /= 10 ) ++max_digits; std::string output_filename; first_filename( input_filename, default_output_filename, output_filename, diff --git a/testsuite/check.sh b/testsuite/check.sh index 314c19e..21da2a8 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -13,7 +13,7 @@ LZIP="${objdir}"/lziprecover LZIPRECOVER="${LZIP}" framework_failure() { echo "failure in testing framework" ; exit 1 ; } -if [ ! -x "${LZIP}" ] ; then +if [ ! -f "${LZIP}" ] || [ ! -x "${LZIP}" ] ; then echo "${LZIP}: cannot execute" exit 1 fi @@ -22,7 +22,7 @@ if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp cd "${objdir}"/tmp -in="${testdir}"/test.txt +cat "${testdir}"/test.txt > in || framework_failure in_lz="${testdir}"/test.txt.lz inD="${testdir}"/test921-1921.txt fox5_lz="${testdir}"/fox5.lz @@ -57,30 +57,63 @@ fail=0 printf "testing lziprecover-%s..." "$2" -"${LZIPRECOVER}" -lq -if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIPRECOVER}" -mq "${bad1_lz}" -if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIPRECOVER}" -Rq -if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIPRECOVER}" -sq -if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -tq "${in}" -if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -tq < "${in}" -if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -cdq "${in}" -if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -cdq < "${in}" -if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +printf " in: Bad magic number (file not in lzip format).\n" > msg +"${LZIP}" -t in 2> out +if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi +printf " (stdin): Bad magic number (file not in lzip format).\n" > msg +"${LZIP}" -t < in 2> out +if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi +rm -f out msg +"${LZIP}" -cdq in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIP}" -cdq < in +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -lq +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -mq "${bad1_lz}" +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -Rq +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi +"${LZIPRECOVER}" -sq +if [ $? = 1 ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIP}" -t "${in_lz}" || fail=1 "${LZIP}" -cd "${in_lz}" > copy || fail=1 -cmp "${in}" copy || fail=1 +cmp in copy || fail=1 +printf . + +cat "${in_lz}" > copy.lz || framework_failure +printf "to be overwritten" > copy || framework_failure +"${LZIP}" -df copy.lz || fail=1 +cmp in copy || fail=1 +printf . + +printf "to be overwritten" > copy || framework_failure +"${LZIP}" -df -o copy < "${in_lz}" || fail=1 +cmp in copy || fail=1 +printf . + +cat "${in_lz}" > anyothername || framework_failure +"${LZIP}" -d anyothername || fail=1 +cmp in anyothername.out || fail=1 +printf . + +cat in in > in2 || framework_failure +cat "${in_lz}" "${in_lz}" > copy2.lz || framework_failure +"${LZIP}" -t copy2.lz || fail=1 +printf . +"${LZIP}" -cd copy2.lz > copy2 || fail=1 +cmp in2 copy2 || fail=1 +printf . + +printf "garbage" >> copy2.lz || framework_failure +printf "to be overwritten" > copy2 || framework_failure +"${LZIP}" -df copy2.lz || fail=1 +cmp in2 copy2 || fail=1 printf . "${LZIPRECOVER}" -D 921-1921 -fo copy "${in_lz}" || fail=1 @@ -89,23 +122,23 @@ cmp "${inD}" copy || fail=1 cmp "${inD}" copy || fail=1 printf . "${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" -fo copy -if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy -if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi rm -f copy.lz "${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}" -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${fox5_lz}" -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${bad1_lz}" "${bad2_lz}" "${bad1_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${f5b1_lz}" "${f5b5_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${f5b3_lz}" "${f5b5_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -m -o copy.lz "${bad3_lz}" "${bad4_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do "${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${i}" || fail=1 @@ -178,9 +211,9 @@ printf . rm -f copy.lz "${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1 -if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -R -o copy.lz "${bad2_lz}" -q -if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else fail=1 ; printf - ; fi +if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi "${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1 cmp "${fox5_lz}" copy.lz || fail=1 "${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1 @@ -192,20 +225,10 @@ printf "garbage" >> copy || fail=1 "${LZIPRECOVER}" -s -o copy.lz copy || fail=1 for i in 1 2 3 ; do "${LZIPRECOVER}" -cd rec${i}copy.lz > copy || fail=1 - cmp "${in}" copy || fail=1 + cmp in copy || fail=1 done printf . -cat "${in_lz}" > anyothername || framework_failure -"${LZIP}" -d anyothername || fail=1 -cmp "${in}" anyothername.out || fail=1 -printf . - -cat "${in}" "${in}" > in2 || framework_failure -cat "${in_lz}" "${in_lz}" | "${LZIP}" -d > copy2 || fail=1 -cmp in2 copy2 || fail=1 -printf . - echo if [ ${fail} = 0 ] ; then echo "tests completed successfully." diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc index b2cdc4e..0a4c4c4 100644 --- a/testsuite/unzcrash.cc +++ b/testsuite/unzcrash.cc @@ -1,6 +1,7 @@ /* Unzcrash - Tests robustness of decompressors to corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014 + Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,7 +45,7 @@ namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2013"; +const char * const program_year = "2014"; const char * invocation_name = 0; int verbosity = 0; @@ -108,7 +109,7 @@ void show_error( const char * const msg, const int errcode = 0, void internal_error( const char * const msg ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); std::exit( 3 ); } @@ -277,7 +278,7 @@ int main( const int argc, const char * const argv[] ) case 's': max_size = getnum( arg, 1, buffer_size ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - default : internal_error( "uncaught option" ); + default : internal_error( "uncaught option." ); } } // end process options |