diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | INSTALL | 13 | ||||
-rw-r--r-- | Makefile.in | 40 | ||||
-rw-r--r-- | NEWS | 24 | ||||
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | arg_parser.cc | 8 | ||||
-rw-r--r-- | arg_parser.h | 2 | ||||
-rwxr-xr-x | configure | 39 | ||||
-rw-r--r-- | decoder.cc | 90 | ||||
-rw-r--r-- | decoder.h | 124 | ||||
-rw-r--r-- | doc/lziprecover.1 | 4 | ||||
-rw-r--r-- | doc/lziprecover.info | 51 | ||||
-rw-r--r-- | doc/lziprecover.texinfo | 35 | ||||
-rw-r--r-- | file_index.cc | 134 | ||||
-rw-r--r-- | file_index.h | 82 | ||||
-rw-r--r-- | lzip.h | 165 | ||||
-rw-r--r-- | main.cc | 218 | ||||
-rw-r--r-- | merge.cc | 77 | ||||
-rw-r--r-- | range_dec.cc | 245 | ||||
-rw-r--r-- | repair.cc | 44 | ||||
-rw-r--r-- | split.cc | 22 | ||||
-rwxr-xr-x | testsuite/check.sh | 11 | ||||
-rw-r--r-- | testsuite/unzcrash.cc | 31 |
23 files changed, 817 insertions, 660 deletions
@@ -1,3 +1,12 @@ +2013-02-27 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 1.14-rc1 released. + * Option '-l, --list' now accepts more than one file. + * Decompression time has been reduced by 12%. + * Makefile.in: Added new target 'install-as-lzip'. + * Makefile.in: Added new target 'install-bin'. + * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2. + 2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.13 released. @@ -60,7 +69,7 @@ * testsuite/unzcrash.cc: Test all 1-byte errors. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.3.5 and 3.3.6, but the code should compile with any +I use gcc 4.7.2 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -30,9 +30,16 @@ from the main archive. 4. Optionally, type 'make check' to run the tests that come with lziprecover. -5. Type 'make install' to install the programs and any data files and +5. Type 'make install' to install the program and any data files and documentation. + You can install only the program, the info manual or the man page + typing 'make install-bin', 'make install-info' or 'make install-man' + respectively. + +5a. Type 'make install-as-lzip' to install the program and any data + files and documentation, and link the program to the name 'lzip'. + Another way ----------- @@ -51,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 08df8ab..4e619e4 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,12 +6,13 @@ INSTALL_DATA = $(INSTALL) -p -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh -objs = arg_parser.o decoder.o merge.o range_dec.o repair.o split.o main.o +objs = arg_parser.o file_index.o merge.o range_dec.o repair.o split.o \ + decoder.o main.o unzobjs = arg_parser.o unzcrash.o -.PHONY : all install install-info install-man install-strip \ - uninstall uninstall-info uninstall-man \ +.PHONY : all install install-bin install-info install-man install-strip \ + install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \ doc info man check dist clean distclean all : $(progname) @@ -34,15 +35,16 @@ unzcrash.o : testsuite/unzcrash.cc %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< -$(objs) : Makefile -arg_parser.o : arg_parser.h -decoder.o : lzip.h decoder.h -main.o : arg_parser.h lzip.h decoder.h -merge.o : lzip.h decoder.h -range_dec.o : lzip.h decoder.h -repair.o : lzip.h -split.o : lzip.h -unzcrash.o : arg_parser.h Makefile +$(objs) : Makefile +arg_parser.o : arg_parser.h +decoder.o : lzip.h decoder.h +file_index.o : lzip.h file_index.h +main.o : arg_parser.h lzip.h decoder.h +merge.o : lzip.h decoder.h file_index.h +range_dec.o : lzip.h decoder.h file_index.h +repair.o : lzip.h +split.o : lzip.h +unzcrash.o : arg_parser.h Makefile doc : info man @@ -64,14 +66,16 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in check : all @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) -install : all install-info install-man +install : install-bin install-info install-man + +install-bin : all if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" install-info : if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" - -install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info + -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" install-man : if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi @@ -80,7 +84,13 @@ install-man : install-strip : all $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install -uninstall : uninstall-info uninstall-man +install-as-lzip : install + -rm -f "$(DESTDIR)$(bindir)/lzip" + cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip + +uninstall : uninstall-bin uninstall-info uninstall-man + +uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/$(progname)" uninstall-info : @@ -1,23 +1,9 @@ -Changes in version 1.13: +Changes in version 1.14: -Lziprecover is now distributed in its own package. Until version 1.12 it -was included in the lzip package. +Option "-l, --list" now accepts more than one file. -Decompressor options (-c, -d, -k, -t) have been implemented in -lziprecover so that a external decompressor is not needed for recovery -nor for "make check". +Decompression time has been reduced by 12%. -The new option "-D, --range-decompress" which extracts a range of bytes -decompressing only the members containing the desired data, has been -added. +The target "install-as-lzip" has been added to the Makefile. -The new option "-l, --list" which prints correct total file sizes and -ratios even for multi-member files, has been added. - -"--merge" and "--repair" now remove the output file if recovery fails. - -Quote characters in messages have been changed as advised by GNU Coding -Standards. - -Configure option "--datadir" has been renamed to "--datarootdir" to -follow GNU Standards. +The target "install-bin" has been added to the Makefile. @@ -40,8 +40,13 @@ If the cause of file corruption is damaged media, the combination GNU ddrescue + lziprecover is the best option for recovering data from multiple damaged copies. +This package also includes unzcrash, a program written to test +robustness to decompression of corrupted data, inspired by unzcrash.c +from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover +directory to build it. Then try 'unzcrash --help'. -Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + +Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index b3fd48d..a28d2ba 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -36,7 +36,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, const Option options[], int & argind ) { - unsigned int len; + unsigned len; int index = -1; bool exact = false, ambig = false; @@ -44,7 +44,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a // Test all long options for either exact match or abbreviated matches. for( int i = 0; options[i].code != 0; ++i ) - if( options[i].name && !std::strncmp( options[i].name, &opt[2], len ) ) + if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) { if( std::strlen( options[i].name ) == len ) // Exact match found { index = i; exact = true; break; } @@ -178,7 +178,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[], if( error_.size() ) data.clear(); else { - for( unsigned int i = 0; i < non_options.size(); ++i ) + for( unsigned i = 0; i < non_options.size(); ++i ) { data.push_back( Record() ); data.back().argument.swap( non_options[i] ); } while( argind < argc ) { data.push_back( Record() ); data.back().argument = argv[argind++]; } diff --git a/arg_parser.h b/arg_parser.h index 4fbd1af..5248cb1 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -1,6 +1,6 @@ #! /bin/sh # configure script for Lziprecover - Data recovery tool for lzipped files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -8,9 +8,9 @@ args= no_create= pkgname=lziprecover -pkgversion=1.13 +pkgversion=1.14-rc1 progname=lziprecover -srctrigger=lzip.h +srctrigger=doc/lziprecover.texinfo # clear some things potentially inherited from environment. LC_ALL=C @@ -22,11 +22,19 @@ bindir='$(exec_prefix)/bin' datarootdir='$(prefix)/share' infodir='$(datarootdir)/info' mandir='$(datarootdir)/man' -CXX= +CXX=g++ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= +# checking whether we are using GNU C++. +if [ ! -x /bin/g++ ] && + [ ! -x /usr/bin/g++ ] && + [ ! -x /usr/local/bin/g++ ] ; then + CXX=c++ + CXXFLAGS='-W -O2' +fi + # Loop over all args while [ -n "$1" ] ; do @@ -91,14 +99,14 @@ done srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. - if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi - if [ ! -r ${srcdir}/${srctrigger} ] ; then + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then ## the sed command below emulates the dirname command srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` fi fi -if [ ! -r ${srcdir}/${srctrigger} ] ; then +if [ ! -r "${srcdir}/${srctrigger}" ] ; then exec 1>&2 echo echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" @@ -107,18 +115,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then fi # Set srcdir to . if that's what it is. -if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi - -# checking whether we are using GNU C++. -if [ -z "${CXX}" ] ; then # Let the user override the test. - if [ -x /bin/g++ ] || - [ -x /usr/bin/g++ ] || - [ -x /usr/local/bin/g++ ] ; then - CXX="g++" - else - CXX="c++" - fi -fi +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi echo if [ -z "${no_create}" ] ; then @@ -152,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lziprecover - Data recovery tool for lzipped files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -173,6 +170,6 @@ CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} EOF -cat ${srcdir}/Makefile.in >> Makefile +cat "${srcdir}/Makefile.in" >> Makefile echo "OK. Now you can run make." @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,19 +34,19 @@ const CRC32 crc32; -void Pretty_print::operator()( const char * const msg ) const +void Pretty_print::operator()( const char * const msg, FILE * const f ) const { if( verbosity_ >= 0 ) { if( first_post ) { first_post = false; - std::fprintf( stderr, " %s: ", name_.c_str() ); - for( unsigned int i = 0; i < longest_name - name_.size(); ++i ) - std::fprintf( stderr, " " ); - if( !msg ) std::fflush( stderr ); + std::fprintf( f, " %s: ", name_.c_str() ); + for( unsigned i = 0; i < longest_name - name_.size(); ++i ) + std::fprintf( f, " " ); + if( !msg ) std::fflush( f ); } - if( msg ) std::fprintf( stderr, "%s.\n", msg ); + if( msg ) std::fprintf( f, "%s.\n", msg ); } } @@ -60,13 +60,13 @@ int readblock( const int fd, uint8_t * const buf, const int size ) errno = 0; while( rest > 0 ) { - errno = 0; const int n = read( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; - else if( n == 0 ) break; + else if( n == 0 ) break; // EOF else if( errno != EINTR && errno != EAGAIN ) break; + errno = 0; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } @@ -79,12 +79,12 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) errno = 0; while( rest > 0 ) { - errno = 0; const int n = write( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; + errno = 0; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } @@ -110,9 +110,10 @@ void LZ_decoder::flush_data() crc32.update( crc_, buffer + stream_pos, size ); if( outfd >= 0 ) { - const long long i = std::max( 0LL, outskip - stream_position() ); + const unsigned long long sp = stream_position(); + const long long i = positive_diff( outskip, sp ); const long long s = - std::min( outend - stream_position(), (long long)size ) - i; + std::min( positive_diff( outend, sp ), (unsigned long long)size ) - i; if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s ) throw Error( "Write error" ); } @@ -126,10 +127,11 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { File_trailer trailer; const int trailer_size = File_trailer::size( member_version ); - const long long member_size = range_decoder.member_position() + trailer_size; + const unsigned long long member_size = + range_decoder.member_position() + trailer_size; bool error = false; - const int size = range_decoder.read( trailer.data, trailer_size ); + int size = range_decoder.read_data( trailer.data, trailer_size ); if( size < trailer_size ) { error = true; @@ -139,9 +141,11 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const std::fprintf( stderr, "Trailer truncated at trailer position %d;" " some checks may fail.\n", size ); } - for( int i = size; i < trailer_size; ++i ) trailer.data[i] = 0; + while( size < trailer_size ) trailer.data[size++] = 0; } + if( member_version == 0 ) trailer.member_size( member_size ); + if( !range_decoder.code_is_zero() ) { error = true; @@ -154,7 +158,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const { pp(); std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n", - (unsigned int)trailer.data_crc(), (unsigned int)crc() ); + trailer.data_crc(), crc() ); } } if( trailer.data_size() != data_position() ) @@ -163,7 +167,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "Data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n", + std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", trailer.data_size(), data_position(), data_position() ); } } @@ -173,7 +177,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const if( pp.verbosity() >= 0 ) { pp(); - std::fprintf( stderr, "Member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n", + std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", trailer.member_size(), member_size, member_size ); } } @@ -183,9 +187,8 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const ( 8.0 * member_size ) / data_position(), 100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) ); if( !error && pp.verbosity() >= 4 ) - std::fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ", - (unsigned int)trailer.data_crc(), trailer.data_size(), - trailer.member_size() ); + std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", + trailer.data_crc(), trailer.data_size(), trailer.member_size() ); return !error; } @@ -194,6 +197,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const // 3 = trailer error, 4 = unknown marker found. int LZ_decoder::decode_member( const Pretty_print & pp ) { + Bit_model bm_literal[1<<literal_context_bits][0x300]; Bit_model bm_match[State::states][pos_states]; Bit_model bm_rep[State::states]; Bit_model bm_rep0[State::states]; @@ -201,32 +205,30 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model+1]; + Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - - unsigned int rep0 = 0; // rep[0-3] latest four distances - unsigned int rep1 = 0; // used for efficient coding of - unsigned int rep2 = 0; // repeated distances - unsigned int rep3 = 0; - Len_decoder len_decoder; Len_decoder rep_match_len_decoder; - Literal_decoder literal_decoder; + + unsigned rep0 = 0; // rep[0-3] latest four distances + unsigned rep1 = 0; // used for efficient coding of + unsigned rep2 = 0; // repeated distances + unsigned rep3 = 0; + State state; range_decoder.load(); - while( true ) + while( !range_decoder.finished() ) { - if( range_decoder.finished() ) { flush_data(); return 2; } const int pos_state = data_position() & pos_state_mask; if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 ) { const uint8_t prev_byte = get_prev_byte(); if( state.is_char() ) - put_byte( literal_decoder.decode( range_decoder, prev_byte ) ); + put_byte( range_decoder.decode_tree( bm_literal[get_lit_state(prev_byte)], 8 ) ); else - put_byte( literal_decoder.decode_matched( range_decoder, prev_byte, - get_byte( rep0 ) ) ); + put_byte( range_decoder.decode_matched( bm_literal[get_lit_state(prev_byte)], + get_byte( rep0 ) ) ); state.set_char(); } else @@ -237,7 +239,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) len = 0; if( range_decoder.decode_bit( bm_rep0[state()] ) == 1 ) { - unsigned int distance; + unsigned distance; if( range_decoder.decode_bit( bm_rep1[state()] ) == 0 ) distance = rep1; else @@ -263,20 +265,20 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } else { - const unsigned int rep0_saved = rep0; + const unsigned rep0_saved = rep0; len = min_match_len + len_decoder.decode( range_decoder, pos_state ); - const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits ); + const int dis_slot = range_decoder.decode_tree6( bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += range_decoder.decode_tree_reversed( bm_dis + rep0 - dis_slot, direct_bits ); + rep0 += range_decoder.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, direct_bits ); else { rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); + rep0 += range_decoder.decode_tree_reversed4( bm_align ); if( rep0 == 0xFFFFFFFFU ) // Marker found { rep0 = rep0_saved; @@ -301,11 +303,13 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state.set_match(); - if( rep0 >= (unsigned int)dictionary_size || - ( rep0 >= (unsigned int)pos && !partial_data_pos ) ) + if( rep0 >= (unsigned)dictionary_size || + ( rep0 >= (unsigned)pos && !partial_data_pos ) ) { flush_data(); return 1; } } copy_block( rep0, len ); } } + flush_data(); + return 2; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ class Range_decoder { enum { buffer_size = 16384 }; - long long partial_member_pos; + unsigned long long partial_member_pos; uint8_t * const buffer; // input buffer int pos; // current pos in buffer int stream_pos; // when reached, a new block must be read @@ -42,22 +42,23 @@ public: code( 0 ), range( 0xFFFFFFFFU ), infd( ifd ), - at_stream_end( false ) {} + at_stream_end( false ) + {} ~Range_decoder() { delete[] buffer; } bool code_is_zero() const { return ( code == 0 ); } bool finished() { return pos >= stream_pos && !read_block(); } - long long member_position() const { return partial_member_pos + pos; } + unsigned long long member_position() const { return partial_member_pos + pos; } void reset_member_position() { partial_member_pos = -pos; } uint8_t get_byte() { - if( finished() ) return 0x55; // make code != 0 + if( finished() ) return 0xAA; // make code != 0 return buffer[pos++]; } - int read( uint8_t * const outbuf, const int size ) + int read_data( uint8_t * const outbuf, const int size ) { int rest = size; while( rest > 0 && !finished() ) @@ -67,14 +68,14 @@ public: pos += rd; rest -= rd; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } void load() { code = 0; - range = 0xFFFFFFFFU; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + range = 0xFFFFFFFFU; } void normalize() @@ -88,17 +89,14 @@ public: int symbol = 0; for( int i = num_bits; i > 0; --i ) { - symbol <<= 1; - if( range <= 0x00FFFFFFU ) - { - range <<= 7; code = (code << 8) | get_byte(); - if( code >= range ) { code -= range; symbol |= 1; } - } - else - { - range >>= 1; - if( code >= range ) { code -= range; symbol |= 1; } - } + normalize(); + range >>= 1; +// symbol <<= 1; +// if( code >= range ) { code -= range; symbol |= 1; } + const uint32_t mask = 0U - (code < range); + code -= range; + code += range & mask; + symbol = (symbol << 1) + (mask + 1); } return symbol; } @@ -130,36 +128,63 @@ public: return model - (1 << num_bits); } + int decode_tree6( Bit_model bm[] ) + { + int model = 1; + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + return model - (1 << 6); + } + int decode_tree_reversed( Bit_model bm[], const int num_bits ) { int model = 1; int symbol = 0; for( int i = 0; i < num_bits; ++i ) { - const int bit = decode_bit( bm[model] ); + const bool bit = decode_bit( bm[model] ); model <<= 1; - if( bit ) { model |= 1; symbol |= (1 << i); } + if( bit ) { ++model; symbol |= (1 << i); } } return symbol; } - int decode_matched( Bit_model bm[], const int match_byte ) + int decode_tree_reversed4( Bit_model bm[] ) + { + int model = 1; + int symbol = 0; + int bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= bit; + bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 1); + bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 2); + if( decode_bit( bm[model] ) ) symbol |= 8; + return symbol; + } + + int decode_matched( Bit_model bm[], int match_byte ) { Bit_model * const bm1 = bm + 0x100; int symbol = 1; for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) + match_byte <<= 1; + const int match_bit = match_byte & 0x100; + const int bit = decode_bit( bm1[match_bit+symbol] ); + symbol = ( symbol << 1 ) + bit; + if( match_bit != bit << 8 ) { - while( --i >= 0 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) + decode_bit( bm[symbol] ); break; } } - return symbol & 0xFF; + return symbol - 0x100; } }; @@ -186,29 +211,12 @@ public: }; -class Literal_decoder - { - Bit_model bm_literal[1<<literal_context_bits][0x300]; - - int lstate( const uint8_t prev_byte ) const - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - -public: - uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte ) - { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); } - - uint8_t decode_matched( Range_decoder & range_decoder, - const uint8_t prev_byte, const uint8_t match_byte ) - { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], - match_byte ); } - }; - - class LZ_decoder { - const long long outskip; - const long long outend; - long long partial_data_pos; + const unsigned long long outskip; + const unsigned long long outend; + unsigned long long partial_data_pos; + Range_decoder & range_decoder; const int dictionary_size; const int buffer_size; uint8_t * const buffer; // output buffer @@ -217,9 +225,8 @@ class LZ_decoder uint32_t crc_; const int outfd; // output file descriptor const int member_version; - Range_decoder & range_decoder; - long long stream_position() const { return partial_data_pos + stream_pos; } + unsigned long long stream_position() const { return partial_data_pos + stream_pos; } void flush_data(); bool verify_trailer( const Pretty_print & pp ) const; @@ -248,7 +255,7 @@ class LZ_decoder if( i < 0 ) i += buffer_size; if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) ) { - std::memcpy( buffer + pos, buffer + i, len ); + std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap pos += len; } else for( ; len > 0; --len ) @@ -264,11 +271,13 @@ class LZ_decoder public: LZ_decoder( const File_header & header, Range_decoder & rdec, const int ofd, - const long long oskip = 0, const long long oend = LLONG_MAX ) + const unsigned long long oskip = 0, + const unsigned long long oend = -1ULL ) : outskip( oskip ), outend( oend ), partial_data_pos( 0 ), + range_decoder( rdec ), dictionary_size( header.dictionary_size() ), buffer_size( std::max( 65536, dictionary_size ) ), buffer( new uint8_t[buffer_size] ), @@ -276,15 +285,14 @@ public: stream_pos( 0 ), crc_( 0xFFFFFFFFU ), outfd( ofd ), - member_version( header.version() ), - range_decoder( rdec ) + member_version( header.version() ) { buffer[buffer_size-1] = 0; } // prev_byte of first_byte ~LZ_decoder() { delete[] buffer; } - uint32_t crc() const { return crc_ ^ 0xFFFFFFFFU; } + unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } - long long data_position() const { return partial_data_pos + pos; } + unsigned long long data_position() const { return partial_data_pos + pos; } int decode_member( const Pretty_print & pp ); }; diff --git a/doc/lziprecover.1 b/doc/lziprecover.1 index 862cbe0..d63f6ab 100644 --- a/doc/lziprecover.1 +++ b/doc/lziprecover.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LZIPRECOVER "1" "February 2012" "Lziprecover 1.13" "User Commands" +.TH LZIPRECOVER "1" "February 2013" "Lziprecover 1.14-rc1" "User Commands" .SH NAME Lziprecover \- recovers data from damaged lzip files .SH SYNOPSIS @@ -61,7 +61,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html .SH COPYRIGHT -Copyright \(co 2012 Antonio Diaz Diaz. +Copyright \(co 2013 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/lziprecover.info b/doc/lziprecover.info index 73830cf..7b24dcb 100644 --- a/doc/lziprecover.info +++ b/doc/lziprecover.info @@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir) Lziprecover Manual ****************** -This manual is for Lziprecover (version 1.13, 24 February 2012). +This manual is for Lziprecover (version 1.14-rc1, 27 February 2013). * Menu: @@ -24,7 +24,7 @@ This manual is for Lziprecover (version 1.13, 24 February 2012). * Concept Index:: Index of concepts - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -132,7 +132,8 @@ The format for running lziprecover is: `-l' `--list' Print total file sizes and ratios. The values produced are correct - even for multi-member files. + even for multi-member files. Use it together with `-v' to see + information about the members in the file. `-m' `--merge' @@ -221,7 +222,12 @@ File: lziprecover.info, Node: File Format, Next: Examples, Prev: Invoking Lzi 3 File Format ************* -In the diagram below, a box like this: +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away. +-- Antoine de Saint-Exupery + + + In the diagram below, a box like this: +---+ | | <-- the vertical bars might be missing +---+ @@ -250,15 +256,18 @@ additional information before, between, or after them. "LZIP". `VN (version number, 1 byte)' - Just in case something needs to be modified in the future. Valid - values are 0 and 1. Version 0 files are deprecated. They can - contain only one member and lack the `Member size' field. + Just in case something needs to be modified in the future. 1 for + now. `DS (coded dictionary size, 1 byte)' - Bits 4-0 contain the base 2 logarithm of the base dictionary size. - Bits 7-5 contain the number of "wedges" to substract from the base - dictionary size to obtain the dictionary size. The size of a wedge - is (base dictionary size / 16). + Lzip divides the distance between any two powers of 2 into 8 + equally spaced intervals, named "wedges". The dictionary size is + calculated by taking a power of 2 (the base size) and substracting + from it a number of wedges between 0 and 7. The size of a wedge is + (base_size / 16). + Bits 4-0 contain the base 2 logarithm of the base size (12 to 29). + Bits 7-5 contain the number of wedges (0 to 7) to substract from + the base size to obtain the dictionary size. Valid values for dictionary size range from 4KiB to 512MiB. `Lzma stream' @@ -272,9 +281,9 @@ additional information before, between, or after them. Size of the uncompressed original data. `Member size (8 bytes)' - Total size of the member, including header and trailer. This - facilitates safe recovery of undamaged members from multi-member - files. + Total size of the member, including header and trailer. This field + acts as a distributed index, and facilitates safe recovery of + undamaged members from multi-member files. @@ -399,13 +408,13 @@ Concept Index Tag Table: Node: Top231 -Node: Introduction900 -Node: Invoking Lziprecover2937 -Node: File Format7982 -Node: Examples9989 -Ref: ddrescue-example11207 -Node: Problems13038 -Node: Concept Index13588 +Node: Introduction910 +Node: Invoking Lziprecover2947 +Node: File Format8073 +Node: Examples10394 +Ref: ddrescue-example11612 +Node: Problems13443 +Node: Concept Index13993 End Tag Table diff --git a/doc/lziprecover.texinfo b/doc/lziprecover.texinfo index 22eea8a..872abb4 100644 --- a/doc/lziprecover.texinfo +++ b/doc/lziprecover.texinfo @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 24 February 2012 -@set VERSION 1.13 +@set UPDATED 27 February 2013 +@set VERSION 1.14-rc1 @dircategory Data Compression @direntry @@ -44,7 +44,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -155,7 +155,9 @@ Keep (don't delete) input files during decompression. @item -l @itemx --list Print total file sizes and ratios. The values produced are correct even -for multi-member files. +for multi-member files. Use it together with @samp{-v} to see +information about the members in the file. + @item -m @itemx --merge @@ -245,6 +247,11 @@ Table of SI and binary prefixes (unit multipliers): @chapter File Format @cindex file format +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away.@* +--- Antoine de Saint-Exupery + +@sp 1 In the diagram below, a box like this: @verbatim +---+ @@ -280,15 +287,16 @@ All multibyte values are stored in little endian order. A four byte string, identifying the lzip format, with the value "LZIP". @item VN (version number, 1 byte) -Just in case something needs to be modified in the future. Valid values -are 0 and 1. Version 0 files are deprecated. They can contain only one -member and lack the @samp{Member size} field. +Just in case something needs to be modified in the future. 1 for now. @item DS (coded dictionary size, 1 byte) -Bits 4-0 contain the base 2 logarithm of the base dictionary size.@* -Bits 7-5 contain the number of "wedges" to substract from the base -dictionary size to obtain the dictionary size. The size of a wedge is -(base dictionary size / 16).@* +Lzip divides the distance between any two powers of 2 into 8 equally +spaced intervals, named "wedges". The dictionary size is calculated by +taking a power of 2 (the base size) and substracting from it a number of +wedges between 0 and 7. The size of a wedge is (base_size / 16).@* +Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* +Bits 7-5 contain the number of wedges (0 to 7) to substract from the +base size to obtain the dictionary size.@* Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream @@ -302,8 +310,9 @@ CRC of the uncompressed original data. Size of the uncompressed original data. @item Member size (8 bytes) -Total size of the member, including header and trailer. This facilitates -safe recovery of undamaged members from multi-member files. +Total size of the member, including header and trailer. This field acts +as a distributed index, and facilitates safe recovery of undamaged +members from multi-member files. @end table diff --git a/file_index.cc b/file_index.cc new file mode 100644 index 0000000..41bee41 --- /dev/null +++ b/file_index.cc @@ -0,0 +1,134 @@ +/* Lziprecover - Data recovery tool for lzipped files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> + +#include "lzip.h" +#include "file_index.h" + + +const char * format_num( unsigned long long num, + unsigned long long limit, + const int set_prefix ) + { + const char * const si_prefix[8] = + { "k", "M", "G", "T", "P", "E", "Z", "Y" }; + const char * const binary_prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + static bool si = true; + static char buf[32]; + + if( set_prefix ) si = ( set_prefix > 0 ); + const unsigned factor = ( si ? 1000 : 1024 ); + const char * const * prefix = ( si ? si_prefix : binary_prefix ); + const char * p = ""; + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } + snprintf( buf, sizeof buf, "%llu %s", num, p ); + return buf; + } + + +File_index::File_index( const int infd ) : retval_( 0 ) + { + const long long isize = lseek( infd, 0, SEEK_END ); + if( isize < 0 ) + { error_ = "Input file is not seekable :"; + error_ += std::strerror( errno ); retval_ = 1; return; } + if( isize > INT64_MAX ) + { error_ = "Input file is too long (2^63 bytes or more)."; + retval_ = 2; return; } + long long pos = isize; // always points to a header or EOF + File_header header; + File_trailer trailer; + + if( isize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } + if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size ) + { error_ = "Error reading member header :"; + error_ += std::strerror( errno ); retval_ = 1; return; } + if( !header.verify_magic() ) + { error_ = "Bad magic number (file not in lzip format)."; + retval_ = 2; return; } + if( !header.verify_version() ) + { error_ = "Version "; error_ += format_num( header.version() ); + error_ += "member format not supported."; retval_ = 2; return; } + + while( pos >= min_member_size ) + { + if( seek_read( infd, trailer.data, File_trailer::size(), + pos - File_trailer::size() ) != File_trailer::size() ) + { error_ = "Error reading member trailer :"; + error_ += std::strerror( errno ); retval_ = 1; break; } + const long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > pos ) + { + if( member_vector.size() == 0 ) // maybe trailing garbage + { --pos; continue; } + error_ = "Member size in trailer is corrupt at pos "; + error_ += format_num( pos - 8 ); retval_ = 2; break; + } + if( seek_read( infd, header.data, File_header::size, + pos - member_size ) != File_header::size ) + { error_ = "Error reading member header :"; + error_ += std::strerror( errno ); retval_ = 1; break; } + if( !header.verify_magic() || !header.verify_version() ) + { + if( member_vector.size() == 0 ) // maybe trailing garbage + { --pos; continue; } + error_ = "Bad header at pos "; + error_ += format_num( pos - member_size ); retval_ = 2; break; + } + if( member_vector.size() == 0 && isize - pos > File_header::size && + seek_read( infd, header.data, File_header::size, pos ) == File_header::size && + header.verify_magic() && header.verify_version() ) + { // last trailer is corrupt + error_ = "Member size in trailer is corrupt at pos "; + error_ += format_num( isize - 8 ); retval_ = 2; break; + } + pos -= member_size; + member_vector.push_back( Member( 0, trailer.data_size(), + pos, member_size ) ); + } + if( pos != 0 || member_vector.size() == 0 ) + { + member_vector.clear(); + if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } + return; + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned i = 0; i < member_vector.size() - 1; ++i ) + { + const long long end = member_vector[i].dblock.end(); + if( end < 0 || end > INT64_MAX ) + { + member_vector.clear(); + error_ = "Data in input file is too long (2^63 bytes or more)."; + retval_ = 2; return; + } + member_vector[i+1].dblock.pos( end ); + } + } diff --git a/file_index.h b/file_index.h new file mode 100644 index 0000000..2f055b1 --- /dev/null +++ b/file_index.h @@ -0,0 +1,82 @@ +/* Lziprecover - Data recovery tool for lzipped files + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +class Block + { + long long pos_, size_; // pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + + bool overlaps( const Block & b ) const + { return ( pos_ < b.end() && b.pos_ < end() ); } + void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } + }; + + +class File_index + { + struct Member + { + Block dblock, mblock; // data block, member block + + Member( const long long dp, const long long ds, + const long long mp, const long long ms ) + : dblock( dp, ds ), mblock( mp, ms ) {} + }; + + std::vector< Member > member_vector; + std::string error_; + int retval_; + +public: + File_index( const int infd ); + + const std::string & error() const { return error_; } + int retval() const { return retval_; } + + long long data_end() const + { if( member_vector.size() ) return member_vector.back().dblock.end(); + else return 0; } + + long long file_end() const + { if( member_vector.size() ) return member_vector.back().mblock.end(); + else return 0; } + + const Block & dblock( const int i ) const + { return member_vector[i].dblock; } + const Block & mblock( const int i ) const + { return member_vector[i].mblock; } + int members() const { return (int)member_vector.size(); } + }; + + +const char * format_num( unsigned long long num, + unsigned long long limit = -1ULL, + const int set_prefix = 0 ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,41 +17,23 @@ class State { - unsigned char st; + int st; public: enum { states = 12 }; State() : st( 0 ) {} - unsigned char operator()() const { return st; } + int operator()() const { return st; } bool is_char() const { return st < 7; } void set_char() { - static const unsigned char next[states] = - { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; st = next[st]; } - void set_match() - { - static const unsigned char next[states] = - { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 }; - st = next[st]; - } - - void set_rep() - { - static const unsigned char next[states] = - { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 }; - st = next[st]; - } - - void set_short_rep() - { - static const unsigned char next[states] = - { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 }; - st = next[st]; - } + void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); } + void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); } + void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); } }; @@ -69,7 +51,7 @@ enum { dis_slot_bits = 6, start_dis_model = 4, end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), + modeled_distances = 1 << (end_dis_model / 2), // 128 dis_align_bits = 4, dis_align_size = 1 << dis_align_bits, @@ -87,12 +69,11 @@ enum { max_dis_states = 4 }; -inline int get_dis_state( int len ) - { - len -= min_match_len; - if( len >= max_dis_states ) len = max_dis_states - 1; - return len; - } +inline int get_dis_state( const int len ) + { return std::min( len - min_match_len, max_dis_states - 1 ); } + +inline int get_lit_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } enum { bit_model_move_bits = 5, @@ -101,17 +82,17 @@ enum { bit_model_move_bits = 5, struct Bit_model { - unsigned int probability; + int probability; Bit_model() : probability( bit_model_total / 2 ) {} }; class Pretty_print { + std::string name_; const char * const stdin_name; - unsigned int longest_name; + unsigned longest_name; const int verbosity_; - std::string name_; mutable bool first_post; public: @@ -119,11 +100,11 @@ public: : stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ), first_post( false ) { - const unsigned int stdin_name_len = std::strlen( stdin_name ); - for( unsigned int i = 0; i < filenames.size(); ++i ) + const unsigned stdin_name_len = std::strlen( stdin_name ); + for( unsigned i = 0; i < filenames.size(); ++i ) { const std::string & s = filenames[i]; - const unsigned int len = ( ( s == "-" ) ? stdin_name_len : s.size() ); + const unsigned len = ( ( s == "-" ) ? stdin_name_len : s.size() ); if( len > longest_name ) longest_name = len; } if( longest_name == 0 ) longest_name = stdin_name_len; @@ -132,7 +113,7 @@ public: Pretty_print( const std::string & filename, const int v ) : stdin_name( "(stdin)" ), verbosity_( v ), first_post( false ) { - const unsigned int stdin_name_len = std::strlen( stdin_name ); + const unsigned stdin_name_len = std::strlen( stdin_name ); longest_name = ( ( filename == "-" ) ? stdin_name_len : filename.size() ); if( longest_name == 0 ) longest_name = stdin_name_len; set_name( filename ); @@ -148,7 +129,7 @@ public: void reset() const { if( name_.size() ) first_post = true; } const char * name() const { return name_.c_str(); } int verbosity() const { return verbosity_; } - void operator()( const char * const msg = 0 ) const; + void operator()( const char * const msg = 0, FILE * const f = stderr ) const; }; @@ -159,9 +140,9 @@ class CRC32 public: CRC32() { - for( unsigned int n = 0; n < 256; ++n ) + for( unsigned n = 0; n < 256; ++n ) { - unsigned int c = n; + unsigned c = n; for( int k = 0; k < 8; ++k ) { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } data[n] = c; @@ -169,8 +150,10 @@ public: } uint32_t operator[]( const uint8_t byte ) const { return data[byte]; } + void update( uint32_t & crc, const uint8_t byte ) const { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const { for( int i = 0; i < size; ++i ) @@ -181,16 +164,15 @@ public: extern const CRC32 crc32; -inline int real_bits( const unsigned int value ) +inline int real_bits( unsigned value ) { - int bits = 0, i = 1; - unsigned int mask = 1; - for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i; + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } return bits; } -const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; +const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct File_header { @@ -206,11 +188,11 @@ struct File_header uint8_t version() const { return data[4]; } bool verify_version() const { return ( data[4] <= 1 ); } - int dictionary_size() const + unsigned dictionary_size() const { - int sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size && sz <= max_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; } @@ -243,36 +225,36 @@ struct File_trailer static int size( const int version = 1 ) { return ( ( version >= 1 ) ? 20 : 12 ); } - uint32_t data_crc() const + unsigned data_crc() const { - uint32_t tmp = 0; + unsigned tmp = 0; for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void data_crc( uint32_t crc ) + void data_crc( unsigned crc ) { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } - long long data_size() const + unsigned long long data_size() const { - long long tmp = 0; + unsigned long long tmp = 0; for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void data_size( long long sz ) + void data_size( unsigned long long sz ) { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } - long long member_size() const + unsigned long long member_size() const { - long long tmp = 0; + unsigned long long tmp = 0; for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } - void member_size( long long sz ) + void member_size( unsigned long long sz ) { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } @@ -286,35 +268,9 @@ struct Error }; -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - - -class Block - { - long long pos_, size_; // pos + size <= LLONG_MAX - -public: - Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} - - long long pos() const { return pos_; } - long long size() const { return size_; } - long long end() const { return pos_ + size_; } - - void pos( const long long p ) { pos_ = p; } - void size( const long long s ) { size_ = s; } - - bool overlaps( const Block & b ) const - { return ( pos_ < b.end() && b.pos_ < end() ); } - void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; } - }; +inline unsigned long long positive_diff( const unsigned long long x, + const unsigned long long y ) + { return ( ( x > y ) ? x - y : 0 ); } // defined in decoder.cc @@ -322,13 +278,11 @@ int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); // defined in main.cc -extern int verbosity; -const char * format_num( long long num, long long limit = LLONG_MAX, - const int set_prefix = 0 ); int open_instream( const std::string & name, struct stat * const in_statsp, const bool to_stdout, const bool reg_only = false ); int open_outstream_rw( const std::string & output_filename, const bool force ); +void show_header( const File_header & header ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void internal_error( const char * const msg ); @@ -337,25 +291,32 @@ void internal_error( const char * const msg ); void cleanup_and_fail( const std::string & output_filename, const int outfd, const int retval ); bool copy_file( const int infd, const int outfd, - const long long size = LLONG_MAX ); -bool try_decompress( const int fd, const long long file_size, + const long long max_size = -1 ); +bool try_decompress( const int fd, const unsigned long long file_size, long long * failure_posp = 0 ); -bool verify_header( const File_header & header ); -bool verify_single_member( const int fd, const long long file_size ); +bool verify_header( const File_header & header, const int verbosity ); +bool verify_single_member( const int fd, const long long file_size, + const int verbosity ); int merge_files( const std::vector< std::string > & filenames, - const std::string & output_filename, const bool force ); + const std::string & output_filename, const int verbosity, + const bool force ); // defined in range_dec.cc -int list_file( const std::string & input_filename ); +int list_files( const std::vector< std::string > & filenames, + const int verbosity ); int range_decompress( const std::string & input_filename, const std::string & default_output_filename, - const std::string & range_string, - const bool to_stdout, const bool force ); + const std::string & range_string, const int verbosity, + const bool force, const bool to_stdout ); // defined in repair.cc +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); int repair_file( const std::string & input_filename, - const std::string & output_filename, const bool force ); + const std::string & output_filename, const int verbosity, + const bool force ); // defined in split.cc int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ); + const std::string & default_output_filename, + const int verbosity, const bool force ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,6 +38,7 @@ #include <utime.h> #include <sys/stat.h> #if defined(__MSVCRT__) +#include <io.h> #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM @@ -47,6 +48,9 @@ #define S_IROTH 0 #define S_IWOTH 0 #endif +#if defined(__OS2__) +#include <io.h> +#endif #include "arg_parser.h" #include "lzip.h" @@ -61,7 +65,7 @@ namespace { const char * const Program_name = "Lziprecover"; const char * const program_name = "lziprecover"; -const char * const program_year = "2012"; +const char * const program_year = "2013"; const char * invocation_name = 0; #ifdef O_BINARY @@ -80,6 +84,7 @@ enum Mode { m_none, m_decompress, m_generate, m_list, m_merge, m_range, std::string output_filename; int outfd = -1; +int verbosity = 0; const mode_t usr_rw = S_IRUSR | S_IWUSR; const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; mode_t outfd_mode = usr_rw; @@ -125,10 +130,30 @@ void show_version() "There is NO WARRANTY, to the extent permitted by law.\n" ); } +} // end namespace + +void show_header( const File_header & header ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = header.dictionary_size(); + bool exact = ( num % factor == 0 ); + + for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + std::fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", + header.version(), np, num, p ); + } + +namespace { -void one_file( const int argind, const int arguments ) +void one_file( const int files ) { - if( argind + 1 != arguments ) + if( files != 1 ) { show_error( "You must specify exactly 1 file.", 0, true ); std::exit( 1 ); @@ -159,6 +184,40 @@ int extension_index( const std::string & name ) return -1; } +} // end namespace + +int open_instream( const std::string & name, struct stat * const in_statsp, + const bool to_stdout, const bool reg_only ) + { + int infd = open( name.c_str(), O_RDONLY | o_binary ); + if( infd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", + program_name, name.c_str(), std::strerror( errno ) ); + } + else + { + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && !reg_only && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name.c_str(), + ( can_read && !to_stdout ) ? + " and '--stdout' was not specified" : "" ); + close( infd ); + infd = -1; + } + } + return infd; + } + +namespace { void set_d_outname( const std::string & name, const int i ) { @@ -300,16 +359,17 @@ void show_trailing_garbage( const uint8_t * const data, const int size, int decompress( const int infd, const Pretty_print & pp, const bool testing ) { + const char * const ok_msg = ( testing ? "ok\n" : "done\n" ); int retval = 0; try { + unsigned long long partial_file_pos = 0; Range_decoder rdec( infd ); - long long partial_file_pos = 0; - for( bool first_member = true; ; first_member = false, pp.reset() ) + for( bool first_member = true; ; first_member = false ) { File_header header; rdec.reset_member_position(); - const int size = rdec.read( header.data, File_header::size ); + const int size = rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File { if( first_member ) @@ -339,13 +399,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { pp( "Invalid dictionary size in member header" ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { - pp(); - if( verbosity >= 2 ) - std::fprintf( stderr, "version %d, dictionary size %7sB. ", - header.version(), - format_num( header.dictionary_size(), 9999, -1 ) ); - } + { pp(); if( verbosity >= 2 ) show_header( header ); } LZ_decoder decoder( header, rdec, outfd ); const int result = decoder.decode_member( pp ); @@ -356,17 +410,15 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) { pp(); if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", + std::fprintf( stderr, "File ends unexpectedly at pos %llu\n", partial_file_pos ); else - std::fprintf( stderr, "Decoder error at pos %lld\n", + std::fprintf( stderr, "Decoder error at pos %llu\n", partial_file_pos ); } retval = 2; break; } - if( verbosity >= 2 ) - { if( testing ) std::fprintf( stderr, "ok\n" ); - else std::fprintf( stderr, "done\n" ); } + if( verbosity >= 2 ) { std::fprintf( stderr, ok_msg ); pp.reset(); } } } catch( std::bad_alloc ) @@ -375,9 +427,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing ) retval = 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } - if( verbosity == 1 && retval == 0 ) - { if( testing ) std::fprintf( stderr, "ok\n" ); - else std::fprintf( stderr, "done\n" ); } + if( verbosity == 1 && retval == 0 ) std::fprintf( stderr, ok_msg ); return retval; } @@ -399,65 +449,6 @@ void set_signals() } // end namespace -int verbosity = 0; - - -const char * format_num( long long num, long long limit, - const int set_prefix ) - { - const char * const si_prefix[8] = - { "k", "M", "G", "T", "P", "E", "Z", "Y" }; - const char * const binary_prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - static bool si = true; - static char buf[32]; - - if( set_prefix ) si = ( set_prefix > 0 ); - const int factor = ( si ? 1000 : 1024 ); - const char * const * prefix = ( si ? si_prefix : binary_prefix ); - const char * p = ""; - bool exact = ( num % factor == 0 ); - - for( int i = 0; i < 8 && ( llabs( num ) > limit || - ( exact && llabs( num ) >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } - snprintf( buf, sizeof buf, "%lld %s", num, p ); - return buf; - } - - -int open_instream( const std::string & name, struct stat * const in_statsp, - const bool to_stdout, const bool reg_only ) - { - int infd = open( name.c_str(), O_RDONLY | o_binary ); - if( infd < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n", - program_name, name.c_str(), std::strerror( errno ) ); - } - else - { - const int i = fstat( infd, in_statsp ); - const mode_t & mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && !reg_only && - ( S_ISBLK( mode ) || S_ISCHR( mode ) || - S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name.c_str(), - ( can_read && !to_stdout ) ? - " and '--stdout' was not specified" : "" ); - close( infd ); - infd = -1; - } - } - return infd; - } - - int open_outstream_rw( const std::string & output_filename, const bool force ) { @@ -490,7 +481,7 @@ void show_error( const char * const msg, const int errcode, const bool help ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); std::fprintf( stderr, "\n" ); } - if( help && invocation_name && invocation_name[0] ) + if( help ) std::fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); } @@ -507,15 +498,15 @@ void internal_error( const char * const msg ) int main( const int argc, const char * const argv[] ) { + std::string input_filename; + std::string default_output_filename; + std::string range_string; + std::vector< std::string > filenames; int infd = -1; Mode program_mode = m_none; bool force = false; bool keep_input_files = false; bool to_stdout = false; - std::string input_filename; - std::string default_output_filename; - std::string range_string; - std::vector< std::string > filenames; invocation_name = argv[0]; const Arg_parser::Option options[] = @@ -546,7 +537,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options - const std::string & arg = parser.argument( argind ).c_str(); + const std::string & arg = parser.argument( argind ); switch( code ) { case 'c': to_stdout = true; break; @@ -570,8 +561,8 @@ int main( const int argc, const char * const argv[] ) } // end process options #if defined(__MSVCRT__) || defined(__OS2__) - _fsetmode( stdin, "b" ); - _fsetmode( stdout, "b" ); + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); #endif if( program_mode == m_none ) @@ -580,6 +571,13 @@ int main( const int argc, const char * const argv[] ) return 1; } + bool filenames_given = false; + for( ; argind < parser.arguments(); ++argind ) + { + filenames.push_back( parser.argument( argind ) ); + if( filenames.back() != "-" ) filenames_given = true; + } + switch( program_mode ) { case m_generate: @@ -588,29 +586,27 @@ int main( const int argc, const char * const argv[] ) case m_none: internal_error( "invalid operation" ); break; case m_decompress: break; case m_list: - one_file( argind, parser.arguments() ); - return list_file( parser.argument( argind ) ); + if( filenames.size() < 1 ) + { show_error( "You must specify at least 1 file.", 0, true ); return 1; } + return list_files( filenames, verbosity ); case m_merge: - for( ; argind < parser.arguments(); ++argind ) - filenames.push_back( parser.argument( argind ) ); if( filenames.size() < 2 ) { show_error( "You must specify at least 2 files.", 0, true ); return 1; } if( !default_output_filename.size() ) default_output_filename = insert_fixed( filenames[0] ); - return merge_files( filenames, default_output_filename, force ); + return merge_files( filenames, default_output_filename, verbosity, force ); case m_range: - one_file( argind, parser.arguments() ); - return range_decompress( parser.argument( argind ), - default_output_filename, range_string, - to_stdout, force ); + one_file( filenames.size() ); + return range_decompress( filenames[0], default_output_filename, + range_string, verbosity, force, to_stdout ); case m_repair: - one_file( argind, parser.arguments() ); + one_file( filenames.size() ); if( !default_output_filename.size() ) - default_output_filename = insert_fixed( parser.argument( argind ) ); - return repair_file( parser.argument( argind ), default_output_filename, force ); + default_output_filename = insert_fixed( filenames[0] ); + return repair_file( filenames[0], default_output_filename, verbosity, force ); case m_split: - one_file( argind, parser.arguments() ); - return split_file( parser.argument( argind ), default_output_filename, force ); + one_file( filenames.size() ); + return split_file( filenames[0], default_output_filename, verbosity, force ); case m_test: break; } @@ -619,13 +615,6 @@ int main( const int argc, const char * const argv[] ) else if( program_mode != m_decompress ) internal_error( "invalid decompressor operation" ); - bool filenames_given = false; - for( ; argind < parser.arguments(); ++argind ) - { - if( parser.argument( argind ) != "-" ) filenames_given = true; - filenames.push_back( parser.argument( argind ) ); - } - if( filenames.empty() ) filenames.push_back("-"); if( !to_stdout && program_mode != m_test && ( filenames_given || default_output_filename.size() ) ) @@ -634,7 +623,7 @@ int main( const int argc, const char * const argv[] ) Pretty_print pp( filenames, verbosity ); int retval = 0; - for( unsigned int i = 0; i < filenames.size(); ++i ) + for( unsigned i = 0; i < filenames.size(); ++i ) { struct stat in_stats; output_filename.clear(); @@ -653,7 +642,7 @@ int main( const int argc, const char * const argv[] ) outfd_mode = all_rw; if( !open_outstream( force ) ) { - if( outfd == -1 && retval < 1 ) retval = 1; + if( retval < 1 ) retval = 1; close( infd ); infd = -1; continue; } @@ -663,7 +652,6 @@ int main( const int argc, const char * const argv[] ) else { input_filename = filenames[i]; - const int eindex = extension_index( input_filename ); infd = open_instream( input_filename, &in_stats, to_stdout ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( program_mode != m_test ) @@ -671,11 +659,11 @@ int main( const int argc, const char * const argv[] ) if( to_stdout ) outfd = STDOUT_FILENO; else { - set_d_outname( input_filename, eindex ); + set_d_outname( input_filename, extension_index( input_filename ) ); outfd_mode = usr_rw; if( !open_outstream( force ) ) { - if( outfd == -1 && retval < 1 ) retval = 1; + if( retval < 1 ) retval = 1; close( infd ); infd = -1; continue; } @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "lzip.h" #include "decoder.h" +#include "file_index.h" namespace { @@ -39,7 +40,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector, { const int buffer_size = 65536; std::vector< uint8_t * > buffer_vector( infd_vector.size() ); - for( unsigned int i = 0; i < infd_vector.size(); ++i ) + for( unsigned i = 0; i < infd_vector.size(); ++i ) buffer_vector[i] = new uint8_t[buffer_size]; Block b( 0, 0 ); long long partial_pos = 0; @@ -53,7 +54,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector, { show_error( "Error reading input file", errno ); error = true; break; } if( rd > 0 ) { - for( unsigned int i = 1; i < infd_vector.size(); ++i ) + for( unsigned i = 1; i < infd_vector.size(); ++i ) if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd ) { show_error( "Error reading input file", errno ); error = true; break; } @@ -66,7 +67,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector, { while( i < rd && b.pos() == 0 ) { - for( unsigned int j = 1; j < infd_vector.size(); ++j ) + for( unsigned j = 1; j < infd_vector.size(); ++j ) if( buffer_vector[0][i] != buffer_vector[j][i] ) { b.pos( partial_pos + i ); break; } // begin block ++i; @@ -74,7 +75,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector, while( i < rd && b.pos() > 0 ) { ++equal_bytes; - for( unsigned int j = 1; j < infd_vector.size(); ++j ) + for( unsigned j = 1; j < infd_vector.size(); ++j ) if( buffer_vector[0][i] != buffer_vector[j][i] ) { equal_bytes = 0; break; } if( equal_bytes >= 2 ) // end block @@ -96,18 +97,18 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector, b.size( partial_pos - b.pos() ); block_vector.push_back( b ); } - for( unsigned int i = 0; i < infd_vector.size(); ++i ) + for( unsigned i = 0; i < infd_vector.size(); ++i ) delete[] buffer_vector[i]; return !error; } -int ipow( const unsigned int base, const unsigned int exponent ) +int ipow( const unsigned base, const unsigned exponent ) { - int result = 1; - for( unsigned int i = 0; i < exponent; ++i ) + unsigned result = 1; + for( unsigned i = 0; i < exponent; ++i ) { - if( INT_MAX / base >= (unsigned int)result ) result *= base; + if( INT_MAX / base >= result ) result *= base; else { result = INT_MAX; break; } } return result; @@ -115,14 +116,15 @@ int ipow( const unsigned int base, const unsigned int exponent ) int open_input_files( const std::vector< std::string > & filenames, - std::vector< int > & infd_vector, long long & isize ) + std::vector< int > & infd_vector, long long & isize, + const int verbosity ) { bool identical = false; - for( unsigned int i = 1; i < filenames.size(); ++i ) + for( unsigned i = 1; i < filenames.size(); ++i ) if( filenames[0] == filenames[i] ) { identical = true; break; } if( !identical ) - for( unsigned int i = 0; i < filenames.size(); ++i ) + for( unsigned i = 0; i < filenames.size(); ++i ) { struct stat in_stats; ino_t st_ino0 = 0; @@ -136,7 +138,7 @@ int open_input_files( const std::vector< std::string > & filenames, if( identical ) { show_error( "Two input files are the same." ); return 1; } isize = 0; - for( unsigned int i = 0; i < filenames.size(); ++i ) + for( unsigned i = 0; i < filenames.size(); ++i ) { const long long tmp = lseek( infd_vector[i], 0, SEEK_END ); if( tmp < 0 ) @@ -155,11 +157,11 @@ int open_input_files( const std::vector< std::string > & filenames, { show_error( "Sizes of input files are different." ); return 1; } } - for( unsigned int i = 0; i < filenames.size(); ++i ) - if( !verify_single_member( infd_vector[i], isize ) ) + for( unsigned i = 0; i < filenames.size(); ++i ) + if( !verify_single_member( infd_vector[i], isize, verbosity ) ) return 2; - for( unsigned int i = 0; i < filenames.size(); ++i ) + for( unsigned i = 0; i < filenames.size(); ++i ) { if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 ) { show_error( "Seek error in input file", errno ); return 1; } @@ -189,19 +191,21 @@ void cleanup_and_fail( const std::string & output_filename, } -bool copy_file( const int infd, const int outfd, const long long size ) +// max_size < 0 means no size limit. +bool copy_file( const int infd, const int outfd, const long long max_size ) { - long long rest = size; const int buffer_size = 65536; + // remaining number of bytes to copy + long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); uint8_t * const buffer = new uint8_t[buffer_size]; bool error = false; - while( true ) + while( rest > 0 ) { - const int block_size = std::min( (long long)buffer_size, rest ); - if( block_size <= 0 ) break; - const int rd = readblock( infd, buffer, block_size ); - if( rd != block_size && errno ) + const int size = std::min( (long long)buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) { show_error( "Error reading input file", errno ); error = true; break; } if( rd > 0 ) { @@ -209,24 +213,21 @@ bool copy_file( const int infd, const int outfd, const long long size ) if( wr != rd ) { show_error( "Error writing output file", errno ); error = true; break; } - rest -= rd; } - if( rd < block_size ) break; // EOF + if( rd < size ) break; // EOF } delete[] buffer; return !error; } -bool try_decompress( const int fd, const long long file_size, +bool try_decompress( const int fd, const unsigned long long file_size, long long * failure_posp ) { try { Range_decoder rdec( fd ); File_header header; - rdec.reset_member_position(); - for( int i = 0; i < File_header::size; ++i ) - header.data[i] = rdec.get_byte(); + rdec.read_data( header.data, File_header::size ); if( !rdec.finished() && // End Of File header.verify_magic() && header.version() == 1 && @@ -251,7 +252,7 @@ bool try_decompress( const int fd, const long long file_size, } -bool verify_header( const File_header & header ) +bool verify_header( const File_header & header, const int verbosity ) { if( !header.verify_magic() ) { @@ -274,13 +275,14 @@ bool verify_header( const File_header & header ) } -bool verify_single_member( const int fd, const long long file_size ) +bool verify_single_member( const int fd, const long long file_size, + const int verbosity ) { File_header header; if( lseek( fd, 0, SEEK_SET ) < 0 || readblock( fd, header.data, File_header::size ) != File_header::size ) { show_error( "Error reading member header", errno ); return false; } - if( !verify_header( header ) ) return false; + if( !verify_header( header, verbosity ) ) return false; File_trailer trailer; if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 || @@ -292,7 +294,7 @@ bool verify_single_member( const int fd, const long long file_size ) if( member_size < file_size && lseek( fd, -member_size, SEEK_END ) > 0 && readblock( fd, header.data, File_header::size ) == File_header::size && - verify_header( header ) ) + verify_header( header, verbosity ) ) show_error( "Input file has more than 1 member. Split it first." ); else show_error( "Member size in input file trailer is corrupt." ); @@ -303,11 +305,12 @@ bool verify_single_member( const int fd, const long long file_size ) int merge_files( const std::vector< std::string > & filenames, - const std::string & output_filename, const bool force ) + const std::string & output_filename, const int verbosity, + const bool force ) { std::vector< int > infd_vector( filenames.size() ); long long isize = 0; - const int retval = open_input_files( filenames, infd_vector, isize ); + const int retval = open_input_files( filenames, infd_vector, isize, verbosity ); if( retval >= 0 ) return retval; const int outfd = open_outstream_rw( output_filename, force ); @@ -353,7 +356,7 @@ int merge_files( const std::vector< std::string > & filenames, std::fflush( stdout ); } int tmp = var; - for( unsigned int i = 0; i < block_vector.size(); ++i ) + for( unsigned i = 0; i < block_vector.size(); ++i ) { const int infd = infd_vector[tmp % filenames.size()]; tmp /= filenames.size(); diff --git a/range_dec.cc b/range_dec.cc index d8e171a..d056271 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,6 @@ #include <algorithm> #include <cerrno> -#include <climits> #include <cstdio> #include <cstdlib> #include <cstring> @@ -31,83 +30,11 @@ #include "lzip.h" #include "decoder.h" +#include "file_index.h" namespace { -class Member - { - Block dblock_, mblock_; // data block, member block - -public: - Member( const long long dp, const long long ds, - const long long mp, const long long ms ) - : dblock_( dp, ds ), mblock_( mp, ms ) {} - - const Block & dblock() const { return dblock_; } - Block & dblock() { return dblock_; } - const Block & mblock() const { return mblock_; } - Block & mblock() { return mblock_; } - }; - - -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) - return readblock( fd, buf, size ); - return 0; - } - - -class Member_index - { - std::vector< Member > member_vector; - -public: - Member_index( const int infd, const long long isize ) - { - long long pos = isize; // always points to a header or EOF - File_header header; - File_trailer trailer; - while( pos >= min_member_size ) - { - if( seek_read( infd, trailer.data, File_trailer::size(), - pos - File_trailer::size() ) != File_trailer::size() ) - { show_error( "Read error", errno ); std::exit( 1 ); } - const long long member_size = trailer.member_size(); - if( member_size < min_member_size || pos < member_size ) break; - if( seek_read( infd, header.data, File_header::size, - pos - member_size ) != File_header::size ) - { show_error( "Read error", errno ); std::exit( 1 ); } - if( !header.verify_magic() || !header.verify_version() ) break; - pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size ) ); - } - if( pos != 0 || member_vector.size() == 0 ) - { - show_error( "Member size in input file trailer is corrupt." ); - std::exit( 1 ); - } - std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned int i = 0; i < member_vector.size() - 1; ++i ) - member_vector[i+1].dblock().pos( member_vector[i].dblock().end() ); - } - - long long data_end() const - { if( member_vector.size() ) return member_vector.back().dblock().end(); - else return 0; } - - const Member & member( const int i ) const { return member_vector[i]; } - const Block & dblock( const int i ) const - { return member_vector[i].dblock(); } - const Block & mblock( const int i ) const - { return member_vector[i].mblock(); } - int members() const { return (int)member_vector.size(); } - }; - - // Returns the number of chars read, or 0 if error. // int parse_long_long( const char * const ptr, long long & value ) @@ -115,7 +42,7 @@ int parse_long_long( const char * const ptr, long long & value ) char * tail; errno = 0; value = strtoll( ptr, &tail, 0 ); - if( tail == ptr || errno ) return 0; + if( tail == ptr || errno || value < 0 ) return 0; int c = tail - ptr; if( ptr[c] ) @@ -141,7 +68,7 @@ int parse_long_long( const char * const ptr, long long & value ) if( ptr[c] == 'B' ) ++c; for( int i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( value ) ) value *= factor; + if( INT64_MAX / factor >= value ) value *= factor; else return 0; } } @@ -156,17 +83,17 @@ void parse_range( const char * const ptr, Block & range ) { long long value = 0; int c = parse_long_long( ptr, value ); // pos - if( c && value >= 0 && value < LLONG_MAX && + if( c && value >= 0 && value < INT64_MAX && ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) { range.pos( value ); - if( ptr[c] == 0 ) { range.size( LLONG_MAX - value ); return; } + if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } const bool issize = ( ptr[c] == ',' ); c = parse_long_long( ptr + c + 1, value ); // size if( c && value > 0 && ( issize || value > range.pos() ) ) { if( !issize ) value -= range.pos(); - if( LLONG_MAX - range.pos() >= value ) { range.size( value ); return; } + if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } } } show_error( "Bad decompression range.", 0, true ); @@ -182,132 +109,138 @@ bool safe_seek( const int fd, const long long pos ) int decompress_member( const int infd, const int outfd, - const Pretty_print & pp, const Member & member, - const long long outskip, const long long outend ) + const Pretty_print & pp, + const unsigned long long mpos, + const unsigned long long outskip, + const unsigned long long outend ) { - int retval = 0; - try { Range_decoder rdec( infd ); File_header header; - int size; - for( size = 0; size < File_header::size && !rdec.finished(); ++size ) - header.data[size] = rdec.get_byte(); + rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File - { pp( "Error reading member header" ); retval = 1; } + { pp( "Error reading member header" ); return 1; } if( !header.verify_magic() ) - { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } + { pp( "Bad magic number (file not in lzip format)" ); return 2; } if( !header.verify_version() ) { - if( verbosity >= 0 ) + if( pp.verbosity() >= 0 ) { pp(); std::fprintf( stderr, "Version %d member format not supported.\n", header.version() ); } - retval = 2; + return 2; } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) - { pp( "Invalid dictionary size in member header" ); retval = 2; } + { pp( "Invalid dictionary size in member header" ); return 2; } - if( pp.verbosity() >= 2 ) - { - pp(); - std::fprintf( stderr, "version %d, dictionary size %7sB. ", - header.version(), - format_num( header.dictionary_size(), 9999, -1 ) ); - } - LZ_decoder decoder( header, rdec, outfd, outskip, outend ); + if( pp.verbosity() >= 2 ) { pp(); show_header( header ); } + LZ_decoder decoder( header, rdec, outfd, outskip, outend ); const int result = decoder.decode_member( pp ); if( result != 0 ) { - if( verbosity >= 0 && result <= 2 ) + if( pp.verbosity() >= 0 && result <= 2 ) { pp(); if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", - member.mblock().pos() + rdec.member_position() ); + std::fprintf( stderr, "File ends unexpectedly at pos %llu\n", + mpos + rdec.member_position() ); else - std::fprintf( stderr, "Decoder error at pos %lld\n", - member.mblock().pos() + rdec.member_position() ); + std::fprintf( stderr, "Decoder error at pos %llu\n", + mpos + rdec.member_position() ); } - retval = 2; + return 2; } if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); } catch( std::bad_alloc ) { pp( "Not enough memory. Find a machine with more memory" ); - retval = 1; + return 1; } - catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } - return retval; + catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } + return 0; } -} // end namespace - -int list_file( const std::string & input_filename ) +int list_file( const std::string & input_filename, const Pretty_print & pp ) { struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - const long long isize = lseek( infd, 0, SEEK_END ); - if( isize < 0 ) - { show_error( "Input file is not seekable", errno ); return 1; } - if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - Member_index member_index( infd, isize ); + File_index file_index( infd ); + close( infd ); + if( file_index.retval() != 0 ) + { show_error( file_index.error().c_str() ); return file_index.retval(); } - if( verbosity >= 0 ) + if( pp.verbosity() >= 0 ) { - if( verbosity >= 1 ) + const unsigned long long data_size = file_index.data_end(); + const unsigned long long file_size = file_index.file_end(); + pp( 0, stdout ); + if( data_size > 0 && file_size > 0 ) + std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)data_size / file_size, + ( 8.0 * file_size ) / data_size, + 100.0 * ( 1.0 - ( (double)file_size / data_size ) ) ); + std::printf( "decompressed size %9llu, compressed size %8llu.\n", + data_size, file_size ); + + if( pp.verbosity() >= 1 && file_index.members() > 1 ) { - std::printf( "Total members in file = %d.\n", member_index.members() ); - for( int i = 0; i < member_index.members(); ++i ) - { - const Block & db = member_index.dblock( i ); - const Block & mb = member_index.mblock( i ); - std::printf( "Member %3d data pos %9lld data size %7lld " - "member pos %9lld member size %7lld.\n", i, - db.pos(), db.size(), mb.pos(), mb.size() ); - } + std::printf( "Total members in file = %d.\n", file_index.members() ); + if( pp.verbosity() >= 2 ) + for( int i = 0; i < file_index.members(); ++i ) + { + const Block & db = file_index.dblock( i ); + const Block & mb = file_index.mblock( i ); + std::printf( "Member %3d data pos %9llu data size %7llu " + "member pos %9llu member size %7llu.\n", i + 1, + db.pos(), db.size(), mb.pos(), mb.size() ); + } std::printf( "\n" ); } - - const long long data_size = member_index.data_end(); - if( data_size > 0 && isize > 0 ) - std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.\n", - (double)data_size / isize, - ( 8.0 * isize ) / data_size, - 100.0 * ( 1.0 - ( (double)isize / data_size ) ) ); - std::printf( "decompressed size %9lld, compressed size %8lld.\n", - data_size, isize ); } return 0; } +} // end namespace + + +int list_files( const std::vector< std::string > & filenames, + const int verbosity ) + { + Pretty_print pp( filenames, verbosity ); + int retval = 0; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + pp.set_name( filenames[i] ); + const int tmp = list_file( filenames[i], pp ); + if( tmp > retval ) retval = tmp; + } + return retval; + } + int range_decompress( const std::string & input_filename, const std::string & output_filename, - const std::string & range_string, - const bool to_stdout, const bool force ) + const std::string & range_string, const int verbosity, + const bool force, const bool to_stdout ) { Block range( 0, 0 ); parse_range( range_string.c_str(), range ); struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - const long long isize = lseek( infd, 0, SEEK_END ); - if( isize < 0 ) - { show_error( "Input file is not seekable", errno ); return 1; } - if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - - Member_index member_index( infd, isize ); - if( range.end() > member_index.data_end() ) - range.size( std::max( 0LL, member_index.data_end() - range.pos() ) ); + + File_index file_index( infd ); + if( file_index.retval() != 0 ) + { show_error( file_index.error().c_str() ); return file_index.retval(); } + + if( range.end() > file_index.data_end() ) + range.size( std::max( 0LL, file_index.data_end() - range.pos() ) ); if( range.size() <= 0 ) { if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; } @@ -315,7 +248,7 @@ int range_decompress( const std::string & input_filename, { if( verbosity >= 2 ) std::fprintf( stderr, "Decompressed file size = %sB\n", - format_num( member_index.data_end() ) ); + format_num( file_index.data_end() ) ); std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) ); std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) ); std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) ); @@ -329,23 +262,23 @@ int range_decompress( const std::string & input_filename, if( outfd < 0 ) return 1; } Pretty_print pp( input_filename, 0 ); int retval = 0; - for( int i = 0; i < member_index.members(); ++i ) + for( int i = 0; i < file_index.members(); ++i ) { - const Block & db = member_index.dblock( i ); + const Block & db = file_index.dblock( i ); if( range.overlaps( db ) ) { if( verbosity >= 3 ) std::fprintf( stderr, "Decompressing member %3d\n", i ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.end(), range.end() - db.pos() ); - if( !safe_seek( infd, member_index.mblock( i ).pos() ) ) - { retval = 1; break; } - retval = decompress_member( infd, outfd, pp, member_index.member( i ), - outskip, outend ); + const long long mpos = file_index.mblock( i ).pos(); + if( !safe_seek( infd, mpos ) ) { retval = 1; break; } + retval = decompress_member( infd, outfd, pp, mpos, outskip, outend ); if( retval ) cleanup_and_fail( output_filename, outfd, retval ); pp.reset(); } } + close( infd ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,8 +30,27 @@ #include "lzip.h" +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +int seek_write( const int fd, const uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return writeblock( fd, buf, size ); + return 0; + } + + int repair_file( const std::string & input_filename, - const std::string & output_filename, const bool force ) + const std::string & output_filename, const int verbosity, + const bool force ) { struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); @@ -41,7 +60,7 @@ int repair_file( const std::string & input_filename, { show_error( "Input file is not seekable", errno ); return 1; } if( isize < min_member_size ) { show_error( "Input file is too short." ); return 2; } - if( !verify_single_member( infd, isize ) ) return 2; + if( !verify_single_member( infd, isize, verbosity ) ) return 2; if( lseek( infd, 0, SEEK_SET ) < 0 ) { show_error( "Seek error in input file", errno ); return 1; } @@ -67,35 +86,28 @@ int repair_file( const std::string & input_filename, const long long min_pos = std::max( (long long)File_header::size, failure_pos - 1000 ); bool done = false; - for( long long pos = failure_pos; pos >= min_pos; --pos ) + for( long long pos = failure_pos; pos >= min_pos && !done ; --pos ) { if( verbosity >= 1 ) { - std::printf( "Trying position %lld \r", pos ); + std::printf( "Trying position %llu \r", pos ); std::fflush( stdout ); } uint8_t byte; - if( lseek( outfd, pos, SEEK_SET ) < 0 || - readblock( outfd, &byte, 1 ) != 1 ) + if( seek_read( outfd, &byte, 1, pos ) != 1 ) { show_error( "Error reading output file", errno ); cleanup_and_fail( output_filename, outfd, 1 ); } - for( int i = 0; i < 255; ++i ) + for( int i = 0; i < 256; ++i ) { ++byte; - if( lseek( outfd, pos, SEEK_SET ) < 0 || - writeblock( outfd, &byte, 1 ) != 1 || + if( seek_write( outfd, &byte, 1, pos ) != 1 || lseek( outfd, 0, SEEK_SET ) < 0 ) { show_error( "Error writing output file", errno ); cleanup_and_fail( output_filename, outfd, 1 ); } + if( i == 255 ) break; if( try_decompress( outfd, isize ) ) { done = true; break; } } - if( done ) break; - ++byte; - if( lseek( outfd, pos, SEEK_SET ) < 0 || - writeblock( outfd, &byte, 1 ) != 1 ) - { show_error( "Error writing output file", errno ); - cleanup_and_fail( output_filename, outfd, 1 ); } } if( verbosity >= 1 ) std::printf( "\n" ); @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -83,7 +83,8 @@ int find_magic( const uint8_t * const buffer, const int pos, const int size ) int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, - const std::string & default_output_filename, const bool force ) + const std::string & default_output_filename, + const int verbosity, const bool force ) { const int hsize = File_header::size; const int tsize = File_trailer::size(); @@ -99,16 +100,16 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, bool at_stream_end = ( size < buffer_size ); if( size != buffer_size && errno ) { show_error( "Read error", errno ); return 1; } - if( size <= tsize ) + if( size < min_member_size ) { show_error( "Input file is too short." ); return 2; } - if( !verify_header( *(File_header *)buffer ) ) return 2; + if( !verify_header( *(File_header *)buffer, verbosity ) ) return 2; std::string output_filename; first_filename( input_filename, default_output_filename, output_filename ); int outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) { close( infd ); return 1; } - long long partial_member_size = 0; + unsigned long long partial_member_size = 0; while( true ) { int pos = 0; @@ -117,10 +118,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, newpos = find_magic( buffer, newpos, size + 4 - newpos ); if( newpos <= size ) { - long long member_size = 0; - for( int i = 1; i <= 8; ++i ) - { member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; } - if( partial_member_size + newpos - pos == member_size ) + const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos); + if( partial_member_size + newpos - pos == trailer.member_size() ) { // header found const int wr = writeblock( outfd, buffer + pos, newpos - pos ); if( wr != newpos - pos ) @@ -167,11 +166,12 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer, int split_file( const std::string & input_filename, - const std::string & default_output_filename, const bool force ) + const std::string & default_output_filename, + const int verbosity, const bool force ) { uint8_t * base_buffer; const int retval = do_split_file( input_filename, base_buffer, - default_output_filename, force ); + default_output_filename, verbosity, force ); delete[] base_buffer; return retval; } diff --git a/testsuite/check.sh b/testsuite/check.sh index 92dcfdd..bd77f02 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lziprecover - Data recovery tool for lzipped files -# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -53,6 +53,15 @@ printf . cmp ${in} copy || fail=1 printf . +"${LZIPRECOVER}" -lq +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIPRECOVER}" -mq ${bad1_lz} +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIPRECOVER}" -Rq +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +"${LZIPRECOVER}" -sq +if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi + "${LZIPRECOVER}" -D 921-1921 -fo copy ${in_lz} || fail=1 cmp ${inD} copy || fail=1 printf . diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc index 4c49035..abf61bb 100644 --- a/testsuite/unzcrash.cc +++ b/testsuite/unzcrash.cc @@ -1,7 +1,7 @@ /* Unzcrash - A test program written to test robustness to decompression of corrupted data. Inspired by unzcrash.c from Julian Seward's bzip2. - Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,22 +34,12 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif -#ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL -#endif -#ifndef LLONG_MIN -#define LLONG_MIN (-LLONG_MAX - 1LL) -#endif -#ifndef ULLONG_MAX -#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL -#endif - namespace { const char * const Program_name = "Unzcrash"; const char * const program_name = "unzcrash"; -const char * const program_year = "2012"; +const char * const program_year = "2013"; const char * invocation_name = 0; int verbosity = 0; @@ -67,11 +57,12 @@ void show_help() "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" - " -b, --bits=<n>[,<n>]... test <n>-bit errors instead of full byte\n" + " -b, --bits=<range> test N-bit errors instead of full byte\n" " -p, --position=<bytes> first byte position to test\n" " -q, --quiet suppress all messages\n" " -s, --size=<bytes> number of byte positions to test\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" + "Examples of <range>: 1 1,2,3 1-4 1,3-5,8\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" ); } @@ -99,7 +90,7 @@ void show_error( const char * const msg, const int errcode = 0, std::fprintf( stderr, ": %s", std::strerror( errcode ) ); std::fprintf( stderr, "\n" ); } - if( help && invocation_name && invocation_name[0] ) + if( help ) std::fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); } @@ -114,13 +105,13 @@ void internal_error( const char * const msg ) } -long long getnum( const char * const ptr, - const long long llimit = LLONG_MIN + 1, - const long long ulimit = LLONG_MAX ) +unsigned long long getnum( const char * const ptr, + const unsigned long long llimit, + const unsigned long long ulimit ) { errno = 0; - char *tail; - long long result = strtoll( ptr, &tail, 0 ); + char * tail; + unsigned long long result = strtoull( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); @@ -155,7 +146,7 @@ long long getnum( const char * const ptr, } for( int i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + if( ulimit / factor >= result ) result *= factor; else { errno = ERANGE; break; } } } |