/* Lziprecover - Data recovery tool for lzipped files Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #include #include #include #include #include "lzip.h" #include "decoder.h" namespace { class Member { Block dblock_, mblock_; // data block, member block public: Member( const long long dp, const long long ds, const long long mp, const long long ms ) : dblock_( dp, ds ), mblock_( mp, ms ) {} const Block & dblock() const throw() { return dblock_; } Block & dblock() throw() { return dblock_; } const Block & mblock() const throw() { return mblock_; } Block & mblock() throw() { return mblock_; } }; int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ) throw() { if( lseek( fd, pos, SEEK_SET ) == pos ) return readblock( fd, buf, size ); return 0; } class Member_index { std::vector< Member > member_vector; public: Member_index( const int infd, const long long isize ) { long long pos = isize; // always points to a header or EOF File_header header; File_trailer trailer; while( pos >= min_member_size ) { if( seek_read( infd, trailer.data, File_trailer::size(), pos - File_trailer::size() ) != File_trailer::size() ) { show_error( "Read error", errno ); std::exit( 1 ); } const long long member_size = trailer.member_size(); if( member_size < min_member_size || pos < member_size ) break; if( seek_read( infd, header.data, File_header::size, pos - member_size ) != File_header::size ) { show_error( "Read error", errno ); std::exit( 1 ); } if( !header.verify_magic() || !header.verify_version() ) break; pos -= member_size; member_vector.push_back( Member( 0, trailer.data_size(), pos, member_size ) ); } if( pos != 0 || member_vector.size() == 0 ) { show_error( "Member size in input file trailer is corrupt." ); std::exit( 1 ); } std::reverse( member_vector.begin(), member_vector.end() ); for( unsigned int i = 0; i < member_vector.size() - 1; ++i ) member_vector[i+1].dblock().pos( member_vector[i].dblock().end() ); } long long data_end() const throw() { if( member_vector.size() ) return member_vector.back().dblock().end(); else return 0; } const Member & member( const int i ) const throw() { return member_vector[i]; } const Block & dblock( const int i ) const throw() { return member_vector[i].dblock(); } const Block & mblock( const int i ) const throw() { return member_vector[i].mblock(); } int members() const throw() { return (int)member_vector.size(); } }; // Returns the number of chars read, or 0 if error. // int parse_long_long( const char * const ptr, long long & value ) throw() { char * tail; int c = 0; errno = 0; value = strtoll( ptr, &tail, 0 ); if( tail == ptr || errno ) return 0; c = tail - ptr; if( ptr[c] ) { const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000; int exponent = 0; switch( ptr[c] ) { case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; case 'P': exponent = 5; break; case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; case 'K': if( factor == 1024 ) exponent = 1; break; case 'k': if( factor == 1000 ) exponent = 1; break; } if( exponent > 0 ) { ++c; if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); } if( ptr[c] == 'B' ) ++c; for( int i = 0; i < exponent; ++i ) { if( LLONG_MAX / factor >= llabs( value ) ) value *= factor; else return 0; } } } return c; } // Recognized formats: - , // void parse_range( const char * const ptr, Block & range ) throw() { long long value = 0; int c = parse_long_long( ptr, value ); // pos if( c && value >= 0 && value < LLONG_MAX && ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) { range.pos( value ); if( ptr[c] == 0 ) { range.size( LLONG_MAX - value ); return; } const bool issize = ( ptr[c] == ',' ); c = parse_long_long( ptr + c + 1, value ); // size if( c && value > 0 && ( issize || value > range.pos() ) ) { if( !issize ) value -= range.pos(); if( LLONG_MAX - range.pos() >= value ) { range.size( value ); return; } } } show_error( "Bad decompression range.", 0, true ); std::exit( 1 ); } bool safe_seek( const int fd, const long long pos ) throw() { if( lseek( fd, pos, SEEK_SET ) == pos ) return true; show_error( "Seek error", errno ); return false; } int decompress_member( const int infd, const int outfd, const Pretty_print & pp, const Member & member, const long long outskip, const long long outend ) { int retval = 0; try { Range_decoder rdec( infd ); File_header header; int size; for( size = 0; size < File_header::size && !rdec.finished(); ++size ) header.data[size] = rdec.get_byte(); if( rdec.finished() ) // End Of File { pp( "Error reading member header" ); retval = 1; } if( !header.verify_magic() ) { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } if( !header.verify_version() ) { if( verbosity >= 0 ) { pp(); std::fprintf( stderr, "Version %d member format not supported.\n", header.version() ); } retval = 2; } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) { pp( "Invalid dictionary size in member header" ); retval = 2; } if( pp.verbosity() >= 2 ) { pp(); std::fprintf( stderr, "version %d, dictionary size %7sB. ", header.version(), format_num( header.dictionary_size(), 9999, -1 ) ); } LZ_decoder decoder( header, rdec, outfd, outskip, outend ); const int result = decoder.decode_member( pp ); if( result != 0 ) { if( verbosity >= 0 && result <= 2 ) { pp(); if( result == 2 ) std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", member.mblock().pos() + rdec.member_position() ); else std::fprintf( stderr, "Decoder error at pos %lld\n", member.mblock().pos() + rdec.member_position() ); } retval = 2; } if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); } catch( std::bad_alloc ) { pp( "Not enough memory. Find a machine with more memory" ); retval = 1; } catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } return retval; } } // end namespace int list_file( const std::string & input_filename ) { struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; const long long isize = lseek( infd, 0, SEEK_END ); if( isize < 0 ) { show_error( "Input file is not seekable", errno ); return 1; } if( isize < min_member_size ) { show_error( "Input file is too short." ); return 2; } Member_index member_index( infd, isize ); if( verbosity >= 0 ) { if( verbosity >= 1 ) { std::printf( "Total members in file = %d.\n", member_index.members() ); for( int i = 0; i < member_index.members(); ++i ) { const Block & db = member_index.dblock( i ); const Block & mb = member_index.mblock( i ); std::printf( "Member %3d data pos %9lld data size %7lld " "member pos %9lld member size %7lld.\n", i, db.pos(), db.size(), mb.pos(), mb.size() ); } std::printf( "\n" ); } const long long data_size = member_index.data_end(); if( data_size > 0 && isize > 0 ) std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.\n", (double)data_size / isize, ( 8.0 * isize ) / data_size, 100.0 * ( 1.0 - ( (double)isize / data_size ) ) ); std::printf( "decompressed size %9lld, compressed size %8lld.\n", data_size, isize ); } return 0; } int range_decompress( const std::string & input_filename, const std::string & output_filename, const std::string & range_string, const bool to_stdout, const bool force ) { Block range( 0, 0 ); parse_range( range_string.c_str(), range ); struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; const long long isize = lseek( infd, 0, SEEK_END ); if( isize < 0 ) { show_error( "Input file is not seekable", errno ); return 1; } if( isize < min_member_size ) { show_error( "Input file is too short." ); return 2; } Member_index member_index( infd, isize ); if( range.end() > member_index.data_end() ) range.size( std::max( 0LL, member_index.data_end() - range.pos() ) ); if( range.size() <= 0 ) { if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; } if( verbosity >= 1 ) { if( verbosity >= 2 ) std::fprintf( stderr, "Decompressed file size = %sB\n", format_num( member_index.data_end() ) ); std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) ); std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) ); std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) ); } int outfd = -1; if( to_stdout || !output_filename.size() ) outfd = STDOUT_FILENO; else { outfd = open_outstream_rw( output_filename, force ); if( outfd < 0 ) return 1; } Pretty_print pp( input_filename, 0 ); int retval = 0; for( int i = 0; i < member_index.members(); ++i ) { const Block & db = member_index.dblock( i ); if( range.overlaps( db ) ) { if( verbosity >= 3 ) std::fprintf( stderr, "Decompressing member %3d\n", i ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.end(), range.end() - db.pos() ); if( !safe_seek( infd, member_index.mblock( i ).pos() ) ) { retval = 1; break; } retval = decompress_member( infd, outfd, pp, member_index.member( i ), outskip, outend ); if( retval ) cleanup_and_fail( output_filename, outfd, retval ); pp.reset(); } } if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); cleanup_and_fail( output_filename, -1, 1 ); } if( verbosity >= 2 ) std::fprintf( stderr, "Byte range decompressed successfully.\n" ); return retval; }