diff options
Diffstat (limited to 'range_dec.cc')
-rw-r--r-- | range_dec.cc | 245 |
1 files changed, 89 insertions, 156 deletions
diff --git a/range_dec.cc b/range_dec.cc index d8e171a..d056271 100644 --- a/range_dec.cc +++ b/range_dec.cc @@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,6 @@ #include <algorithm> #include <cerrno> -#include <climits> #include <cstdio> #include <cstdlib> #include <cstring> @@ -31,83 +30,11 @@ #include "lzip.h" #include "decoder.h" +#include "file_index.h" namespace { -class Member - { - Block dblock_, mblock_; // data block, member block - -public: - Member( const long long dp, const long long ds, - const long long mp, const long long ms ) - : dblock_( dp, ds ), mblock_( mp, ms ) {} - - const Block & dblock() const { return dblock_; } - Block & dblock() { return dblock_; } - const Block & mblock() const { return mblock_; } - Block & mblock() { return mblock_; } - }; - - -int seek_read( const int fd, uint8_t * const buf, const int size, - const long long pos ) - { - if( lseek( fd, pos, SEEK_SET ) == pos ) - return readblock( fd, buf, size ); - return 0; - } - - -class Member_index - { - std::vector< Member > member_vector; - -public: - Member_index( const int infd, const long long isize ) - { - long long pos = isize; // always points to a header or EOF - File_header header; - File_trailer trailer; - while( pos >= min_member_size ) - { - if( seek_read( infd, trailer.data, File_trailer::size(), - pos - File_trailer::size() ) != File_trailer::size() ) - { show_error( "Read error", errno ); std::exit( 1 ); } - const long long member_size = trailer.member_size(); - if( member_size < min_member_size || pos < member_size ) break; - if( seek_read( infd, header.data, File_header::size, - pos - member_size ) != File_header::size ) - { show_error( "Read error", errno ); std::exit( 1 ); } - if( !header.verify_magic() || !header.verify_version() ) break; - pos -= member_size; - member_vector.push_back( Member( 0, trailer.data_size(), - pos, member_size ) ); - } - if( pos != 0 || member_vector.size() == 0 ) - { - show_error( "Member size in input file trailer is corrupt." ); - std::exit( 1 ); - } - std::reverse( member_vector.begin(), member_vector.end() ); - for( unsigned int i = 0; i < member_vector.size() - 1; ++i ) - member_vector[i+1].dblock().pos( member_vector[i].dblock().end() ); - } - - long long data_end() const - { if( member_vector.size() ) return member_vector.back().dblock().end(); - else return 0; } - - const Member & member( const int i ) const { return member_vector[i]; } - const Block & dblock( const int i ) const - { return member_vector[i].dblock(); } - const Block & mblock( const int i ) const - { return member_vector[i].mblock(); } - int members() const { return (int)member_vector.size(); } - }; - - // Returns the number of chars read, or 0 if error. // int parse_long_long( const char * const ptr, long long & value ) @@ -115,7 +42,7 @@ int parse_long_long( const char * const ptr, long long & value ) char * tail; errno = 0; value = strtoll( ptr, &tail, 0 ); - if( tail == ptr || errno ) return 0; + if( tail == ptr || errno || value < 0 ) return 0; int c = tail - ptr; if( ptr[c] ) @@ -141,7 +68,7 @@ int parse_long_long( const char * const ptr, long long & value ) if( ptr[c] == 'B' ) ++c; for( int i = 0; i < exponent; ++i ) { - if( LLONG_MAX / factor >= llabs( value ) ) value *= factor; + if( INT64_MAX / factor >= value ) value *= factor; else return 0; } } @@ -156,17 +83,17 @@ void parse_range( const char * const ptr, Block & range ) { long long value = 0; int c = parse_long_long( ptr, value ); // pos - if( c && value >= 0 && value < LLONG_MAX && + if( c && value >= 0 && value < INT64_MAX && ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) { range.pos( value ); - if( ptr[c] == 0 ) { range.size( LLONG_MAX - value ); return; } + if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; } const bool issize = ( ptr[c] == ',' ); c = parse_long_long( ptr + c + 1, value ); // size if( c && value > 0 && ( issize || value > range.pos() ) ) { if( !issize ) value -= range.pos(); - if( LLONG_MAX - range.pos() >= value ) { range.size( value ); return; } + if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; } } } show_error( "Bad decompression range.", 0, true ); @@ -182,132 +109,138 @@ bool safe_seek( const int fd, const long long pos ) int decompress_member( const int infd, const int outfd, - const Pretty_print & pp, const Member & member, - const long long outskip, const long long outend ) + const Pretty_print & pp, + const unsigned long long mpos, + const unsigned long long outskip, + const unsigned long long outend ) { - int retval = 0; - try { Range_decoder rdec( infd ); File_header header; - int size; - for( size = 0; size < File_header::size && !rdec.finished(); ++size ) - header.data[size] = rdec.get_byte(); + rdec.read_data( header.data, File_header::size ); if( rdec.finished() ) // End Of File - { pp( "Error reading member header" ); retval = 1; } + { pp( "Error reading member header" ); return 1; } if( !header.verify_magic() ) - { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } + { pp( "Bad magic number (file not in lzip format)" ); return 2; } if( !header.verify_version() ) { - if( verbosity >= 0 ) + if( pp.verbosity() >= 0 ) { pp(); std::fprintf( stderr, "Version %d member format not supported.\n", header.version() ); } - retval = 2; + return 2; } if( header.dictionary_size() < min_dictionary_size || header.dictionary_size() > max_dictionary_size ) - { pp( "Invalid dictionary size in member header" ); retval = 2; } + { pp( "Invalid dictionary size in member header" ); return 2; } - if( pp.verbosity() >= 2 ) - { - pp(); - std::fprintf( stderr, "version %d, dictionary size %7sB. ", - header.version(), - format_num( header.dictionary_size(), 9999, -1 ) ); - } - LZ_decoder decoder( header, rdec, outfd, outskip, outend ); + if( pp.verbosity() >= 2 ) { pp(); show_header( header ); } + LZ_decoder decoder( header, rdec, outfd, outskip, outend ); const int result = decoder.decode_member( pp ); if( result != 0 ) { - if( verbosity >= 0 && result <= 2 ) + if( pp.verbosity() >= 0 && result <= 2 ) { pp(); if( result == 2 ) - std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", - member.mblock().pos() + rdec.member_position() ); + std::fprintf( stderr, "File ends unexpectedly at pos %llu\n", + mpos + rdec.member_position() ); else - std::fprintf( stderr, "Decoder error at pos %lld\n", - member.mblock().pos() + rdec.member_position() ); + std::fprintf( stderr, "Decoder error at pos %llu\n", + mpos + rdec.member_position() ); } - retval = 2; + return 2; } if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); } catch( std::bad_alloc ) { pp( "Not enough memory. Find a machine with more memory" ); - retval = 1; + return 1; } - catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } - return retval; + catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; } + return 0; } -} // end namespace - -int list_file( const std::string & input_filename ) +int list_file( const std::string & input_filename, const Pretty_print & pp ) { struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - const long long isize = lseek( infd, 0, SEEK_END ); - if( isize < 0 ) - { show_error( "Input file is not seekable", errno ); return 1; } - if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - Member_index member_index( infd, isize ); + File_index file_index( infd ); + close( infd ); + if( file_index.retval() != 0 ) + { show_error( file_index.error().c_str() ); return file_index.retval(); } - if( verbosity >= 0 ) + if( pp.verbosity() >= 0 ) { - if( verbosity >= 1 ) + const unsigned long long data_size = file_index.data_end(); + const unsigned long long file_size = file_index.file_end(); + pp( 0, stdout ); + if( data_size > 0 && file_size > 0 ) + std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)data_size / file_size, + ( 8.0 * file_size ) / data_size, + 100.0 * ( 1.0 - ( (double)file_size / data_size ) ) ); + std::printf( "decompressed size %9llu, compressed size %8llu.\n", + data_size, file_size ); + + if( pp.verbosity() >= 1 && file_index.members() > 1 ) { - std::printf( "Total members in file = %d.\n", member_index.members() ); - for( int i = 0; i < member_index.members(); ++i ) - { - const Block & db = member_index.dblock( i ); - const Block & mb = member_index.mblock( i ); - std::printf( "Member %3d data pos %9lld data size %7lld " - "member pos %9lld member size %7lld.\n", i, - db.pos(), db.size(), mb.pos(), mb.size() ); - } + std::printf( "Total members in file = %d.\n", file_index.members() ); + if( pp.verbosity() >= 2 ) + for( int i = 0; i < file_index.members(); ++i ) + { + const Block & db = file_index.dblock( i ); + const Block & mb = file_index.mblock( i ); + std::printf( "Member %3d data pos %9llu data size %7llu " + "member pos %9llu member size %7llu.\n", i + 1, + db.pos(), db.size(), mb.pos(), mb.size() ); + } std::printf( "\n" ); } - - const long long data_size = member_index.data_end(); - if( data_size > 0 && isize > 0 ) - std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.\n", - (double)data_size / isize, - ( 8.0 * isize ) / data_size, - 100.0 * ( 1.0 - ( (double)isize / data_size ) ) ); - std::printf( "decompressed size %9lld, compressed size %8lld.\n", - data_size, isize ); } return 0; } +} // end namespace + + +int list_files( const std::vector< std::string > & filenames, + const int verbosity ) + { + Pretty_print pp( filenames, verbosity ); + int retval = 0; + for( unsigned i = 0; i < filenames.size(); ++i ) + { + pp.set_name( filenames[i] ); + const int tmp = list_file( filenames[i], pp ); + if( tmp > retval ) retval = tmp; + } + return retval; + } + int range_decompress( const std::string & input_filename, const std::string & output_filename, - const std::string & range_string, - const bool to_stdout, const bool force ) + const std::string & range_string, const int verbosity, + const bool force, const bool to_stdout ) { Block range( 0, 0 ); parse_range( range_string.c_str(), range ); struct stat in_stats; const int infd = open_instream( input_filename, &in_stats, true, true ); if( infd < 0 ) return 1; - const long long isize = lseek( infd, 0, SEEK_END ); - if( isize < 0 ) - { show_error( "Input file is not seekable", errno ); return 1; } - if( isize < min_member_size ) - { show_error( "Input file is too short." ); return 2; } - - Member_index member_index( infd, isize ); - if( range.end() > member_index.data_end() ) - range.size( std::max( 0LL, member_index.data_end() - range.pos() ) ); + + File_index file_index( infd ); + if( file_index.retval() != 0 ) + { show_error( file_index.error().c_str() ); return file_index.retval(); } + + if( range.end() > file_index.data_end() ) + range.size( std::max( 0LL, file_index.data_end() - range.pos() ) ); if( range.size() <= 0 ) { if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; } @@ -315,7 +248,7 @@ int range_decompress( const std::string & input_filename, { if( verbosity >= 2 ) std::fprintf( stderr, "Decompressed file size = %sB\n", - format_num( member_index.data_end() ) ); + format_num( file_index.data_end() ) ); std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) ); std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) ); std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) ); @@ -329,23 +262,23 @@ int range_decompress( const std::string & input_filename, if( outfd < 0 ) return 1; } Pretty_print pp( input_filename, 0 ); int retval = 0; - for( int i = 0; i < member_index.members(); ++i ) + for( int i = 0; i < file_index.members(); ++i ) { - const Block & db = member_index.dblock( i ); + const Block & db = file_index.dblock( i ); if( range.overlaps( db ) ) { if( verbosity >= 3 ) std::fprintf( stderr, "Decompressing member %3d\n", i ); const long long outskip = std::max( 0LL, range.pos() - db.pos() ); const long long outend = std::min( db.end(), range.end() - db.pos() ); - if( !safe_seek( infd, member_index.mblock( i ).pos() ) ) - { retval = 1; break; } - retval = decompress_member( infd, outfd, pp, member_index.member( i ), - outskip, outend ); + const long long mpos = file_index.mblock( i ).pos(); + if( !safe_seek( infd, mpos ) ) { retval = 1; break; } + retval = decompress_member( infd, outfd, pp, mpos, outskip, outend ); if( retval ) cleanup_and_fail( output_filename, outfd, retval ); pp.reset(); } } + close( infd ); if( close( outfd ) != 0 ) { show_error( "Error closing output file", errno ); |