diff options
Diffstat (limited to 'range_dec.cc')
-rw-r--r-- | range_dec.cc | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/range_dec.cc b/range_dec.cc new file mode 100644 index 0000000..650d81a --- /dev/null +++ b/range_dec.cc @@ -0,0 +1,359 @@ +/* Lziprecover - Data recovery tool for lzipped files + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> + +#include "lzip.h" +#include "decoder.h" + + +namespace { + +class Member + { + Block dblock_, mblock_; // data block, member block + +public: + Member( const long long dp, const long long ds, + const long long mp, const long long ms ) + : dblock_( dp, ds ), mblock_( mp, ms ) {} + + const Block & dblock() const throw() { return dblock_; } + Block & dblock() throw() { return dblock_; } + const Block & mblock() const throw() { return mblock_; } + Block & mblock() throw() { return mblock_; } + }; + + +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) throw() + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + + +class Member_index + { + std::vector< Member > member_vector; + +public: + Member_index( const int infd, const long long isize ) + { + long long pos = isize; // always points to a header or EOF + File_header header; + File_trailer trailer; + while( pos >= min_member_size ) + { + if( seek_read( infd, trailer.data, File_trailer::size(), + pos - File_trailer::size() ) != File_trailer::size() ) + { show_error( "Read error", errno ); std::exit( 1 ); } + const long long member_size = trailer.member_size(); + if( member_size < min_member_size || pos < member_size ) break; + if( seek_read( infd, header.data, File_header::size, + pos - member_size ) != File_header::size ) + { show_error( "Read error", errno ); std::exit( 1 ); } + if( !header.verify_magic() || !header.verify_version() ) break; + pos -= member_size; + member_vector.push_back( Member( 0, trailer.data_size(), + pos, member_size ) ); + } + if( pos != 0 || member_vector.size() == 0 ) + { + show_error( "Member size in input file trailer is corrupt." ); + std::exit( 1 ); + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned int i = 0; i < member_vector.size() - 1; ++i ) + member_vector[i+1].dblock().pos( member_vector[i].dblock().end() ); + } + + long long data_end() const throw() + { if( member_vector.size() ) return member_vector.back().dblock().end(); + else return 0; } + + const Member & member( const int i ) const throw() + { return member_vector[i]; } + const Block & dblock( const int i ) const throw() + { return member_vector[i].dblock(); } + const Block & mblock( const int i ) const throw() + { return member_vector[i].mblock(); } + int members() const throw() { return (int)member_vector.size(); } + }; + + +// Returns the number of chars read, or 0 if error. +// +int parse_long_long( const char * const ptr, long long & value ) throw() + { + char * tail; + int c = 0; + errno = 0; + value = strtoll( ptr, &tail, 0 ); + if( tail == ptr || errno ) return 0; + c = tail - ptr; + + if( ptr[c] ) + { + const int factor = ( ptr[c+1] == 'i' ) ? 1024 : 1000; + int exponent = 0; + switch( ptr[c] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent > 0 ) + { + ++c; + if( ptr[c] == 'i' ) { ++c; if( value ) format_num( 0, 0, -1 ); } + if( ptr[c] == 'B' ) ++c; + for( int i = 0; i < exponent; ++i ) + { + if( LLONG_MAX / factor >= llabs( value ) ) value *= factor; + else return 0; + } + } + } + return c; + } + + +// Recognized formats: <begin> <begin>-<end> <begin>,<size> +// +void parse_range( const char * const ptr, Block & range ) throw() + { + long long value = 0; + int c = parse_long_long( ptr, value ); // pos + if( c && value >= 0 && value < LLONG_MAX && + ( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) ) + { + range.pos( value ); + if( ptr[c] == 0 ) { range.size( LLONG_MAX - value ); return; } + const bool issize = ( ptr[c] == ',' ); + c = parse_long_long( ptr + c + 1, value ); // size + if( c && value > 0 && ( issize || value > range.pos() ) ) + { + if( !issize ) value -= range.pos(); + if( LLONG_MAX - range.pos() >= value ) { range.size( value ); return; } + } + } + show_error( "Bad decompression range.", 0, true ); + std::exit( 1 ); + } + + +bool safe_seek( const int fd, const long long pos ) throw() + { + if( lseek( fd, pos, SEEK_SET ) == pos ) return true; + show_error( "Seek error", errno ); return false; + } + + +int decompress_member( const int infd, const int outfd, + const Pretty_print & pp, const Member & member, + const long long outskip, const long long outend ) + { + int retval = 0; + + try { + Range_decoder rdec( infd ); + File_header header; + int size; + for( size = 0; size < File_header::size && !rdec.finished(); ++size ) + header.data[size] = rdec.get_byte(); + if( rdec.finished() ) // End Of File + { pp( "Error reading member header" ); retval = 1; } + if( !header.verify_magic() ) + { pp( "Bad magic number (file not in lzip format)" ); retval = 2; } + if( !header.verify_version() ) + { + if( verbosity >= 0 ) + { pp(); + std::fprintf( stderr, "Version %d member format not supported.\n", + header.version() ); } + retval = 2; + } + if( header.dictionary_size() < min_dictionary_size || + header.dictionary_size() > max_dictionary_size ) + { pp( "Invalid dictionary size in member header" ); retval = 2; } + + if( pp.verbosity() >= 2 ) + { + pp(); + std::fprintf( stderr, "version %d, dictionary size %7sB. ", + header.version(), + format_num( header.dictionary_size(), 9999, -1 ) ); + } + LZ_decoder decoder( header, rdec, outfd, outskip, outend ); + + const int result = decoder.decode_member( pp ); + if( result != 0 ) + { + if( verbosity >= 0 && result <= 2 ) + { + pp(); + if( result == 2 ) + std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", + member.mblock().pos() + rdec.member_position() ); + else + std::fprintf( stderr, "Decoder error at pos %lld\n", + member.mblock().pos() + rdec.member_position() ); + } + retval = 2; + } + if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" ); + } + catch( std::bad_alloc ) + { + pp( "Not enough memory. Find a machine with more memory" ); + retval = 1; + } + catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; } + return retval; + } + +} // end namespace + + +int list_file( const std::string & input_filename ) + { + struct stat in_stats; + const int infd = open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) return 1; + const long long isize = lseek( infd, 0, SEEK_END ); + if( isize < 0 ) + { show_error( "Input file is not seekable", errno ); return 1; } + if( isize < min_member_size ) + { show_error( "Input file is too short." ); return 2; } + + Member_index member_index( infd, isize ); + + if( verbosity >= 0 ) + { + if( verbosity >= 1 ) + { + std::printf( "Total members in file = %d.\n", member_index.members() ); + for( int i = 0; i < member_index.members(); ++i ) + { + const Block & db = member_index.dblock( i ); + const Block & mb = member_index.mblock( i ); + std::printf( "Member %3d data pos %9lld data size %7lld " + "member pos %9lld member size %7lld.\n", i, + db.pos(), db.size(), mb.pos(), mb.size() ); + } + std::printf( "\n" ); + } + + const long long data_size = member_index.data_end(); + if( data_size > 0 && isize > 0 ) + std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.\n", + (double)data_size / isize, + ( 8.0 * isize ) / data_size, + 100.0 * ( 1.0 - ( (double)isize / data_size ) ) ); + std::printf( "decompressed size %9lld, compressed size %8lld.\n", + data_size, isize ); + } + return 0; + } + + +int range_decompress( const std::string & input_filename, + const std::string & output_filename, + const std::string & range_string, + const bool to_stdout, const bool force ) + { + Block range( 0, 0 ); + parse_range( range_string.c_str(), range ); + struct stat in_stats; + const int infd = open_instream( input_filename, &in_stats, true, true ); + if( infd < 0 ) return 1; + const long long isize = lseek( infd, 0, SEEK_END ); + if( isize < 0 ) + { show_error( "Input file is not seekable", errno ); return 1; } + if( isize < min_member_size ) + { show_error( "Input file is too short." ); return 2; } + + Member_index member_index( infd, isize ); + if( range.end() > member_index.data_end() ) + range.size( std::max( 0LL, member_index.data_end() - range.pos() ) ); + if( range.size() <= 0 ) + { if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; } + + if( verbosity >= 1 ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "Decompressed file size = %sB\n", + format_num( member_index.data_end() ) ); + std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) ); + std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) ); + std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) ); + } + + int outfd = -1; + if( to_stdout || !output_filename.size() ) + outfd = STDOUT_FILENO; + else + { outfd = open_outstream_rw( output_filename, force ); + if( outfd < 0 ) return 1; } + Pretty_print pp( input_filename, 0 ); + int retval = 0; + for( int i = 0; i < member_index.members(); ++i ) + { + const Block & db = member_index.dblock( i ); + if( range.overlaps( db ) ) + { + if( verbosity >= 3 ) + std::fprintf( stderr, "Decompressing member %3d\n", i ); + const long long outskip = std::max( 0LL, range.pos() - db.pos() ); + const long long outend = std::min( db.end(), range.end() - db.pos() ); + if( !safe_seek( infd, member_index.mblock( i ).pos() ) ) + { retval = 1; break; } + retval = decompress_member( infd, outfd, pp, member_index.member( i ), + outskip, outend ); + if( retval ) cleanup_and_fail( output_filename, outfd, retval ); + pp.reset(); + } + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( output_filename, -1, 1 ); + } + if( verbosity >= 2 ) + std::fprintf( stderr, "Byte range decompressed successfully.\n" ); + return retval; + } |