diff options
Diffstat (limited to 'decoder.h')
-rw-r--r-- | decoder.h | 124 |
1 files changed, 66 insertions, 58 deletions
@@ -1,5 +1,5 @@ /* Lziprecover - Data recovery tool for lzipped files - Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ class Range_decoder { enum { buffer_size = 16384 }; - long long partial_member_pos; + unsigned long long partial_member_pos; uint8_t * const buffer; // input buffer int pos; // current pos in buffer int stream_pos; // when reached, a new block must be read @@ -42,22 +42,23 @@ public: code( 0 ), range( 0xFFFFFFFFU ), infd( ifd ), - at_stream_end( false ) {} + at_stream_end( false ) + {} ~Range_decoder() { delete[] buffer; } bool code_is_zero() const { return ( code == 0 ); } bool finished() { return pos >= stream_pos && !read_block(); } - long long member_position() const { return partial_member_pos + pos; } + unsigned long long member_position() const { return partial_member_pos + pos; } void reset_member_position() { partial_member_pos = -pos; } uint8_t get_byte() { - if( finished() ) return 0x55; // make code != 0 + if( finished() ) return 0xAA; // make code != 0 return buffer[pos++]; } - int read( uint8_t * const outbuf, const int size ) + int read_data( uint8_t * const outbuf, const int size ) { int rest = size; while( rest > 0 && !finished() ) @@ -67,14 +68,14 @@ public: pos += rd; rest -= rd; } - return ( rest > 0 ) ? size - rest : size; + return size - rest; } void load() { code = 0; - range = 0xFFFFFFFFU; for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + range = 0xFFFFFFFFU; } void normalize() @@ -88,17 +89,14 @@ public: int symbol = 0; for( int i = num_bits; i > 0; --i ) { - symbol <<= 1; - if( range <= 0x00FFFFFFU ) - { - range <<= 7; code = (code << 8) | get_byte(); - if( code >= range ) { code -= range; symbol |= 1; } - } - else - { - range >>= 1; - if( code >= range ) { code -= range; symbol |= 1; } - } + normalize(); + range >>= 1; +// symbol <<= 1; +// if( code >= range ) { code -= range; symbol |= 1; } + const uint32_t mask = 0U - (code < range); + code -= range; + code += range & mask; + symbol = (symbol << 1) + (mask + 1); } return symbol; } @@ -130,36 +128,63 @@ public: return model - (1 << num_bits); } + int decode_tree6( Bit_model bm[] ) + { + int model = 1; + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + model = ( model << 1 ) | decode_bit( bm[model] ); + return model - (1 << 6); + } + int decode_tree_reversed( Bit_model bm[], const int num_bits ) { int model = 1; int symbol = 0; for( int i = 0; i < num_bits; ++i ) { - const int bit = decode_bit( bm[model] ); + const bool bit = decode_bit( bm[model] ); model <<= 1; - if( bit ) { model |= 1; symbol |= (1 << i); } + if( bit ) { ++model; symbol |= (1 << i); } } return symbol; } - int decode_matched( Bit_model bm[], const int match_byte ) + int decode_tree_reversed4( Bit_model bm[] ) + { + int model = 1; + int symbol = 0; + int bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= bit; + bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 1); + bit = decode_bit( bm[model] ); + model = (model << 1) + bit; symbol |= (bit << 2); + if( decode_bit( bm[model] ) ) symbol |= 8; + return symbol; + } + + int decode_matched( Bit_model bm[], int match_byte ) { Bit_model * const bm1 = bm + 0x100; int symbol = 1; for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); - symbol = ( symbol << 1 ) | bit; - if( match_bit != bit ) + match_byte <<= 1; + const int match_bit = match_byte & 0x100; + const int bit = decode_bit( bm1[match_bit+symbol] ); + symbol = ( symbol << 1 ) + bit; + if( match_bit != bit << 8 ) { - while( --i >= 0 ) - symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) + decode_bit( bm[symbol] ); break; } } - return symbol & 0xFF; + return symbol - 0x100; } }; @@ -186,29 +211,12 @@ public: }; -class Literal_decoder - { - Bit_model bm_literal[1<<literal_context_bits][0x300]; - - int lstate( const uint8_t prev_byte ) const - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - -public: - uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte ) - { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); } - - uint8_t decode_matched( Range_decoder & range_decoder, - const uint8_t prev_byte, const uint8_t match_byte ) - { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], - match_byte ); } - }; - - class LZ_decoder { - const long long outskip; - const long long outend; - long long partial_data_pos; + const unsigned long long outskip; + const unsigned long long outend; + unsigned long long partial_data_pos; + Range_decoder & range_decoder; const int dictionary_size; const int buffer_size; uint8_t * const buffer; // output buffer @@ -217,9 +225,8 @@ class LZ_decoder uint32_t crc_; const int outfd; // output file descriptor const int member_version; - Range_decoder & range_decoder; - long long stream_position() const { return partial_data_pos + stream_pos; } + unsigned long long stream_position() const { return partial_data_pos + stream_pos; } void flush_data(); bool verify_trailer( const Pretty_print & pp ) const; @@ -248,7 +255,7 @@ class LZ_decoder if( i < 0 ) i += buffer_size; if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) ) { - std::memcpy( buffer + pos, buffer + i, len ); + std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap pos += len; } else for( ; len > 0; --len ) @@ -264,11 +271,13 @@ class LZ_decoder public: LZ_decoder( const File_header & header, Range_decoder & rdec, const int ofd, - const long long oskip = 0, const long long oend = LLONG_MAX ) + const unsigned long long oskip = 0, + const unsigned long long oend = -1ULL ) : outskip( oskip ), outend( oend ), partial_data_pos( 0 ), + range_decoder( rdec ), dictionary_size( header.dictionary_size() ), buffer_size( std::max( 65536, dictionary_size ) ), buffer( new uint8_t[buffer_size] ), @@ -276,15 +285,14 @@ public: stream_pos( 0 ), crc_( 0xFFFFFFFFU ), outfd( ofd ), - member_version( header.version() ), - range_decoder( rdec ) + member_version( header.version() ) { buffer[buffer_size-1] = 0; } // prev_byte of first_byte ~LZ_decoder() { delete[] buffer; } - uint32_t crc() const { return crc_ ^ 0xFFFFFFFFU; } + unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; } - long long data_position() const { return partial_data_pos + pos; } + unsigned long long data_position() const { return partial_data_pos + pos; } int decode_member( const Pretty_print & pp ); }; |