diff options
Diffstat (limited to '')
-rw-r--r-- | decoder.h | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/decoder.h b/decoder.h new file mode 100644 index 0000000..785f310 --- /dev/null +++ b/decoder.h @@ -0,0 +1,282 @@ +/* Lzlib - A compression library for lzip files + Copyright (C) 2009 Antonio Diaz Diaz. + + This library is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + As a special exception, you may use this file as part of a free + software library without restriction. Specifically, if other files + instantiate templates or use macros or inline functions from this + file, or you compile this file and link it with other files to + produce an executable, this file does not by itself cause the + resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. +*/ + +const int min_available_bytes = 8 + sizeof( File_trailer ); + +class Input_buffer : public Circular_buffer + { + bool at_stream_end_; + +public: + Input_buffer() + : + Circular_buffer( 65536 + min_available_bytes ), + at_stream_end_( false ) {} + + bool at_stream_end() const throw() { return at_stream_end_; } + void finish() throw() { at_stream_end_ = true; } + bool finished() const throw() { return at_stream_end_ && !used_bytes(); } + void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } + + int write_data( uint8_t * const in_buffer, const int in_size ) throw() + { + if( at_stream_end_ || in_size <= 0 ) return 0; + return Circular_buffer::write_data( in_buffer, in_size ); + } + }; + + +class Range_decoder + { + mutable long long member_pos; + uint32_t code; + uint32_t range; + Input_buffer & ibuf; + +public: + Range_decoder( const int header_size, Input_buffer & buf ) + : + member_pos( header_size ), + code( 0 ), + range( 0xFFFFFFFF ), + ibuf( buf ) + { for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); } + + uint8_t get_byte() const + { + ++member_pos; + return ibuf.get_byte(); + } + + bool at_stream_end() const throw() { return ibuf.at_stream_end(); } + int available_bytes() const throw() { return ibuf.used_bytes(); } + bool finished() const throw() { return ibuf.finished(); } + long long member_position() const throw() { return member_pos; } + + int decode( const int num_bits ) + { + int symbol = 0; + for( int i = num_bits - 1; i >= 0; --i ) + { + range >>= 1; + symbol <<= 1; + if( code >= range ) + { code -= range; symbol |= 1; } + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | get_byte(); } + } + return symbol; + } + + int decode_bit( Bit_model & bm ) + { + int symbol; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; + symbol = 0; + } + else + { + range -= bound; + code -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + symbol = 1; + } + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | get_byte(); } + return symbol; + } + + int decode_tree( Bit_model bm[], const int num_bits ) + { + int model = 1; + for( int i = num_bits; i > 0; --i ) + model = ( model << 1 ) | decode_bit( bm[model-1] ); + return model - (1 << num_bits); + } + + int decode_tree_reversed( Bit_model bm[], const int num_bits ) + { + int model = 1; + int symbol = 0; + for( int i = 1; i < (1 << num_bits); i <<= 1 ) + { + const int bit = decode_bit( bm[model-1] ); + model = ( model << 1 ) | bit; + if( bit ) symbol |= i; + } + return symbol; + } + + int decode_matched( Bit_model bm[], const int match_byte ) + { + int symbol = 1; + for( int i = 7; i >= 0; --i ) + { + const int match_bit = ( match_byte >> i ) & 1; + const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] ); + symbol = ( symbol << 1 ) | bit; + if( match_bit != bit ) break; + } + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] ); + return symbol & 0xFF; + } + }; + + +class Len_decoder + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + +public: + int decode( Range_decoder & range_decoder, const int pos_state ) + { + if( range_decoder.decode_bit( choice1 ) == 0 ) + return range_decoder.decode_tree( bm_low[pos_state], len_low_bits ); + if( range_decoder.decode_bit( choice2 ) == 0 ) + return len_low_symbols + + range_decoder.decode_tree( bm_mid[pos_state], len_mid_bits ); + return len_low_symbols + len_mid_symbols + + range_decoder.decode_tree( bm_high, len_high_bits ); + } + }; + + +class Literal_decoder + { + Bit_model bm_literal[1<<literal_context_bits][0x300]; + + int state( const int prev_byte ) const throw() + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + +public: + uint8_t decode( Range_decoder & range_decoder, const int prev_byte ) + { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); } + + uint8_t decode_matched( Range_decoder & range_decoder, + const int prev_byte, const int match_byte ) + { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); } + }; + + +class LZ_decoder : public Circular_buffer + { + long long partial_data_pos; + const int format_version; + const int dictionary_size; + uint32_t crc_; + bool member_finished_; + unsigned int rep0; + unsigned int rep1; + unsigned int rep2; + unsigned int rep3; + State state; + uint8_t prev_byte; + + Bit_model bm_match[State::states][pos_states]; + Bit_model bm_rep[State::states]; + Bit_model bm_rep0[State::states]; + Bit_model bm_rep1[State::states]; + Bit_model bm_rep2[State::states]; + Bit_model bm_len[State::states][pos_states]; + Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits]; + Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_align[dis_align_size]; + + Range_decoder range_decoder; + Len_decoder len_decoder; + Len_decoder rep_match_len_decoder; + Literal_decoder literal_decoder; + +// using Circular_buffer::get_byte; + uint8_t get_byte( const int distance ) const throw() + { + int i = put - distance - 1; + if( i < 0 ) i += buffer_size; + return buffer[i]; + } + + void put_byte( const uint8_t b ) + { + crc32.update( crc_, b ); + buffer[put] = b; + if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } + } + + bool copy_block( const int distance, int len ) + { + if( distance < 0 || distance >= dictionary_size || + len <= 0 || len > max_match_len ) return false; + int i = put - distance - 1; + if( i < 0 ) i += buffer_size; + for( ; len > 0 ; --len ) + { + crc32.update( crc_, buffer[i] ); + buffer[put] = buffer[i]; + if( ++put >= buffer_size ) { partial_data_pos += put; put = 0; } + if( ++i >= buffer_size ) i = 0; + } + return true; + } + + bool verify_trailer(); + +public: + LZ_decoder( const File_header & header, Input_buffer & ibuf ) + : + Circular_buffer( std::max( 65536, header.dictionary_size() ) + max_match_len ), + partial_data_pos( 0 ), + format_version( header.version ), + dictionary_size( header.dictionary_size() ), + crc_( 0xFFFFFFFF ), + member_finished_( false ), + rep0( 0 ), + rep1( 0 ), + rep2( 0 ), + rep3( 0 ), + prev_byte( 0 ), + range_decoder( sizeof header, ibuf ), + literal_decoder() {} + + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + int decode_member(); + bool member_finished() const throw() + { return ( member_finished_ && !used_bytes() ); } + + long long member_position() const throw() + { return range_decoder.member_position(); } + long long data_position() const throw() + { return partial_data_pos + put; } + }; |