diff options
Diffstat (limited to '')
-rw-r--r-- | decoder.h | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/decoder.h b/decoder.h new file mode 100644 index 0000000..ab921dd --- /dev/null +++ b/decoder.h @@ -0,0 +1,272 @@ +/* Lzip - A data compressor based on the LZMA algorithm + Copyright (C) 2008, 2009 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +class Input_buffer + { + enum { buffer_size = 65536 }; + + uint8_t * const buffer; + int pos; + int stream_pos; // when reached, a new block must be read + const int ides_; + bool at_stream_end; + + bool read_block(); + +public: + Input_buffer( const int ides ) + : + buffer( new uint8_t[buffer_size] ), + pos( 0 ), + stream_pos( 0 ), + ides_( ides ), + at_stream_end( false ) {} + + ~Input_buffer() { delete[] buffer; } + + bool finished() const throw() { return at_stream_end && pos >= stream_pos; } + + uint8_t read_byte() + { + if( pos >= stream_pos && !read_block() ) return 0; + return buffer[pos++]; + } + }; + + +class Range_decoder + { + mutable long long member_pos; + uint32_t code; + uint32_t range; + Input_buffer & ibuf; + +public: + Range_decoder( const int header_size, Input_buffer & buf ) + : + member_pos( header_size ), + code( 0 ), + range( 0xFFFFFFFF ), + ibuf( buf ) + { for( int i = 0; i < 5; ++i ) code = (code << 8) | read_byte(); } + + uint8_t read_byte() const + { + ++member_pos; + return ibuf.read_byte(); + } + + long long member_position() const throw() { return member_pos; } + bool finished() const throw() { return ibuf.finished(); } + + int decode( const int num_bits ) + { + int symbol = 0; + for( int i = num_bits - 1; i >= 0; --i ) + { + range >>= 1; + symbol <<= 1; + if( code >= range ) + { code -= range; symbol |= 1; } + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | read_byte(); } + } + return symbol; + } + + int decode_bit( Bit_model & bm ) + { + int symbol; + const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; + if( code < bound ) + { + range = bound; + bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits; + symbol = 0; + } + else + { + range -= bound; + code -= bound; + bm.probability -= bm.probability >> bit_model_move_bits; + symbol = 1; + } + if( range <= 0x00FFFFFF ) + { range <<= 8; code = (code << 8) | read_byte(); } + return symbol; + } + + int decode_tree( Bit_model bm[], const int num_bits ) + { + int model = 1; + for( int i = num_bits; i > 0; --i ) + model = ( model << 1 ) | decode_bit( bm[model-1] ); + return model - (1 << num_bits); + } + + int decode_tree_reversed( Bit_model bm[], const int num_bits ) + { + int model = 1; + int symbol = 0; + for( int i = 1; i < (1 << num_bits); i <<= 1 ) + { + const int bit = decode_bit( bm[model-1] ); + model = ( model << 1 ) | bit; + if( bit ) symbol |= i; + } + return symbol; + } + + int decode_matched( Bit_model bm[], const int match_byte ) + { + int symbol = 1; + for( int i = 7; i >= 0; --i ) + { + const int match_bit = ( match_byte >> i ) & 1; + const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] ); + symbol = ( symbol << 1 ) | bit; + if( match_bit != bit ) break; + } + while( symbol < 0x100 ) + symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] ); + return symbol & 0xFF; + } + }; + + +class Len_decoder + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + +public: + int decode( Range_decoder & range_decoder, const int pos_state ) + { + if( range_decoder.decode_bit( choice1 ) == 0 ) + return range_decoder.decode_tree( bm_low[pos_state], len_low_bits ); + if( range_decoder.decode_bit( choice2 ) == 0 ) + return len_low_symbols + + range_decoder.decode_tree( bm_mid[pos_state], len_mid_bits ); + return len_low_symbols + len_mid_symbols + + range_decoder.decode_tree( bm_high, len_high_bits ); + } + }; + + +class Literal_decoder + { + Bit_model bm_literal[1<<literal_context_bits][0x300]; + + int state( const int prev_byte ) const throw() + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + +public: + uint8_t decode( Range_decoder & range_decoder, const int prev_byte ) + { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); } + + uint8_t decode_matched( Range_decoder & range_decoder, + const int prev_byte, const int match_byte ) + { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); } + }; + + +class LZ_decoder + { + long long partial_data_pos; + const int format_version; + const int buffer_size; + uint8_t * const buffer; + int pos; + uint32_t crc_; + const int odes_; + bool member_finished; + + Bit_model bm_match[State::states][pos_states]; + Bit_model bm_rep[State::states]; + Bit_model bm_rep0[State::states]; + Bit_model bm_rep1[State::states]; + Bit_model bm_rep2[State::states]; + Bit_model bm_len[State::states][pos_states]; + Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits]; + Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_align[dis_align_size]; + + Range_decoder range_decoder; + Len_decoder len_decoder; + Len_decoder rep_match_len_decoder; + Literal_decoder literal_decoder; + + uint8_t get_byte( const int distance ) const throw() + { + int newpos = pos - distance - 1; + if( newpos < 0 ) newpos += buffer_size; + return buffer[newpos]; + } + + void put_byte( const uint8_t b ) + { + crc32.update( crc_, b ); + buffer[pos] = b; + if( ++pos >= buffer_size ) flush_data(); + } + + bool copy_block( const int distance, int len ) + { + if( distance < 0 || distance >= buffer_size || + len <= 0 || len > max_match_len ) return false; + int newpos = pos - distance - 1; + if( newpos < 0 ) newpos += buffer_size; + for( ; len > 0 ; --len ) + { + crc32.update( crc_, buffer[newpos] ); + buffer[pos] = buffer[newpos]; + if( ++pos >= buffer_size ) flush_data(); + if( ++newpos >= buffer_size ) newpos = 0; + } + return true; + } + + void flush_data(); + bool verify_trailer( const Pretty_print & pp ) const; + +public: + LZ_decoder( const File_header & header, Input_buffer & ibuf, const int odes ) + : + partial_data_pos( 0 ), + format_version( header.version ), + buffer_size( header.dictionary_size() ), + buffer( new uint8_t[buffer_size] ), + pos( 0 ), + crc_( 0xFFFFFFFF ), + odes_( odes ), + member_finished( false ), + range_decoder( sizeof header, ibuf ), + literal_decoder() {} + + ~LZ_decoder() { delete[] buffer; } + + uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; } + int decode_member( const Pretty_print & pp ); + + long long member_position() const throw() + { return range_decoder.member_position(); } + long long data_position() const throw() + { return partial_data_pos + pos; } + }; |