summaryrefslogtreecommitdiffstats
path: root/decoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'decoder.h')
-rw-r--r--decoder.h272
1 files changed, 272 insertions, 0 deletions
diff --git a/decoder.h b/decoder.h
new file mode 100644
index 0000000..ab921dd
--- /dev/null
+++ b/decoder.h
@@ -0,0 +1,272 @@
+/* Lzip - A data compressor based on the LZMA algorithm
+ Copyright (C) 2008, 2009 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Input_buffer
+ {
+ enum { buffer_size = 65536 };
+
+ uint8_t * const buffer;
+ int pos;
+ int stream_pos; // when reached, a new block must be read
+ const int ides_;
+ bool at_stream_end;
+
+ bool read_block();
+
+public:
+ Input_buffer( const int ides )
+ :
+ buffer( new uint8_t[buffer_size] ),
+ pos( 0 ),
+ stream_pos( 0 ),
+ ides_( ides ),
+ at_stream_end( false ) {}
+
+ ~Input_buffer() { delete[] buffer; }
+
+ bool finished() const throw() { return at_stream_end && pos >= stream_pos; }
+
+ uint8_t read_byte()
+ {
+ if( pos >= stream_pos && !read_block() ) return 0;
+ return buffer[pos++];
+ }
+ };
+
+
+class Range_decoder
+ {
+ mutable long long member_pos;
+ uint32_t code;
+ uint32_t range;
+ Input_buffer & ibuf;
+
+public:
+ Range_decoder( const int header_size, Input_buffer & buf )
+ :
+ member_pos( header_size ),
+ code( 0 ),
+ range( 0xFFFFFFFF ),
+ ibuf( buf )
+ { for( int i = 0; i < 5; ++i ) code = (code << 8) | read_byte(); }
+
+ uint8_t read_byte() const
+ {
+ ++member_pos;
+ return ibuf.read_byte();
+ }
+
+ long long member_position() const throw() { return member_pos; }
+ bool finished() const throw() { return ibuf.finished(); }
+
+ int decode( const int num_bits )
+ {
+ int symbol = 0;
+ for( int i = num_bits - 1; i >= 0; --i )
+ {
+ range >>= 1;
+ symbol <<= 1;
+ if( code >= range )
+ { code -= range; symbol |= 1; }
+ if( range <= 0x00FFFFFF )
+ { range <<= 8; code = (code << 8) | read_byte(); }
+ }
+ return symbol;
+ }
+
+ int decode_bit( Bit_model & bm )
+ {
+ int symbol;
+ const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
+ if( code < bound )
+ {
+ range = bound;
+ bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
+ symbol = 0;
+ }
+ else
+ {
+ range -= bound;
+ code -= bound;
+ bm.probability -= bm.probability >> bit_model_move_bits;
+ symbol = 1;
+ }
+ if( range <= 0x00FFFFFF )
+ { range <<= 8; code = (code << 8) | read_byte(); }
+ return symbol;
+ }
+
+ int decode_tree( Bit_model bm[], const int num_bits )
+ {
+ int model = 1;
+ for( int i = num_bits; i > 0; --i )
+ model = ( model << 1 ) | decode_bit( bm[model-1] );
+ return model - (1 << num_bits);
+ }
+
+ int decode_tree_reversed( Bit_model bm[], const int num_bits )
+ {
+ int model = 1;
+ int symbol = 0;
+ for( int i = 1; i < (1 << num_bits); i <<= 1 )
+ {
+ const int bit = decode_bit( bm[model-1] );
+ model = ( model << 1 ) | bit;
+ if( bit ) symbol |= i;
+ }
+ return symbol;
+ }
+
+ int decode_matched( Bit_model bm[], const int match_byte )
+ {
+ int symbol = 1;
+ for( int i = 7; i >= 0; --i )
+ {
+ const int match_bit = ( match_byte >> i ) & 1;
+ const int bit = decode_bit( bm[(match_bit<<8)+symbol+0xFF] );
+ symbol = ( symbol << 1 ) | bit;
+ if( match_bit != bit ) break;
+ }
+ while( symbol < 0x100 )
+ symbol = ( symbol << 1 ) | decode_bit( bm[symbol-1] );
+ return symbol & 0xFF;
+ }
+ };
+
+
+class Len_decoder
+ {
+ Bit_model choice1;
+ Bit_model choice2;
+ Bit_model bm_low[pos_states][len_low_symbols];
+ Bit_model bm_mid[pos_states][len_mid_symbols];
+ Bit_model bm_high[len_high_symbols];
+
+public:
+ int decode( Range_decoder & range_decoder, const int pos_state )
+ {
+ if( range_decoder.decode_bit( choice1 ) == 0 )
+ return range_decoder.decode_tree( bm_low[pos_state], len_low_bits );
+ if( range_decoder.decode_bit( choice2 ) == 0 )
+ return len_low_symbols +
+ range_decoder.decode_tree( bm_mid[pos_state], len_mid_bits );
+ return len_low_symbols + len_mid_symbols +
+ range_decoder.decode_tree( bm_high, len_high_bits );
+ }
+ };
+
+
+class Literal_decoder
+ {
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
+
+ int state( const int prev_byte ) const throw()
+ { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
+
+public:
+ uint8_t decode( Range_decoder & range_decoder, const int prev_byte )
+ { return range_decoder.decode_tree( bm_literal[state(prev_byte)], 8 ); }
+
+ uint8_t decode_matched( Range_decoder & range_decoder,
+ const int prev_byte, const int match_byte )
+ { return range_decoder.decode_matched( bm_literal[state(prev_byte)], match_byte ); }
+ };
+
+
+class LZ_decoder
+ {
+ long long partial_data_pos;
+ const int format_version;
+ const int buffer_size;
+ uint8_t * const buffer;
+ int pos;
+ uint32_t crc_;
+ const int odes_;
+ bool member_finished;
+
+ Bit_model bm_match[State::states][pos_states];
+ Bit_model bm_rep[State::states];
+ Bit_model bm_rep0[State::states];
+ Bit_model bm_rep1[State::states];
+ Bit_model bm_rep2[State::states];
+ Bit_model bm_len[State::states][pos_states];
+ Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
+ Bit_model bm_dis[modeled_distances-end_dis_model];
+ Bit_model bm_align[dis_align_size];
+
+ Range_decoder range_decoder;
+ Len_decoder len_decoder;
+ Len_decoder rep_match_len_decoder;
+ Literal_decoder literal_decoder;
+
+ uint8_t get_byte( const int distance ) const throw()
+ {
+ int newpos = pos - distance - 1;
+ if( newpos < 0 ) newpos += buffer_size;
+ return buffer[newpos];
+ }
+
+ void put_byte( const uint8_t b )
+ {
+ crc32.update( crc_, b );
+ buffer[pos] = b;
+ if( ++pos >= buffer_size ) flush_data();
+ }
+
+ bool copy_block( const int distance, int len )
+ {
+ if( distance < 0 || distance >= buffer_size ||
+ len <= 0 || len > max_match_len ) return false;
+ int newpos = pos - distance - 1;
+ if( newpos < 0 ) newpos += buffer_size;
+ for( ; len > 0 ; --len )
+ {
+ crc32.update( crc_, buffer[newpos] );
+ buffer[pos] = buffer[newpos];
+ if( ++pos >= buffer_size ) flush_data();
+ if( ++newpos >= buffer_size ) newpos = 0;
+ }
+ return true;
+ }
+
+ void flush_data();
+ bool verify_trailer( const Pretty_print & pp ) const;
+
+public:
+ LZ_decoder( const File_header & header, Input_buffer & ibuf, const int odes )
+ :
+ partial_data_pos( 0 ),
+ format_version( header.version ),
+ buffer_size( header.dictionary_size() ),
+ buffer( new uint8_t[buffer_size] ),
+ pos( 0 ),
+ crc_( 0xFFFFFFFF ),
+ odes_( odes ),
+ member_finished( false ),
+ range_decoder( sizeof header, ibuf ),
+ literal_decoder() {}
+
+ ~LZ_decoder() { delete[] buffer; }
+
+ uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFF; }
+ int decode_member( const Pretty_print & pp );
+
+ long long member_position() const throw()
+ { return range_decoder.member_position(); }
+ long long data_position() const throw()
+ { return partial_data_pos + pos; }
+ };