diff options
Diffstat (limited to '')
-rw-r--r-- | lzd.cc | 78 |
1 files changed, 38 insertions, 40 deletions
@@ -1,5 +1,5 @@ /* Lzd - Educational decompressor for the lzip format - Copyright (C) 2013-2016 Antonio Diaz Diaz. + Copyright (C) 2013-2017 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -150,10 +150,10 @@ public: uint8_t get_byte() { return std::getc( stdin ); } - int decode( const int num_bits ) + unsigned decode( const int num_bits ) { - int symbol = 0; - for( int i = 0; i < num_bits; ++i ) + unsigned symbol = 0; + for( int i = num_bits; i > 0; --i ) { range >>= 1; symbol <<= 1; @@ -164,9 +164,9 @@ public: return symbol; } - int decode_bit( Bit_model & bm ) + unsigned decode_bit( Bit_model & bm ) { - int symbol; + unsigned symbol; const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { @@ -186,18 +186,18 @@ public: return symbol; } - int decode_tree( Bit_model bm[], const int num_bits ) + unsigned decode_tree( Bit_model bm[], const int num_bits ) { - int symbol = 1; + unsigned symbol = 1; for( int i = 0; i < num_bits; ++i ) symbol = ( symbol << 1 ) | decode_bit( bm[symbol] ); return symbol - (1 << num_bits); } - int decode_tree_reversed( Bit_model bm[], const int num_bits ) + unsigned decode_tree_reversed( Bit_model bm[], const int num_bits ) { - int symbol = decode_tree( bm, num_bits ); - int reversed_symbol = 0; + unsigned symbol = decode_tree( bm, num_bits ); + unsigned reversed_symbol = 0; for( int i = 0; i < num_bits; ++i ) { reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 ); @@ -206,14 +206,13 @@ public: return reversed_symbol; } - int decode_matched( Bit_model bm[], const int match_byte ) + unsigned decode_matched( Bit_model bm[], const unsigned match_byte ) { - Bit_model * const bm1 = bm + 0x100; - int symbol = 1; + unsigned symbol = 1; for( int i = 7; i >= 0; --i ) { - const int match_bit = ( match_byte >> i ) & 1; - const int bit = decode_bit( bm1[(match_bit<<8)+symbol] ); + const unsigned match_bit = ( match_byte >> i ) & 1; + const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] ); symbol = ( symbol << 1 ) | bit; if( match_bit != bit ) { @@ -225,7 +224,7 @@ public: return symbol & 0xFF; } - int decode_len( Len_model & lm, const int pos_state ) + unsigned decode_len( Len_model & lm, const int pos_state ) { if( decode_bit( lm.choice1 ) == 0 ) return decode_tree( lm.bm_low[pos_state], len_low_bits ); @@ -253,9 +252,9 @@ class LZ_decoder uint8_t peek( const unsigned distance ) const { - unsigned i = pos - distance - 1; - if( pos <= distance ) i += dictionary_size; - return buffer[i]; + if( pos > distance ) return buffer[pos - distance - 1]; + if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1]; + return 0; // prev_byte of first byte } void put_byte( const uint8_t b ) @@ -274,7 +273,7 @@ public: stream_pos( 0 ), crc_( 0xFFFFFFFFU ), pos_wrapped( false ) - { buffer[dictionary_size-1] = 0; } // prev_byte of first byte + {} ~LZ_decoder() { delete[] buffer; } @@ -312,13 +311,13 @@ bool LZ_decoder::decode_member() // Returns false if error Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[len_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Len_model match_len_model; Len_model rep_len_model; - unsigned rep0 = 0; // rep[0-3] latest four distances - unsigned rep1 = 0; // used for efficient coding of - unsigned rep2 = 0; // repeated distances + unsigned rep0 = 0; // rep[0-3] latest four distances + unsigned rep1 = 0; // used for efficient coding of + unsigned rep2 = 0; // repeated distances unsigned rep3 = 0; State state; @@ -341,7 +340,12 @@ bool LZ_decoder::decode_member() // Returns false if error int len; if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit { - if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit + if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit + { + if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit + { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } + } + else { unsigned distance; if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit @@ -357,11 +361,6 @@ bool LZ_decoder::decode_member() // Returns false if error rep1 = rep0; rep0 = distance; } - else - { - if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit - { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; } - } state.set_rep(); len = min_match_len + rdec.decode_len( rep_len_model, pos_state ); } @@ -370,15 +369,14 @@ bool LZ_decoder::decode_member() // Returns false if error rep3 = rep2; rep2 = rep1; rep1 = rep0; len = min_match_len + rdec.decode_len( match_len_model, pos_state ); const int len_state = std::min( len - min_match_len, len_states - 1 ); - const int dis_slot = - rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); - if( dis_slot < start_dis_model ) rep0 = dis_slot; - else + rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits ); + if( rep0 >= start_dis_model ) { + const unsigned dis_slot = rep0; const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, + rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ), direct_bits ); else { @@ -414,7 +412,7 @@ int main( const int argc, const char * const argv[] ) "It is not safe to use lzd for any real work.\n" "\nUsage: %s < file.lz > file\n", argv[0] ); std::printf( "Lzd decompresses from standard input to standard output.\n" - "\nCopyright (C) 2016 Antonio Diaz Diaz.\n" + "\nCopyright (C) 2017 Antonio Diaz Diaz.\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" "Report bugs to lzip-bug@nongnu.org\n" @@ -429,7 +427,7 @@ int main( const int argc, const char * const argv[] ) for( bool first_member = true; ; first_member = false ) { - File_header header; + File_header header; // verify header for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin ); if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 ) { @@ -444,11 +442,11 @@ int main( const int argc, const char * const argv[] ) { std::fputs( "Invalid dictionary size in member header.\n", stderr ); return 2; } - LZ_decoder decoder( dict_size ); + LZ_decoder decoder( dict_size ); // decode LZMA stream if( !decoder.decode_member() ) { std::fputs( "Data error\n", stderr ); return 2; } - File_trailer trailer; + File_trailer trailer; // verify trailer for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin ); unsigned crc = 0; for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; } |