summaryrefslogtreecommitdiffstats
path: root/lzd.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lzd.cc78
1 files changed, 38 insertions, 40 deletions
diff --git a/lzd.cc b/lzd.cc
index 56dce37..6fd536a 100644
--- a/lzd.cc
+++ b/lzd.cc
@@ -1,5 +1,5 @@
/* Lzd - Educational decompressor for the lzip format
- Copyright (C) 2013-2016 Antonio Diaz Diaz.
+ Copyright (C) 2013-2017 Antonio Diaz Diaz.
This program is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -150,10 +150,10 @@ public:
uint8_t get_byte() { return std::getc( stdin ); }
- int decode( const int num_bits )
+ unsigned decode( const int num_bits )
{
- int symbol = 0;
- for( int i = 0; i < num_bits; ++i )
+ unsigned symbol = 0;
+ for( int i = num_bits; i > 0; --i )
{
range >>= 1;
symbol <<= 1;
@@ -164,9 +164,9 @@ public:
return symbol;
}
- int decode_bit( Bit_model & bm )
+ unsigned decode_bit( Bit_model & bm )
{
- int symbol;
+ unsigned symbol;
const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
if( code < bound )
{
@@ -186,18 +186,18 @@ public:
return symbol;
}
- int decode_tree( Bit_model bm[], const int num_bits )
+ unsigned decode_tree( Bit_model bm[], const int num_bits )
{
- int symbol = 1;
+ unsigned symbol = 1;
for( int i = 0; i < num_bits; ++i )
symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
return symbol - (1 << num_bits);
}
- int decode_tree_reversed( Bit_model bm[], const int num_bits )
+ unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
{
- int symbol = decode_tree( bm, num_bits );
- int reversed_symbol = 0;
+ unsigned symbol = decode_tree( bm, num_bits );
+ unsigned reversed_symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
reversed_symbol = ( reversed_symbol << 1 ) | ( symbol & 1 );
@@ -206,14 +206,13 @@ public:
return reversed_symbol;
}
- int decode_matched( Bit_model bm[], const int match_byte )
+ unsigned decode_matched( Bit_model bm[], const unsigned match_byte )
{
- Bit_model * const bm1 = bm + 0x100;
- int symbol = 1;
+ unsigned symbol = 1;
for( int i = 7; i >= 0; --i )
{
- const int match_bit = ( match_byte >> i ) & 1;
- const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
+ const unsigned match_bit = ( match_byte >> i ) & 1;
+ const unsigned bit = decode_bit( bm[symbol+(match_bit<<8)+0x100] );
symbol = ( symbol << 1 ) | bit;
if( match_bit != bit )
{
@@ -225,7 +224,7 @@ public:
return symbol & 0xFF;
}
- int decode_len( Len_model & lm, const int pos_state )
+ unsigned decode_len( Len_model & lm, const int pos_state )
{
if( decode_bit( lm.choice1 ) == 0 )
return decode_tree( lm.bm_low[pos_state], len_low_bits );
@@ -253,9 +252,9 @@ class LZ_decoder
uint8_t peek( const unsigned distance ) const
{
- unsigned i = pos - distance - 1;
- if( pos <= distance ) i += dictionary_size;
- return buffer[i];
+ if( pos > distance ) return buffer[pos - distance - 1];
+ if( pos_wrapped ) return buffer[dictionary_size + pos - distance - 1];
+ return 0; // prev_byte of first byte
}
void put_byte( const uint8_t b )
@@ -274,7 +273,7 @@ public:
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
pos_wrapped( false )
- { buffer[dictionary_size-1] = 0; } // prev_byte of first byte
+ {}
~LZ_decoder() { delete[] buffer; }
@@ -312,13 +311,13 @@ bool LZ_decoder::decode_member() // Returns false if error
Bit_model bm_rep2[State::states];
Bit_model bm_len[State::states][pos_states];
Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
- Bit_model bm_dis[modeled_distances-end_dis_model];
+ Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
Len_model match_len_model;
Len_model rep_len_model;
- unsigned rep0 = 0; // rep[0-3] latest four distances
- unsigned rep1 = 0; // used for efficient coding of
- unsigned rep2 = 0; // repeated distances
+ unsigned rep0 = 0; // rep[0-3] latest four distances
+ unsigned rep1 = 0; // used for efficient coding of
+ unsigned rep2 = 0; // repeated distances
unsigned rep3 = 0;
State state;
@@ -341,7 +340,12 @@ bool LZ_decoder::decode_member() // Returns false if error
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
{
- if( rdec.decode_bit( bm_rep0[state()] ) != 0 ) // 3rd bit
+ if( rdec.decode_bit( bm_rep0[state()] ) == 0 ) // 3rd bit
+ {
+ if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
+ { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
+ }
+ else
{
unsigned distance;
if( rdec.decode_bit( bm_rep1[state()] ) == 0 ) // 4th bit
@@ -357,11 +361,6 @@ bool LZ_decoder::decode_member() // Returns false if error
rep1 = rep0;
rep0 = distance;
}
- else
- {
- if( rdec.decode_bit( bm_len[state()][pos_state] ) == 0 ) // 4th bit
- { state.set_short_rep(); put_byte( peek( rep0 ) ); continue; }
- }
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
@@ -370,15 +369,14 @@ bool LZ_decoder::decode_member() // Returns false if error
rep3 = rep2; rep2 = rep1; rep1 = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
const int len_state = std::min( len - min_match_len, len_states - 1 );
- const int dis_slot =
- rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
- if( dis_slot < start_dis_model ) rep0 = dis_slot;
- else
+ rep0 = rdec.decode_tree( bm_dis_slot[len_state], dis_slot_bits );
+ if( rep0 >= start_dis_model )
{
+ const unsigned dis_slot = rep0;
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- rep0 += rdec.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1,
+ rep0 += rdec.decode_tree_reversed( bm_dis + ( rep0 - dis_slot ),
direct_bits );
else
{
@@ -414,7 +412,7 @@ int main( const int argc, const char * const argv[] )
"It is not safe to use lzd for any real work.\n"
"\nUsage: %s < file.lz > file\n", argv[0] );
std::printf( "Lzd decompresses from standard input to standard output.\n"
- "\nCopyright (C) 2016 Antonio Diaz Diaz.\n"
+ "\nCopyright (C) 2017 Antonio Diaz Diaz.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
"Report bugs to lzip-bug@nongnu.org\n"
@@ -429,7 +427,7 @@ int main( const int argc, const char * const argv[] )
for( bool first_member = true; ; first_member = false )
{
- File_header header;
+ File_header header; // verify header
for( int i = 0; i < 6; ++i ) header[i] = std::getc( stdin );
if( std::feof( stdin ) || std::memcmp( header, "LZIP\x01", 5 ) != 0 )
{
@@ -444,11 +442,11 @@ int main( const int argc, const char * const argv[] )
{ std::fputs( "Invalid dictionary size in member header.\n", stderr );
return 2; }
- LZ_decoder decoder( dict_size );
+ LZ_decoder decoder( dict_size ); // decode LZMA stream
if( !decoder.decode_member() )
{ std::fputs( "Data error\n", stderr ); return 2; }
- File_trailer trailer;
+ File_trailer trailer; // verify trailer
for( int i = 0; i < 20; ++i ) trailer[i] = std::getc( stdin );
unsigned crc = 0;
for( int i = 3; i >= 0; --i ) { crc <<= 8; crc += trailer[i]; }