summaryrefslogtreecommitdiffstats
path: root/encoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'encoder.cc')
-rw-r--r--encoder.cc181
1 files changed, 84 insertions, 97 deletions
diff --git a/encoder.cc b/encoder.cc
index 563fd64..fc8c114 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -1,5 +1,6 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014
+ Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -60,9 +61,9 @@ void Matchfinder_base::normalize_pos()
pos -= offset;
stream_pos -= offset;
for( int i = 0; i < num_prev_positions; ++i )
- if( prev_positions[i] >= 0 ) prev_positions[i] -= offset;
+ prev_positions[i] -= std::min( prev_positions[i], offset );
for( int i = 0; i < pos_array_size; ++i )
- if( pos_array[i] >= 0 ) pos_array[i] -= offset;
+ pos_array[i] -= std::min( pos_array[i], offset );
read_block();
}
}
@@ -70,12 +71,11 @@ void Matchfinder_base::normalize_pos()
Matchfinder_base::Matchfinder_base( const int before, const int dict_size,
const int after_size, const int dict_factor,
- const int match_len_limit, const int num_prev_positions23,
+ const int num_prev_positions23,
const int pos_array_factor, const int ifd )
:
partial_data_pos( 0 ),
before_size( before ),
- match_len_limit_( match_len_limit ),
pos( 0 ),
cyclic_pos( 0 ),
stream_pos( 0 ),
@@ -113,19 +113,19 @@ Matchfinder_base::Matchfinder_base( const int before, const int dict_size,
prev_positions = new( std::nothrow ) int32_t[size];
if( !prev_positions ) { std::free( buffer ); throw std::bad_alloc(); }
pos_array = prev_positions + num_prev_positions;
- for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+ for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
}
void Matchfinder_base::reset()
{
- const int size = stream_pos - pos;
- if( size > 0 ) std::memmove( buffer, buffer + pos, size );
+ if( stream_pos > pos )
+ std::memmove( buffer, buffer + pos, stream_pos - pos );
partial_data_pos = 0;
stream_pos -= pos;
pos = 0;
cyclic_pos = 0;
- for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1;
+ for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
read_block();
}
@@ -139,14 +139,15 @@ int Matchfinder::get_match_pairs( struct Pair * pairs )
if( len_limit < 4 ) return 0;
}
- int maxlen = min_match_len - 1;
+ int maxlen = 0;
int num_pairs = 0;
+ const int pos1 = pos + 1;
const int min_pos = ( pos > dictionary_size_ ) ? pos - dictionary_size_ : 0;
const uint8_t * const data = buffer + pos;
unsigned tmp = crc32[data[0]] ^ data[1];
const int key2 = tmp & ( num_prev_positions2 - 1 );
- tmp ^= (uint32_t)data[2] << 8;
+ tmp ^= (unsigned)data[2] << 8;
const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
const int key4 = num_prev_positions2 + num_prev_positions3 +
( ( tmp ^ ( crc32[data[3]] << 5 ) ) & key4_mask );
@@ -155,34 +156,34 @@ int Matchfinder::get_match_pairs( struct Pair * pairs )
{
int np2 = prev_positions[key2];
int np3 = prev_positions[key3];
- if( np2 >= min_pos && buffer[np2] == data[0] )
+ if( np2 > min_pos && buffer[np2-1] == data[0] )
{
- pairs[0].dis = pos - np2 - 1;
+ pairs[0].dis = pos - np2;
pairs[0].len = maxlen = 2;
num_pairs = 1;
}
- if( np2 != np3 && np3 >= min_pos && buffer[np3] == data[0] )
+ if( np2 != np3 && np3 > min_pos && buffer[np3-1] == data[0] )
{
maxlen = 3;
- pairs[num_pairs].dis = pos - np3 - 1;
- ++num_pairs;
np2 = np3;
+ pairs[num_pairs].dis = pos - np2;
+ ++num_pairs;
}
if( num_pairs > 0 )
{
- const int delta = pos - np2;
+ const int delta = pos1 - np2;
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
++maxlen;
pairs[num_pairs-1].len = maxlen;
- if( maxlen >= len_limit ) pairs = 0;
+ if( maxlen >= len_limit ) pairs = 0; // done. now just skip
}
if( maxlen < 3 ) maxlen = 3;
}
- prev_positions[key2] = pos;
- prev_positions[key3] = pos;
+ prev_positions[key2] = pos1;
+ prev_positions[key3] = pos1;
int newpos = prev_positions[key4];
- prev_positions[key4] = pos;
+ prev_positions[key4] = pos1;
int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
int32_t * ptr1 = ptr0 + 1;
@@ -190,9 +191,9 @@ int Matchfinder::get_match_pairs( struct Pair * pairs )
for( int count = cycles; ; )
{
- if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; }
+ if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
- const int delta = pos - newpos;
+ const int delta = pos1 - newpos;
int32_t * const newptr = pos_array +
( ( cyclic_pos - delta +
( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ + 1 ) ) << 1 );
@@ -251,7 +252,7 @@ void Len_encoder::encode( Range_encoder & renc, int symbol,
if( symbol < len_low_symbols )
{
renc.encode_bit( choice1, 0 );
- renc.encode_tree( bm_low[pos_state], symbol, len_low_bits );
+ renc.encode_tree3( bm_low[pos_state], symbol );
}
else
{
@@ -259,14 +260,12 @@ void Len_encoder::encode( Range_encoder & renc, int symbol,
if( symbol < len_low_symbols + len_mid_symbols )
{
renc.encode_bit( choice2, 0 );
- renc.encode_tree( bm_mid[pos_state], symbol - len_low_symbols,
- len_mid_bits );
+ renc.encode_tree3( bm_mid[pos_state], symbol - len_low_symbols );
}
else
{
renc.encode_bit( choice2, 1 );
- renc.encode_tree( bm_high, symbol - len_low_symbols - len_mid_symbols,
- len_high_bits );
+ renc.encode_tree8( bm_high, symbol - len_low_symbols - len_mid_symbols );
}
}
if( --counters[pos_state] <= 0 ) update_prices( pos_state );
@@ -318,10 +317,10 @@ void LZ_encoder::fill_distance_prices()
int * const dsp = dis_slot_prices[len_state];
const Bit_model * const bmds = bm_dis_slot[len_state];
int slot = 0;
- for( ; slot < end_dis_model && slot < num_dis_slots; ++slot )
- dsp[slot] = price_symbol( bmds, slot, dis_slot_bits );
+ for( ; slot < end_dis_model; ++slot )
+ dsp[slot] = price_symbol6( bmds, slot );
for( ; slot < num_dis_slots; ++slot )
- dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) +
+ dsp[slot] = price_symbol6( bmds, slot ) +
(((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
int * const dp = dis_prices[len_state];
@@ -336,7 +335,7 @@ void LZ_encoder::fill_distance_prices()
/* Return value == number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
- ( trials[0].dis == -1 && trials[0].price == 1 ) means literal.
+ ( trials[0].dis == -1 ) means literal.
*/
int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const State state )
@@ -376,12 +375,12 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
const int pos_state = matchfinder.data_position() & pos_state_mask;
- const uint8_t prev_byte = matchfinder[-1];
+ const uint8_t prev_byte = matchfinder[1];
const uint8_t cur_byte = matchfinder[0];
- const uint8_t match_byte = matchfinder[-reps[0]-1];
+ const uint8_t match_byte = matchfinder[reps[0]+1];
trials[0].state = state;
- trials[1].dis = -1;
+ trials[1].dis = -1; // literal
trials[1].price = price0( bm_match[state()][pos_state] );
if( state.is_char() )
trials[1].price += price_literal( prev_byte, cur_byte );
@@ -392,7 +391,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const int rep_match_price = match_price + price1( bm_rep[state()] );
if( match_byte == cur_byte )
- trials[1].update( rep_match_price + price_rep_len1( state, pos_state ), 0, 0 );
+ trials[1].update( rep_match_price + price_shortrep( state, pos_state ), 0, 0 );
num_trials = std::max( main_len, replens[rep_index] );
@@ -457,60 +456,50 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
// give final values to current trial
Trial & cur_trial = trials[cur];
+ State cur_state;
+ {
+ int dis = cur_trial.dis;
int prev_index = cur_trial.prev_index;
const int prev_index2 = cur_trial.prev_index2;
- State cur_state;
- if( prev_index2 != single_step_trial )
+ if( prev_index2 == single_step_trial )
{
- --prev_index;
- if( prev_index2 >= 0 )
+ cur_state = trials[prev_index].state;
+ if( prev_index + 1 == cur ) // len == 1
{
- cur_state = trials[prev_index2].state;
- if( cur_trial.dis2 < num_rep_distances )
- cur_state.set_rep();
- else
- cur_state.set_match();
+ if( dis == 0 ) cur_state.set_short_rep();
+ else cur_state.set_char(); // literal
}
- else
- cur_state = trials[prev_index].state;
- cur_state.set_char();
+ else if( dis < num_rep_distances ) cur_state.set_rep();
+ else cur_state.set_match();
}
- else
- cur_state = trials[prev_index].state;
-
- if( prev_index == cur - 1 )
+ else if( prev_index2 == dual_step_trial ) // dis == 0
{
- if( cur_trial.dis == 0 ) cur_state.set_short_rep();
- else cur_state.set_char();
- for( int i = 0; i < num_rep_distances; ++i )
- cur_trial.reps[i] = trials[prev_index].reps[i];
+ --prev_index;
+ cur_state = trials[prev_index].state;
+ cur_state.set_char();
+ cur_state.set_rep();
}
- else
+ else // if( prev_index2 >= 0 )
{
- int dis;
- if( prev_index2 >= 0 )
- {
- dis = cur_trial.dis2;
- prev_index = prev_index2;
- cur_state.set_rep();
- }
- else
- {
- dis = cur_trial.dis;
- if( dis < num_rep_distances ) cur_state.set_rep();
- else cur_state.set_match();
- }
- for( int i = 0; i < num_rep_distances; ++i )
- cur_trial.reps[i] = trials[prev_index].reps[i];
- mtf_reps( dis, cur_trial.reps );
+ prev_index = prev_index2;
+ cur_state = trials[prev_index].state;
+ if( dis < num_rep_distances ) cur_state.set_rep();
+ else cur_state.set_match();
+ cur_state.set_char();
+ cur_state.set_rep();
}
cur_trial.state = cur_state;
+ for( int i = 0; i < num_rep_distances; ++i )
+ cur_trial.reps[i] = trials[prev_index].reps[i];
+ mtf_reps( dis, cur_trial.reps );
+ }
const int pos_state = matchfinder.data_position() & pos_state_mask;
- const uint8_t prev_byte = matchfinder[-1];
+ const uint8_t prev_byte = matchfinder[1];
const uint8_t cur_byte = matchfinder[0];
- const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1];
+ const uint8_t match_byte = matchfinder[cur_trial.reps[0]+1];
+ matchfinder.move_pos();
int next_price = cur_trial.price +
price0( bm_match[cur_state()][pos_state] );
@@ -518,26 +507,25 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
next_price += price_literal( prev_byte, cur_byte );
else
next_price += price_matched( prev_byte, cur_byte, match_byte );
- matchfinder.move_pos();
// try last updates to next trial
Trial & next_trial = trials[cur+1];
- next_trial.update( next_price, -1, cur );
+ next_trial.update( next_price, -1, cur ); // literal
const int match_price = cur_trial.price + price1( bm_match[cur_state()][pos_state] );
const int rep_match_price = match_price + price1( bm_rep[cur_state()] );
- if( match_byte == cur_byte && next_trial.dis != 0 )
+ if( match_byte == cur_byte && next_trial.dis != 0 &&
+ next_trial.prev_index2 == single_step_trial )
{
const int price = rep_match_price +
- price_rep_len1( cur_state, pos_state );
+ price_shortrep( cur_state, pos_state );
if( price <= next_trial.price )
{
next_trial.price = price;
next_trial.dis = 0;
next_trial.prev_index = cur;
- next_trial.prev_index2 = single_step_trial;
}
}
@@ -567,7 +555,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
price_rep0_len( len, state2, pos_state2 );
while( num_trials < cur + 1 + len )
trials[++num_trials].price = infinite_price;
- trials[cur+1+len].update2( price, 0, cur + 1 );
+ trials[cur+1+len].update2( price, cur + 1 );
}
}
@@ -580,7 +568,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
int len;
const int dis = cur_trial.reps[rep] + 1;
- if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue;
+ if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
for( len = min_match_len; len < len_limit; ++len )
if( data[len-dis] != data[len] ) break;
while( num_trials < cur + len )
@@ -612,7 +600,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
price_rep0_len( len2, state2, pos_state2 );
while( num_trials < cur + len + 1 + len2 )
trials[++num_trials].price = infinite_price;
- trials[cur+len+1+len2].update3( price, 0, cur + len + 1, rep, cur );
+ trials[cur+len+1+len2].update3( price, rep, cur + len + 1, cur );
}
// try matches
@@ -657,8 +645,8 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
while( num_trials < cur + len + 1 + len2 )
trials[++num_trials].price = infinite_price;
- trials[cur+len+1+len2].update3( price, 0, cur + len + 1,
- dis + num_rep_distances, cur );
+ trials[cur+len+1+len2].update3( price, dis + num_rep_distances,
+ cur + len + 1, cur );
}
if( ++i >= num_pairs ) break;
dis = pairs[i].dis;
@@ -675,9 +663,9 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
member_size - File_trailer::size() - max_marker_size;
const int fill_count = ( matchfinder.match_len_limit() > 12 ) ? 128 : 512;
int fill_counter = 0;
- int rep_distances[num_rep_distances];
+ int reps[num_rep_distances];
State state;
- for( int i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0;
+ for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
if( matchfinder.data_position() != 0 ||
renc.member_position() != File_header::size )
@@ -703,28 +691,28 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
if( align_price_count <= 0 ) fill_align_prices();
}
- int ahead = sequence_optimizer( rep_distances, state );
+ int ahead = sequence_optimizer( reps, state );
if( ahead <= 0 ) return false; // can't happen
- for( int i = 0; ; )
+ for( int i = 0; ahead > 0; )
{
const int pos_state =
( matchfinder.data_position() - ahead ) & pos_state_mask;
const int dis = trials[i].dis;
const int len = trials[i].price;
- bool bit = ( dis < 0 && len == 1 );
+ bool bit = ( dis < 0 );
renc.encode_bit( bm_match[state()][pos_state], !bit );
if( bit ) // literal byte
{
- const uint8_t prev_byte = matchfinder[-ahead-1];
- const uint8_t cur_byte = matchfinder[-ahead];
+ const uint8_t prev_byte = matchfinder[ahead+1];
+ const uint8_t cur_byte = matchfinder[ahead];
crc32.update_byte( crc_, cur_byte );
if( state.is_char() )
encode_literal( prev_byte, cur_byte );
else
{
- const uint8_t match_byte = matchfinder[-ahead-rep_distances[0]-1];
+ const uint8_t match_byte = matchfinder[ahead+reps[0]+1];
encode_matched( prev_byte, cur_byte, match_byte );
}
state.set_char();
@@ -732,10 +720,10 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
else // match or repeated match
{
crc32.update_buf( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
- mtf_reps( dis, rep_distances );
+ mtf_reps( dis, reps );
bit = ( dis < num_rep_distances );
renc.encode_bit( bm_rep[state()], bit );
- if( bit )
+ if( bit ) // repeated match
{
bit = ( dis == 0 );
renc.encode_bit( bm_rep0[state()], !bit );
@@ -754,7 +742,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
state.set_rep();
}
}
- else
+ else // match
{
encode_pair( dis - num_rep_distances, len, pos_state );
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
@@ -770,7 +758,6 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
full_flush( matchfinder.data_position(), state );
return true;
}
- if( ahead <= 0 ) break;
}
}
full_flush( matchfinder.data_position(), state );