summaryrefslogtreecommitdiffstats
path: root/encoder.c
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2015-11-06 12:50:02 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2015-11-06 12:50:02 +0000
commitaaed943b403d90335816ae7a8e486ffa834cd383 (patch)
tree1262948f006f2cef62c6be9fc6a9face0b27baa8 /encoder.c
parentAdding debian version 1.6~pre1-2. (diff)
downloadclzip-aaed943b403d90335816ae7a8e486ffa834cd383.tar.xz
clzip-aaed943b403d90335816ae7a8e486ffa834cd383.zip
Merging upstream version 1.6~pre2.
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to 'encoder.c')
-rw-r--r--encoder.c105
1 files changed, 42 insertions, 63 deletions
diff --git a/encoder.c b/encoder.c
index fa896ef..2989065 100644
--- a/encoder.c
+++ b/encoder.c
@@ -50,7 +50,7 @@ bool Mf_read_block( struct Matchfinder * const mf )
void Mf_normalize_pos( struct Matchfinder * const mf )
{
if( mf->pos > mf->stream_pos )
- internal_error( "pos > stream_pos in Mf_normalize_pos" );
+ internal_error( "pos > stream_pos in Mf_normalize_pos." );
if( !mf->at_stream_end )
{
int i;
@@ -256,36 +256,6 @@ void Re_flush_data( struct Range_encoder * const renc )
}
-void Lee_encode( struct Len_encoder * const le,
- struct Range_encoder * const renc,
- int symbol, const int pos_state )
- {
- symbol -= min_match_len;
- if( symbol < len_low_symbols )
- {
- Re_encode_bit( renc, &le->lm.choice1, 0 );
- Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits );
- }
- else
- {
- Re_encode_bit( renc, &le->lm.choice1, 1 );
- if( symbol < len_low_symbols + len_mid_symbols )
- {
- Re_encode_bit( renc, &le->lm.choice2, 0 );
- Re_encode_tree( renc, le->lm.bm_mid[pos_state],
- symbol - len_low_symbols, len_mid_bits );
- }
- else
- {
- Re_encode_bit( renc, &le->lm.choice2, 1 );
- Re_encode_tree( renc, le->lm.bm_high,
- symbol - len_low_symbols - len_mid_symbols, len_high_bits );
- }
- }
- if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state );
- }
-
-
/* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
static void LZe_full_flush( struct LZ_encoder * const e, const State state )
{
@@ -305,16 +275,7 @@ static void LZe_full_flush( struct LZ_encoder * const e, const State state )
}
-static void LZe_fill_align_prices( struct LZ_encoder * const e )
- {
- int i;
- for( i = 0; i < dis_align_size; ++i )
- e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
- e->align_price_count = dis_align_size;
- }
-
-
-static void LZe_fill_distance_prices( struct LZ_encoder * const e )
+static void LZe_update_distance_prices( struct LZ_encoder * const e )
{
int dis, len_state;
for( dis = start_dis_model; dis < modeled_distances; ++dis )
@@ -368,9 +329,10 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
e->matchfinder = mf;
if( !Re_init( &e->renc, outfd ) ) return false;
- Lee_init( &e->match_len_encoder, mf->match_len_limit );
- Lee_init( &e->rep_len_encoder, mf->match_len_limit );
- e->align_price_count = 0;
+ Lm_init( &e->match_len_model );
+ Lm_init( &e->rep_len_model );
+ Lp_init( &e->match_len_prices, &e->match_len_model, mf->match_len_limit );
+ Lp_init( &e->rep_len_prices, &e->rep_len_model, mf->match_len_limit );
e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 );
for( i = 0; i < Fh_size; ++i )
@@ -382,6 +344,7 @@ bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf,
/* Return value == number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis == -1 ) means literal.
+ A match/rep longer or equal than match_len_limit finishes the sequence.
*/
static int LZe_sequence_optimizer( struct LZ_encoder * const e,
const int reps[num_rep_distances],
@@ -468,7 +431,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
for( len = min_match_len; len <= replens[rep]; ++len )
Tr_update( &e->trials[len], price +
- Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 );
+ Lp_price( &e->rep_len_prices, len, pos_state ), rep, 0 );
}
if( main_len > replens[0] )
@@ -487,8 +450,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
- Mf_move_pos( e->matchfinder );
-
while( true ) /* price optimization loop */
{
struct Trial *cur_trial, *next_trial;
@@ -498,6 +459,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
State cur_state;
uint8_t prev_byte, cur_byte, match_byte;
+ Mf_move_pos( e->matchfinder );
if( ++cur >= num_trials ) /* no more initialized trials */
{
LZe_backward( e, cur );
@@ -557,7 +519,6 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
prev_byte = Mf_peek( e->matchfinder, 1 );
cur_byte = Mf_peek( e->matchfinder, 0 );
match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 );
- Mf_move_pos( e->matchfinder );
next_price = cur_trial->price +
price0( e->bm_match[cur_state][pos_state] );
@@ -587,7 +548,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
}
}
- available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1,
+ available_bytes = min( Mf_available_bytes( e->matchfinder ),
max_num_trials - 1 - cur );
if( available_bytes < min_match_len ) continue;
@@ -596,7 +557,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try literal + rep0 */
if( match_byte != cur_byte && next_trial->prev_index != cur )
{
- const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
+ const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
const int dis = cur_trial->reps[0] + 1;
const int limit = min( e->matchfinder->match_len_limit + 1,
available_bytes );
@@ -619,7 +580,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try rep distances */
for( rep = 0; rep < num_rep_distances; ++rep )
{
- const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
+ const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
int price;
const int dis = cur_trial->reps[rep] + 1;
@@ -631,7 +592,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state );
for( i = min_match_len; i <= len; ++i )
Tr_update( &e->trials[cur+i], price +
- Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur );
+ Lp_price( &e->rep_len_prices, i, pos_state ), rep, cur );
if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
@@ -647,7 +608,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
pos_state2 = ( pos_state + len ) & pos_state_mask;
state2 = St_set_rep( cur_state );
- price += Lee_price( &e->rep_len_encoder, len, pos_state ) +
+ price += Lp_price( &e->rep_len_prices, len, pos_state ) +
price0( e->bm_match[state2][pos_state2] ) +
LZe_price_matched( e, data[len-1], data[len], data[len-dis] );
pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
@@ -683,7 +644,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const e,
/* try match + literal + rep0 */
if( len == e->pairs[i].len )
{
- const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1;
+ const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder );
const int dis2 = dis + 1;
int len2 = len + 1;
const int limit = min( e->matchfinder->match_len_limit + len2,
@@ -721,8 +682,13 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{
const unsigned long long member_size_limit =
member_size - Ft_size - max_marker_size;
- const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512;
- int fill_counter = 0;
+ const bool best = ( e->matchfinder->match_len_limit > 12 );
+ const int dis_price_count = best ? 1 : 512;
+ const int align_price_count = best ? 1 : dis_align_size;
+ const int price_count = ( e->matchfinder->match_len_limit > 36 ) ? 1013 : 4093;
+ int price_counter = 0;
+ int dis_price_counter = 0;
+ int align_price_counter = 0;
int ahead, i;
int reps[num_rep_distances];
State state = 0;
@@ -736,24 +702,33 @@ bool LZe_encode_member( struct LZ_encoder * const e,
{
const uint8_t prev_byte = 0;
const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 );
- CRC32_update_byte( &e->crc, cur_byte );
Re_encode_bit( &e->renc, &e->bm_match[state][0], 0 );
LZe_encode_literal( e, prev_byte, cur_byte );
+ CRC32_update_byte( &e->crc, cur_byte );
Mf_get_match_pairs( e->matchfinder, 0 );
Mf_move_pos( e->matchfinder );
}
while( !Mf_finished( e->matchfinder ) )
{
- if( e->pending_num_pairs == 0 )
+ if( price_counter <= 0 && e->pending_num_pairs == 0 )
{
- if( fill_counter <= 0 )
- { LZe_fill_distance_prices( e ); fill_counter = fill_count; }
- if( e->align_price_count <= 0 ) LZe_fill_align_prices( e );
+ price_counter = price_count; /* recalculate prices every these bytes */
+ if( dis_price_counter <= 0 )
+ { dis_price_counter = dis_price_count; LZe_update_distance_prices( e ); }
+ if( align_price_counter <= 0 )
+ {
+ align_price_counter = align_price_count;
+ for( i = 0; i < dis_align_size; ++i )
+ e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits );
+ }
+ Lp_update_prices( &e->match_len_prices );
+ Lp_update_prices( &e->rep_len_prices );
}
ahead = LZe_sequence_optimizer( e, reps, state );
if( ahead <= 0 ) return false; /* can't happen */
+ price_counter -= ahead;
for( i = 0; ahead > 0; )
{
@@ -800,14 +775,18 @@ bool LZe_encode_member( struct LZ_encoder * const e,
if( len == 1 ) state = St_set_short_rep( state );
else
{
- Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state );
+ Re_encode_len( &e->renc, &e->rep_len_model, len, pos_state );
+ Lp_decrement_counter( &e->rep_len_prices, pos_state );
state = St_set_rep( state );
}
}
else /* match */
{
LZe_encode_pair( e, dis - num_rep_distances, len, pos_state );
- --fill_counter;
+ if( get_slot( dis - num_rep_distances ) >= end_dis_model )
+ --align_price_counter;
+ --dis_price_counter;
+ Lp_decrement_counter( &e->match_len_prices, pos_state );
state = St_set_match( state );
}
}