summaryrefslogtreecommitdiffstats
path: root/encoder.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--encoder.h368
1 files changed, 201 insertions, 167 deletions
diff --git a/encoder.h b/encoder.h
index 13df958..e39d7c4 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Clzip - Data compressor based on the LZMA algorithm
- Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,17 +16,19 @@
*/
enum { max_num_trials = 1 << 12,
- price_shift = 6 };
+ price_shift_bits = 6,
+ price_step_bits = 2,
+ price_step = 1 << price_step_bits };
-typedef unsigned char Dis_slots[1<<12];
+typedef uint8_t Dis_slots[1<<10];
extern Dis_slots dis_slots;
-static inline void Dis_slots_init()
+static inline void Dis_slots_init( void )
{
int i, size, slot;
for( slot = 0; slot < 4; ++slot ) dis_slots[slot] = slot;
- for( i = 4, size = 2, slot = 4; slot < 24; slot += 2 )
+ for( i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
{
memset( &dis_slots[i], slot, size );
memset( &dis_slots[i+size], slot + 1, size );
@@ -35,40 +37,47 @@ static inline void Dis_slots_init()
}
}
-static inline int get_slot( const uint32_t dis )
+static inline uint8_t get_slot( const uint32_t dis )
{
- if( dis < (1 << 12) ) return dis_slots[dis];
- if( dis < (1 << 23) ) return dis_slots[dis>>11] + 22;
- return dis_slots[dis>>22] + 44;
+ if( dis < (1 << 10) ) return dis_slots[dis];
+ if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
+ if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
+ return dis_slots[dis>>27] + 54;
}
-typedef int Prob_prices[bit_model_total >> 2];
+typedef short Prob_prices[bit_model_total >> price_step_bits];
extern Prob_prices prob_prices;
-static inline void Prob_prices_init()
+static inline void Prob_prices_init( void )
{
- const int num_bits = ( bit_model_total_bits - 2 );
- int i, j = 1, end = 2;
- prob_prices[0] = bit_model_total_bits << price_shift;
- for( i = num_bits - 1; i >= 0; --i, end <<= 1 )
+ int i, j;
+ for( i = price_step / 2; i < bit_model_total; i += price_step )
{
- for( ; j < end; ++j )
- prob_prices[j] = ( i << price_shift ) +
- ( ((end - j) << price_shift) >> (num_bits - i - 1) );
+ unsigned val = i;
+ int bits = 0; /* base 2 logarithm of val */
+ for( j = 0; j < price_shift_bits; ++j )
+ {
+ val = val * val;
+ bits <<= 1;
+ while( val >= 1 << 16 ) { val >>= 1; ++bits; }
+ }
+ bits += 15; /* remaining bits in val */
+ prob_prices[i >> price_step_bits] =
+ ( bit_model_total_bits << price_shift_bits ) - bits;
}
}
static inline int get_price( const int probability )
- { return prob_prices[probability >> 2]; }
+ { return prob_prices[probability >> price_step_bits]; }
static inline int price0( const Bit_model probability )
{ return get_price( probability ); }
static inline int price1( const Bit_model probability )
- { return get_price( bit_model_total-probability ); }
+ { return get_price( bit_model_total - probability ); }
static inline int price_bit( const Bit_model bm, const int bit )
{ if( bit ) return price1( bm ); else return price0( bm ); }
@@ -106,58 +115,57 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol,
}
-static inline int price_matched( const Bit_model bm[], const int symbol,
- const int match_byte )
+static inline int price_matched( const Bit_model bm[], unsigned symbol,
+ unsigned match_byte )
{
int price = 0;
- int model = 1;
- int i;
-
- for( i = 7; i >= 0; --i )
- {
- const int match_bit = ( match_byte >> i ) & 1;
- int bit = ( symbol >> i ) & 1;
- price += price_bit( bm[(match_bit<<8)+model+0x100], bit );
- model = ( model << 1 ) | bit;
- if( match_bit != bit )
- {
- while( --i >= 0 )
- {
- bit = ( symbol >> i ) & 1;
- price += price_bit( bm[model], bit );
- model = ( model << 1 ) | bit;
- }
- break;
- }
+ unsigned mask = 0x100;
+ symbol |= 0x100;
+
+ do {
+ unsigned bit, match_bit;
+ match_byte <<= 1;
+ match_bit = match_byte & mask;
+ symbol <<= 1;
+ bit = symbol & 0x100;
+ price += price_bit( bm[match_bit+(symbol>>9)+mask], bit );
+ mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
+ while( symbol < 0x10000 );
return price;
}
+struct Pair /* distance-length pair */
+ {
+ int dis;
+ int len;
+ };
+
+
enum { /* bytes to keep in buffer before dictionary */
before_size = max_num_trials + 1,
/* bytes to keep in buffer after pos */
- after_size = max_match_len,
- num_prev_positions4 = 1 << 20,
- num_prev_positions3 = 1 << 18,
- num_prev_positions2 = 1 << 16,
- num_prev_positions = num_prev_positions4 + num_prev_positions3 +
- num_prev_positions2 };
+ after_size = ( 2 * max_match_len ) + 1,
+ num_prev_positions3 = 1 << 16,
+ num_prev_positions2 = 1 << 10 };
struct Matchfinder
{
- long long partial_data_pos;
+ unsigned long long partial_data_pos;
uint8_t * buffer; /* input buffer */
int32_t * prev_positions; /* last seen position of key */
- int32_t * prev_pos_tree;
- int dictionary_size; /* bytes to keep in buffer before pos */
+ int32_t * prev_pos_tree; /* previous positions of key */
+ int match_len_limit;
int buffer_size;
+ int dictionary_size; /* bytes to keep in buffer before pos */
int pos; /* current pos in buffer */
- int cyclic_pos; /* current pos in dictionary */
- int stream_pos; /* first byte not yet read from file */
+ int cyclic_pos; /* cycles through [0, dictionary_size] */
int pos_limit; /* when reached, a new block must be read */
- int match_len_limit;
+ int stream_pos; /* first byte not yet read from file */
int cycles;
+ unsigned key4_mask;
+ int num_prev_positions; /* size of prev_positions */
int infd; /* input file descriptor */
bool at_stream_end; /* stream_pos shows real end of file */
};
@@ -166,13 +174,12 @@ bool Mf_read_block( struct Matchfinder * const mf );
void Mf_normalize_pos( struct Matchfinder * const mf );
bool Mf_init( struct Matchfinder * const mf,
- const int dict_size, const int len_limit, const int ifd );
+ const int dict_size, const int match_len_limit, const int ifd );
static inline void Mf_free( struct Matchfinder * const mf )
{
- free( mf->prev_pos_tree ); mf->prev_pos_tree = 0;
- free( mf->buffer ); mf->buffer = 0;
- free( mf->prev_positions ); mf->prev_positions = 0;
+ free( mf->prev_positions );
+ free( mf->buffer );
}
static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i )
@@ -181,13 +188,15 @@ static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i
static inline int Mf_available_bytes( const struct Matchfinder * const mf )
{ return mf->stream_pos - mf->pos; }
-static inline long long Mf_data_position( const struct Matchfinder * const mf )
+static inline unsigned long long
+Mf_data_position( const struct Matchfinder * const mf )
{ return mf->partial_data_pos + mf->pos; }
static inline bool Mf_finished( const struct Matchfinder * const mf )
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
-static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
+static inline const uint8_t *
+Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
{ return mf->buffer + mf->pos; }
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
@@ -196,7 +205,7 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf,
if( ahead < 0 || mf->pos < ahead ) return false;
mf->pos -= ahead;
mf->cyclic_pos -= ahead;
- if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size;
+ if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size + 1;
return true;
}
@@ -215,12 +224,12 @@ static inline int Mf_true_match_len( const struct Matchfinder * const mf,
static inline void Mf_move_pos( struct Matchfinder * const mf )
{
- if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0;
+ if( ++mf->cyclic_pos > mf->dictionary_size ) mf->cyclic_pos = 0;
if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
}
void Mf_reset( struct Matchfinder * const mf );
-int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances );
+int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs );
enum { re_buffer_size = 65536 };
@@ -228,7 +237,7 @@ enum { re_buffer_size = 65536 };
struct Range_encoder
{
uint64_t low;
- long long partial_member_pos;
+ unsigned long long partial_member_pos;
uint8_t * buffer; /* output buffer */
int pos; /* current pos in buffer */
uint32_t range;
@@ -277,7 +286,8 @@ static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
static inline void Re_free( struct Range_encoder * const renc )
{ free( renc->buffer ); }
-static inline long long Re_member_position( const struct Range_encoder * const renc )
+static inline unsigned long long
+Re_member_position( const struct Range_encoder * const renc )
{ return renc->partial_member_pos + renc->pos + renc->ff_count; }
static inline void Re_flush( struct Range_encoder * const renc )
@@ -345,27 +355,22 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
}
static inline void Re_encode_matched( struct Range_encoder * const renc,
- Bit_model bm[], int symbol, int match_byte )
- {
- int model = 1;
- int i;
- for( i = 7; i >= 0; --i )
- {
- const int match_bit = ( match_byte >> i ) & 1;
- int bit = ( symbol >> i ) & 1;
- Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit );
- model = ( model << 1 ) | bit;
- if( match_bit != bit )
- {
- while( --i >= 0 )
- {
- bit = ( symbol >> i ) & 1;
- Re_encode_bit( renc, &bm[model], bit );
- model = ( model << 1 ) | bit;
- }
- break;
- }
+ Bit_model bm[], unsigned symbol,
+ unsigned match_byte )
+ {
+ unsigned mask = 0x100;
+ symbol |= 0x100;
+
+ do {
+ unsigned bit, match_bit;
+ match_byte <<= 1;
+ match_bit = match_byte & mask;
+ symbol <<= 1;
+ bit = symbol & 0x100;
+ Re_encode_bit( renc, &bm[match_bit+(symbol>>9)+mask], bit );
+ mask &= ~(match_byte ^ symbol); /* if( match_bit != bit ) mask = 0; */
}
+ while( symbol < 0x10000 );
}
@@ -404,20 +409,15 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
}
static inline void Lee_init( struct Len_encoder * const len_encoder,
- const int len_limit )
+ const int match_len_limit )
{
- int i, j;
+ int i;
Bm_init( &len_encoder->choice1 );
Bm_init( &len_encoder->choice2 );
- for( i = 0; i < pos_states; ++i )
- for( j = 0; j < len_low_symbols; ++j )
- Bm_init( &len_encoder->bm_low[i][j] );
- for( i = 0; i < pos_states; ++i )
- for( j = 0; j < len_mid_symbols; ++j )
- Bm_init( &len_encoder->bm_mid[i][j] );
- for( i = 0; i < len_high_symbols; ++i )
- Bm_init( &len_encoder->bm_high[i] );
- len_encoder->len_symbols = len_limit + 1 - min_match_len;
+ Bm_array_init( len_encoder->bm_low[0], pos_states * len_low_symbols );
+ Bm_array_init( len_encoder->bm_mid[0], pos_states * len_mid_symbols );
+ Bm_array_init( len_encoder->bm_high, len_high_symbols );
+ len_encoder->len_symbols = match_len_limit + 1 - min_match_len;
for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i );
}
@@ -430,71 +430,66 @@ static inline int Lee_price( const struct Len_encoder * const len_encoder,
{ return len_encoder->prices[pos_state][symbol - min_match_len]; }
-struct Literal_encoder
- {
- Bit_model bm_literal[1<<literal_context_bits][0x300];
- };
-
-static inline void Lie_init( struct Literal_encoder * const lienc )
- {
- int i, j;
- for( i = 0; i < 1<<literal_context_bits; ++i )
- for( j = 0; j < 0x300; ++j )
- Bm_init( &lienc->bm_literal[i][j] );
- }
-
-static inline int Lie_state( const uint8_t prev_byte )
- { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
-
-static inline void Lie_encode( struct Literal_encoder * const lienc,
- struct Range_encoder * const renc,
- uint8_t prev_byte, uint8_t symbol )
- { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-
-static inline void Lie_encode_matched( struct Literal_encoder * const lienc,
- struct Range_encoder * const renc,
- uint8_t prev_byte, uint8_t symbol,
- uint8_t match_byte )
- { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)],
- symbol, match_byte ); }
-
-static inline int Lie_price_symbol( const struct Literal_encoder * const lienc,
- uint8_t prev_byte, uint8_t symbol )
- { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-
-static inline int Lie_price_matched( const struct Literal_encoder * const lienc,
- uint8_t prev_byte, uint8_t symbol,
- uint8_t match_byte )
- { return price_matched( lienc->bm_literal[Lie_state(prev_byte)],
- symbol, match_byte ); }
-
-
enum { infinite_price = 0x0FFFFFFF,
max_marker_size = 16,
- num_rep_distances = 4 }; /* must be 4 */
+ num_rep_distances = 4, /* must be 4 */
+ single_step_trial = -2,
+ dual_step_trial = -1 };
struct Trial
{
State state;
- int dis;
- int prev_index; /* index of prev trial in trials[] */
int price; /* dual use var; cumulative price, match length */
+ int dis; /* rep index or match distance */
+ int prev_index; /* index of prev trial in trials[] */
+ int dis2;
+ int prev_index2; /* -2 trial is single step */
+ /* -1 literal + rep0 */
+ /* >= 0 rep or match + literal + rep0 */
int reps[num_rep_distances];
};
-static inline void Tr_update( struct Trial * const trial,
- const int d, const int p_i, const int pr )
+static inline void Tr_update( struct Trial * const trial, const int pr,
+ const int d, const int p_i )
{
if( pr < trial->price )
- { trial->dis = d; trial->prev_index = p_i; trial->price = pr; }
+ {
+ trial->price = pr;
+ trial->dis = d; trial->prev_index = p_i;
+ trial->prev_index2 = single_step_trial;
+ }
+ }
+
+static inline void Tr_update2( struct Trial * const trial, const int pr,
+ const int d, const int p_i )
+ {
+ if( pr < trial->price )
+ {
+ trial->price = pr;
+ trial->dis = d; trial->prev_index = p_i;
+ trial->prev_index2 = dual_step_trial;
+ }
+ }
+
+static inline void Tr_update3( struct Trial * const trial, const int pr,
+ const int d, const int p_i,
+ const int d2, const int p_i2 )
+ {
+ if( pr < trial->price )
+ {
+ trial->price = pr;
+ trial->dis = d; trial->prev_index = p_i;
+ trial->dis2 = d2; trial->prev_index2 = p_i2;
+ }
}
struct LZ_encoder
{
- int longest_match_found;
+ int pending_num_pairs;
uint32_t crc;
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
Bit_model bm_rep0[states];
@@ -502,17 +497,16 @@ struct LZ_encoder
Bit_model bm_rep2[states];
Bit_model bm_len[states][pos_states];
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
- Bit_model bm_dis[modeled_distances-end_dis_model+1];
+ Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
struct Matchfinder * matchfinder;
struct Range_encoder range_encoder;
struct Len_encoder len_encoder;
struct Len_encoder rep_match_len_encoder;
- struct Literal_encoder literal_encoder;
int num_dis_slots;
- int match_distances[max_match_len+1];
+ struct Pair pairs[max_match_len+1];
struct Trial trials[max_num_trials];
int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
@@ -521,11 +515,6 @@ struct LZ_encoder
int align_price_count;
};
-void LZe_full_flush( struct LZ_encoder * const encoder, const State state );
-
-void LZe_fill_align_prices( struct LZ_encoder * const encoder );
-void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
-
bool LZe_init( struct LZ_encoder * const encoder,
struct Matchfinder * const mf,
const File_header header, const int outfd );
@@ -533,7 +522,7 @@ bool LZe_init( struct LZ_encoder * const encoder,
static inline void LZe_free( struct LZ_encoder * const encoder )
{ Re_free( &encoder->range_encoder ); }
-static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder )
+static inline unsigned LZe_crc( const struct LZ_encoder * const encoder )
{ return encoder->crc ^ 0xFFFFFFFFU; }
/* move-to-front dis in/into reps */
@@ -578,6 +567,14 @@ static inline int LZe_price_rep( const struct LZ_encoder * const encoder,
return price;
}
+static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder,
+ const int len,
+ const State state, const int pos_state )
+ {
+ return LZe_price_rep( encoder, 0, state, pos_state ) +
+ Lee_price( &encoder->rep_match_len_encoder, len, pos_state );
+ }
+
static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
const int dis, const int dis_state )
{
@@ -592,12 +589,32 @@ static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
const int dis, const int len,
const int pos_state )
{
- if( len <= min_match_len && dis >= modeled_distances )
- return infinite_price;
return Lee_price( &encoder->len_encoder, len, pos_state ) +
LZe_price_dis( encoder, dis, get_dis_state( len ) );
}
+static inline int LZe_price_literal( const struct LZ_encoder * const encoder,
+ uint8_t prev_byte, uint8_t symbol )
+ { return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
+
+static inline int LZe_price_matched( const struct LZ_encoder * const encoder,
+ uint8_t prev_byte, uint8_t symbol,
+ uint8_t match_byte )
+ { return price_matched( encoder->bm_literal[get_lit_state(prev_byte)],
+ symbol, match_byte ); }
+
+static inline void LZe_encode_literal( struct LZ_encoder * const encoder,
+ uint8_t prev_byte, uint8_t symbol )
+ { Re_encode_tree( &encoder->range_encoder,
+ encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); }
+
+static inline void LZe_encode_matched( struct LZ_encoder * const encoder,
+ uint8_t prev_byte, uint8_t symbol,
+ uint8_t match_byte )
+ { Re_encode_matched( &encoder->range_encoder,
+ encoder->bm_literal[get_lit_state(prev_byte)],
+ symbol, match_byte ); }
+
static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
const uint32_t dis, const int len,
const int pos_state )
@@ -616,7 +633,7 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
if( dis_slot < end_dis_model )
Re_encode_tree_reversed( &encoder->range_encoder,
- encoder->bm_dis + base - dis_slot,
+ encoder->bm_dis + base - dis_slot - 1,
direct_dis, direct_bits );
else
{
@@ -624,20 +641,27 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
direct_bits - dis_align_bits );
Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align,
direct_dis, dis_align_bits );
- if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder );
+ --encoder->align_price_count;
}
}
}
static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
{
- int len = Mf_longest_match_len( encoder->matchfinder,
- encoder->match_distances );
- if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
- len += Mf_true_match_len( encoder->matchfinder, len,
- encoder->match_distances[len] + 1,
- max_match_len - len );
- return len;
+ const int num_pairs =
+ Mf_get_match_pairs( encoder->matchfinder, encoder->pairs );
+ if( num_pairs > 0 )
+ {
+ int len = encoder->pairs[num_pairs-1].len;
+ if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
+ {
+ len += Mf_true_match_len( encoder->matchfinder, len,
+ encoder->pairs[num_pairs-1].dis + 1,
+ max_match_len - len );
+ encoder->pairs[num_pairs-1].len = len;
+ }
+ }
+ return num_pairs;
}
static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
@@ -645,7 +669,7 @@ static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
if( --n >= 0 ) Mf_move_pos( encoder->matchfinder );
while( --n >= 0 )
{
- Mf_longest_match_len( encoder->matchfinder, 0 );
+ Mf_get_match_pairs( encoder->matchfinder, 0 );
Mf_move_pos( encoder->matchfinder );
}
}
@@ -657,15 +681,25 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
{
const int prev_index = encoder->trials[cur].prev_index;
struct Trial * const prev_trial = &encoder->trials[prev_index];
+
+ if( encoder->trials[cur].prev_index2 != single_step_trial )
+ {
+ prev_trial->dis = -1;
+ prev_trial->prev_index = prev_index - 1;
+ prev_trial->prev_index2 = single_step_trial;
+ if( encoder->trials[cur].prev_index2 >= 0 )
+ {
+ struct Trial * const prev_trial2 = &encoder->trials[prev_index-1];
+ prev_trial2->dis = encoder->trials[cur].dis2;
+ prev_trial2->prev_index = encoder->trials[cur].prev_index2;
+ prev_trial2->prev_index2 = single_step_trial;
+ }
+ }
prev_trial->price = cur - prev_index; /* len */
cur = *dis; *dis = prev_trial->dis; prev_trial->dis = cur;
cur = prev_index;
}
}
-int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
- const int reps[num_rep_distances],
- const State state );
-
bool LZe_encode_member( struct LZ_encoder * const encoder,
- const long long member_size );
+ const unsigned long long member_size );