summaryrefslogtreecommitdiffstats
path: root/encoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'encoder.h')
-rw-r--r--encoder.h277
1 files changed, 144 insertions, 133 deletions
diff --git a/encoder.h b/encoder.h
index e7a6481..13df958 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Clzip - Data compressor based on the LZMA algorithm
- Copyright (C) 2010, 2011 Antonio Diaz Diaz.
+ Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -142,7 +142,7 @@ enum { /* bytes to keep in buffer before dictionary */
num_prev_positions3 = 1 << 18,
num_prev_positions2 = 1 << 16,
num_prev_positions = num_prev_positions4 + num_prev_positions3 +
- num_prev_positions2 };
+ num_prev_positions2 };
struct Matchfinder
{
@@ -150,43 +150,44 @@ struct Matchfinder
uint8_t * buffer; /* input buffer */
int32_t * prev_positions; /* last seen position of key */
int32_t * prev_pos_tree;
- int dictionary_size_; /* bytes to keep in buffer before pos */
+ int dictionary_size; /* bytes to keep in buffer before pos */
int buffer_size;
int pos; /* current pos in buffer */
int cyclic_pos; /* current pos in dictionary */
int stream_pos; /* first byte not yet read from file */
int pos_limit; /* when reached, a new block must be read */
- int match_len_limit_;
+ int match_len_limit;
int cycles;
int infd; /* input file descriptor */
bool at_stream_end; /* stream_pos shows real end of file */
};
bool Mf_read_block( struct Matchfinder * const mf );
+void Mf_normalize_pos( struct Matchfinder * const mf );
-void Mf_init( struct Matchfinder * const mf,
+bool Mf_init( struct Matchfinder * const mf,
const int dict_size, const int len_limit, const int ifd );
static inline void Mf_free( struct Matchfinder * const mf )
{
free( mf->prev_pos_tree ); mf->prev_pos_tree = 0;
- free( mf->prev_positions ); mf->prev_positions = 0;
free( mf->buffer ); mf->buffer = 0;
+ free( mf->prev_positions ); mf->prev_positions = 0;
}
-static inline uint8_t Mf_peek( struct Matchfinder * const mf, const int i )
+static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i )
{ return mf->buffer[mf->pos+i]; }
-static inline int Mf_available_bytes( struct Matchfinder * const mf )
+
+static inline int Mf_available_bytes( const struct Matchfinder * const mf )
{ return mf->stream_pos - mf->pos; }
-static inline long long Mf_data_position( struct Matchfinder * const mf )
+
+static inline long long Mf_data_position( const struct Matchfinder * const mf )
{ return mf->partial_data_pos + mf->pos; }
-static inline int Mf_dictionary_size( struct Matchfinder * const mf )
- { return mf->dictionary_size_; }
-static inline bool Mf_finished( struct Matchfinder * const mf )
+
+static inline bool Mf_finished( const struct Matchfinder * const mf )
{ return mf->at_stream_end && mf->pos >= mf->stream_pos; }
-static inline int Mf_match_len_limit( struct Matchfinder * const mf )
- { return mf->match_len_limit_; }
-static inline const uint8_t * Mf_ptr_to_current_pos( struct Matchfinder * const mf )
+
+static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf )
{ return mf->buffer + mf->pos; }
static inline bool Mf_dec_pos( struct Matchfinder * const mf,
@@ -195,25 +196,30 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf,
if( ahead < 0 || mf->pos < ahead ) return false;
mf->pos -= ahead;
mf->cyclic_pos -= ahead;
- if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size_;
+ if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size;
return true;
}
-static inline int Mf_true_match_len( struct Matchfinder * const mf,
+static inline int Mf_true_match_len( const struct Matchfinder * const mf,
const int index, const int distance,
int len_limit )
{
- const uint8_t * const data = mf->buffer + mf->pos + index - distance;
+ const uint8_t * const data = mf->buffer + mf->pos + index;
int i = 0;
if( index + len_limit > Mf_available_bytes( mf ) )
len_limit = Mf_available_bytes( mf ) - index;
- while( i < len_limit && data[i] == data[i+distance] ) ++i;
+ while( i < len_limit && data[i-distance] == data[i] ) ++i;
return i;
}
+static inline void Mf_move_pos( struct Matchfinder * const mf )
+ {
+ if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0;
+ if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf );
+ }
+
void Mf_reset( struct Matchfinder * const mf );
-void Mf_move_pos( struct Matchfinder * const mf );
int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances );
@@ -231,89 +237,85 @@ struct Range_encoder
uint8_t cache;
};
-void Re_flush_data( struct Range_encoder * const range_encoder );
+void Re_flush_data( struct Range_encoder * const renc );
-static inline void Re_put_byte( struct Range_encoder * const range_encoder,
+static inline void Re_put_byte( struct Range_encoder * const renc,
const uint8_t b )
{
- range_encoder->buffer[range_encoder->pos] = b;
- if( ++range_encoder->pos >= re_buffer_size ) Re_flush_data( range_encoder );
+ renc->buffer[renc->pos] = b;
+ if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc );
}
-static inline void Re_shift_low( struct Range_encoder * const range_encoder )
+static inline void Re_shift_low( struct Range_encoder * const renc )
{
- const uint32_t carry = range_encoder->low >> 32;
- if( range_encoder->low < 0xFF000000U || carry == 1 )
+ const bool carry = ( renc->low > 0xFFFFFFFFU );
+ if( carry || renc->low < 0xFF000000U )
{
- Re_put_byte( range_encoder, range_encoder->cache + carry );
- for( ; range_encoder->ff_count > 0; --range_encoder->ff_count )
- Re_put_byte( range_encoder, 0xFF + carry );
- range_encoder->cache = range_encoder->low >> 24;
+ Re_put_byte( renc, renc->cache + carry );
+ for( ; renc->ff_count > 0; --renc->ff_count )
+ Re_put_byte( renc, 0xFF + carry );
+ renc->cache = renc->low >> 24;
}
- else ++range_encoder->ff_count;
- range_encoder->low = ( range_encoder->low & 0x00FFFFFFU ) << 8;
+ else ++renc->ff_count;
+ renc->low = ( renc->low & 0x00FFFFFFU ) << 8;
}
-static inline void Re_init( struct Range_encoder * const range_encoder,
- const int ofd )
- {
- range_encoder->low = 0;
- range_encoder->partial_member_pos = 0;
- range_encoder->buffer = (uint8_t *)malloc( re_buffer_size );
- if( !range_encoder->buffer )
- {
- show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
- cleanup_and_fail( 1 );
- }
- range_encoder->pos = 0;
- range_encoder->range = 0xFFFFFFFFU;
- range_encoder->ff_count = 0;
- range_encoder->outfd = ofd;
- range_encoder->cache = 0;
+static inline bool Re_init( struct Range_encoder * const renc, const int ofd )
+ {
+ renc->low = 0;
+ renc->partial_member_pos = 0;
+ renc->buffer = (uint8_t *)malloc( re_buffer_size );
+ if( !renc->buffer ) return false;
+ renc->pos = 0;
+ renc->range = 0xFFFFFFFFU;
+ renc->ff_count = 0;
+ renc->outfd = ofd;
+ renc->cache = 0;
+ return true;
}
-static inline void Re_free( struct Range_encoder * const range_encoder )
- { free( range_encoder->buffer ); range_encoder->buffer = 0; }
+static inline void Re_free( struct Range_encoder * const renc )
+ { free( renc->buffer ); }
-static inline long long Re_member_position( struct Range_encoder * const range_encoder )
- { return range_encoder->partial_member_pos + range_encoder->pos + range_encoder->ff_count; }
+static inline long long Re_member_position( const struct Range_encoder * const renc )
+ { return renc->partial_member_pos + renc->pos + renc->ff_count; }
-static inline void Re_flush( struct Range_encoder * const range_encoder )
- { int i; for( i = 0; i < 5; ++i ) Re_shift_low( range_encoder ); }
+static inline void Re_flush( struct Range_encoder * const renc )
+ { int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); }
-static inline void Re_encode( struct Range_encoder * const range_encoder,
+static inline void Re_encode( struct Range_encoder * const renc,
const int symbol, const int num_bits )
{
int i;
for( i = num_bits - 1; i >= 0; --i )
{
- range_encoder->range >>= 1;
- if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range;
- if( range_encoder->range <= 0x00FFFFFFU )
- { range_encoder->range <<= 8; Re_shift_low( range_encoder ); }
+ renc->range >>= 1;
+ if( (symbol >> i) & 1 ) renc->low += renc->range;
+ if( renc->range <= 0x00FFFFFFU )
+ { renc->range <<= 8; Re_shift_low( renc ); }
}
}
-static inline void Re_encode_bit( struct Range_encoder * const range_encoder,
+static inline void Re_encode_bit( struct Range_encoder * const renc,
Bit_model * const probability, const int bit )
{
- const uint32_t bound = ( range_encoder->range >> bit_model_total_bits ) * *probability;
+ const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability;
if( !bit )
{
- range_encoder->range = bound;
+ renc->range = bound;
*probability += (bit_model_total - *probability) >> bit_model_move_bits;
}
else
{
- range_encoder->low += bound;
- range_encoder->range -= bound;
+ renc->low += bound;
+ renc->range -= bound;
*probability -= *probability >> bit_model_move_bits;
}
- if( range_encoder->range <= 0x00FFFFFFU )
- { range_encoder->range <<= 8; Re_shift_low( range_encoder ); }
+ if( renc->range <= 0x00FFFFFFU )
+ { renc->range <<= 8; Re_shift_low( renc ); }
}
-static inline void Re_encode_tree( struct Range_encoder * const range_encoder,
+static inline void Re_encode_tree( struct Range_encoder * const renc,
Bit_model bm[], const int symbol, const int num_bits )
{
int mask = ( 1 << ( num_bits - 1 ) );
@@ -322,13 +324,13 @@ static inline void Re_encode_tree( struct Range_encoder * const range_encoder,
for( i = num_bits; i > 0; --i, mask >>= 1 )
{
const int bit = ( symbol & mask );
- Re_encode_bit( range_encoder, &bm[model], bit );
+ Re_encode_bit( renc, &bm[model], bit );
model <<= 1;
if( bit ) model |= 1;
}
}
-static inline void Re_encode_tree_reversed( struct Range_encoder * const range_encoder,
+static inline void Re_encode_tree_reversed( struct Range_encoder * const renc,
Bit_model bm[], int symbol, const int num_bits )
{
int model = 1;
@@ -336,13 +338,13 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const range_e
for( i = num_bits; i > 0; --i )
{
const int bit = symbol & 1;
- Re_encode_bit( range_encoder, &bm[model], bit );
+ Re_encode_bit( renc, &bm[model], bit );
model = ( model << 1 ) | bit;
symbol >>= 1;
}
}
-static inline void Re_encode_matched( struct Range_encoder * const range_encoder,
+static inline void Re_encode_matched( struct Range_encoder * const renc,
Bit_model bm[], int symbol, int match_byte )
{
int model = 1;
@@ -351,14 +353,14 @@ static inline void Re_encode_matched( struct Range_encoder * const range_encoder
{
const int match_bit = ( match_byte >> i ) & 1;
int bit = ( symbol >> i ) & 1;
- Re_encode_bit( range_encoder, &bm[(match_bit<<8)+model+0x100], bit );
+ Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit );
model = ( model << 1 ) | bit;
if( match_bit != bit )
{
while( --i >= 0 )
{
bit = ( symbol >> i ) & 1;
- Re_encode_bit( range_encoder, &bm[model], bit );
+ Re_encode_bit( renc, &bm[model], bit );
model = ( model << 1 ) | bit;
}
break;
@@ -393,11 +395,11 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder,
pps[len] = tmp + price0( len_encoder->choice2 ) +
price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits );
for( ; len < len_encoder->len_symbols; ++len )
- /* using 4 slots per value makes "Lee_price" faster */
- len_encoder->prices[3][len] = len_encoder->prices[2][len] =
- len_encoder->prices[1][len] = len_encoder->prices[0][len] =
- tmp + price1( len_encoder->choice2 ) +
- price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
+ /* using 4 slots per value makes "Lee_price" faster */
+ len_encoder->prices[3][len] = len_encoder->prices[2][len] =
+ len_encoder->prices[1][len] = len_encoder->prices[0][len] =
+ tmp + price1( len_encoder->choice2 ) +
+ price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits );
len_encoder->counters[pos_state] = len_encoder->len_symbols;
}
@@ -420,10 +422,10 @@ static inline void Lee_init( struct Len_encoder * const len_encoder,
}
void Lee_encode( struct Len_encoder * const len_encoder,
- struct Range_encoder * const range_encoder, int symbol,
- const int pos_state );
+ struct Range_encoder * const renc,
+ int symbol, const int pos_state );
-static inline int Lee_price( struct Len_encoder * const len_encoder,
+static inline int Lee_price( const struct Len_encoder * const len_encoder,
const int symbol, const int pos_state )
{ return len_encoder->prices[pos_state][symbol - min_match_len]; }
@@ -433,34 +435,38 @@ struct Literal_encoder
Bit_model bm_literal[1<<literal_context_bits][0x300];
};
-static inline int Lie_state( const uint8_t prev_byte )
- { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
-
-static inline void Lie_init( struct Literal_encoder * const literal_encoder )
+static inline void Lie_init( struct Literal_encoder * const lienc )
{
int i, j;
for( i = 0; i < 1<<literal_context_bits; ++i )
for( j = 0; j < 0x300; ++j )
- Bm_init( &literal_encoder->bm_literal[i][j] );
+ Bm_init( &lienc->bm_literal[i][j] );
}
-static inline void Lie_encode( struct Literal_encoder * const literal_encoder,
- struct Range_encoder * const range_encoder,
+static inline int Lie_state( const uint8_t prev_byte )
+ { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
+
+static inline void Lie_encode( struct Literal_encoder * const lienc,
+ struct Range_encoder * const renc,
uint8_t prev_byte, uint8_t symbol )
- { Re_encode_tree( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
+ { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-static inline void Lie_encode_matched( struct Literal_encoder * const literal_encoder,
- struct Range_encoder * const range_encoder,
- uint8_t prev_byte, uint8_t symbol, uint8_t match_byte )
- { Re_encode_matched( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); }
+static inline void Lie_encode_matched( struct Literal_encoder * const lienc,
+ struct Range_encoder * const renc,
+ uint8_t prev_byte, uint8_t symbol,
+ uint8_t match_byte )
+ { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)],
+ symbol, match_byte ); }
-static inline int Lie_price_symbol( struct Literal_encoder * const literal_encoder,
+static inline int Lie_price_symbol( const struct Literal_encoder * const lienc,
uint8_t prev_byte, uint8_t symbol )
- { return price_symbol( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
+ { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); }
-static inline int Lie_price_matched( struct Literal_encoder * const literal_encoder,
- uint8_t prev_byte, uint8_t symbol, uint8_t match_byte )
- { return price_matched( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); }
+static inline int Lie_price_matched( const struct Literal_encoder * const lienc,
+ uint8_t prev_byte, uint8_t symbol,
+ uint8_t match_byte )
+ { return price_matched( lienc->bm_literal[Lie_state(prev_byte)],
+ symbol, match_byte ); }
enum { infinite_price = 0x0FFFFFFF,
@@ -487,7 +493,7 @@ static inline void Tr_update( struct Trial * const trial,
struct LZ_encoder
{
int longest_match_found;
- uint32_t crc_;
+ uint32_t crc;
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
@@ -515,11 +521,20 @@ struct LZ_encoder
int align_price_count;
};
+void LZe_full_flush( struct LZ_encoder * const encoder, const State state );
+
void LZe_fill_align_prices( struct LZ_encoder * const encoder );
void LZe_fill_distance_prices( struct LZ_encoder * const encoder );
-static inline uint32_t LZe_crc( struct LZ_encoder * const encoder )
- { return encoder->crc_ ^ 0xFFFFFFFFU; }
+bool LZe_init( struct LZ_encoder * const encoder,
+ struct Matchfinder * const mf,
+ const File_header header, const int outfd );
+
+static inline void LZe_free( struct LZ_encoder * const encoder )
+ { Re_free( &encoder->range_encoder ); }
+
+static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder )
+ { return encoder->crc ^ 0xFFFFFFFFU; }
/* move-to-front dis in/into reps */
static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
@@ -538,13 +553,15 @@ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] )
}
}
-static inline int LZe_price_rep_len1( struct LZ_encoder * const encoder,
+static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder,
const State state, const int pos_state )
{
- return price0( encoder->bm_rep0[state] ) + price0( encoder->bm_len[state][pos_state] );
+ return price0( encoder->bm_rep0[state] ) +
+ price0( encoder->bm_len[state][pos_state] );
}
-static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int rep,
+static inline int LZe_price_rep( const struct LZ_encoder * const encoder,
+ const int rep,
const State state, const int pos_state )
{
int price;
@@ -561,7 +578,7 @@ static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int re
return price;
}
-static inline int LZe_price_dis( struct LZ_encoder * const encoder,
+static inline int LZe_price_dis( const struct LZ_encoder * const encoder,
const int dis, const int dis_state )
{
if( dis < modeled_distances )
@@ -571,7 +588,7 @@ static inline int LZe_price_dis( struct LZ_encoder * const encoder,
encoder->align_prices[dis & (dis_align_size - 1)];
}
-static inline int LZe_price_pair( struct LZ_encoder * const encoder,
+static inline int LZe_price_pair( const struct LZ_encoder * const encoder,
const int dis, const int len,
const int pos_state )
{
@@ -598,12 +615,15 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
const uint32_t direct_dis = dis - base;
if( dis_slot < end_dis_model )
- Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_dis + base - dis_slot,
- direct_dis, direct_bits );
+ Re_encode_tree_reversed( &encoder->range_encoder,
+ encoder->bm_dis + base - dis_slot,
+ direct_dis, direct_bits );
else
{
- Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
- Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, direct_dis, dis_align_bits );
+ Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits,
+ direct_bits - dis_align_bits );
+ Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align,
+ direct_dis, dis_align_bits );
if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder );
}
}
@@ -611,19 +631,21 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder,
static inline int LZe_read_match_distances( struct LZ_encoder * const encoder )
{
- int len = Mf_longest_match_len( encoder->matchfinder, encoder->match_distances );
- if( len == Mf_match_len_limit( encoder->matchfinder ) )
- len += Mf_true_match_len( encoder->matchfinder, len, encoder->match_distances[len] + 1, max_match_len - len );
+ int len = Mf_longest_match_len( encoder->matchfinder,
+ encoder->match_distances );
+ if( len == encoder->matchfinder->match_len_limit && len < max_match_len )
+ len += Mf_true_match_len( encoder->matchfinder, len,
+ encoder->match_distances[len] + 1,
+ max_match_len - len );
return len;
}
-static inline void LZe_move_pos( struct LZ_encoder * const encoder,
- int n, bool skip )
+static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n )
{
+ if( --n >= 0 ) Mf_move_pos( encoder->matchfinder );
while( --n >= 0 )
{
- if( skip ) skip = false;
- else Mf_longest_match_len( encoder->matchfinder, 0 );
+ Mf_longest_match_len( encoder->matchfinder, 0 );
Mf_move_pos( encoder->matchfinder );
}
}
@@ -642,19 +664,8 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur )
}
int LZe_sequence_optimizer( struct LZ_encoder * const encoder,
- const int reps[num_rep_distances], const State state );
-
-void LZe_full_flush( struct LZ_encoder * const encoder, const State state );
-
-void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf,
- const File_header header, const int outfd );
-
-static inline void LZe_free( struct LZ_encoder * const encoder )
- {
- Re_free( &encoder->range_encoder );
- }
-
-bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size );
+ const int reps[num_rep_distances],
+ const State state );
-static inline long long LZe_member_position( struct LZ_encoder * const encoder )
- { return Re_member_position( &encoder->range_encoder ); }
+bool LZe_encode_member( struct LZ_encoder * const encoder,
+ const long long member_size );