summaryrefslogtreecommitdiffstats
path: root/encoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'encoder.h')
-rw-r--r--encoder.h249
1 files changed, 150 insertions, 99 deletions
diff --git a/encoder.h b/encoder.h
index 86c23c2..21c683d 100644
--- a/encoder.h
+++ b/encoder.h
@@ -1,5 +1,5 @@
/* Lzip - Data compressor based on the LZMA algorithm
- Copyright (C) 2008, 2009, 2010, 2011 Antonio Diaz Diaz.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -140,41 +140,43 @@ inline int price_matched( const Bit_model bm[], const int symbol,
}
-class Matchfinder
+class Matchfinder_base
{
- enum { // bytes to keep in buffer before dictionary
- before_size = max_num_trials + 1,
- // bytes to keep in buffer after pos
- after_size = max_match_len,
- num_prev_positions4 = 1 << 20,
- num_prev_positions3 = 1 << 18,
- num_prev_positions2 = 1 << 16,
- num_prev_positions = num_prev_positions4 + num_prev_positions3 +
- num_prev_positions2 };
+ Matchfinder_base( const Matchfinder_base & ); // declared as private
+ void operator=( const Matchfinder_base & ); // declared as private
+
+ bool read_block();
+ void normalize_pos();
+
+protected:
+ enum { after_size = max_match_len }; // bytes to keep in buffer after pos
long long partial_data_pos;
- uint8_t * buffer; // input buffer
int32_t * const prev_positions; // last seen position of key
- int32_t * prev_pos_tree;
- int dictionary_size_; // bytes to keep in buffer before pos
+ uint8_t * buffer; // input buffer
+ int32_t * pos_array; // may be tree or chain
+ const int num_prev_positions;
+ const int before_size; // bytes to keep in buffer before dictionary
+ const int match_len_limit_;
+ const int infd; // input file descriptor
int buffer_size;
+ int dictionary_size_; // bytes to keep in buffer before pos
int pos; // current pos in buffer
int cyclic_pos; // current pos in dictionary
- int stream_pos; // first byte not yet read from file
int pos_limit; // when reached, a new block must be read
- const int match_len_limit_;
- const int cycles;
- const int infd; // input file descriptor
+ int stream_pos; // first byte not yet read from file
+ int pos_array_size;
bool at_stream_end; // stream_pos shows real end of file
- bool read_block();
-
-public:
- Matchfinder( const int dict_size, const int len_limit, const int ifd );
+ Matchfinder_base( const int before, const int dict_size,
+ const int dict_factor, const int len_limit,
+ const int num_prev_pos, const int ifd,
+ const int pos_array_factor );
- ~Matchfinder()
- { delete[] prev_pos_tree; std::free( buffer ); delete[] prev_positions; }
+ ~Matchfinder_base()
+ { delete[] pos_array; std::free( buffer ); delete[] prev_positions; }
+public:
uint8_t operator[]( const int i ) const throw() { return buffer[pos+i]; }
int available_bytes() const throw() { return stream_pos - pos; }
long long data_position() const throw() { return partial_data_pos + pos; }
@@ -183,27 +185,47 @@ public:
int match_len_limit() const throw() { return match_len_limit_; }
const uint8_t * ptr_to_current_pos() const throw() { return buffer + pos; }
- bool dec_pos( const int ahead ) throw()
- {
- if( ahead < 0 || pos < ahead ) return false;
- pos -= ahead;
- cyclic_pos -= ahead;
- if( cyclic_pos < 0 ) cyclic_pos += dictionary_size_;
- return true;
- }
-
int true_match_len( const int index, const int distance, int len_limit ) const throw()
{
if( index + len_limit > available_bytes() )
len_limit = available_bytes() - index;
- const uint8_t * const data = buffer + pos + index - distance;
+ const uint8_t * const data = buffer + pos + index;
int i = 0;
- while( i < len_limit && data[i] == data[i+distance] ) ++i;
+ while( i < len_limit && data[i-distance] == data[i] ) ++i;
return i;
}
void reset();
- void move_pos();
+ void move_pos()
+ {
+ if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0;
+ if( ++pos >= pos_limit ) normalize_pos();
+ }
+ };
+
+
+class Matchfinder : public Matchfinder_base
+ {
+ enum { before = max_num_trials + 1,
+ dict_factor = 2,
+ num_prev_positions4 = 1 << 20,
+ num_prev_positions3 = 1 << 18,
+ num_prev_positions2 = 1 << 16,
+ num_prev_pos = num_prev_positions4 + num_prev_positions3 +
+ num_prev_positions2,
+ pos_array_factor = 2 };
+
+ const int cycles;
+
+public:
+ Matchfinder( const int dict_size, const int len_limit, const int ifd )
+ :
+ Matchfinder_base( before, dict_size, dict_factor, len_limit,
+ num_prev_pos, ifd, pos_array_factor ),
+ cycles( ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256 )
+ {}
+
+ bool dec_pos( const int ahead ) throw();
int longest_match_len( int * const distances = 0 ) throw();
};
@@ -233,8 +255,11 @@ class Range_encoder
low = ( low & 0x00FFFFFFU ) << 8;
}
+ Range_encoder( const Range_encoder & );
+ void operator=( const Range_encoder & );
+
public:
- Range_encoder( const int ofd )
+ explicit Range_encoder( const int ofd )
:
low( 0 ),
partial_member_pos( 0 ),
@@ -367,7 +392,7 @@ class Len_encoder
}
public:
- Len_encoder( const int len_limit )
+ explicit Len_encoder( const int len_limit )
: len_symbols( len_limit + 1 - min_match_len )
{
for( int i = 0; i < pos_states; ++i ) update_prices( i );
@@ -408,24 +433,12 @@ public:
};
-class LZ_encoder
+class LZ_encoder_base
{
- enum { infinite_price = 0x0FFFFFFF,
- max_marker_size = 16,
+protected:
+ enum { max_marker_size = 16,
num_rep_distances = 4 }; // must be 4
- struct Trial
- {
- State state;
- int dis;
- int prev_index; // index of prev trial in trials[]
- int price; // dual use var; cumulative price, match length
- int reps[num_rep_distances];
- void update( const int d, const int p_i, const int pr ) throw()
- { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } }
- };
-
- int longest_match_found;
uint32_t crc_;
Bit_model bm_match[State::states][pos_states];
@@ -438,26 +451,28 @@ class LZ_encoder
Bit_model bm_dis[modeled_distances-end_dis_model+1];
Bit_model bm_align[dis_align_size];
- Matchfinder & matchfinder;
Range_encoder range_encoder;
Len_encoder len_encoder;
Len_encoder rep_match_len_encoder;
Literal_encoder literal_encoder;
const int num_dis_slots;
- int match_distances[max_match_len+1];
- Trial trials[max_num_trials];
-
- int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
- int dis_prices[max_dis_states][modeled_distances];
- int align_prices[dis_align_size];
- int align_price_count;
-
- void fill_align_prices() throw();
- void fill_distance_prices() throw();
uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; }
+ LZ_encoder_base( const File_header & header, const int dictionary_size,
+ const int match_len_limit, const int outfd )
+ :
+ crc_( 0xFFFFFFFFU ),
+ range_encoder( outfd ),
+ len_encoder( match_len_limit ),
+ rep_match_len_encoder( match_len_limit ),
+ num_dis_slots( 2 * real_bits( dictionary_size - 1 ) )
+ {
+ for( int i = 0; i < File_header::size; ++i )
+ range_encoder.put_byte( header.data[i] );
+ }
+
// move-to-front dis in/into reps
void mtf_reps( const int dis, int reps[num_rep_distances] ) throw()
{
@@ -474,6 +489,65 @@ class LZ_encoder
}
}
+ void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw()
+ {
+ len_encoder.encode( range_encoder, len, pos_state );
+ const int dis_slot = dis_slots[dis];
+ range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits );
+
+ if( dis_slot >= start_dis_model )
+ {
+ const int direct_bits = ( dis_slot >> 1 ) - 1;
+ const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
+ const uint32_t direct_dis = dis - base;
+
+ if( dis_slot < end_dis_model )
+ range_encoder.encode_tree_reversed( bm_dis + base - dis_slot,
+ direct_dis, direct_bits );
+ else
+ {
+ range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
+ range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits );
+ }
+ }
+ }
+
+ void full_flush( const long long data_position, const State & state );
+
+public:
+ long long member_position() const throw()
+ { return range_encoder.member_position(); }
+ };
+
+
+class LZ_encoder : public LZ_encoder_base
+ {
+ enum { infinite_price = 0x0FFFFFFF };
+
+ struct Trial
+ {
+ State state;
+ int dis;
+ int prev_index; // index of prev trial in trials[]
+ int price; // dual use var; cumulative price, match length
+ int reps[num_rep_distances];
+ void update( const int d, const int p_i, const int pr ) throw()
+ { if( pr < price ) { dis = d; prev_index = p_i; price = pr; } }
+ };
+
+ Matchfinder & matchfinder;
+ int longest_match_found;
+ int match_distances[max_match_len+1];
+ Trial trials[max_num_trials];
+
+ int dis_slot_prices[max_dis_states][2*max_dictionary_bits];
+ int dis_prices[max_dis_states][modeled_distances];
+ int align_prices[dis_align_size];
+ int align_price_count;
+
+ void fill_align_prices() throw();
+ void fill_distance_prices() throw();
+
int price_rep_len1( const State & state, const int pos_state ) const throw()
{
return price0( bm_rep0[state()] ) + price0( bm_len[state()][pos_state] );
@@ -512,44 +586,21 @@ class LZ_encoder
price_dis( dis, get_dis_state( len ) );
}
- void encode_pair( const uint32_t dis, const int len, const int pos_state ) throw()
- {
- len_encoder.encode( range_encoder, len, pos_state );
- const int dis_slot = dis_slots[dis];
- range_encoder.encode_tree( bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits );
-
- if( dis_slot >= start_dis_model )
- {
- const int direct_bits = ( dis_slot >> 1 ) - 1;
- const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
- const uint32_t direct_dis = dis - base;
-
- if( dis_slot < end_dis_model )
- range_encoder.encode_tree_reversed( bm_dis + base - dis_slot,
- direct_dis, direct_bits );
- else
- {
- range_encoder.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
- range_encoder.encode_tree_reversed( bm_align, direct_dis, dis_align_bits );
- if( --align_price_count <= 0 ) fill_align_prices();
- }
- }
- }
-
int read_match_distances() throw()
{
int len = matchfinder.longest_match_len( match_distances );
- if( len == matchfinder.match_len_limit() )
- len += matchfinder.true_match_len( len, match_distances[len] + 1, max_match_len - len );
+ if( len == matchfinder.match_len_limit() && len < max_match_len )
+ len += matchfinder.true_match_len( len, match_distances[len] + 1,
+ max_match_len - len );
return len;
}
- void move_pos( int n, bool skip = false )
+ void move_pos( int n )
{
+ if( --n >= 0 ) matchfinder.move_pos();
while( --n >= 0 )
{
- if( skip ) skip = false;
- else matchfinder.longest_match_len();
+ matchfinder.longest_match_len();
matchfinder.move_pos();
}
}
@@ -570,13 +621,13 @@ class LZ_encoder
int sequence_optimizer( const int reps[num_rep_distances],
const State & state );
- void full_flush( const State & state );
-
public:
- LZ_encoder( Matchfinder & mf, const File_header & header, const int outfd );
+ LZ_encoder( Matchfinder & mf, const File_header & header, const int outfd )
+ :
+ LZ_encoder_base( header, mf.dictionary_size(), mf.match_len_limit(), outfd ),
+ matchfinder( mf ),
+ longest_match_found( 0 )
+ { fill_align_prices(); }
bool encode_member( const long long member_size );
-
- long long member_position() const throw()
- { return range_encoder.member_position(); }
};