summaryrefslogtreecommitdiffstats
path: root/encoder.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--encoder.cc282
1 files changed, 72 insertions, 210 deletions
diff --git a/encoder.cc b/encoder.cc
index 0d34ad1..4833dd4 100644
--- a/encoder.cc
+++ b/encoder.cc
@@ -1,5 +1,5 @@
/* Lzip - LZMA lossless data compressor
- Copyright (C) 2008-2014 Antonio Diaz Diaz.
+ Copyright (C) 2008-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,112 +26,13 @@
#include <stdint.h>
#include "lzip.h"
+#include "encoder_base.h"
#include "encoder.h"
-Dis_slots dis_slots;
-Prob_prices prob_prices;
-
-
-bool Matchfinder_base::read_block()
- {
- if( !at_stream_end && stream_pos < buffer_size )
- {
- const int size = buffer_size - stream_pos;
- const int rd = readblock( infd, buffer + stream_pos, size );
- stream_pos += rd;
- if( rd != size && errno ) throw Error( "Read error" );
- if( rd < size ) { at_stream_end = true; pos_limit = buffer_size; }
- }
- return pos < stream_pos;
- }
-
-
-void Matchfinder_base::normalize_pos()
- {
- if( pos > stream_pos )
- internal_error( "pos > stream_pos in Matchfinder_base::normalize_pos." );
- if( !at_stream_end )
- {
- const int offset = pos - dictionary_size_ - before_size;
- const int size = stream_pos - offset;
- std::memmove( buffer, buffer + offset, size );
- partial_data_pos += offset;
- pos -= offset;
- stream_pos -= offset;
- for( int i = 0; i < num_prev_positions; ++i )
- prev_positions[i] -= std::min( prev_positions[i], offset );
- for( int i = 0; i < pos_array_size; ++i )
- pos_array[i] -= std::min( pos_array[i], offset );
- read_block();
- }
- }
-
-
-Matchfinder_base::Matchfinder_base( const int before, const int dict_size,
- const int after_size, const int dict_factor,
- const int num_prev_positions23,
- const int pos_array_factor, const int ifd )
- :
- partial_data_pos( 0 ),
- before_size( before ),
- pos( 0 ),
- cyclic_pos( 0 ),
- stream_pos( 0 ),
- infd( ifd ),
- at_stream_end( false )
+int LZ_encoder::get_match_pairs( Pair * pairs )
{
- const int buffer_size_limit =
- ( dict_factor * dict_size ) + before_size + after_size;
- buffer_size = std::max( 65536, dict_size );
- buffer = (uint8_t *)std::malloc( buffer_size );
- if( !buffer ) throw std::bad_alloc();
- if( read_block() && !at_stream_end && buffer_size < buffer_size_limit )
- {
- buffer_size = buffer_size_limit;
- uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
- if( !tmp ) { std::free( buffer ); throw std::bad_alloc(); }
- buffer = tmp;
- read_block();
- }
- if( at_stream_end && stream_pos < dict_size )
- dictionary_size_ = std::max( (int)min_dictionary_size, stream_pos );
- else
- dictionary_size_ = dict_size;
- pos_limit = buffer_size;
- if( !at_stream_end ) pos_limit -= after_size;
- unsigned size = 1 << std::max( 16, real_bits( dictionary_size_ - 1 ) - 2 );
- if( dictionary_size_ > 1 << 26 ) // 64 MiB
- size >>= 1;
- key4_mask = size - 1;
- size += num_prev_positions23;
-
- num_prev_positions = size;
- pos_array_size = pos_array_factor * ( dictionary_size_ + 1 );
- size += pos_array_size;
- prev_positions = new( std::nothrow ) int32_t[size];
- if( !prev_positions ) { std::free( buffer ); throw std::bad_alloc(); }
- pos_array = prev_positions + num_prev_positions;
- for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
- }
-
-
-void Matchfinder_base::reset()
- {
- if( stream_pos > pos )
- std::memmove( buffer, buffer + pos, stream_pos - pos );
- partial_data_pos = 0;
- stream_pos -= pos;
- pos = 0;
- cyclic_pos = 0;
- for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = 0;
- read_block();
- }
-
-
-int Matchfinder::get_match_pairs( Pair * pairs )
- {
- int len_limit = match_len_limit_;
+ int len_limit = match_len_limit;
if( len_limit > available_bytes() )
{
len_limit = available_bytes();
@@ -141,8 +42,8 @@ int Matchfinder::get_match_pairs( Pair * pairs )
int maxlen = 0;
int num_pairs = 0;
const int pos1 = pos + 1;
- const int min_pos = ( pos > dictionary_size_ ) ? pos - dictionary_size_ : 0;
- const uint8_t * const data = buffer + pos;
+ const int min_pos = ( pos > dictionary_size ) ? pos - dictionary_size : 0;
+ const uint8_t * const data = ptr_to_current_pos();
unsigned tmp = crc32[data[0]] ^ data[1];
const int key2 = tmp & ( num_prev_positions2 - 1 );
@@ -174,7 +75,7 @@ int Matchfinder::get_match_pairs( Pair * pairs )
while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
++maxlen;
pairs[num_pairs-1].len = maxlen;
- if( maxlen >= len_limit ) pairs = 0; // done. now just skip
+ if( maxlen >= len_limit ) pairs = 0; /* done. now just skip */
}
if( maxlen < 3 ) maxlen = 3;
}
@@ -195,7 +96,7 @@ int Matchfinder::get_match_pairs( Pair * pairs )
const int delta = pos1 - newpos;
int32_t * const newptr = pos_array +
( ( cyclic_pos - delta +
- ( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ + 1 ) ) << 1 );
+ ( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) ) << 1 );
if( data[len-delta] == data[len] )
{
while( ++len < len_limit && data[len-delta] == data[len] ) {}
@@ -231,38 +132,6 @@ int Matchfinder::get_match_pairs( Pair * pairs )
}
-void Range_encoder::flush_data()
- {
- if( pos > 0 )
- {
- if( outfd >= 0 && writeblock( outfd, buffer, pos ) != pos )
- throw Error( "Write error" );
- partial_member_pos += pos;
- pos = 0;
- if( verbosity >= 2 ) show_progress();
- }
- }
-
-
- // End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len)
-void LZ_encoder_base::full_flush( const unsigned long long data_position,
- const State state )
- {
- const int pos_state = data_position & pos_state_mask;
- renc.encode_bit( bm_match[state()][pos_state], 1 );
- renc.encode_bit( bm_rep[state()], 0 );
- encode_pair( 0xFFFFFFFFU, min_match_len, pos_state );
- renc.flush();
- File_trailer trailer;
- trailer.data_crc( crc() );
- trailer.data_size( data_position );
- trailer.member_size( renc.member_position() + File_trailer::size() );
- for( int i = 0; i < File_trailer::size(); ++i )
- renc.put_byte( trailer.data[i] );
- renc.flush_data();
- }
-
-
void LZ_encoder::update_distance_prices()
{
for( int dis = start_dis_model; dis < modeled_distances; ++dis )
@@ -297,7 +166,7 @@ void LZ_encoder::update_distance_prices()
}
-/* Return value == number of bytes advanced (ahead).
+/* Returns the number of bytes advanced (ahead).
trials[0]..trials[ahead-1] contain the steps to encode.
( trials[0].dis == -1 ) means literal.
A match/rep longer or equal than match_len_limit finishes the sequence.
@@ -320,29 +189,29 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
int rep_index = 0;
for( int i = 0; i < num_rep_distances; ++i )
{
- replens[i] = matchfinder.true_match_len( 0, reps[i] + 1, max_match_len );
+ replens[i] = true_match_len( 0, reps[i] + 1, max_match_len );
if( replens[i] > replens[rep_index] ) rep_index = i;
}
- if( replens[rep_index] >= matchfinder.match_len_limit() )
+ if( replens[rep_index] >= match_len_limit )
{
trials[0].dis = rep_index;
trials[0].price = replens[rep_index];
- move_pos( replens[rep_index] );
+ move_and_update( replens[rep_index] );
return replens[rep_index];
}
- if( main_len >= matchfinder.match_len_limit() )
+ if( main_len >= match_len_limit )
{
trials[0].dis = pairs[num_pairs-1].dis + num_rep_distances;
trials[0].price = main_len;
- move_pos( main_len );
+ move_and_update( main_len );
return main_len;
}
- const int pos_state = matchfinder.data_position() & pos_state_mask;
- const uint8_t prev_byte = matchfinder[1];
- const uint8_t cur_byte = matchfinder[0];
- const uint8_t match_byte = matchfinder[reps[0]+1];
+ const int pos_state = data_position() & pos_state_mask;
+ const uint8_t prev_byte = peek( 1 );
+ const uint8_t cur_byte = peek( 0 );
+ const uint8_t match_byte = peek( reps[0] + 1 );
trials[0].state = state;
trials[1].dis = -1; // literal
@@ -364,7 +233,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
{
trials[0].dis = trials[1].dis;
trials[0].price = 1;
- matchfinder.move_pos();
+ move_pos();
return 1;
}
@@ -400,10 +269,10 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
int cur = 0;
- while( true ) // price optimization loop
+ while( true ) /* price optimization loop */
{
- matchfinder.move_pos();
- if( ++cur >= num_trials ) // no more initialized trials
+ move_pos();
+ if( ++cur >= num_trials ) /* no more initialized trials */
{
backward( cur );
return cur;
@@ -411,14 +280,14 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
const int num_pairs = read_match_distances();
const int newlen = ( num_pairs > 0 ) ? pairs[num_pairs-1].len : 0;
- if( newlen >= matchfinder.match_len_limit() )
+ if( newlen >= match_len_limit )
{
pending_num_pairs = num_pairs;
backward( cur );
return cur;
}
- // give final values to current trial
+ /* give final values to current trial */
Trial & cur_trial = trials[cur];
State cur_state;
{
@@ -429,7 +298,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
if( prev_index2 == single_step_trial )
{
cur_state = trials[prev_index].state;
- if( prev_index + 1 == cur ) // len == 1
+ if( prev_index + 1 == cur ) /* len == 1 */
{
if( dis == 0 ) cur_state.set_short_rep();
else cur_state.set_char(); // literal
@@ -437,14 +306,14 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
else if( dis < num_rep_distances ) cur_state.set_rep();
else cur_state.set_match();
}
- else if( prev_index2 == dual_step_trial ) // dis == 0
+ else if( prev_index2 == dual_step_trial ) /* dis == 0 */
{
--prev_index;
cur_state = trials[prev_index].state;
cur_state.set_char();
cur_state.set_rep();
}
- else // if( prev_index2 >= 0 )
+ else /* if( prev_index2 >= 0 ) */
{
prev_index = prev_index2;
cur_state = trials[prev_index].state;
@@ -459,10 +328,10 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
mtf_reps( dis, cur_trial.reps );
}
- const int pos_state = matchfinder.data_position() & pos_state_mask;
- const uint8_t prev_byte = matchfinder[1];
- const uint8_t cur_byte = matchfinder[0];
- const uint8_t match_byte = matchfinder[cur_trial.reps[0]+1];
+ const int pos_state = data_position() & pos_state_mask;
+ const uint8_t prev_byte = peek( 1 );
+ const uint8_t cur_byte = peek( 0 );
+ const uint8_t match_byte = peek( cur_trial.reps[0] + 1 );
int next_price = cur_trial.price +
price0( bm_match[cur_state()][pos_state] );
@@ -471,7 +340,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
else
next_price += price_matched( prev_byte, cur_byte, match_byte );
- // try last updates to next trial
+ /* try last updates to next trial */
Trial & next_trial = trials[cur+1];
next_trial.update( next_price, -1, cur ); // literal
@@ -482,8 +351,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
if( match_byte == cur_byte && next_trial.dis != 0 &&
next_trial.prev_index2 == single_step_trial )
{
- const int price = rep_match_price +
- price_shortrep( cur_state, pos_state );
+ const int price = rep_match_price + price_shortrep( cur_state, pos_state );
if( price <= next_trial.price )
{
next_trial.price = price;
@@ -492,20 +360,18 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
}
}
- const int available_bytes = std::min( matchfinder.available_bytes(),
- max_num_trials - 1 - cur );
- if( available_bytes < min_match_len ) continue;
+ const int triable_bytes =
+ std::min( available_bytes(), max_num_trials - 1 - cur );
+ if( triable_bytes < min_match_len ) continue;
- const int len_limit = std::min( matchfinder.match_len_limit(),
- available_bytes );
+ const int len_limit = std::min( match_len_limit, triable_bytes );
- // try literal + rep0
+ /* try literal + rep0 */
if( match_byte != cur_byte && next_trial.prev_index != cur )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos();
+ const uint8_t * const data = ptr_to_current_pos();
const int dis = cur_trial.reps[0] + 1;
- const int limit = std::min( matchfinder.match_len_limit() + 1,
- available_bytes );
+ const int limit = std::min( match_len_limit + 1, triable_bytes );
int len = 1;
while( len < limit && data[len-dis] == data[len] ) ++len;
if( --len >= min_match_len )
@@ -524,10 +390,10 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
int start_len = min_match_len;
- // try rep distances
+ /* try rep distances */
for( int rep = 0; rep < num_rep_distances; ++rep )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos();
+ const uint8_t * const data = ptr_to_current_pos();
int len;
const int dis = cur_trial.reps[rep] + 1;
@@ -541,12 +407,11 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
trials[cur+i].update( price + rep_len_prices.price( i, pos_state ),
rep, cur );
- if( rep == 0 ) start_len = len + 1; // discard shorter matches
+ if( rep == 0 ) start_len = len + 1; /* discard shorter matches */
- // try rep + literal + rep0
+ /* try rep + literal + rep0 */
int len2 = len + 1;
- const int limit = std::min( matchfinder.match_len_limit() + len2,
- available_bytes );
+ const int limit = std::min( match_len_limit + len2, triable_bytes );
while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
len2 -= len + 1;
if( len2 < min_match_len ) continue;
@@ -566,7 +431,7 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
trials[cur+len+1+len2].update3( price, rep, cur + len + 1, cur );
}
- // try matches
+ /* try matches */
if( newlen >= start_len && newlen <= len_limit )
{
const int normal_match_price = match_price +
@@ -584,14 +449,13 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
trials[cur+len].update( price, dis + num_rep_distances, cur );
- // try match + literal + rep0
+ /* try match + literal + rep0 */
if( len == pairs[i].len )
{
- const uint8_t * const data = matchfinder.ptr_to_current_pos();
+ const uint8_t * const data = ptr_to_current_pos();
const int dis2 = dis + 1;
int len2 = len + 1;
- const int limit = std::min( matchfinder.match_len_limit() + len2,
- available_bytes );
+ const int limit = std::min( match_len_limit + len2, triable_bytes );
while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
len2 -= len + 1;
if( len2 >= min_match_len )
@@ -624,10 +488,10 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
{
const unsigned long long member_size_limit =
member_size - File_trailer::size() - max_marker_size;
- const bool best = ( matchfinder.match_len_limit() > 12 );
+ const bool best = ( match_len_limit > 12 );
const int dis_price_count = best ? 1 : 512;
const int align_price_count = best ? 1 : dis_align_size;
- const int price_count = ( matchfinder.match_len_limit() > 36 ) ? 1013 : 4093;
+ const int price_count = ( match_len_limit > 36 ) ? 1013 : 4093;
int price_counter = 0;
int dis_price_counter = 0;
int align_price_counter = 0;
@@ -635,26 +499,25 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
State state;
for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
- if( matchfinder.data_position() != 0 ||
- renc.member_position() != File_header::size )
- return false; // can be called only once
+ if( data_position() != 0 || renc.member_position() != File_header::size )
+ return false; /* can be called only once */
- if( !matchfinder.finished() ) // encode first byte
+ if( !data_finished() ) // encode first byte
{
const uint8_t prev_byte = 0;
- const uint8_t cur_byte = matchfinder[0];
+ const uint8_t cur_byte = peek( 0 );
renc.encode_bit( bm_match[state()][0], 0 );
encode_literal( prev_byte, cur_byte );
crc32.update_byte( crc_, cur_byte );
- matchfinder.get_match_pairs();
- matchfinder.move_pos();
+ get_match_pairs();
+ move_pos();
}
- while( !matchfinder.finished() )
+ while( !data_finished() )
{
if( price_counter <= 0 && pending_num_pairs == 0 )
{
- price_counter = price_count; // recalculate prices every these bytes
+ price_counter = price_count; /* recalculate prices every these bytes */
if( dis_price_counter <= 0 )
{ dis_price_counter = dis_price_count; update_distance_prices(); }
if( align_price_counter <= 0 )
@@ -668,39 +531,38 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
}
int ahead = sequence_optimizer( reps, state );
- if( ahead <= 0 ) return false; // can't happen
+ if( ahead <= 0 ) return false; /* can't happen */
price_counter -= ahead;
for( int i = 0; ahead > 0; )
{
- const int pos_state =
- ( matchfinder.data_position() - ahead ) & pos_state_mask;
+ const int pos_state = ( data_position() - ahead ) & pos_state_mask;
const int dis = trials[i].dis;
const int len = trials[i].price;
bool bit = ( dis < 0 );
renc.encode_bit( bm_match[state()][pos_state], !bit );
- if( bit ) // literal byte
+ if( bit ) /* literal byte */
{
- const uint8_t prev_byte = matchfinder[ahead+1];
- const uint8_t cur_byte = matchfinder[ahead];
+ const uint8_t prev_byte = peek( ahead + 1 );
+ const uint8_t cur_byte = peek( ahead );
crc32.update_byte( crc_, cur_byte );
if( state.is_char() )
encode_literal( prev_byte, cur_byte );
else
{
- const uint8_t match_byte = matchfinder[ahead+reps[0]+1];
+ const uint8_t match_byte = peek( ahead + reps[0] + 1 );
encode_matched( prev_byte, cur_byte, match_byte );
}
state.set_char();
}
- else // match or repeated match
+ else /* match or repeated match */
{
- crc32.update_buf( crc_, matchfinder.ptr_to_current_pos() - ahead, len );
+ crc32.update_buf( crc_, ptr_to_current_pos() - ahead, len );
mtf_reps( dis, reps );
bit = ( dis < num_rep_distances );
renc.encode_bit( bm_rep[state()], bit );
- if( bit ) // repeated match
+ if( bit ) /* repeated match */
{
bit = ( dis == 0 );
renc.encode_bit( bm_rep0[state()], !bit );
@@ -720,7 +582,7 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
state.set_rep();
}
}
- else // match
+ else /* match */
{
encode_pair( dis - num_rep_distances, len, pos_state );
if( get_slot( dis - num_rep_distances ) >= end_dis_model )
@@ -733,12 +595,12 @@ bool LZ_encoder::encode_member( const unsigned long long member_size )
ahead -= len; i += len;
if( renc.member_position() >= member_size_limit )
{
- if( !matchfinder.dec_pos( ahead ) ) return false;
- full_flush( matchfinder.data_position(), state );
+ if( !dec_pos( ahead ) ) return false;
+ full_flush( state );
return true;
}
}
}
- full_flush( matchfinder.data_position(), state );
+ full_flush( state );
return true;
}