summaryrefslogtreecommitdiffstats
path: root/main.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--main.c185
1 files changed, 106 insertions, 79 deletions
diff --git a/main.c b/main.c
index fdecac2..bd8e8fb 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,5 @@
/* Clzip - LZMA lossless data compressor
- Copyright (C) 2010-2014 Antonio Diaz Diaz.
+ Copyright (C) 2010-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -54,7 +54,9 @@
#include "carg_parser.h"
#include "lzip.h"
#include "decoder.h"
+#include "encoder_base.h"
#include "encoder.h"
+#include "fast_encoder.h"
#ifndef O_BINARY
#define O_BINARY 0
@@ -67,7 +69,7 @@
const char * const Program_name = "Clzip";
const char * const program_name = "clzip";
-const char * const program_year = "2014";
+const char * const program_year = "2015";
const char * invocation_name = 0;
struct { const char * from; const char * to; } const known_extensions[] = {
@@ -112,8 +114,8 @@ static void show_help( void )
" -S, --volume-size=<bytes> set volume size limit in bytes\n"
" -t, --test test compressed file integrity\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
- " -1 .. -9 set compression level [default 6]\n"
- " --fast alias for -1\n"
+ " -0 .. -9 set compression level [default 6]\n"
+ " --fast alias for -0\n"
" --best alias for -9\n"
"If no file names are given, clzip compresses or decompresses\n"
"from standard input to standard output.\n"
@@ -122,8 +124,7 @@ static void show_help( void )
"The bidimensional parameter space of LZMA can't be mapped to a linear\n"
"scale optimal for all files. If your files are large, very repetitive,\n"
"etc, you may need to use the --match-length and --dictionary-size\n"
- "options directly to achieve optimal performance. For example, -9m64\n"
- "usually compresses executables more (and faster) than -9.\n"
+ "options directly to achieve optimal performance.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
"invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@@ -145,18 +146,21 @@ static void show_version( void )
static void show_header( const unsigned dictionary_size )
{
- const char * const prefix[8] =
- { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
- enum { factor = 1024 };
- const char * p = "";
- const char * np = " ";
- unsigned num = dictionary_size, i;
- bool exact = ( num % factor == 0 );
-
- for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
- { num /= factor; if( num % factor != 0 ) exact = false;
- p = prefix[i]; np = ""; }
- fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
+ if( verbosity >= 3 )
+ {
+ const char * const prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ enum { factor = 1024 };
+ const char * p = "";
+ const char * np = " ";
+ unsigned num = dictionary_size, i;
+ bool exact = ( num % factor == 0 );
+
+ for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false;
+ p = prefix[i]; np = ""; }
+ fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p );
+ }
}
@@ -233,8 +237,10 @@ static int extension_index( const char * const name )
for( i = 0; known_extensions[i].from; ++i )
{
const char * const ext = known_extensions[i].from;
- if( strlen( name ) > strlen( ext ) &&
- strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
+ const unsigned name_len = strlen( name );
+ const unsigned ext_len = strlen( ext );
+ if( name_len > ext_len &&
+ strncmp( name + name_len - ext_len, ext, ext_len ) == 0 )
return i;
}
return -1;
@@ -311,20 +317,21 @@ static void set_c_outname( const char * const name, const bool multifile )
static void set_d_outname( const char * const name, const int i )
{
+ const unsigned name_len = strlen( name );
if( i >= 0 )
{
const char * const from = known_extensions[i].from;
- if( strlen( name ) > strlen( from ) )
+ const unsigned from_len = strlen( from );
+ if( name_len > from_len )
{
- output_filename = resize_buffer( output_filename, strlen( name ) +
+ output_filename = resize_buffer( output_filename, name_len +
strlen( known_extensions[0].to ) + 1 );
strcpy( output_filename, name );
- strcpy( output_filename + strlen( name ) - strlen( from ),
- known_extensions[i].to );
+ strcpy( output_filename + name_len - from_len, known_extensions[i].to );
return;
}
}
- output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 );
+ output_filename = resize_buffer( output_filename, name_len + 4 + 1 );
strcpy( output_filename, name );
strcat( output_filename, ".out" );
if( verbosity >= 1 )
@@ -354,7 +361,7 @@ static bool open_outstream( const bool force )
static bool check_tty( const int infd, const enum Mode program_mode )
{
- if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
+ if( program_mode == m_compress && isatty( outfd ) )
{
show_error( "I won't write compressed data to a terminal.", 0, true );
return false;
@@ -417,11 +424,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
static bool next_filename( void )
{
- const unsigned len = strlen( known_extensions[0].from );
+ const unsigned name_len = strlen( output_filename );
+ const unsigned ext_len = strlen( known_extensions[0].from );
int i, j;
-
- if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */
- for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
+ if( name_len >= ext_len + 5 ) /* "*00001.lz" */
+ for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
{
if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
else output_filename[i] = '0';
@@ -430,55 +437,69 @@ static bool next_filename( void )
}
+struct Poly_encoder
+ {
+ struct LZ_encoder_base * eb;
+ struct LZ_encoder * e;
+ struct FLZ_encoder * fe;
+ };
+
+
static int compress( const unsigned long long member_size,
- const unsigned long long volume_size,
+ const unsigned long long volume_size, const int infd,
const struct Lzma_options * const encoder_options,
- const int infd, struct Pretty_print * const pp,
- const struct stat * const in_statsp )
+ struct Pretty_print * const pp,
+ const struct stat * const in_statsp, const bool zero )
{
const unsigned long long cfile_size =
(in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0;
unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
int retval = 0;
- struct Matchfinder matchfinder;
- File_header header;
- Fh_set_magic( header );
-
+ struct Poly_encoder encoder = { 0, 0, 0 }; /* polymorphic encoder */
if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
- if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
- encoder_options->match_len_limit < min_match_len_limit ||
- encoder_options->match_len_limit > max_match_len )
- internal_error( "invalid argument to encoder." );
- if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
- encoder_options->match_len_limit, infd ) )
+ {
+ bool error = false;
+ if( zero )
+ {
+ encoder.fe = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) );
+ if( !encoder.fe || !FLZe_init( encoder.fe, infd, outfd ) ) error = true;
+ else encoder.eb = &encoder.fe->eb;
+ }
+ else
{
- Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
- return 1;
+ File_header header;
+ if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
+ encoder_options->match_len_limit < min_match_len_limit ||
+ encoder_options->match_len_limit > max_match_len )
+ internal_error( "invalid argument to encoder." );
+ encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+ if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
+ encoder_options->match_len_limit, infd, outfd ) )
+ error = true;
+ else encoder.eb = &encoder.e->eb;
+ }
+ if( error )
+ {
+ show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
+ cleanup_and_fail( 1 );
}
- Fh_set_dictionary_size( header, matchfinder.dictionary_size );
+ }
while( true ) /* encode one member per iteration */
{
- struct LZ_encoder encoder;
const unsigned long long size = ( volume_size > 0 ) ?
min( member_size, volume_size - partial_volume_size ) : member_size;
- if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
- {
- show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
- cleanup_and_fail( 1 );
- }
- if( verbosity >= 2 )
- show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */
- if( !LZe_encode_member( &encoder, size ) )
+ show_progress( in_size, &encoder.eb->mb, pp, cfile_size ); /* init */
+ if( ( zero && !FLZe_encode_member( encoder.fe, size ) ) ||
+ ( !zero && !LZe_encode_member( encoder.e, size ) ) )
{ Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
- in_size += Mf_data_position( &matchfinder );
- out_size += Re_member_position( &encoder.renc );
- LZe_free( &encoder );
- if( Mf_finished( &matchfinder ) ) break;
+ in_size += Mb_data_position( &encoder.eb->mb );
+ out_size += Re_member_position( &encoder.eb->renc );
+ if( Mb_data_finished( &encoder.eb->mb ) ) break;
if( volume_size > 0 )
{
- partial_volume_size += Re_member_position( &encoder.renc );
+ partial_volume_size += Re_member_position( &encoder.eb->renc );
if( partial_volume_size >= volume_size - min_dictionary_size )
{
partial_volume_size = 0;
@@ -492,7 +513,7 @@ static int compress( const unsigned long long member_size,
}
}
}
- Mf_reset( &matchfinder );
+ if( zero ) FLZe_reset( encoder.fe ); else LZe_reset( encoder.e );
}
if( retval == 0 && verbosity >= 1 )
@@ -507,7 +528,8 @@ static int compress( const unsigned long long member_size,
100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
in_size, out_size );
}
- Mf_free( &matchfinder );
+ LZeb_free( encoder.eb );
+ if( zero ) free( encoder.fe ); else free( encoder.e );
return retval;
}
@@ -561,8 +583,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
- { Pp_show_msg( pp, 0 );
- if( verbosity >= 3 ) show_header( dictionary_size ); }
+ { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
{
@@ -637,24 +658,27 @@ void internal_error( const char * const msg )
void show_progress( const unsigned long long partial_size,
- const struct Matchfinder * const m,
+ const struct Matchfinder_base * const m,
struct Pretty_print * const p,
const unsigned long long cfile_size )
{
static unsigned long long csize = 0; /* file_size / 100 */
static unsigned long long psize = 0;
- static const struct Matchfinder * mf = 0;
+ static const struct Matchfinder_base * mb = 0;
static struct Pretty_print * pp = 0;
- if( m ) /* initialize static vars */
- { csize = cfile_size; psize = partial_size; mf = m; pp = p; }
- if( mf && pp )
+ if( verbosity >= 2 )
{
- const unsigned long long pos = psize + Mf_data_position( mf );
- if( csize > 0 )
- fprintf( stderr, "%4llu%%", pos / csize );
- fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
- Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
+ if( m ) /* initialize static vars */
+ { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
+ if( mb && pp )
+ {
+ const unsigned long long pos = psize + Mb_data_position( mb );
+ if( csize > 0 )
+ fprintf( stderr, "%4llu%%", pos / csize );
+ fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
+ Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */
+ }
}
}
@@ -665,7 +689,7 @@ int main( const int argc, const char * const argv[] )
to the corresponding LZMA compression modes. */
const struct Lzma_options option_mapping[] =
{
- { 1 << 20, 5 }, /* -0 */
+ { 1 << 16, 16 }, /* -0 entry values not used */
{ 1 << 20, 5 }, /* -1 */
{ 3 << 19, 6 }, /* -2 */
{ 1 << 21, 8 }, /* -3 */
@@ -694,6 +718,7 @@ int main( const int argc, const char * const argv[] )
bool keep_input_files = false;
bool recompress = false;
bool to_stdout = false;
+ bool zero = false;
struct Pretty_print pp;
const struct ap_Option options[] =
@@ -745,6 +770,7 @@ int main( const int argc, const char * const argv[] )
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
+ zero = ( code == '0' );
encoder_options = option_mapping[code-'0']; break;
case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
case 'c': to_stdout = true; break;
@@ -754,12 +780,13 @@ int main( const int argc, const char * const argv[] )
case 'h': show_help(); return 0;
case 'k': keep_input_files = true; break;
case 'm': encoder_options.match_len_limit =
- getnum( arg, min_match_len_limit, max_match_len ); break;
+ getnum( arg, min_match_len_limit, max_match_len );
+ zero = false; break;
case 'n': break;
case 'o': default_output_filename = arg; break;
case 'q': verbosity = -1; break;
case 's': encoder_options.dictionary_size = get_dict_size( arg );
- break;
+ zero = false; break;
case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
case 't': program_mode = m_test; break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
@@ -866,8 +893,8 @@ int main( const int argc, const char * const argv[] )
in_statsp = input_filename[0] ? &in_stats : 0;
Pp_set_name( &pp, input_filename );
if( program_mode == m_compress )
- tmp = compress( member_size, volume_size, &encoder_options, infd,
- &pp, in_statsp );
+ tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
+ in_statsp, zero );
else
tmp = decompress( infd, &pp, program_mode == m_test );
if( tmp > retval ) retval = tmp;