diff options
Diffstat (limited to '')
-rw-r--r-- | main.c | 185 |
1 files changed, 106 insertions, 79 deletions
@@ -1,5 +1,5 @@ /* Clzip - LZMA lossless data compressor - Copyright (C) 2010-2014 Antonio Diaz Diaz. + Copyright (C) 2010-2015 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -54,7 +54,9 @@ #include "carg_parser.h" #include "lzip.h" #include "decoder.h" +#include "encoder_base.h" #include "encoder.h" +#include "fast_encoder.h" #ifndef O_BINARY #define O_BINARY 0 @@ -67,7 +69,7 @@ const char * const Program_name = "Clzip"; const char * const program_name = "clzip"; -const char * const program_year = "2014"; +const char * const program_year = "2015"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -112,8 +114,8 @@ static void show_help( void ) " -S, --volume-size=<bytes> set volume size limit in bytes\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" - " -1 .. -9 set compression level [default 6]\n" - " --fast alias for -1\n" + " -0 .. -9 set compression level [default 6]\n" + " --fast alias for -0\n" " --best alias for -9\n" "If no file names are given, clzip compresses or decompresses\n" "from standard input to standard output.\n" @@ -122,8 +124,7 @@ static void show_help( void ) "The bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" - "options directly to achieve optimal performance. For example, -9m64\n" - "usually compresses executables more (and faster) than -9.\n" + "options directly to achieve optimal performance.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" "invalid input file, 3 for an internal consistency error (eg, bug) which\n" @@ -145,18 +146,21 @@ static void show_version( void ) static void show_header( const unsigned dictionary_size ) { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + if( verbosity >= 3 ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size, i; + bool exact = ( num % factor == 0 ); + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + } } @@ -233,8 +237,10 @@ static int extension_index( const char * const name ) for( i = 0; known_extensions[i].from; ++i ) { const char * const ext = known_extensions[i].from; - if( strlen( name ) > strlen( ext ) && - strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) + const unsigned name_len = strlen( name ); + const unsigned ext_len = strlen( ext ); + if( name_len > ext_len && + strncmp( name + name_len - ext_len, ext, ext_len ) == 0 ) return i; } return -1; @@ -311,20 +317,21 @@ static void set_c_outname( const char * const name, const bool multifile ) static void set_d_outname( const char * const name, const int i ) { + const unsigned name_len = strlen( name ); if( i >= 0 ) { const char * const from = known_extensions[i].from; - if( strlen( name ) > strlen( from ) ) + const unsigned from_len = strlen( from ); + if( name_len > from_len ) { - output_filename = resize_buffer( output_filename, strlen( name ) + + output_filename = resize_buffer( output_filename, name_len + strlen( known_extensions[0].to ) + 1 ); strcpy( output_filename, name ); - strcpy( output_filename + strlen( name ) - strlen( from ), - known_extensions[i].to ); + strcpy( output_filename + name_len - from_len, known_extensions[i].to ); return; } } - output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); + output_filename = resize_buffer( output_filename, name_len + 4 + 1 ); strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) @@ -354,7 +361,7 @@ static bool open_outstream( const bool force ) static bool check_tty( const int infd, const enum Mode program_mode ) { - if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) + if( program_mode == m_compress && isatty( outfd ) ) { show_error( "I won't write compressed data to a terminal.", 0, true ); return false; @@ -417,11 +424,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) static bool next_filename( void ) { - const unsigned len = strlen( known_extensions[0].from ); + const unsigned name_len = strlen( output_filename ); + const unsigned ext_len = strlen( known_extensions[0].from ); int i, j; - - if( strlen( output_filename ) >= len + 5 ) /* "*00001.lz" */ - for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j ) + if( name_len >= ext_len + 5 ) /* "*00001.lz" */ + for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j ) { if( output_filename[i] < '9' ) { ++output_filename[i]; return true; } else output_filename[i] = '0'; @@ -430,55 +437,69 @@ static bool next_filename( void ) } +struct Poly_encoder + { + struct LZ_encoder_base * eb; + struct LZ_encoder * e; + struct FLZ_encoder * fe; + }; + + static int compress( const unsigned long long member_size, - const unsigned long long volume_size, + const unsigned long long volume_size, const int infd, const struct Lzma_options * const encoder_options, - const int infd, struct Pretty_print * const pp, - const struct stat * const in_statsp ) + struct Pretty_print * const pp, + const struct stat * const in_statsp, const bool zero ) { const unsigned long long cfile_size = (in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0; unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0; int retval = 0; - struct Matchfinder matchfinder; - File_header header; - Fh_set_magic( header ); - + struct Poly_encoder encoder = { 0, 0, 0 }; /* polymorphic encoder */ if( verbosity >= 1 ) Pp_show_msg( pp, 0 ); - if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || - encoder_options->match_len_limit < min_match_len_limit || - encoder_options->match_len_limit > max_match_len ) - internal_error( "invalid argument to encoder." ); - if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ), - encoder_options->match_len_limit, infd ) ) + { + bool error = false; + if( zero ) + { + encoder.fe = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) ); + if( !encoder.fe || !FLZe_init( encoder.fe, infd, outfd ) ) error = true; + else encoder.eb = &encoder.fe->eb; + } + else { - Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." ); - return 1; + File_header header; + if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || + encoder_options->match_len_limit < min_match_len_limit || + encoder_options->match_len_limit > max_match_len ) + internal_error( "invalid argument to encoder." ); + encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); + if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ), + encoder_options->match_len_limit, infd, outfd ) ) + error = true; + else encoder.eb = &encoder.e->eb; + } + if( error ) + { + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); + cleanup_and_fail( 1 ); } - Fh_set_dictionary_size( header, matchfinder.dictionary_size ); + } while( true ) /* encode one member per iteration */ { - struct LZ_encoder encoder; const unsigned long long size = ( volume_size > 0 ) ? min( member_size, volume_size - partial_volume_size ) : member_size; - if( !LZe_init( &encoder, &matchfinder, header, outfd ) ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } - if( verbosity >= 2 ) - show_progress( in_size, &matchfinder, pp, cfile_size ); /* init */ - if( !LZe_encode_member( &encoder, size ) ) + show_progress( in_size, &encoder.eb->mb, pp, cfile_size ); /* init */ + if( ( zero && !FLZe_encode_member( encoder.fe, size ) ) || + ( !zero && !LZe_encode_member( encoder.e, size ) ) ) { Pp_show_msg( pp, "Encoder error." ); retval = 1; break; } - in_size += Mf_data_position( &matchfinder ); - out_size += Re_member_position( &encoder.renc ); - LZe_free( &encoder ); - if( Mf_finished( &matchfinder ) ) break; + in_size += Mb_data_position( &encoder.eb->mb ); + out_size += Re_member_position( &encoder.eb->renc ); + if( Mb_data_finished( &encoder.eb->mb ) ) break; if( volume_size > 0 ) { - partial_volume_size += Re_member_position( &encoder.renc ); + partial_volume_size += Re_member_position( &encoder.eb->renc ); if( partial_volume_size >= volume_size - min_dictionary_size ) { partial_volume_size = 0; @@ -492,7 +513,7 @@ static int compress( const unsigned long long member_size, } } } - Mf_reset( &matchfinder ); + if( zero ) FLZe_reset( encoder.fe ); else LZe_reset( encoder.e ); } if( retval == 0 && verbosity >= 1 ) @@ -507,7 +528,8 @@ static int compress( const unsigned long long member_size, 100.0 * ( 1.0 - ( (double)out_size / in_size ) ), in_size, out_size ); } - Mf_free( &matchfinder ); + LZeb_free( encoder.eb ); + if( zero ) free( encoder.fe ); else free( encoder.e ); return retval; } @@ -561,8 +583,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); - if( verbosity >= 3 ) show_header( dictionary_size ); } + { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) ) { @@ -637,24 +658,27 @@ void internal_error( const char * const msg ) void show_progress( const unsigned long long partial_size, - const struct Matchfinder * const m, + const struct Matchfinder_base * const m, struct Pretty_print * const p, const unsigned long long cfile_size ) { static unsigned long long csize = 0; /* file_size / 100 */ static unsigned long long psize = 0; - static const struct Matchfinder * mf = 0; + static const struct Matchfinder_base * mb = 0; static struct Pretty_print * pp = 0; - if( m ) /* initialize static vars */ - { csize = cfile_size; psize = partial_size; mf = m; pp = p; } - if( mf && pp ) + if( verbosity >= 2 ) { - const unsigned long long pos = psize + Mf_data_position( mf ); - if( csize > 0 ) - fprintf( stderr, "%4llu%%", pos / csize ); - fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); - Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ + if( m ) /* initialize static vars */ + { csize = cfile_size; psize = partial_size; mb = m; pp = p; } + if( mb && pp ) + { + const unsigned long long pos = psize + Mb_data_position( mb ); + if( csize > 0 ) + fprintf( stderr, "%4llu%%", pos / csize ); + fprintf( stderr, " %.1f MB\r", pos / 1000000.0 ); + Pp_reset( pp ); Pp_show_msg( pp, 0 ); /* restore cursor position */ + } } } @@ -665,7 +689,7 @@ int main( const int argc, const char * const argv[] ) to the corresponding LZMA compression modes. */ const struct Lzma_options option_mapping[] = { - { 1 << 20, 5 }, /* -0 */ + { 1 << 16, 16 }, /* -0 entry values not used */ { 1 << 20, 5 }, /* -1 */ { 3 << 19, 6 }, /* -2 */ { 1 << 21, 8 }, /* -3 */ @@ -694,6 +718,7 @@ int main( const int argc, const char * const argv[] ) bool keep_input_files = false; bool recompress = false; bool to_stdout = false; + bool zero = false; struct Pretty_print pp; const struct ap_Option options[] = @@ -745,6 +770,7 @@ int main( const int argc, const char * const argv[] ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + zero = ( code == '0' ); encoder_options = option_mapping[code-'0']; break; case 'b': member_size = getnum( arg, 100000, max_member_size ); break; case 'c': to_stdout = true; break; @@ -754,12 +780,13 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, min_match_len_limit, max_match_len ); break; + getnum( arg, min_match_len_limit, max_match_len ); + zero = false; break; case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); - break; + zero = false; break; case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break; case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; @@ -866,8 +893,8 @@ int main( const int argc, const char * const argv[] ) in_statsp = input_filename[0] ? &in_stats : 0; Pp_set_name( &pp, input_filename ); if( program_mode == m_compress ) - tmp = compress( member_size, volume_size, &encoder_options, infd, - &pp, in_statsp ); + tmp = compress( member_size, volume_size, infd, &encoder_options, &pp, + in_statsp, zero ); else tmp = decompress( infd, &pp, program_mode == m_test ); if( tmp > retval ) retval = tmp; |