1 files changed, 106 insertions, 79 deletions
diff --git a/main.c b/main.c
index fdecac2..bd8e8fb 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,5 @@
 /*  Clzip - LZMA lossless data compressor
-    Copyright (C) 2010-2014 Antonio Diaz Diaz.
+    Copyright (C) 2010-2015 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -54,7 +54,9 @@
 #include "carg_parser.h"
 #include "lzip.h"
 #include "decoder.h"
+#include "encoder_base.h"
 #include "encoder.h"
+#include "fast_encoder.h"
 
 #ifndef O_BINARY
 #define O_BINARY 0
@@ -67,7 +69,7 @@
 
 const char * const Program_name = "Clzip";
 const char * const program_name = "clzip";
-const char * const program_year = "2014";
+const char * const program_year = "2015";
 const char * invocation_name = 0;
 
 struct { const char * from; const char * to; } const known_extensions[] = {
@@ -112,8 +114,8 @@ static void show_help( void )
           "  -S, --volume-size=<bytes>      set volume size limit in bytes\n"
           "  -t, --test                     test compressed file integrity\n"
           "  -v, --verbose                  be verbose (a 2nd -v gives more)\n"
-          "  -1 .. -9                       set compression level [default 6]\n"
-          "      --fast                     alias for -1\n"
+          "  -0 .. -9                       set compression level [default 6]\n"
+          "      --fast                     alias for -0\n"
           "      --best                     alias for -9\n"
           "If no file names are given, clzip compresses or decompresses\n"
           "from standard input to standard output.\n"
@@ -122,8 +124,7 @@ static void show_help( void )
           "The bidimensional parameter space of LZMA can't be mapped to a linear\n"
           "scale optimal for all files. If your files are large, very repetitive,\n"
           "etc, you may need to use the --match-length and --dictionary-size\n"
-          "options directly to achieve optimal performance. For example, -9m64\n"
-          "usually compresses executables more (and faster) than -9.\n"
+          "options directly to achieve optimal performance.\n"
           "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
           "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
           "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
@@ -145,18 +146,21 @@ static void show_version( void )
 
 static void show_header( const unsigned dictionary_size )
   {
-  const char * const prefix[8] =
-    { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
-  enum { factor = 1024 };
-  const char * p = "";
-  const char * np = "  ";
-  unsigned num = dictionary_size, i;
-  bool exact = ( num % factor == 0 );
-
-  for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
-    { num /= factor; if( num % factor != 0 ) exact = false;
-      p = prefix[i]; np = ""; }
-  fprintf( stderr, "dictionary size %s%4u %sB.  ", np, num, p );
+  if( verbosity >= 3 )
+    {
+    const char * const prefix[8] =
+      { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+    enum { factor = 1024 };
+    const char * p = "";
+    const char * np = "  ";
+    unsigned num = dictionary_size, i;
+    bool exact = ( num % factor == 0 );
+
+    for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+      { num /= factor; if( num % factor != 0 ) exact = false;
+        p = prefix[i]; np = ""; }
+    fprintf( stderr, "dictionary size %s%4u %sB.  ", np, num, p );
+    }
   }
 
 
@@ -233,8 +237,10 @@ static int extension_index( const char * const name )
   for( i = 0; known_extensions[i].from; ++i )
     {
     const char * const ext = known_extensions[i].from;
-    if( strlen( name ) > strlen( ext ) &&
-        strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
+    const unsigned name_len = strlen( name );
+    const unsigned ext_len = strlen( ext );
+    if( name_len > ext_len &&
+        strncmp( name + name_len - ext_len, ext, ext_len ) == 0 )
       return i;
     }
   return -1;
@@ -311,20 +317,21 @@ static void set_c_outname( const char * const name, const bool multifile )
 
 static void set_d_outname( const char * const name, const int i )
   {
+  const unsigned name_len = strlen( name );
   if( i >= 0 )
     {
     const char * const from = known_extensions[i].from;
-    if( strlen( name ) > strlen( from ) )
+    const unsigned from_len = strlen( from );
+    if( name_len > from_len )
       {
-      output_filename = resize_buffer( output_filename, strlen( name ) +
+      output_filename = resize_buffer( output_filename, name_len +
                                        strlen( known_extensions[0].to ) + 1 );
       strcpy( output_filename, name );
-      strcpy( output_filename + strlen( name ) - strlen( from ),
-              known_extensions[i].to );
+      strcpy( output_filename + name_len - from_len, known_extensions[i].to );
       return;
       }
     }
-  output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 );
+  output_filename = resize_buffer( output_filename, name_len + 4 + 1 );
   strcpy( output_filename, name );
   strcat( output_filename, ".out" );
   if( verbosity >= 1 )
@@ -354,7 +361,7 @@ static bool open_outstream( const bool force )
 
 static bool check_tty( const int infd, const enum Mode program_mode )
   {
-  if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) )
+  if( program_mode == m_compress && isatty( outfd ) )
     {
     show_error( "I won't write compressed data to a terminal.", 0, true );
     return false;
@@ -417,11 +424,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp )
 
 static bool next_filename( void )
   {
-  const unsigned len = strlen( known_extensions[0].from );
+  const unsigned name_len = strlen( output_filename );
+  const unsigned ext_len = strlen( known_extensions[0].from );
   int i, j;
-
-  if( strlen( output_filename ) >= len + 5 )		/* "*00001.lz" */
-    for( i = strlen( output_filename ) - len - 1, j = 0; j < 5; --i, ++j )
+  if( name_len >= ext_len + 5 )				/* "*00001.lz" */
+    for( i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
       {
       if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
       else output_filename[i] = '0';
@@ -430,55 +437,69 @@ static bool next_filename( void )
   }
 
 
+struct Poly_encoder
+  {
+  struct LZ_encoder_base * eb;
+  struct LZ_encoder * e;
+  struct FLZ_encoder * fe;
+  };
+
+
 static int compress( const unsigned long long member_size,
-                     const unsigned long long volume_size,
+                     const unsigned long long volume_size, const int infd,
                      const struct Lzma_options * const encoder_options,
-                     const int infd, struct Pretty_print * const pp,
-                     const struct stat * const in_statsp )
+                     struct Pretty_print * const pp,
+                     const struct stat * const in_statsp, const bool zero )
   {
   const unsigned long long cfile_size =
     (in_statsp && S_ISREG( in_statsp->st_mode )) ? in_statsp->st_size / 100 : 0;
   unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
   int retval = 0;
-  struct Matchfinder matchfinder;
-  File_header header;
-  Fh_set_magic( header );
-
+  struct Poly_encoder encoder = { 0, 0, 0 };	/* polymorphic encoder */
   if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
-  if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
-      encoder_options->match_len_limit < min_match_len_limit ||
-      encoder_options->match_len_limit > max_match_len )
-    internal_error( "invalid argument to encoder." );
 
-  if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ),
-                encoder_options->match_len_limit, infd ) )
+  {
+  bool error = false;
+  if( zero )
+    {
+    encoder.fe = (struct FLZ_encoder *)malloc( sizeof (struct FLZ_encoder) );
+    if( !encoder.fe || !FLZe_init( encoder.fe, infd, outfd ) ) error = true;
+    else encoder.eb = &encoder.fe->eb;
+    }
+  else
     {
-    Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size." );
-    return 1;
+    File_header header;
+    if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
+        encoder_options->match_len_limit < min_match_len_limit ||
+        encoder_options->match_len_limit > max_match_len )
+      internal_error( "invalid argument to encoder." );
+    encoder.e = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) );
+    if( !encoder.e || !LZe_init( encoder.e, Fh_get_dictionary_size( header ),
+                                 encoder_options->match_len_limit, infd, outfd ) )
+      error = true;
+    else encoder.eb = &encoder.e->eb;
+    }
+  if( error )
+    {
+    show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
+    cleanup_and_fail( 1 );
     }
-  Fh_set_dictionary_size( header, matchfinder.dictionary_size );
+  }
 
   while( true )			/* encode one member per iteration */
     {
-    struct LZ_encoder encoder;
     const unsigned long long size = ( volume_size > 0 ) ?
       min( member_size, volume_size - partial_volume_size ) : member_size;
-    if( !LZe_init( &encoder, &matchfinder, header, outfd ) )
-      {
-      show_error( "Not enough memory. Try a smaller dictionary size.", 0, false );
-      cleanup_and_fail( 1 );
-      }
-    if( verbosity >= 2 )
-      show_progress( in_size, &matchfinder, pp, cfile_size );	/* init */
-    if( !LZe_encode_member( &encoder, size ) )
+    show_progress( in_size, &encoder.eb->mb, pp, cfile_size );	/* init */
+    if( ( zero && !FLZe_encode_member( encoder.fe, size ) ) ||
+        ( !zero && !LZe_encode_member( encoder.e, size ) ) )
       { Pp_show_msg( pp, "Encoder error." ); retval = 1; break; }
-    in_size += Mf_data_position( &matchfinder );
-    out_size += Re_member_position( &encoder.renc );
-    LZe_free( &encoder );
-    if( Mf_finished( &matchfinder ) ) break;
+    in_size += Mb_data_position( &encoder.eb->mb );
+    out_size += Re_member_position( &encoder.eb->renc );
+    if( Mb_data_finished( &encoder.eb->mb ) ) break;
     if( volume_size > 0 )
       {
-      partial_volume_size += Re_member_position( &encoder.renc );
+      partial_volume_size += Re_member_position( &encoder.eb->renc );
       if( partial_volume_size >= volume_size - min_dictionary_size )
         {
         partial_volume_size = 0;
@@ -492,7 +513,7 @@ static int compress( const unsigned long long member_size,
           }
         }
       }
-    Mf_reset( &matchfinder );
+    if( zero ) FLZe_reset( encoder.fe ); else LZe_reset( encoder.e );
     }
 
   if( retval == 0 && verbosity >= 1 )
@@ -507,7 +528,8 @@ static int compress( const unsigned long long member_size,
                100.0 * ( 1.0 - ( (double)out_size / in_size ) ),
                in_size, out_size );
     }
-  Mf_free( &matchfinder );
+  LZeb_free( encoder.eb );
+  if( zero ) free( encoder.fe ); else free( encoder.e );
   return retval;
   }
 
@@ -561,8 +583,7 @@ static int decompress( const int infd, struct Pretty_print * const pp,
         retval = 2; break; }
 
     if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
-      { Pp_show_msg( pp, 0 );
-        if( verbosity >= 3 ) show_header( dictionary_size ); }
+      { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); }
 
     if( !LZd_init( &decoder, &rdec, dictionary_size, outfd ) )
       {
@@ -637,24 +658,27 @@ void internal_error( const char * const msg )
 
 
 void show_progress( const unsigned long long partial_size,
-                    const struct Matchfinder * const m,
+                    const struct Matchfinder_base * const m,
                     struct Pretty_print * const p,
                     const unsigned long long cfile_size )
   {
   static unsigned long long csize = 0;		/* file_size / 100 */
   static unsigned long long psize = 0;
-  static const struct Matchfinder * mf = 0;
+  static const struct Matchfinder_base * mb = 0;
   static struct Pretty_print * pp = 0;
 
-  if( m )					/* initialize static vars */
-    { csize = cfile_size; psize = partial_size; mf = m; pp = p; }
-  if( mf && pp )
+  if( verbosity >= 2 )
     {
-    const unsigned long long pos = psize + Mf_data_position( mf );
-    if( csize > 0 )
-      fprintf( stderr, "%4llu%%", pos / csize );
-    fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
-    Pp_reset( pp ); Pp_show_msg( pp, 0 );	/* restore cursor position */
+    if( m )					/* initialize static vars */
+      { csize = cfile_size; psize = partial_size; mb = m; pp = p; }
+    if( mb && pp )
+      {
+      const unsigned long long pos = psize + Mb_data_position( mb );
+      if( csize > 0 )
+        fprintf( stderr, "%4llu%%", pos / csize );
+      fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
+      Pp_reset( pp ); Pp_show_msg( pp, 0 );	/* restore cursor position */
+      }
     }
   }
 
@@ -665,7 +689,7 @@ int main( const int argc, const char * const argv[] )
      to the corresponding LZMA compression modes. */
   const struct Lzma_options option_mapping[] =
     {
-    { 1 << 20,   5 },		/* -0 */
+    { 1 << 16,  16 },		/* -0 entry values not used */
     { 1 << 20,   5 },		/* -1 */
     { 3 << 19,   6 },		/* -2 */
     { 1 << 21,   8 },		/* -3 */
@@ -694,6 +718,7 @@ int main( const int argc, const char * const argv[] )
   bool keep_input_files = false;
   bool recompress = false;
   bool to_stdout = false;
+  bool zero = false;
   struct Pretty_print pp;
 
   const struct ap_Option options[] =
@@ -745,6 +770,7 @@ int main( const int argc, const char * const argv[] )
       {
       case '0': case '1': case '2': case '3': case '4':
       case '5': case '6': case '7': case '8': case '9':
+                zero = ( code == '0' );
                 encoder_options = option_mapping[code-'0']; break;
       case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
       case 'c': to_stdout = true; break;
@@ -754,12 +780,13 @@ int main( const int argc, const char * const argv[] )
       case 'h': show_help(); return 0;
       case 'k': keep_input_files = true; break;
       case 'm': encoder_options.match_len_limit =
-                  getnum( arg, min_match_len_limit, max_match_len ); break;
+                  getnum( arg, min_match_len_limit, max_match_len );
+                zero = false; break;
       case 'n': break;
       case 'o': default_output_filename = arg; break;
       case 'q': verbosity = -1; break;
       case 's': encoder_options.dictionary_size = get_dict_size( arg );
-                break;
+                zero = false; break;
       case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
       case 't': program_mode = m_test; break;
       case 'v': if( verbosity < 4 ) ++verbosity; break;
@@ -866,8 +893,8 @@ int main( const int argc, const char * const argv[] )
     in_statsp = input_filename[0] ? &in_stats : 0;
     Pp_set_name( &pp, input_filename );
     if( program_mode == m_compress )
-      tmp = compress( member_size, volume_size, &encoder_options, infd,
-                      &pp, in_statsp );
+      tmp = compress( member_size, volume_size, infd, &encoder_options, &pp,
+                      in_statsp, zero );
     else
       tmp = decompress( infd, &pp, program_mode == m_test );
     if( tmp > retval ) retval = tmp;