diff options
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 616 |
1 files changed, 616 insertions, 0 deletions
@@ -0,0 +1,616 @@ +/* Pdlzip - A data compressor based on the LZMA algorithm
+ 2009-08-14 : Igor Pavlov : Public domain
+ Copyright (C) 2010 Antonio Diaz Diaz.
+
+ This program is free software: you have unlimited permission
+ to copy, distribute and modify it.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "carg_parser.h"
+#include "pdlzip.h"
+#include "Alloc.h"
+#include "7zFile.h"
+#include "LzmaDec.h"
+#include "LzmaEnc.h"
+
+
+static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
+static void SzFree(void *p, void *address) { p = p; MyFree(address); }
+static ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+const char * invocation_name = 0;
+const char * const Program_name = "Pdlzip";
+const char * const program_name = "pdlzip";
+const char * const program_year = "2010";
+
+
+struct { const char * from; const char * to; } const known_extensions[] = {
+ { ".lz", "" },
+ { ".tlz", ".tar" },
+ { 0, 0 } };
+
+struct Lzma_options
+ {
+ int dictionary_size; // 4KiB..512MiB
+ int match_len_limit; // 5..273
+ };
+
+enum Mode { m_compress = 0, m_decompress, m_test };
+char * output_filename = 0;
+
+
+// assure at least a minimum size for buffer `buf'
+inline void * resize_buffer( void * buf, const int min_size )
+ {
+ if( buf ) buf = realloc( buf, min_size );
+ else buf = malloc( min_size );
+ return buf;
+ }
+
+
+static void show_help()
+ {
+ printf( "%s - A data compressor based on the LZMA algorithm.\n", Program_name );
+ printf( "\nUsage: %s [options] [file]\n", invocation_name );
+ printf( "\nOptions:\n" );
+ printf( " -h, --help display this help and exit\n" );
+ printf( " -V, --version output version information and exit\n" );
+ printf( " -c, --stdout send output to standard output\n" );
+ printf( " -d, --decompress decompress\n" );
+// printf( " -f, --force overwrite existing output files\n" );
+// printf( " -k, --keep keep (don't delete) input files\n" );
+ printf( " -m, --match-length=<n> set match length limit in bytes [80]\n" );
+ printf( " -q, --quiet suppress all messages\n" );
+ printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" );
+ printf( " -t, --test test compressed file integrity\n" );
+ printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" );
+ printf( " -1 .. -9 set compression level [default 6]\n" );
+ printf( " --fast alias for -1\n" );
+ printf( " --best alias for -9\n" );
+ printf( "If no file name is given, %s compresses or decompresses\n", program_name );
+ printf( "from standard input to standard output.\n" );
+ printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" );
+ printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
+ printf( "\nReport bugs to lzip-bug@nongnu.org\n" );
+// printf( "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" );
+ }
+
+
+static void show_version()
+ {
+ printf( "%s %s\n", Program_name, PROGVERSION );
+ printf( "Public Domain 2009 Igor Pavlov.\n" );
+ printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
+ printf( "This is free software: you are free to change and redistribute it.\n" );
+ printf( "There is NO WARRANTY, to the extent permitted by law.\n" );
+ }
+
+
+static long long getnum( const char * const ptr, const int bs,
+ const long long llimit, const long long ulimit )
+ {
+ errno = 0;
+ char *tail;
+ long long result = strtoll( ptr, &tail, 0 );
+ if( tail == ptr )
+ {
+ show_error( "bad or missing numerical argument", 0, true );
+ exit( 1 );
+ }
+
+ if( !errno && tail[0] )
+ {
+ int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
+ int exponent = 0;
+ bool bad_multiplier = false;
+ switch( tail[0] )
+ {
+ case ' ': break;
+ case 'b': if( bs > 0 ) { factor = bs; exponent = 1; }
+ else bad_multiplier = true;
+ break;
+ case 'Y': exponent = 8; break;
+ case 'Z': exponent = 7; break;
+ case 'E': exponent = 6; break;
+ case 'P': exponent = 5; break;
+ case 'T': exponent = 4; break;
+ case 'G': exponent = 3; break;
+ case 'M': exponent = 2; break;
+ case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true;
+ break;
+ case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true;
+ break;
+ default : bad_multiplier = true;
+ }
+ if( bad_multiplier )
+ {
+ show_error( "bad multiplier in numerical argument", 0, true );
+ exit( 1 );
+ }
+ for( int i = 0; i < exponent; ++i )
+ {
+ if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
+ else { errno = ERANGE; break; }
+ }
+ }
+ if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
+ if( errno )
+ {
+ show_error( "numerical argument out of limits", 0, false );
+ exit( 1 );
+ }
+ return result;
+ }
+
+
+static int get_dict_size( const char * const arg )
+ {
+ char *tail;
+ int bits = strtol( arg, &tail, 0 );
+ if( bits >= min_dictionary_bits &&
+ bits <= max_dictionary_bits && *tail == 0 )
+ return ( 1 << bits );
+ return getnum( arg, 0, min_dictionary_size, max_dictionary_size );
+ }
+
+
+static void show_name( const char * const name )
+ {
+ if( verbosity >= 1 )
+ fprintf( stderr, " %s: ", ( name && name[0] ) ? name : "(stdin)" );
+ }
+
+
+#define IN_BUF_SIZE (1 << 16)
+#define OUT_BUF_SIZE (1 << 16)
+
+static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[],
+ size_t * const inPos, size_t * const inSize )
+ {
+ if( *inPos >= *inSize ) *inSize = 0;
+ else if( *inPos > 0 )
+ {
+ memmove( inBuf, inBuf + *inPos, *inSize - *inPos );
+ *inSize -= *inPos;
+ }
+ *inPos = 0;
+ size_t rest = IN_BUF_SIZE - *inSize;
+ if( rest > 0 )
+ {
+ if( inStream->Read( inStream, inBuf + *inSize, &rest ) != 0 )
+ { show_error( "read error", errno, false ); return false; }
+ *inSize += rest;
+ }
+ return true;
+ }
+
+static int Decode2( CLzmaDec *state, ISeqOutStream *outStream,
+ ISeqInStream *inStream, Byte inBuf[], size_t * const inPos,
+ size_t * const inSize, const int version, const bool testing )
+ {
+ long long total_in = sizeof (File_header), total_out = 0;
+ Byte outBuf[OUT_BUF_SIZE];
+ size_t outPos = 0;
+ uint32_t crc = 0xFFFFFFFF;
+ LzmaDec_Init(state);
+ for (;;)
+ {
+ if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) )
+ return 1;
+ if( *inPos == *inSize )
+ { show_error( "unexpected EOF", errno, false ); return 1; }
+ else
+ {
+ SizeT inProcessed = *inSize - *inPos;
+ SizeT outProcessed = OUT_BUF_SIZE - outPos;
+ ELzmaFinishMode finishMode = LZMA_FINISH_ANY;
+ ELzmaStatus status;
+
+ if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed,
+ inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 )
+ { show_error( "data error", 0, false ); return 1; }
+ *inPos += inProcessed;
+ total_in += inProcessed;
+ outPos += outProcessed;
+
+ if (outStream)
+ if (outStream->Write(outStream, outBuf, outPos) != outPos)
+ { show_error( "can not write output file", errno, false ); return 1; }
+
+ CRC32_update_buf( &crc, outBuf, outPos );
+ total_out += outPos;
+ outPos = 0;
+
+ if (inProcessed == 0 && outProcessed == 0)
+ {
+ if( status != LZMA_STATUS_FINISHED_WITH_MARK )
+ { show_error( "data error", 0, false ); return 1; }
+ bool error = false;
+ File_trailer trailer;
+ const size_t trailer_size = Ft_size( version );
+ if( *inSize - *inPos < trailer_size &&
+ !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1;
+ if( *inSize - *inPos < trailer_size )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ fprintf( stderr, "trailer truncated at trailer position %u;"
+ " some checks may fail.\n", (unsigned int)(*inSize - *inPos) );
+ for( size_t i = *inSize - *inPos; i < trailer_size; ++i )
+ inBuf[*inPos+i] = 0;
+ }
+ for( size_t i = 0; i < trailer_size; ++i )
+ trailer[i] = inBuf[(*inPos)++];
+ total_in += trailer_size;
+ if( version == 0 ) Ft_set_member_size( trailer, total_in );
+ if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFF ) )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n",
+ (unsigned int)Ft_get_data_crc( trailer ),
+ (unsigned int)( crc ^ 0xFFFFFFFF ) );
+ }
+ if( Ft_get_data_size( trailer ) != total_out )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n",
+ Ft_get_data_size( trailer ), total_out );
+ }
+ if( Ft_get_member_size( trailer ) != total_in )
+ {
+ error = true;
+ if( verbosity >= 0 )
+ fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n",
+ Ft_get_member_size( trailer ), total_in );
+ }
+ if( !error && verbosity >= 3 )
+ fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ",
+ (unsigned int)Ft_get_data_crc( trailer ),
+ Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) );
+ if( !error && verbosity >= 1 )
+ { if( testing ) fprintf( stderr, "ok\n" );
+ else fprintf( stderr, "done\n" ); }
+ if( error ) return 2;
+ return 0;
+ }
+ }
+ }
+ }
+
+static int Decode( ISeqOutStream *outStream, ISeqInStream *inStream,
+ const char * const name, const bool testing )
+ {
+ CLzmaDec state;
+ File_header header;
+ Byte inBuf[IN_BUF_SIZE];
+ size_t inPos = 0, inSize = 0;
+ int retval = 0;
+
+ for( bool first_member = true; ; first_member = false )
+ {
+ if( inSize < sizeof (File_header) &&
+ !read_inbuf( inStream, inBuf, &inPos, &inSize ) ) return 1;
+ if( inSize < sizeof (File_header) ) // End Of File
+ {
+ if( !first_member ) break;
+ show_error( "error reading member header", 0, false ); return 1;
+ }
+ for( unsigned int i = 0; i < sizeof (File_header); ++i )
+ header[i] = inBuf[inPos++];
+ if( !Fh_verify_magic( header ) )
+ {
+ if( !first_member ) break; // trailing garbage
+ show_error( "bad magic number (file not in lzip format)", 0, false );
+ return 2;
+ }
+ if( !first_member ) show_name( name );
+ if( !Fh_verify_version( header ) )
+ {
+ if( verbosity >= 0 )
+ fprintf( stderr, "version %d member format not supported, newer %s needed.\n",
+ Fh_version( header ), program_name );
+ return 2;
+ }
+ if( Fh_get_dictionary_size( header ) < min_dictionary_size ||
+ Fh_get_dictionary_size( header ) > max_dictionary_size )
+ {
+ if( verbosity >= 0 )
+ fprintf( stderr, "invalid dictionary size in member header" );
+ return 2;
+ }
+
+ if( verbosity >= 1 )
+ {
+ if( verbosity >= 2 )
+ fprintf( stderr, "version %d, dictionary size %7dB. ",
+ Fh_version( header ), Fh_get_dictionary_size( header ) );
+ }
+
+ /* 5 bytes of LZMA properties */
+ unsigned char props[LZMA_PROPS_SIZE];
+ props[0] = 93; // 45 * 2 + 3
+ int ds = Fh_get_dictionary_size( header );
+ for( int i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; }
+
+ LzmaDec_Construct(&state);
+ if( LzmaDec_Allocate( &state, props, LZMA_PROPS_SIZE, &g_Alloc ) != 0 )
+ { show_error( "can not allocate memory", 0, false ); return 1; }
+ retval = Decode2( &state, outStream, inStream, inBuf, &inPos,
+ &inSize, Fh_version( header ), testing );
+ LzmaDec_Free(&state, &g_Alloc);
+ if( retval != 0 ) break;
+ }
+ return retval;
+ }
+
+static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream,
+ const struct Lzma_options * const encoder_options )
+ {
+ CLzmaEncProps props;
+
+ CLzmaEncHandle enc = LzmaEnc_Create(&g_Alloc);
+ if(enc == 0)
+ { show_error( "can not allocate memory", 0, false ); return 1; }
+
+ LzmaEncProps_Init(&props);
+ props.dictSize = encoder_options->dictionary_size;
+ props.lc = literal_context_bits;
+ props.lp = 0;
+ props.pb = pos_state_bits;
+ props.fb = encoder_options->match_len_limit;
+ props.btMode = 1;
+ props.numHashBytes = 4;
+ props.mc = 16 + ( encoder_options->match_len_limit / 2 );
+ LzmaEnc_SetProps(enc, &props);
+
+ int retval = 0;
+ File_header header;
+ Fh_set_magic( header );
+ if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) ||
+ encoder_options->match_len_limit < min_match_len_limit ||
+ encoder_options->match_len_limit > max_match_len )
+ internal_error( "invalid argument to encoder" );
+
+ if( outStream->Write( outStream, header, sizeof (File_header) ) != sizeof (File_header) )
+ { show_error( "can not write output file", errno, false ); retval = 1; }
+ else
+ if( LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc) != 0 )
+ { show_error( "data error", 0, false ); retval = 1; }
+ LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
+ return retval;
+ }
+
+int verbosity = 0;
+
+
+void show_error( const char * const msg, const int errcode, const bool help )
+ {
+ if( verbosity >= 0 )
+ {
+ if( msg && msg[0] != 0 )
+ {
+ fprintf( stderr, "%s: %s", program_name, msg );
+ if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) );
+ fprintf( stderr, "\n" );
+ }
+ if( help && invocation_name && invocation_name[0] != 0 )
+ fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name );
+ }
+ }
+
+
+void internal_error( const char * const msg )
+ {
+ const char * const e = "internal error: ";
+ char * s = resize_buffer( 0, strlen( e ) + strlen( msg ) + 1 );
+ strcpy( s, e );
+ strcat( s, msg );
+ show_error( s, 0, false );
+ free( s );
+ exit( 3 );
+ }
+
+
+static int extension_index( const char * const name )
+ {
+ for( int i = 0; known_extensions[i].from; ++i )
+ {
+ const char * const ext = known_extensions[i].from;
+ if( strlen( name ) > strlen( ext ) &&
+ strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 )
+ return i;
+ }
+ return -1;
+ }
+
+
+static void set_c_outname( const char * const name )
+ {
+ output_filename = resize_buffer( output_filename, strlen( name ) +
+ strlen( known_extensions[0].from ) + 1 );
+ strcpy( output_filename, name );
+ strcat( output_filename, known_extensions[0].from );
+ }
+
+
+static void set_d_outname( const char * const name )
+ {
+ const int i = extension_index( name );
+ if( i >= 0 )
+ {
+ const char * const from = known_extensions[i].from;
+ if( strlen( name ) > strlen( from ) )
+ {
+ output_filename = resize_buffer( output_filename, strlen( name ) +
+ strlen( known_extensions[0].to ) + 1 );
+ strcpy( output_filename, name );
+ strcpy( output_filename + strlen( name ) - strlen( from ),
+ known_extensions[i].to );
+ return;
+ }
+ }
+ output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 );
+ strcpy( output_filename, name );
+ strcat( output_filename, ".out" );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: can't guess original name for `%s' -- using `%s'.\n",
+ program_name, name, output_filename );
+ }
+
+
+CRC32 crc32;
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ // Mapping from gzip/bzip2 style 1..9 compression modes
+ // to the corresponding LZMA compression modes.
+ const struct Lzma_options option_mapping[] =
+ {
+ { 1 << 20, 10 }, // -1
+ { 3 << 19, 12 }, // -2
+ { 1 << 21, 17 }, // -3
+ { 3 << 20, 26 }, // -4
+ { 1 << 22, 44 }, // -5
+ { 1 << 23, 80 }, // -6
+ { 1 << 24, 108 }, // -7
+ { 3 << 23, 163 }, // -8
+ { 1 << 25, 273 } }; // -9
+ struct Lzma_options encoder_options = option_mapping[5]; // default = "-6"
+ enum Mode program_mode = m_compress;
+ bool force = false;
+ bool keep_input_files = false;
+ bool to_stdout = false;
+ invocation_name = argv[0];
+ CRC32_init();
+ if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8)
+ internal_error( "incorrect UInt32 or UInt64" );
+
+ const struct ap_Option options[] =
+ {
+ { '1', "fast", ap_no },
+ { '2', 0, ap_no },
+ { '3', 0, ap_no },
+ { '4', 0, ap_no },
+ { '5', 0, ap_no },
+ { '6', 0, ap_no },
+ { '7', 0, ap_no },
+ { '8', 0, ap_no },
+ { '9', "best", ap_no },
+ { 'b', "member-size", ap_yes },
+ { 'c', "stdout", ap_no },
+ { 'd', "decompress", ap_no },
+ { 'f', "force", ap_no },
+ { 'h', "help", ap_no },
+ { 'k', "keep", ap_no },
+ { 'm', "match-length", ap_yes },
+// { 'o', "output", ap_yes },
+ { 'q', "quiet", ap_no },
+ { 's', "dictionary-size", ap_yes },
+ { 'S', "volume-size", ap_yes },
+ { 't', "test", ap_no },
+ { 'v', "verbose", ap_no },
+ { 'V', "version", ap_no },
+ { 0 , 0, ap_no } };
+
+ struct Arg_parser parser;
+ if( !ap_init( &parser, argc, argv, options, 0 ) )
+ { show_error( "memory exhausted", 0, false ); return 1; }
+ if( ap_error( &parser ) ) // bad option
+ { show_error( ap_error( &parser ), 0, true ); return 1; }
+
+ int argind = 0;
+ for( ; argind < ap_arguments( &parser ); ++argind )
+ {
+ const int code = ap_code( &parser, argind );
+ const char * const arg = ap_argument( &parser, argind );
+ if( !code ) break; // no more options
+ switch( code )
+ {
+ case '1': case '2': case '3':
+ case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ encoder_options = option_mapping[code-'1']; break;
+ case 'b': break;
+ case 'c': to_stdout = true; break;
+ case 'd': program_mode = m_decompress; break;
+ case 'f': force = true; break;
+ case 'h': show_help(); return 0;
+ case 'k': keep_input_files = true; break;
+ case 'm': encoder_options.match_len_limit =
+ getnum( arg, 0, min_match_len_limit, max_match_len ); break;
+// case 'o': default_output_filename = arg; break;
+ case 'q': verbosity = -1; break;
+ case 's': encoder_options.dictionary_size = get_dict_size( arg );
+ break;
+ case 'S': break;
+ case 't': program_mode = m_test; break;
+ case 'v': if( verbosity < 4 ) ++verbosity; break;
+ case 'V': show_version(); return 0;
+ default : internal_error( "uncaught option" );
+ }
+ }
+
+ const char * input_filename = "";
+ if( ap_arguments( &parser ) > argind &&
+ strcmp( ap_argument( &parser, argind ), "-" ) )
+ input_filename = ap_argument( &parser, argind );
+ if( ap_arguments( &parser ) > argind + 1 )
+ { show_error( "too many file names", 0, true ); return 1; }
+
+ if( program_mode == m_test ) output_filename = "/dev/null";
+ else
+ {
+ if( to_stdout || !input_filename[0] ) output_filename = "";
+ else
+ {
+ if( program_mode == m_compress ) set_c_outname( input_filename );
+ else set_d_outname( input_filename );
+ }
+ }
+
+ CFileSeqInStream inStream;
+ CFileOutStream outStream;
+
+ FileSeqInStream_CreateVTable(&inStream);
+ File_Construct(&inStream.file);
+
+ FileOutStream_CreateVTable(&outStream);
+ File_Construct(&outStream.file);
+
+ if (InFile_Open(&inStream.file, input_filename) != 0)
+ { show_error( "can not open input file", errno, false ); return 1; }
+
+ if (OutFile_Open(&outStream.file, output_filename) != 0)
+ { show_error( "can not open output file", errno, false ); return 1; }
+
+ show_name( input_filename );
+ int retval;
+ if( program_mode == m_compress )
+ retval = Encode( &outStream.s, &inStream.s, &encoder_options );
+ else
+ retval = Decode( &outStream.s, &inStream.s, input_filename, program_mode == m_test );
+
+ File_Close(&outStream.file);
+ File_Close(&inStream.file);
+
+ ap_free( &parser );
+ return retval;
+ }
|