From 1808170f1c228a7ead06583ce92f81ccd0be00fe Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 15:19:41 +0100 Subject: Adding upstream version 1.0~rc2. Signed-off-by: Daniel Baumann --- main.c | 616 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 616 insertions(+) create mode 100644 main.c (limited to 'main.c') diff --git a/main.c b/main.c new file mode 100644 index 0000000..2674285 --- /dev/null +++ b/main.c @@ -0,0 +1,616 @@ +/* Pdlzip - A data compressor based on the LZMA algorithm + 2009-08-14 : Igor Pavlov : Public domain + Copyright (C) 2010 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "carg_parser.h" +#include "pdlzip.h" +#include "Alloc.h" +#include "7zFile.h" +#include "LzmaDec.h" +#include "LzmaEnc.h" + + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +const char * invocation_name = 0; +const char * const Program_name = "Pdlzip"; +const char * const program_name = "pdlzip"; +const char * const program_year = "2010"; + + +struct { const char * from; const char * to; } const known_extensions[] = { + { ".lz", "" }, + { ".tlz", ".tar" }, + { 0, 0 } }; + +struct Lzma_options + { + int dictionary_size; // 4KiB..512MiB + int match_len_limit; // 5..273 + }; + +enum Mode { m_compress = 0, m_decompress, m_test }; +char * output_filename = 0; + + +// assure at least a minimum size for buffer `buf' +inline void * resize_buffer( void * buf, const int min_size ) + { + if( buf ) buf = realloc( buf, min_size ); + else buf = malloc( min_size ); + return buf; + } + + +static void show_help() + { + printf( "%s - A data compressor based on the LZMA algorithm.\n", Program_name ); + printf( "\nUsage: %s [options] [file]\n", invocation_name ); + printf( "\nOptions:\n" ); + printf( " -h, --help display this help and exit\n" ); + printf( " -V, --version output version information and exit\n" ); + printf( " -c, --stdout send output to standard output\n" ); + printf( " -d, --decompress decompress\n" ); +// printf( " -f, --force overwrite existing output files\n" ); +// printf( " -k, --keep keep (don't delete) input files\n" ); + printf( " -m, --match-length= set match length limit in bytes [80]\n" ); + printf( " -q, --quiet suppress all messages\n" ); + printf( " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" ); + printf( " -t, --test test compressed file integrity\n" ); + printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" ); + printf( " -1 .. -9 set compression level [default 6]\n" ); + printf( " --fast alias for -1\n" ); + printf( " --best alias for -9\n" ); + printf( "If no file name is given, %s compresses or decompresses\n", program_name ); + printf( "from standard input to standard output.\n" ); + printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); + printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + printf( "\nReport bugs to lzip-bug@nongnu.org\n" ); +// printf( "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); + } + + +static void show_version() + { + printf( "%s %s\n", Program_name, PROGVERSION ); + printf( "Public Domain 2009 Igor Pavlov.\n" ); + printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + printf( "This is free software: you are free to change and redistribute it.\n" ); + printf( "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +static long long getnum( const char * const ptr, const int bs, + const long long llimit, const long long ulimit ) + { + errno = 0; + char *tail; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "bad or missing numerical argument", 0, true ); + exit( 1 ); + } + + if( !errno && tail[0] ) + { + int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; + bool bad_multiplier = false; + switch( tail[0] ) + { + case ' ': break; + case 'b': if( bs > 0 ) { factor = bs; exponent = 1; } + else bad_multiplier = true; + break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; + break; + case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; + break; + default : bad_multiplier = true; + } + if( bad_multiplier ) + { + show_error( "bad multiplier in numerical argument", 0, true ); + exit( 1 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "numerical argument out of limits", 0, false ); + exit( 1 ); + } + return result; + } + + +static int get_dict_size( const char * const arg ) + { + char *tail; + int bits = strtol( arg, &tail, 0 ); + if( bits >= min_dictionary_bits && + bits <= max_dictionary_bits && *tail == 0 ) + return ( 1 << bits ); + return getnum( arg, 0, min_dictionary_size, max_dictionary_size ); + } + + +static void show_name( const char * const name ) + { + if( verbosity >= 1 ) + fprintf( stderr, " %s: ", ( name && name[0] ) ? name : "(stdin)" ); + } + + +#define IN_BUF_SIZE (1 << 16) +#define OUT_BUF_SIZE (1 << 16) + +static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[], + size_t * const inPos, size_t * const inSize ) + { + if( *inPos >= *inSize ) *inSize = 0; + else if( *inPos > 0 ) + { + memmove( inBuf, inBuf + *inPos, *inSize - *inPos ); + *inSize -= *inPos; + } + *inPos = 0; + size_t rest = IN_BUF_SIZE - *inSize; + if( rest > 0 ) + { + if( inStream->Read( inStream, inBuf + *inSize, &rest ) != 0 ) + { show_error( "read error", errno, false ); return false; } + *inSize += rest; + } + return true; + } + +static int Decode2( CLzmaDec *state, ISeqOutStream *outStream, + ISeqInStream *inStream, Byte inBuf[], size_t * const inPos, + size_t * const inSize, const int version, const bool testing ) + { + long long total_in = sizeof (File_header), total_out = 0; + Byte outBuf[OUT_BUF_SIZE]; + size_t outPos = 0; + uint32_t crc = 0xFFFFFFFF; + LzmaDec_Init(state); + for (;;) + { + if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) + return 1; + if( *inPos == *inSize ) + { show_error( "unexpected EOF", errno, false ); return 1; } + else + { + SizeT inProcessed = *inSize - *inPos; + SizeT outProcessed = OUT_BUF_SIZE - outPos; + ELzmaFinishMode finishMode = LZMA_FINISH_ANY; + ELzmaStatus status; + + if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, + inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) + { show_error( "data error", 0, false ); return 1; } + *inPos += inProcessed; + total_in += inProcessed; + outPos += outProcessed; + + if (outStream) + if (outStream->Write(outStream, outBuf, outPos) != outPos) + { show_error( "can not write output file", errno, false ); return 1; } + + CRC32_update_buf( &crc, outBuf, outPos ); + total_out += outPos; + outPos = 0; + + if (inProcessed == 0 && outProcessed == 0) + { + if( status != LZMA_STATUS_FINISHED_WITH_MARK ) + { show_error( "data error", 0, false ); return 1; } + bool error = false; + File_trailer trailer; + const size_t trailer_size = Ft_size( version ); + if( *inSize - *inPos < trailer_size && + !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; + if( *inSize - *inPos < trailer_size ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "trailer truncated at trailer position %u;" + " some checks may fail.\n", (unsigned int)(*inSize - *inPos) ); + for( size_t i = *inSize - *inPos; i < trailer_size; ++i ) + inBuf[*inPos+i] = 0; + } + for( size_t i = 0; i < trailer_size; ++i ) + trailer[i] = inBuf[(*inPos)++]; + total_in += trailer_size; + if( version == 0 ) Ft_set_member_size( trailer, total_in ); + if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFF ) ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n", + (unsigned int)Ft_get_data_crc( trailer ), + (unsigned int)( crc ^ 0xFFFFFFFF ) ); + } + if( Ft_get_data_size( trailer ) != total_out ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", + Ft_get_data_size( trailer ), total_out ); + } + if( Ft_get_member_size( trailer ) != total_in ) + { + error = true; + if( verbosity >= 0 ) + fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", + Ft_get_member_size( trailer ), total_in ); + } + if( !error && verbosity >= 3 ) + fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", + (unsigned int)Ft_get_data_crc( trailer ), + Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); + if( !error && verbosity >= 1 ) + { if( testing ) fprintf( stderr, "ok\n" ); + else fprintf( stderr, "done\n" ); } + if( error ) return 2; + return 0; + } + } + } + } + +static int Decode( ISeqOutStream *outStream, ISeqInStream *inStream, + const char * const name, const bool testing ) + { + CLzmaDec state; + File_header header; + Byte inBuf[IN_BUF_SIZE]; + size_t inPos = 0, inSize = 0; + int retval = 0; + + for( bool first_member = true; ; first_member = false ) + { + if( inSize < sizeof (File_header) && + !read_inbuf( inStream, inBuf, &inPos, &inSize ) ) return 1; + if( inSize < sizeof (File_header) ) // End Of File + { + if( !first_member ) break; + show_error( "error reading member header", 0, false ); return 1; + } + for( unsigned int i = 0; i < sizeof (File_header); ++i ) + header[i] = inBuf[inPos++]; + if( !Fh_verify_magic( header ) ) + { + if( !first_member ) break; // trailing garbage + show_error( "bad magic number (file not in lzip format)", 0, false ); + return 2; + } + if( !first_member ) show_name( name ); + if( !Fh_verify_version( header ) ) + { + if( verbosity >= 0 ) + fprintf( stderr, "version %d member format not supported, newer %s needed.\n", + Fh_version( header ), program_name ); + return 2; + } + if( Fh_get_dictionary_size( header ) < min_dictionary_size || + Fh_get_dictionary_size( header ) > max_dictionary_size ) + { + if( verbosity >= 0 ) + fprintf( stderr, "invalid dictionary size in member header" ); + return 2; + } + + if( verbosity >= 1 ) + { + if( verbosity >= 2 ) + fprintf( stderr, "version %d, dictionary size %7dB. ", + Fh_version( header ), Fh_get_dictionary_size( header ) ); + } + + /* 5 bytes of LZMA properties */ + unsigned char props[LZMA_PROPS_SIZE]; + props[0] = 93; // 45 * 2 + 3 + int ds = Fh_get_dictionary_size( header ); + for( int i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; } + + LzmaDec_Construct(&state); + if( LzmaDec_Allocate( &state, props, LZMA_PROPS_SIZE, &g_Alloc ) != 0 ) + { show_error( "can not allocate memory", 0, false ); return 1; } + retval = Decode2( &state, outStream, inStream, inBuf, &inPos, + &inSize, Fh_version( header ), testing ); + LzmaDec_Free(&state, &g_Alloc); + if( retval != 0 ) break; + } + return retval; + } + +static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, + const struct Lzma_options * const encoder_options ) + { + CLzmaEncProps props; + + CLzmaEncHandle enc = LzmaEnc_Create(&g_Alloc); + if(enc == 0) + { show_error( "can not allocate memory", 0, false ); return 1; } + + LzmaEncProps_Init(&props); + props.dictSize = encoder_options->dictionary_size; + props.lc = literal_context_bits; + props.lp = 0; + props.pb = pos_state_bits; + props.fb = encoder_options->match_len_limit; + props.btMode = 1; + props.numHashBytes = 4; + props.mc = 16 + ( encoder_options->match_len_limit / 2 ); + LzmaEnc_SetProps(enc, &props); + + int retval = 0; + File_header header; + Fh_set_magic( header ); + if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || + encoder_options->match_len_limit < min_match_len_limit || + encoder_options->match_len_limit > max_match_len ) + internal_error( "invalid argument to encoder" ); + + if( outStream->Write( outStream, header, sizeof (File_header) ) != sizeof (File_header) ) + { show_error( "can not write output file", errno, false ); retval = 1; } + else + if( LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc) != 0 ) + { show_error( "data error", 0, false ); retval = 1; } + LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); + return retval; + } + +int verbosity = 0; + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity >= 0 ) + { + if( msg && msg[0] != 0 ) + { + fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fprintf( stderr, "\n" ); + } + if( help && invocation_name && invocation_name[0] != 0 ) + fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name ); + } + } + + +void internal_error( const char * const msg ) + { + const char * const e = "internal error: "; + char * s = resize_buffer( 0, strlen( e ) + strlen( msg ) + 1 ); + strcpy( s, e ); + strcat( s, msg ); + show_error( s, 0, false ); + free( s ); + exit( 3 ); + } + + +static int extension_index( const char * const name ) + { + for( int i = 0; known_extensions[i].from; ++i ) + { + const char * const ext = known_extensions[i].from; + if( strlen( name ) > strlen( ext ) && + strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) + return i; + } + return -1; + } + + +static void set_c_outname( const char * const name ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + + strlen( known_extensions[0].from ) + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, known_extensions[0].from ); + } + + +static void set_d_outname( const char * const name ) + { + const int i = extension_index( name ); + if( i >= 0 ) + { + const char * const from = known_extensions[i].from; + if( strlen( name ) > strlen( from ) ) + { + output_filename = resize_buffer( output_filename, strlen( name ) + + strlen( known_extensions[0].to ) + 1 ); + strcpy( output_filename, name ); + strcpy( output_filename + strlen( name ) - strlen( from ), + known_extensions[i].to ); + return; + } + } + output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); + strcpy( output_filename, name ); + strcat( output_filename, ".out" ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: can't guess original name for `%s' -- using `%s'.\n", + program_name, name, output_filename ); + } + + +CRC32 crc32; + + +int main( const int argc, const char * const argv[] ) + { + // Mapping from gzip/bzip2 style 1..9 compression modes + // to the corresponding LZMA compression modes. + const struct Lzma_options option_mapping[] = + { + { 1 << 20, 10 }, // -1 + { 3 << 19, 12 }, // -2 + { 1 << 21, 17 }, // -3 + { 3 << 20, 26 }, // -4 + { 1 << 22, 44 }, // -5 + { 1 << 23, 80 }, // -6 + { 1 << 24, 108 }, // -7 + { 3 << 23, 163 }, // -8 + { 1 << 25, 273 } }; // -9 + struct Lzma_options encoder_options = option_mapping[5]; // default = "-6" + enum Mode program_mode = m_compress; + bool force = false; + bool keep_input_files = false; + bool to_stdout = false; + invocation_name = argv[0]; + CRC32_init(); + if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8) + internal_error( "incorrect UInt32 or UInt64" ); + + const struct ap_Option options[] = + { + { '1', "fast", ap_no }, + { '2', 0, ap_no }, + { '3', 0, ap_no }, + { '4', 0, ap_no }, + { '5', 0, ap_no }, + { '6', 0, ap_no }, + { '7', 0, ap_no }, + { '8', 0, ap_no }, + { '9', "best", ap_no }, + { 'b', "member-size", ap_yes }, + { 'c', "stdout", ap_no }, + { 'd', "decompress", ap_no }, + { 'f', "force", ap_no }, + { 'h', "help", ap_no }, + { 'k', "keep", ap_no }, + { 'm', "match-length", ap_yes }, +// { 'o', "output", ap_yes }, + { 'q', "quiet", ap_no }, + { 's', "dictionary-size", ap_yes }, + { 'S', "volume-size", ap_yes }, + { 't', "test", ap_no }, + { 'v', "verbose", ap_no }, + { 'V', "version", ap_no }, + { 0 , 0, ap_no } }; + + struct Arg_parser parser; + if( !ap_init( &parser, argc, argv, options, 0 ) ) + { show_error( "memory exhausted", 0, false ); return 1; } + if( ap_error( &parser ) ) // bad option + { show_error( ap_error( &parser ), 0, true ); return 1; } + + int argind = 0; + for( ; argind < ap_arguments( &parser ); ++argind ) + { + const int code = ap_code( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); + if( !code ) break; // no more options + switch( code ) + { + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + encoder_options = option_mapping[code-'1']; break; + case 'b': break; + case 'c': to_stdout = true; break; + case 'd': program_mode = m_decompress; break; + case 'f': force = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'm': encoder_options.match_len_limit = + getnum( arg, 0, min_match_len_limit, max_match_len ); break; +// case 'o': default_output_filename = arg; break; + case 'q': verbosity = -1; break; + case 's': encoder_options.dictionary_size = get_dict_size( arg ); + break; + case 'S': break; + case 't': program_mode = m_test; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + default : internal_error( "uncaught option" ); + } + } + + const char * input_filename = ""; + if( ap_arguments( &parser ) > argind && + strcmp( ap_argument( &parser, argind ), "-" ) ) + input_filename = ap_argument( &parser, argind ); + if( ap_arguments( &parser ) > argind + 1 ) + { show_error( "too many file names", 0, true ); return 1; } + + if( program_mode == m_test ) output_filename = "/dev/null"; + else + { + if( to_stdout || !input_filename[0] ) output_filename = ""; + else + { + if( program_mode == m_compress ) set_c_outname( input_filename ); + else set_d_outname( input_filename ); + } + } + + CFileSeqInStream inStream; + CFileOutStream outStream; + + FileSeqInStream_CreateVTable(&inStream); + File_Construct(&inStream.file); + + FileOutStream_CreateVTable(&outStream); + File_Construct(&outStream.file); + + if (InFile_Open(&inStream.file, input_filename) != 0) + { show_error( "can not open input file", errno, false ); return 1; } + + if (OutFile_Open(&outStream.file, output_filename) != 0) + { show_error( "can not open output file", errno, false ); return 1; } + + show_name( input_filename ); + int retval; + if( program_mode == m_compress ) + retval = Encode( &outStream.s, &inStream.s, &encoder_options ); + else + retval = Decode( &outStream.s, &inStream.s, input_filename, program_mode == m_test ); + + File_Close(&outStream.file); + File_Close(&inStream.file); + + ap_free( &parser ); + return retval; + } -- cgit v1.2.3