/* Pdlzip - Data compressor based on the LZMA algorithm 2009-08-14 : Igor Pavlov : Public domain Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #define _FILE_OFFSET_BITS 64 #include #include #include #include #include #include #include #if defined(__OS2__) #include #endif #include "pdarg_parser.h" #include "pdlzip.h" #include "Alloc.h" #include "7zFile.h" #include "LzmaDec.h" #include "LzmaEnc.h" #if CHAR_BIT != 8 #error "Environments where CHAR_BIT != 8 are not supported." #endif #ifndef LLONG_MAX #define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif #ifndef LLONG_MIN #define LLONG_MIN (-LLONG_MAX - 1LL) #endif #ifndef ULLONG_MAX #define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL #endif long long int llabs( long long int number ); static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } static void SzFree(void *p, void *address) { p = p; MyFree(address); } static ISzAlloc g_Alloc = { SzAlloc, SzFree }; const char * const Program_name = "Pdlzip"; const char * const program_name = "pdlzip"; const char * const program_year = "2012"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, { ".lzma", "" }, { 0, 0 } }; struct Lzma_options { int dictionary_size; /* 4KiB..512MiB */ int match_len_limit; /* 5..273 */ }; enum Mode { m_compress, m_decompress, m_test }; char * output_filename = 0; /* assure at least a minimum size for buffer 'buf' */ inline void * resize_buffer( void * buf, const int min_size ) { if( buf ) buf = realloc( buf, min_size ); else buf = malloc( min_size ); return buf; } static void show_help() { printf( "%s - A \"public domain\" version of the lzip data compressor\n", Program_name ); printf( "also able to decompress legacy lzma-alone (.lzma) files.\n" "\nUsage: %s [options] [file]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -c, --stdout send output to standard output\n" " -d, --decompress decompress\n" /* " -f, --force overwrite existing output files\n" */ /* " -k, --keep keep (don't delete) input files\n" */ " -m, --match-length= set match length limit in bytes [36]\n" " -q, --quiet suppress all messages\n" " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" " -t, --test test compressed file integrity\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" " -1 .. -9 set compression level [default 6]\n" " --fast alias for -1\n" " --best alias for -9\n" "If no file name is given, pdlzip compresses or decompresses\n" "from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" "The bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" "etc, you may need to use the --match-length and --dictionary-size\n" "options directly to achieve optimal performance.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html\n" ); } static void show_version() { printf( "%s %s\n", Program_name, PROGVERSION ); printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); printf( "Public Domain 2009 Igor Pavlov.\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" ); } static const char * format_num( long long num ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; enum { buf_size = 16, factor = 1024 }; static char buf[buf_size]; const char *p = ""; bool exact = ( num % factor == 0 ); int i; for( i = 0; i < 8 && ( llabs( num ) > 9999 || ( exact && llabs( num ) >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } snprintf( buf, buf_size, "%lld %s", num, p ); return buf; } static long long getnum( const char * const ptr, const long long llimit, const long long ulimit ) { long long result; char *tail; errno = 0; result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { show_error( "Bad or missing numerical argument.", 0, true ); exit( 1 ); } if( !errno && tail[0] ) { int factor = ( tail[1] == 'i' ) ? 1024 : 1000; int exponent = 0, i; bool bad_multiplier = false; switch( tail[0] ) { case ' ': break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; case 'P': exponent = 5; break; case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; break; case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; break; default : bad_multiplier = true; } if( bad_multiplier ) { show_error( "Bad multiplier in numerical argument.", 0, true ); exit( 1 ); } for( i = 0; i < exponent; ++i ) { if( LLONG_MAX / factor >= llabs( result ) ) result *= factor; else { errno = ERANGE; break; } } } if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { show_error( "Numerical argument out of limits.", 0, false ); exit( 1 ); } return result; } static int get_dict_size( const char * const arg ) { char *tail; int bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits && *tail == 0 ) return ( 1 << bits ); return getnum( arg, min_dictionary_size, max_dictionary_size ); } static void show_name( const char * const name ) { if( verbosity >= 1 ) fprintf( stderr, " %s: ", ( name && name[0] ) ? name : "(stdin)" ); } #define IN_BUF_SIZE (1 << 16) #define OUT_BUF_SIZE (1 << 16) static bool read_inbuf( ISeqInStream * const inStream, Byte inBuf[], size_t * const inPos, size_t * const inSize ) { size_t rest; if( *inPos >= *inSize ) *inSize = 0; else if( *inPos > 0 ) { memmove( inBuf, inBuf + *inPos, *inSize - *inPos ); *inSize -= *inPos; } *inPos = 0; rest = IN_BUF_SIZE - *inSize; if( rest > 0 ) { if( inStream->Read( inStream, inBuf + *inSize, &rest ) != 0 ) { show_error( "Read error", errno, false ); return false; } *inSize += rest; } return true; } static int lzma_Decode2( UInt64 unpackSize, CLzmaDec *state, ISeqOutStream *outStream, ISeqInStream *inStream, Byte inBuf[], size_t * const inPos, size_t * const inSize, const bool testing ) { long long total_in = 13, total_out = 0; Byte outBuf[OUT_BUF_SIZE]; size_t outPos = 0; const bool thereIsSize = (unpackSize != (UInt64)(Int64)-1); LzmaDec_Init(state); for (;;) { SizeT inProcessed; SizeT outProcessed = OUT_BUF_SIZE - outPos; ELzmaFinishMode finishMode = LZMA_FINISH_ANY; ELzmaStatus status; if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; inProcessed = *inSize - *inPos; if (thereIsSize && outProcessed > unpackSize) { outProcessed = (SizeT)unpackSize; finishMode = LZMA_FINISH_END; } if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) { show_error( "Data error.", 0, false ); return 1; } *inPos += inProcessed; total_in += inProcessed; outPos += outProcessed; unpackSize -= outProcessed; if (outStream) if (outStream->Write(outStream, outBuf, outPos) != outPos) { show_error( "Can not write output file", errno, false ); return 1; } total_out += outPos; outPos = 0; if( ( inProcessed == 0 && outProcessed == 0 ) || ( thereIsSize && unpackSize == 0 ) ) { if( ( thereIsSize && unpackSize != 0 ) || ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) { show_error( "Data error.", 0, false ); return 1; } if( verbosity >= 2 ) fprintf( stderr, "lzma-alone, dictionary size %7sB. ", format_num( state->prop.dicSize ) ); if( verbosity >= 3 ) fprintf( stderr, "uncompressed size %9lld, compressed size %8lld. ", total_out, total_in ); if( verbosity >= 1 ) { if( testing ) fprintf( stderr, "(apparently) ok\n" ); else fprintf( stderr, "(apparently) done\n" ); } return 0; } } } static int Decode2( CLzmaDec *state, ISeqOutStream *outStream, ISeqInStream *inStream, Byte inBuf[], size_t * const inPos, size_t * const inSize, const int member_version, const bool testing ) { long long total_in = Fh_size, total_out = 0; Byte outBuf[OUT_BUF_SIZE]; size_t outPos = 0; uint32_t crc = 0xFFFFFFFFU; LzmaDec_Init(state); for (;;) { SizeT inProcessed; SizeT outProcessed = OUT_BUF_SIZE - outPos; ELzmaFinishMode finishMode = LZMA_FINISH_ANY; ELzmaStatus status; if( *inPos == *inSize && !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; if( *inPos == *inSize ) { show_error( "Unexpected EOF.", 0, false ); return 1; } inProcessed = *inSize - *inPos; if( LzmaDec_DecodeToBuf( state, outBuf + outPos, &outProcessed, inBuf + *inPos, &inProcessed, finishMode, &status ) != 0 ) { show_error( "Data error.", 0, false ); return 1; } *inPos += inProcessed; total_in += inProcessed; outPos += outProcessed; if (outStream) if (outStream->Write(outStream, outBuf, outPos) != outPos) { show_error( "Can not write output file", errno, false ); return 1; } CRC32_update_buf( &crc, outBuf, outPos ); total_out += outPos; outPos = 0; if (inProcessed == 0 && outProcessed == 0) { File_trailer trailer; size_t i; const size_t trailer_size = Ft_versioned_size( member_version ); bool error = false; if( status != LZMA_STATUS_FINISHED_WITH_MARK ) { show_error( "Data error.", 0, false ); return 1; } if( *inSize - *inPos < trailer_size && !read_inbuf( inStream, inBuf, inPos, inSize ) ) return 1; if( *inSize - *inPos < trailer_size ) { error = true; if( verbosity >= 0 ) fprintf( stderr, "trailer truncated at trailer position %u;" " some checks may fail.\n", (unsigned int)(*inSize - *inPos) ); for( i = *inSize - *inPos; i < trailer_size; ++i ) inBuf[*inPos+i] = 0; } for( i = 0; i < trailer_size; ++i ) trailer[i] = inBuf[(*inPos)++]; total_in += trailer_size; if( member_version == 0 ) Ft_set_member_size( trailer, total_in ); if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) { error = true; if( verbosity >= 0 ) fprintf( stderr, "crc mismatch; trailer says %08X, data crc is %08X.\n", (unsigned int)Ft_get_data_crc( trailer ), (unsigned int)( crc ^ 0xFFFFFFFFU ) ); } if( Ft_get_data_size( trailer ) != total_out ) { error = true; if( verbosity >= 0 ) fprintf( stderr, "data size mismatch; trailer says %lld, data size is %lld.\n", Ft_get_data_size( trailer ), total_out ); } if( Ft_get_member_size( trailer ) != total_in ) { error = true; if( verbosity >= 0 ) fprintf( stderr, "member size mismatch; trailer says %lld, member size is %lld.\n", Ft_get_member_size( trailer ), total_in ); } if( !error && verbosity >= 3 ) fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", (unsigned int)Ft_get_data_crc( trailer ), Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); if( !error && verbosity >= 1 ) { if( testing ) fprintf( stderr, "ok\n" ); else fprintf( stderr, "done\n" ); } if( error ) return 2; return 0; } } } static int Decode( ISeqOutStream *outStream, ISeqInStream *inStream, const char * const name, const bool testing ) { UInt64 unpackSize = 0; CLzmaDec state; File_header header; Byte inBuf[IN_BUF_SIZE]; size_t inPos = 0, inSize = 0; int retval = 0; bool lzma_mode = false; bool first_member; /* 5 bytes of LZMA properties and 8 bytes of uncompressed size */ unsigned char props[LZMA_PROPS_SIZE+8]; for( first_member = true; ; first_member = false ) { int i; if( inSize < Fh_size && !read_inbuf( inStream, inBuf, &inPos, &inSize ) ) return 1; if( inSize < Fh_size ) /* End Of File */ { if( !first_member ) break; show_error( "Error reading member header.", 0, false ); return 1; } for( i = 0; i < Fh_size; ++i ) props[i] = header[i] = inBuf[inPos++]; if( !Fh_verify_magic( header ) ) { if( !first_member ) break; /* trailing garbage */ if( inSize >= 13 - Fh_size ) /* try lzma-alone */ { for( i = Fh_size; i < 13; ++i ) props[i] = inBuf[inPos++]; for( i = 0; i < 8; ++i ) unpackSize += (UInt64)props[LZMA_PROPS_SIZE+i] << (i * 8); if( ( props[12] == 0 || props[12] == 0xFF ) && props[12] == props[11] ) lzma_mode = true; } if( !lzma_mode ) { show_error( "Bad magic number (file not in lzip format).", 0, false ); return 2; } } if( !first_member ) show_name( name ); if( !lzma_mode ) { int ds, i; if( !Fh_verify_version( header ) ) { if( verbosity >= 0 ) fprintf( stderr, "version %d member format not supported, newer %s needed.\n", Fh_version( header ), program_name ); return 2; } if( Fh_get_dictionary_size( header ) < min_dictionary_size || Fh_get_dictionary_size( header ) > max_dictionary_size ) { if( verbosity >= 0 ) fprintf( stderr, "invalid dictionary size in member header.\n" ); return 2; } if( verbosity >= 2 ) fprintf( stderr, "version %d, dictionary size %7sB. ", Fh_version( header ), format_num( Fh_get_dictionary_size( header ) ) ); props[0] = 93; /* (45 * 2) + (9 * 0) + 3 */ ds = Fh_get_dictionary_size( header ); for( i = 1; i <= 4; ++i ) { props[i] = ds & 0xFF; ds >>= 8; } } LzmaDec_Construct(&state); if( LzmaDec_Allocate( &state, props, LZMA_PROPS_SIZE, &g_Alloc ) != 0 ) { show_error( "Can not allocate memory.", 0, false ); return 1; } if( !lzma_mode ) retval = Decode2( &state, outStream, inStream, inBuf, &inPos, &inSize, Fh_version( header ), testing ); else retval = lzma_Decode2( unpackSize, &state, outStream, inStream, inBuf, &inPos, &inSize, testing ); LzmaDec_Free(&state, &g_Alloc); if( retval != 0 || lzma_mode ) break; } return retval; } static int Encode( ISeqOutStream *outStream, ISeqInStream *inStream, const struct Lzma_options * const encoder_options ) { CLzmaEncProps props; int retval = 0; File_header header; CLzmaEncHandle enc = LzmaEnc_Create(&g_Alloc); if(enc == 0) { show_error( "Can not allocate memory.", 0, false ); return 1; } LzmaEncProps_Init(&props); props.dictSize = encoder_options->dictionary_size; props.lc = literal_context_bits; props.lp = 0; props.pb = pos_state_bits; props.fb = encoder_options->match_len_limit; props.btMode = 1; props.numHashBytes = 4; props.mc = 16 + ( encoder_options->match_len_limit / 2 ); LzmaEnc_SetProps(enc, &props); Fh_set_magic( header ); if( !Fh_set_dictionary_size( header, encoder_options->dictionary_size ) || encoder_options->match_len_limit < min_match_len_limit || encoder_options->match_len_limit > max_match_len ) internal_error( "invalid argument to encoder" ); if( outStream->Write( outStream, header, Fh_size ) != Fh_size ) { show_error( "Can not write output file", errno, false ); retval = 1; } else if( LzmaEnc_Encode(enc, outStream, inStream, NULL, &g_Alloc, &g_Alloc) != 0 ) { show_error( "Data error.", 0, false ); retval = 1; } LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc); return retval; } int verbosity = 0; void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity >= 0 ) { if( msg && msg[0] ) { fprintf( stderr, "%s: %s", program_name, msg ); if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); fprintf( stderr, "\n" ); } if( help && invocation_name && invocation_name[0] ) fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); } } void internal_error( const char * const msg ) { if( verbosity >= 0 ) fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); exit( 3 ); } static int extension_index( const char * const name ) { int i; for( i = 0; known_extensions[i].from; ++i ) { const char * const ext = known_extensions[i].from; if( strlen( name ) > strlen( ext ) && strncmp( name + strlen( name ) - strlen( ext ), ext, strlen( ext ) ) == 0 ) return i; } return -1; } static void set_c_outname( const char * const name ) { output_filename = resize_buffer( output_filename, strlen( name ) + strlen( known_extensions[0].from ) + 1 ); strcpy( output_filename, name ); strcat( output_filename, known_extensions[0].from ); } static void set_d_outname( const char * const name ) { const int i = extension_index( name ); if( i >= 0 ) { const char * const from = known_extensions[i].from; if( strlen( name ) > strlen( from ) ) { output_filename = resize_buffer( output_filename, strlen( name ) + strlen( known_extensions[0].to ) + 1 ); strcpy( output_filename, name ); strcpy( output_filename + strlen( name ) - strlen( from ), known_extensions[i].to ); return; } } output_filename = resize_buffer( output_filename, strlen( name ) + 4 + 1 ); strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", program_name, name, output_filename ); } CRC32 crc32; int main( const int argc, const char * const argv[] ) { /* Mapping from gzip/bzip2 style 1..9 compression modes to the corresponding LZMA compression modes. */ const struct Lzma_options option_mapping[] = { { 1 << 20, 5 }, /* -0 */ { 1 << 20, 5 }, /* -1 */ { 3 << 19, 6 }, /* -2 */ { 1 << 21, 8 }, /* -3 */ { 3 << 20, 12 }, /* -4 */ { 1 << 22, 20 }, /* -5 */ { 1 << 23, 36 }, /* -6 */ { 1 << 24, 68 }, /* -7 */ { 3 << 23, 132 }, /* -8 */ { 1 << 25, 273 } }; /* -9 */ struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ enum Mode program_mode = m_compress; const char * input_filename = ""; CFileSeqInStream inStream; CFileOutStream outStream; int argind; int retval; bool force = false; bool keep_input_files = false; bool to_stdout = false; const struct ap_Option options[] = { { '0', 0, ap_no }, { '1', "fast", ap_no }, { '2', 0, ap_no }, { '3', 0, ap_no }, { '4', 0, ap_no }, { '5', 0, ap_no }, { '6', 0, ap_no }, { '7', 0, ap_no }, { '8', 0, ap_no }, { '9', "best", ap_no }, { 'b', "member-size", ap_yes }, { 'c', "stdout", ap_no }, { 'd', "decompress", ap_no }, { 'f', "force", ap_no }, { 'h', "help", ap_no }, { 'k', "keep", ap_no }, { 'm', "match-length", ap_yes }, { 'q', "quiet", ap_no }, { 's', "dictionary-size", ap_yes }, { 'S', "volume-size", ap_yes }, { 't', "test", ap_no }, { 'v', "verbose", ap_no }, { 'V', "version", ap_no }, { 0 , 0, ap_no } }; struct Arg_parser parser; invocation_name = argv[0]; CRC32_init(); if (sizeof(UInt32) != 4 || sizeof(UInt64) != 8) internal_error( "incorrect UInt32 or UInt64" ); if( !ap_init( &parser, argc, argv, options, 0 ) ) { show_error( "Memory exhausted.", 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } for( argind = 0; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); const char * const arg = ap_argument( &parser, argind ); if( !code ) break; /* no more options */ switch( code ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; case 'b': break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; case 'e': break; case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = getnum( arg, min_match_len_limit, max_match_len ); break; case 'q': verbosity = -1; break; case 's': encoder_options.dictionary_size = get_dict_size( arg ); break; case 'S': break; case 't': program_mode = m_test; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; default : internal_error( "uncaught option" ); } } /* end process options */ #if defined(__OS2__) _fsetmode( stdin, "b" ); _fsetmode( stdout, "b" ); #endif if( ap_arguments( &parser ) > argind && strcmp( ap_argument( &parser, argind ), "-" ) ) input_filename = ap_argument( &parser, argind ); if( ap_arguments( &parser ) > argind + 1 ) { show_error( "Too many file names.", 0, true ); return 1; } if( program_mode == m_test ) output_filename = "/dev/null"; else { if( to_stdout || !input_filename[0] ) output_filename = ""; else { if( program_mode == m_compress ) set_c_outname( input_filename ); else set_d_outname( input_filename ); } } FileSeqInStream_CreateVTable(&inStream); File_Construct(&inStream.file); FileOutStream_CreateVTable(&outStream); File_Construct(&outStream.file); if (InFile_Open(&inStream.file, input_filename) != 0) { show_error( "Can not open input file", errno, false ); return 1; } if (OutFile_Open(&outStream.file, output_filename) != 0) { show_error( "Can not open output file", errno, false ); return 1; } show_name( input_filename ); if( program_mode == m_compress ) retval = Encode( &outStream.s, &inStream.s, &encoder_options ); else retval = Decode( &outStream.s, &inStream.s, input_filename, program_mode == m_test ); File_Close(&outStream.file); File_Close(&inStream.file); ap_free( &parser ); return retval; }