diff options
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 227 |
1 files changed, 126 insertions, 101 deletions
@@ -1,6 +1,6 @@ /* Pdlzip - LZMA lossless data compressor 2009-08-14 : Igor Pavlov : Public domain - Copyright (C) 2010-2015 Antonio Diaz Diaz. + Copyright (C) 2010-2016 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -30,6 +30,7 @@ #include <fcntl.h> #include <limits.h> #include <signal.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -65,10 +66,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +int verbosity = 0; const char * const Program_name = "Pdlzip"; const char * const program_name = "pdlzip"; -const char * const program_year = "2015"; +const char * const program_year = "2016"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -87,30 +89,29 @@ enum Mode { m_compress, m_decompress, m_test }; char * output_filename = 0; int outfd = -1; -int verbosity = 0; -const mode_t usr_rw = S_IRUSR | S_IWUSR; -const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; -mode_t outfd_mode = S_IRUSR | S_IWUSR; bool delete_output_on_interrupt = false; static void show_help( void ) { - printf( "%s - A \"public domain\" version of the lzip data compressor\n", Program_name ); - printf( "also able to decompress legacy lzma-alone (.lzma) files.\n" - "Lzma-alone is a very bad format. If you keep any lzma-alone files, it is\n" - "advisable to recompress them to lzip format.\n" + printf( "%s - A permissively licensed implementation of the lzip data\n", Program_name ); + printf( "compressor also able to decompress legacy lzma-alone (.lzma) files.\n" + "\nLzma-alone is a very bad format; it is essentially a raw LZMA stream.\n" + "If you keep any lzma-alone files, it is advisable to recompress them to\n" + "lzip format. Lziprecover can convert lzma-alone files to lzip format\n" + "without recompressing.\n" "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" - " -c, --stdout send output to standard output\n" + " -a, --trailing-error exit with error status if trailing data\n" + " -c, --stdout write to standard output, keep input files\n" " -d, --decompress decompress\n" " -f, --force overwrite existing output files\n" " -F, --recompress force re-compression of compressed files\n" " -k, --keep keep (don't delete) input files\n" " -m, --match-length=<bytes> set match length limit in bytes [36]\n" - " -o, --output=<file> if reading stdin, place the output into <file>\n" + " -o, --output=<file> if reading standard input, write to <file>\n" " -q, --quiet suppress all messages\n" " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n" " -t, --test test compressed file integrity\n" @@ -118,13 +119,15 @@ static void show_help( void ) " -1 .. -9 set compression level [default 6]\n" " --fast alias for -1\n" " --best alias for -9\n" - "If no file names are given, pdlzip compresses or decompresses\n" - "from standard input to standard output.\n" + "If no file names are given, or if a file is '-', pdlzip compresses or\n" + "decompresses from standard input to standard output.\n" "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" - "The bidimensional parameter space of LZMA can't be mapped to a linear\n" + "Dictionary sizes 12 to 27 are interpreted as powers of two, meaning 2^12\n" + "to 2^27 bytes.\n" + "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n" "scale optimal for all files. If your files are large, very repetitive,\n" - "etc, you may need to use the --match-length and --dictionary-size\n" + "etc, you may need to use the --dictionary-size and --match-length\n" "options directly to achieve optimal performance. For example, -9m64\n" "usually compresses executables more (and faster) than -9.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" @@ -149,18 +152,21 @@ static void show_version( void ) static void show_header( const unsigned dictionary_size ) { - const char * const prefix[8] = - { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - enum { factor = 1024 }; - const char * p = ""; - const char * np = " "; - unsigned num = dictionary_size, i; - bool exact = ( num % factor == 0 ); - - for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) - { num /= factor; if( num % factor != 0 ) exact = false; - p = prefix[i]; np = ""; } - fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + if( verbosity >= 3 ) + { + const char * const prefix[8] = + { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; + enum { factor = 1024 }; + const char * p = ""; + const char * np = " "; + unsigned num = dictionary_size, i; + bool exact = ( num % factor == 0 ); + + for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; + p = prefix[i]; np = ""; } + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); + } } @@ -180,12 +186,11 @@ static unsigned long getnum( const char * const ptr, if( !errno && tail[0] ) { - int factor = ( tail[1] == 'i' ) ? 1024 : 1000; - int exponent = 0, i; - bool bad_multiplier = false; + const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; /* 0 = bad multiplier */ + int i; switch( tail[0] ) { - case ' ': break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -193,13 +198,10 @@ static unsigned long getnum( const char * const ptr, case 'T': exponent = 4; break; case 'G': exponent = 3; break; case 'M': exponent = 2; break; - case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; - break; - case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; - break; - default : bad_multiplier = true; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; } - if( bad_multiplier ) + if( exponent <= 0 ) { show_error( "Bad multiplier in numerical argument.", 0, true ); exit( 1 ); @@ -264,7 +266,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( infd < 0 ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Can't open input file '%s': %s.\n", + fprintf( stderr, "%s: Can't open input file '%s': %s\n", program_name, name, strerror( errno ) ); } else @@ -274,14 +276,14 @@ static int open_instream( const char * const name, struct stat * const in_statsp const bool can_read = ( i == 0 && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - const bool no_ofile = to_stdout || program_mode == m_test; + const bool no_ofile = ( to_stdout || program_mode == m_test ); if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) { if( verbosity >= 0 ) fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", program_name, name, ( can_read && !no_ofile ) ? - " and '--stdout' was not specified" : "" ); + ",\n and '--stdout' was not specified" : "" ); close( infd ); infd = -1; } @@ -334,24 +336,28 @@ static void set_d_outname( const char * const name, const int i ) strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n", program_name, name, output_filename ); } -static bool open_outstream( const bool force ) +static bool open_outstream( const bool force, const bool from_stdin ) { + const mode_t usr_rw = S_IRUSR | S_IWUSR; + const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + const mode_t outfd_mode = from_stdin ? all_rw : usr_rw; int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename, flags, outfd_mode ); - if( outfd < 0 && verbosity >= 0 ) + if( outfd >= 0 ) delete_output_on_interrupt = true; + else if( verbosity >= 0 ) { if( errno == EEXIST ) fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", program_name, output_filename ); else - fprintf( stderr, "%s: Can't create output file '%s': %s.\n", + fprintf( stderr, "%s: Can't create output file '%s': %s\n", program_name, output_filename, strerror( errno ) ); } return ( outfd >= 0 ); @@ -406,7 +412,11 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) warning = true; } - if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno, false ); + cleanup_and_fail( 1 ); + } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -444,7 +454,7 @@ static int compress( const struct Lzma_options * const encoder_options, } if( writeblock( outfd, header, Fh_size ) != Fh_size ) - { show_error( "Can not write output file", errno, false ); retval = 1; } + { show_error( "Can't write output file", errno, false ); retval = 1; } else if( LzmaEnc_Encode( encoder ) != 0 ) { Pp_show_msg( pp, "Encoder error." ); retval = 1; } @@ -516,7 +526,7 @@ static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, unpackSize -= outProcessed; if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) - { show_error( "Can not write output file", errno, false ); return 1; } + { show_error( "Can't write output file", errno, false ); return 1; } total_out += outPos; outPos = 0; @@ -527,13 +537,18 @@ static int lzma_decode( uint64_t unpackSize, CLzmaDec *decoder, const int infd, if( ( thereIsSize && unpackSize != 0 ) || ( !thereIsSize && status != LZMA_STATUS_FINISHED_WITH_MARK ) ) { show_error( "Data error.", 0, false ); return 2; } - if( verbosity >= 2 ) - { fprintf( stderr, "lzma-alone, " ); show_header( decoder->dicBufSize ); } - if( verbosity >= 3 ) + if( verbosity >= 2 && total_out > 0 && total_in > 0 ) + fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)total_out / total_in, + ( 8.0 * total_in ) / total_out, + 100.0 * ( 1.0 - ( (double)total_in / total_out ) ) ); + if( verbosity >= 4 ) fprintf( stderr, "uncompressed size %9llu, compressed size %8llu. ", total_out, total_in ); + if( verbosity >= 2 ) + fputs( "lzma-alone, ", stderr ); if( verbosity >= 1 ) - fprintf( stderr, testing ? "(apparently) ok\n" : "(apparently) done\n" ); + fputs( testing ? "(apparently) ok\n" : "(apparently) done\n", stderr ); return 0; } } @@ -569,7 +584,7 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], outPos += outProcessed; if( outfd >= 0 && writeblock( outfd, outBuf, outPos ) != outPos ) - { show_error( "Can not write output file", errno, false ); return 1; } + { show_error( "Can't write output file", errno, false ); return 1; } CRC32_update_buf( &crc, outBuf, outPos ); total_out += outPos; @@ -598,25 +613,26 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], for( i = 0; i < Ft_size; ++i ) trailer[i] = inBuf[(*inPos)++]; total_in += Ft_size; - if( Ft_get_data_crc( trailer ) != ( crc ^ 0xFFFFFFFFU ) ) + crc ^= 0xFFFFFFFFU; + if( Ft_get_data_crc( trailer ) != crc ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "CRC mismatch; trailer says %08X, data crc is %08X.\n", - Ft_get_data_crc( trailer ), crc ^ 0xFFFFFFFFU ); + fprintf( stderr, "CRC mismatch; trailer says %08X, data crc is %08X\n", + Ft_get_data_crc( trailer ), crc ); } if( Ft_get_data_size( trailer ) != total_out ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", + fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX)\n", Ft_get_data_size( trailer ), total_out, total_out ); } if( Ft_get_member_size( trailer ) != total_in ) { error = true; if( verbosity >= 0 ) - fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", + fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX)\n", Ft_get_member_size( trailer ), total_in, total_in ); } if( !error && verbosity >= 2 && total_out > 0 && total_in > 0 ) @@ -626,8 +642,7 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], 100.0 * ( 1.0 - ( (double)total_in / total_out ) ) ); if( !error && verbosity >= 4 ) fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", - Ft_get_data_crc( trailer ), - Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); + crc, total_out, total_in ); if( error ) return 2; return 0; } @@ -636,7 +651,7 @@ static int lzip_decode( CLzmaDec *decoder, const int infd, uint8_t inBuf[], static int decompress( const int infd, struct Pretty_print * const pp, - const bool testing ) + const bool ignore_trailing, const bool testing ) { uint64_t unpackSize = 0; CLzmaDec decoder; @@ -649,31 +664,43 @@ static int decompress( const int infd, struct Pretty_print * const pp, for( first_member = true; ; first_member = false ) { - int i; - unsigned dictionary_size = 0; + int i, size; + unsigned dictionary_size; File_header header; if( inSize - inPos < lzma_header_size && !read_inbuf( infd, inBuf, &inPos, &inSize ) ) return 1; - if( inSize - inPos <= Fh_size ) /* End Of File */ + size = inSize - inPos; + for( i = 0; i < size && i < Fh_size; ++i ) + raw_props[i] = header[i] = inBuf[inPos++]; + if( size <= Fh_size ) /* End Of File */ { - if( first_member ) + if( first_member || Fh_verify_prefix( header, size ) ) { Pp_show_msg( pp, "File ends unexpectedly at member header." ); retval = 2; } break; } - for( i = 0; i < Fh_size; ++i ) raw_props[i] = header[i] = inBuf[inPos++]; if( !Fh_verify_magic( header ) ) { - if( !first_member ) break; /* trailing garbage */ + if( !first_member ) + { + if( !ignore_trailing ) + { Pp_show_msg( pp, "Trailing data not allowed." ); retval = 2; } + break; + } if( inSize - inPos >= lzma_header_size - Fh_size ) /* try lzma-alone */ { for( i = Fh_size; i < lzma_header_size; ++i ) raw_props[i] = inBuf[inPos++]; - for( i = 0; i < 8; ++i ) - unpackSize += (uint64_t)raw_props[LZMA_PROPS_SIZE+i] << (i * 8); if( ( raw_props[12] == 0 || raw_props[12] == 0xFF ) && raw_props[12] == raw_props[11] && raw_props[0] < (9 * 5 * 5) ) + { lzip_mode = false; + dictionary_size = 0; + for( i = 4; i >= 1; --i ) + { dictionary_size <<= 8; dictionary_size += raw_props[i]; } + for( i = 7; i >= 0; --i ) + { unpackSize <<= 8; unpackSize += raw_props[LZMA_PROPS_SIZE+i]; } + } } if( lzip_mode ) { @@ -693,8 +720,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } dictionary_size = Fh_get_dictionary_size( header ); - if( dictionary_size < min_dictionary_size || - dictionary_size > max_dictionary_size ) + if( !isvalid_ds( dictionary_size ) ) { Pp_show_msg( pp, "Invalid dictionary size in member header." ); retval = 2; break; } @@ -704,14 +730,10 @@ static int decompress( const int infd, struct Pretty_print * const pp, } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); - if( lzip_mode && verbosity >= 3 ) show_header( dictionary_size ); } + { Pp_show_msg( pp, 0 ); show_header( dictionary_size ); } if( !LzmaDec_Init( &decoder, raw_props ) ) - { - show_error( "Not enough memory.", 0, false ); - cleanup_and_fail( 1 ); - } + { Pp_show_msg( pp, "Not enough memory." ); return 1; } if( lzip_mode ) retval = lzip_decode( &decoder, infd, inBuf, &inPos, &inSize ); else @@ -720,10 +742,10 @@ static int decompress( const int infd, struct Pretty_print * const pp, LzmaDec_Free(&decoder); if( retval != 0 || !lzip_mode ) break; if( verbosity >= 2 ) - { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); } + { fputs( testing ? "ok\n" : "done\n", stderr ); Pp_reset( pp ); } } if( lzip_mode && verbosity == 1 && retval == 0 ) - fprintf( stderr, testing ? "ok\n" : "done\n" ); + fputs( testing ? "ok\n" : "done\n", stderr ); return retval; } @@ -753,11 +775,11 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { if( pp->first_post ) { - int i, len; + unsigned i; pp->first_post = false; fprintf( stderr, " %s: ", pp->name ); - len = pp->longest_name - strlen( pp->name ); - for( i = 0; i < len; ++i ) fprintf( stderr, " " ); + for( i = strlen( pp->name ); i < pp->longest_name; ++i ) + fputc( ' ', stderr ); if( !msg ) fflush( stderr ); } if( msg ) fprintf( stderr, "%s\n", msg ); @@ -804,18 +826,16 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) void show_error( const char * const msg, const int errcode, const bool help ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( msg && msg[0] ) { - if( msg && msg[0] ) - { - fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) fprintf( stderr, ": %s.", strerror( errcode ) ); - fprintf( stderr, "\n" ); - } - if( help ) - fprintf( stderr, "Try '%s --help' for more information.\n", - invocation_name ); + fprintf( stderr, "%s: %s", program_name, msg ); + if( errcode > 0 ) fprintf( stderr, ": %s", strerror( errcode ) ); + fputc( '\n', stderr ); } + if( help ) + fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); } @@ -855,7 +875,9 @@ int main( const int argc, const char * const argv[] ) int i; bool filenames_given = false; bool force = false; + bool ignore_trailing = true; bool keep_input_files = false; + bool stdin_used = false; bool recompress = false; bool to_stdout = false; struct Pretty_print pp; @@ -872,6 +894,7 @@ int main( const int argc, const char * const argv[] ) { '7', 0, ap_no }, { '8', 0, ap_no }, { '9', "best", ap_no }, + { 'a', "trailing-error", ap_no }, { 'b', "member-size", ap_yes }, { 'c', "stdout", ap_no }, { 'd', "decompress", ap_no }, @@ -910,6 +933,7 @@ int main( const int argc, const char * const argv[] ) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; + case 'a': ignore_trailing = false; break; case 'b': break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; @@ -954,7 +978,7 @@ int main( const int argc, const char * const argv[] ) ( filenames_given || default_output_filename[0] ) ) set_signals(); - Pp_init( &pp, filenames, num_filenames ); + Pp_init( &pp, filenames, num_filenames, verbosity ); output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) @@ -966,6 +990,7 @@ int main( const int argc, const char * const argv[] ) if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) { + if( stdin_used ) continue; else stdin_used = true; input_filename = ""; infd = STDIN_FILENO; if( program_mode != m_test ) @@ -982,8 +1007,7 @@ int main( const int argc, const char * const argv[] ) strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); } - outfd_mode = all_rw; - if( !open_outstream( force ) ) + if( !open_outstream( force, true ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -1007,8 +1031,7 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_compress ) set_c_outname( input_filename ); else set_d_outname( input_filename, eindex ); - outfd_mode = usr_rw; - if( !open_outstream( force ) ) + if( !open_outstream( force, false ) ) { if( retval < 1 ) retval = 1; close( infd ); infd = -1; @@ -1018,16 +1041,18 @@ int main( const int argc, const char * const argv[] ) } } - if( !check_tty( infd, program_mode ) ) return 1; + if( !check_tty( infd, program_mode ) ) + { + if( retval < 1 ) retval = 1; + cleanup_and_fail( retval ); + } - if( output_filename[0] && !to_stdout && program_mode != m_test ) - delete_output_on_interrupt = true; in_statsp = input_filename[0] ? &in_stats : 0; Pp_set_name( &pp, input_filename ); if( program_mode == m_compress ) tmp = compress( &encoder_options, infd, &pp ); else - tmp = decompress( infd, &pp, program_mode == m_test ); + tmp = decompress( infd, &pp, ignore_trailing, program_mode == m_test ); if( tmp > retval ) retval = tmp; if( tmp && program_mode != m_test ) cleanup_and_fail( retval ); |