From 44d4d7d11c87a21d329de628b9921a2f02fd7244 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 21 Feb 2022 17:26:33 +0100 Subject: Merging upstream version 1.12. Signed-off-by: Daniel Baumann --- ChangeLog | 9 ++- INSTALL | 4 +- LzmaEnc.h | 1 - Makefile.in | 2 +- NEWS | 32 +-------- README | 22 +++--- carg_parser.c | 110 ++++++++++++++++++---------- carg_parser.h | 14 ++-- configure | 6 +- doc/pdlzip.1 | 26 +++---- lzip.h | 3 +- main.c | 206 ++++++++++++++++++++++++++++++++--------------------- testsuite/check.sh | 18 +++-- 13 files changed, 257 insertions(+), 196 deletions(-) diff --git a/ChangeLog b/ChangeLog index 1b5efdd..fe563fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2022-01-21 Antonio Diaz Diaz + + * Version 1.12 released. + * main.c (getnum): Show option name and valid range if error. + 2021-01-01 Antonio Diaz Diaz * Version 1.11 released. @@ -18,7 +23,7 @@ * main.c: Document option -0 and make it use a 64 KiB dict size. * main.c (main): Check return value of close( infd ). * main.c: Compile on DOS with DJGPP. - * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'. + * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'. * INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'. 2018-02-04 Antonio Diaz Diaz @@ -104,7 +109,7 @@ * Using LZMA SDK 9.10 (public domain) from Igor Pavlov. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and diff --git a/INSTALL b/INSTALL index d06e04d..131c9a7 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C99 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -69,7 +69,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/LzmaEnc.h b/LzmaEnc.h index 1b18df7..b607be8 100644 --- a/LzmaEnc.h +++ b/LzmaEnc.h @@ -5,7 +5,6 @@ /* ---------- CLzmaEncHandle Interface ---------- */ /* LzmaEnc_* functions can return the following exit codes: -Returns: SZ_OK - OK SZ_ERROR_WRITE - Write callback error. */ diff --git a/Makefile.in b/Makefile.in index ec334f3..33e0a65 100644 --- a/Makefile.in +++ b/Makefile.in @@ -20,7 +20,7 @@ objs = carg_parser.o LzFind.o LzmaEnc.o LzmaDec.o main.o all : $(progname) $(progname) : $(objs) - $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs) main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< diff --git a/NEWS b/NEWS index 76ecbb3..85cff1d 100644 --- a/NEWS +++ b/NEWS @@ -1,30 +1,4 @@ -Changes in version 1.11: +Changes in version 1.12: -Pdlzip now reports an error if a file name is empty (pdlzip -t ""). - -Option '-o, --output' now behaves like '-c, --stdout', but sending the -output unconditionally to a file instead of to standard output. See the new -description of '-o' in the manual. This change is backwards compatible only -when (de)compressing from standard input alone. Therefore commands like: - pdlzip -o foo.lz - bar < foo -must now be split into: - pdlzip -o foo.lz - < foo - pdlzip bar -or rewritten as: - pdlzip - bar < foo > foo.lz - -When using '-c' or '-o', pdlzip now checks whether the output is a terminal -only once. - -Pdlzip now does not even open the output file if the input file is a terminal. - -It is now an error to specify two different operations in the command line -(--decompress and --test). - -The words 'decompressed' and 'compressed' have been replaced with the -shorter 'out' and 'in' in the verbose output when decompressing or testing. - -The commands needed to extract files from a tar.lz archive have been -documented in the output of '--help' and in the man page. - -9 new test files have been added to the testsuite. +In case of error in a numerical argument to a command line option, pdlzip +now shows the name of the option and the range of valid values. diff --git a/README b/README index 1a843ab..0aa6bfd 100644 --- a/README +++ b/README @@ -2,19 +2,19 @@ Description Pdlzip is a permissively licensed implementation of the lzip data compressor, intended for those who can't distribute (or even use) GPL -licensed Free Software. (The name of pdlzip comes from 'public domain -lzip'). Pdlzip is written in C and is (hope)fully compatible with lzip 1.4 -or newer. +licensed Free Software. The name of pdlzip comes from 'public domain lzip'. +Pdlzip is written in C and is (hope)fully compatible with lzip 1.4 or newer. Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov -chain-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip -0) or -compress most files more than bzip2 (lzip -9). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from -a data recovery perspective. Lzip has been designed, written, and tested -with great care to replace gzip and bzip2 as the standard general-purpose -compressed format for unix-like systems. +chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip -0) or compress most files more than bzip2 +(lzip -9). Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general-purpose compressed format for unix-like +systems. The lzip file format is designed for data sharing and long-term archiving, taking into account both data integrity and decoder availability: @@ -56,7 +56,7 @@ users of the most non-free platforms can share lzip files with everybody else. -Copyright (C) 2010-2021 Antonio Diaz Diaz. +Copyright (C) 2010-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/carg_parser.c b/carg_parser.c index d0c05d5..181ba23 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size ) } -static char push_back_record( struct Arg_parser * const ap, - const int code, const char * const argument ) +static char push_back_record( struct Arg_parser * const ap, const int code, + const char * const long_name, + const char * const argument ) { - const int len = strlen( argument ); struct ap_Record * p; void * tmp = ap_resize_buffer( ap->data, ( ap->data_size + 1 ) * sizeof (struct ap_Record) ); @@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap, ap->data = (struct ap_Record *)tmp; p = &(ap->data[ap->data_size]); p->code = code; - p->argument = 0; - tmp = ap_resize_buffer( p->argument, len + 1 ); - if( !tmp ) return 0; - p->argument = (char *)tmp; - strncpy( p->argument, argument, len + 1 ); + if( long_name ) + { + const int len = strlen( long_name ); + p->parsed_name = (char *)malloc( len + 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = p->parsed_name[1] = '-'; + strncpy( p->parsed_name + 2, long_name, len + 1 ); + } + else if( code > 0 && code < 256 ) + { + p->parsed_name = (char *)malloc( 2 + 1 ); + if( !p->parsed_name ) return 0; + p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0; + } + else p->parsed_name = 0; + if( argument ) + { + const int len = strlen( argument ); + p->argument = (char *)malloc( len + 1 ); + if( !p->argument ) { free( p->parsed_name ); return 0; } + strncpy( p->argument, argument, len + 1 ); + } + else p->argument = 0; ++ap->data_size; return 1; } @@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg ) static void free_data( struct Arg_parser * const ap ) { int i; - for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument ); + for( i = 0; i < ap->data_size; ++i ) + { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); } if( ap->data ) { free( ap->data ); ap->data = 0; } ap->data_size = 0; } +/* Return 0 only if out of memory. */ static char parse_long_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap, /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( strlen( options[i].name ) == len ) /* Exact match found */ + if( strlen( options[i].long_name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } else if( index < 0 ) index = i; /* First nonexact match found */ else if( options[index].code != options[i].code || @@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap, { if( options[index].has_arg == ap_no ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } - return push_back_record( ap, options[index].code, &opt[len+3] ); + return push_back_record( ap, options[index].code, + options[index].long_name, &opt[len+3] ); } if( options[index].has_arg == ap_yes ) { if( !arg || !arg[0] ) { - add_error( ap, "option '--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].long_name ); add_error( ap, "' requires an argument" ); return 1; } ++*argindp; - return push_back_record( ap, options[index].code, arg ); + return push_back_record( ap, options[index].code, + options[index].long_name, arg ); } - return push_back_record( ap, options[index].code, "" ); + return push_back_record( ap, options[index].code, + options[index].long_name, 0 ); } +/* Return 0 only if out of memory. */ static char parse_short_option( struct Arg_parser * const ap, const char * const opt, const char * const arg, const struct ap_Option options[], @@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { int index = -1, i; - const unsigned char code = opt[cind]; + const unsigned char c = opt[cind]; char code_str[2]; - code_str[0] = code; code_str[1] = 0; + code_str[0] = c; code_str[1] = 0; - if( code != 0 ) + if( c != 0 ) for( i = 0; options[i].code; ++i ) - if( code == options[i].code ) + if( c == options[i].code ) { index = i; break; } if( index < 0 ) @@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap, if( options[index].has_arg != ap_no && cind > 0 && opt[cind] ) { - if( !push_back_record( ap, code, &opt[cind] ) ) return 0; + if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0; ++*argindp; cind = 0; } else if( options[index].has_arg == ap_yes ) @@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap, return 1; } ++*argindp; cind = 0; - if( !push_back_record( ap, code, arg ) ) return 0; + if( !push_back_record( ap, c, 0, arg ) ) return 0; } - else if( !push_back_record( ap, code, "" ) ) return 0; + else if( !push_back_record( ap, c, 0, 0 ) ) return 0; } return 1; } @@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap, const char ** non_options = 0; /* skipped non-options */ int non_options_size = 0; /* number of skipped non-options */ int argind = 1; /* index in argv */ - int i; + char done = 0; /* false until success */ ap->data = 0; ap->error = 0; @@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap, if( ch2 == '-' ) { if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */ - else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out; } - else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0; + else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out; if( ap->error ) break; } else { if( in_order ) - { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; } + { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } else { void * tmp = ap_resize_buffer( non_options, ( non_options_size + 1 ) * sizeof *non_options ); - if( !tmp ) return 0; + if( !tmp ) goto out; non_options = (const char **)tmp; non_options[non_options_size++] = argv[argind++]; } @@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap, if( ap->error ) free_data( ap ); else { + int i; for( i = 0; i < non_options_size; ++i ) - if( !push_back_record( ap, 0, non_options[i] ) ) return 0; + if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out; while( argind < argc ) - if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; + if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; } - if( non_options ) free( non_options ); - return 1; + done = 1; +out: if( non_options ) free( non_options ); + return done; } @@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap ) int ap_code( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code; - else return 0; + if( i < 0 || i >= ap_arguments( ap ) ) return 0; + return ap->data[i].code; + } + + +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ) + { + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return ""; + return ap->data[i].parsed_name; } const char * ap_argument( const struct Arg_parser * const ap, const int i ) { - if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument; - else return ""; + if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return ""; + return ap->data[i].argument; } diff --git a/carg_parser.h b/carg_parser.h index c5f2352..0c64861 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -24,9 +24,9 @@ message. 'options' is an array of 'struct ap_Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe }; struct ap_Option { int code; /* Short option letter or code ( code != 0 ) */ - const char * name; /* Long option name (maybe null) */ + const char * long_name; /* Long option name (maybe null) */ enum ap_Has_arg has_arg; }; @@ -58,6 +58,7 @@ struct ap_Option struct ap_Record { int code; + char * parsed_name; char * argument; }; @@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap ); Else ap_argument( i ) is the option's argument (or empty). */ int ap_code( const struct Arg_parser * const ap, const int i ); +/* Full name of the option parsed (short or long). */ +const char * ap_parsed_name( const struct Arg_parser * const ap, const int i ); + const char * ap_argument( const struct Arg_parser * const ap, const int i ); #ifdef __cplusplus diff --git a/configure b/configure index 8baa082..2a74adb 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=pdlzip -pkgversion=1.11 +pkgversion=1.12 progname=pdlzip srctrigger=doc/${progname}.1 @@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission diff --git a/doc/pdlzip.1 b/doc/pdlzip.1 index 8586d88..57a144f 100644 --- a/doc/pdlzip.1 +++ b/doc/pdlzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH PDLZIP "1" "January 2021" "pdlzip 1.11" "User Commands" +.TH PDLZIP "1" "January 2022" "pdlzip 1.12" "User Commands" .SH NAME pdlzip \- reduces the size of files .SH SYNOPSIS @@ -8,19 +8,19 @@ pdlzip \- reduces the size of files .SH DESCRIPTION Pdlzip is a permissively licensed implementation of the lzip data compressor, intended for those who can't distribute (or even use) GPL -licensed Free Software. (The name of pdlzip comes from 'public domain -lzip'). Pdlzip is written in C and is (hope)fully compatible with lzip 1.4 -or newer. +licensed Free Software. The name of pdlzip comes from 'public domain lzip'. +Pdlzip is written in C and is (hope)fully compatible with lzip 1.4 or newer. .PP Lzip is a lossless data compressor with a user interface similar to the one of gzip or bzip2. Lzip uses a simplified form of the 'Lempel\-Ziv\-Markov -chain\-Algorithm' (LZMA) stream format, chosen to maximize safety and -interoperability. Lzip can compress about as fast as gzip (lzip \fB\-0\fR) or -compress most files more than bzip2 (lzip \fB\-9\fR). Decompression speed is -intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from -a data recovery perspective. Lzip has been designed, written, and tested -with great care to replace gzip and bzip2 as the standard general\-purpose -compressed format for unix\-like systems. +chain\-Algorithm' (LZMA) stream format and provides a 3 factor integrity +checking to maximize interoperability and optimize safety. Lzip can compress +about as fast as gzip (lzip \fB\-0\fR) or compress most files more than bzip2 +(lzip \fB\-9\fR). Decompression speed is intermediate between gzip and bzip2. +Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip +has been designed, written, and tested with great care to replace gzip and +bzip2 as the standard general\-purpose compressed format for unix\-like +systems. .PP Pdlzip is also able to decompress legacy lzma\-alone (.lzma) files. Lzma\-alone is a very bad format; it is essentially a raw LZMA stream. @@ -101,7 +101,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands .PP Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or -invalid input file, 3 for an internal consistency error (eg, bug) which +invalid input file, 3 for an internal consistency error (e.g., bug) which caused pdlzip to panic. .PP Pdlzip includes public domain compression/decompression code from the LZMA @@ -111,7 +111,7 @@ Report bugs to lzip\-bug@nongnu.org .br Pdlzip home page: http://www.nongnu.org/lzip/pdlzip.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. Public Domain 2009 Igor Pavlov. License 2\-clause BSD. .br diff --git a/lzip.h b/lzip.h index db45f0b..eed4e1c 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Pdlzip - LZMA lossless data compressor - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -64,6 +64,7 @@ static inline void CRC32_init( void ) } } +/* about as fast as it is possible without messing with endianness */ static inline void CRC32_update_buf( uint32_t * const crc, const uint8_t * const buffer, const int size ) diff --git a/main.c b/main.c index 41933df..b0fab06 100644 --- a/main.c +++ b/main.c @@ -1,6 +1,6 @@ /* Pdlzip - LZMA lossless data compressor 2009-08-14 : Igor Pavlov : Public domain - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -21,7 +21,7 @@ Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error - (eg, bug) which caused pdlzip to panic. + (e.g., bug) which caused pdlzip to panic. */ #define _FILE_OFFSET_BITS 64 @@ -38,9 +38,9 @@ #include #include #include -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ #include -#if defined(__MSVCRT__) +#if defined __MSVCRT__ #define fchmod(x,y) 0 #define fchown(x,y,z) 0 #define SIGHUP SIGTERM @@ -52,7 +52,7 @@ #define S_IWOTH 0 #endif #endif -#if defined(__DJGPP__) +#if defined __DJGPP__ #define S_ISSOCK(x) 0 #define S_ISVTX 0 #endif @@ -71,6 +71,11 @@ #error "Environments where CHAR_BIT != 8 are not supported." #endif +#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \ + ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX ) +#error "Environments where 'size_t' is narrower than 'int' are not supported." +#endif + int verbosity = 0; static void cleanup_and_fail( const int retval ); static void show_error( const char * const msg, const int errcode, @@ -80,7 +85,7 @@ static void show_file_error( const char * const filename, static void internal_error( const char * const msg ); static const char * const program_name = "pdlzip"; -static const char * const program_year = "2021"; +static const char * const program_year = "2022"; static const char * invocation_name = "pdlzip"; /* default value */ static const struct { const char * from; const char * to; } known_extensions[] = { @@ -108,18 +113,18 @@ static void show_help( void ) { printf( "Pdlzip is a permissively licensed implementation of the lzip data\n" "compressor, intended for those who can't distribute (or even use) GPL\n" - "licensed Free Software. (The name of pdlzip comes from 'public domain\n" - "lzip'). Pdlzip is written in C and is (hope)fully compatible with lzip 1.4\n" - "or newer.\n" + "licensed Free Software. The name of pdlzip comes from 'public domain lzip'.\n" + "Pdlzip is written in C and is (hope)fully compatible with lzip 1.4 or newer.\n" "\nLzip is a lossless data compressor with a user interface similar to the one\n" "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n" - "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n" - "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n" - "compress most files more than bzip2 (lzip -9). Decompression speed is\n" - "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n" - "a data recovery perspective. Lzip has been designed, written, and tested\n" - "with great care to replace gzip and bzip2 as the standard general-purpose\n" - "compressed format for unix-like systems.\n" + "chain-Algorithm' (LZMA) stream format and provides a 3 factor integrity\n" + "checking to maximize interoperability and optimize safety. Lzip can compress\n" + "about as fast as gzip (lzip -0) or compress most files more than bzip2\n" + "(lzip -9). Decompression speed is intermediate between gzip and bzip2.\n" + "Lzip is better than gzip and bzip2 from a data recovery perspective. Lzip\n" + "has been designed, written, and tested with great care to replace gzip and\n" + "bzip2 as the standard general-purpose compressed format for unix-like\n" + "systems.\n" "\nPdlzip is also able to decompress legacy lzma-alone (.lzma) files.\n" "Lzma-alone is a very bad format; it is essentially a raw LZMA stream.\n" "If you keep any lzma-alone files, it is advisable to recompress them to\n" @@ -160,7 +165,7 @@ static void show_help( void ) "'tar -xf foo.tar.lz' or 'pdlzip -cd foo.tar.lz | tar -xf -'.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" - "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "invalid input file, 3 for an internal consistency error (e.g., bug) which\n" "caused pdlzip to panic.\n" "\nPdlzip includes public domain compression/decompression code from the LZMA\n" "SDK (Software Development Kit) written by Igor Pavlov.\n" @@ -202,8 +207,6 @@ struct Pretty_print static void Pp_init( struct Pretty_print * const pp, const char * const filenames[], const int num_filenames ) { - unsigned stdin_name_len; - int i; pp->name = 0; pp->padded_name = 0; pp->stdin_name = "(stdin)"; @@ -211,7 +214,8 @@ static void Pp_init( struct Pretty_print * const pp, pp->first_post = false; if( verbosity <= 0 ) return; - stdin_name_len = strlen( pp->stdin_name ); + const unsigned stdin_name_len = strlen( pp->stdin_name ); + int i; for( i = 0; i < num_filenames; ++i ) { const char * const s = filenames[i]; @@ -245,16 +249,14 @@ static void Pp_reset( struct Pretty_print * const pp ) static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { - if( verbosity >= 0 ) + if( verbosity < 0 ) return; + if( pp->first_post ) { - if( pp->first_post ) - { - pp->first_post = false; - fputs( pp->padded_name, stderr ); - if( !msg ) fflush( stderr ); - } - if( msg ) fprintf( stderr, "%s\n", msg ); + pp->first_post = false; + fputs( pp->padded_name, stderr ); + if( !msg ) fflush( stderr ); } + if( msg ) fprintf( stderr, "%s\n", msg ); } @@ -275,17 +277,53 @@ static void show_header( const unsigned dictionary_size ) } -static unsigned long getnum( const char * const ptr, +/* separate large numbers >= 100_000 in groups of 3 digits using '_' */ +static const char * format_num3( unsigned long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; /* circle of static buffers for printf */ + static int current = 0; + int i; + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; /* fill the buffer backwards */ + *p = 0; /* terminator */ + if( num > 1024 ) + { + char prefix = 0; /* try binary first, then si */ + for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 100000; + + for( i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + return p; + } + + +static unsigned long getnum( const char * const arg, + const char * const option_name, const unsigned long llimit, const unsigned long ulimit ) { - unsigned long result; char * tail; errno = 0; - result = strtoul( ptr, &tail, 0 ); - if( tail == ptr ) + unsigned long result = strtoul( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } @@ -308,7 +346,9 @@ static unsigned long getnum( const char * const ptr, } if( exponent <= 0 ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); exit( 1 ); } for( i = 0; i < exponent; ++i ) @@ -320,21 +360,24 @@ static unsigned long getnum( const char * const ptr, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits.", 0, false ); + if( verbosity >= 0 ) + fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); exit( 1 ); } return result; } -static int get_dict_size( const char * const arg ) +static int get_dict_size( const char * const arg, const char * const option_name ) { char * tail; const long bits = strtol( arg, &tail, 0 ); if( bits >= min_dictionary_bits && bits <= max_dictionary_bits_c && *tail == 0 ) return 1 << bits; - return getnum( arg, min_dictionary_size, max_dictionary_size_c ); + return getnum( arg, option_name, min_dictionary_size, max_dictionary_size_c ); } @@ -408,34 +451,31 @@ static int open_instream( const char * const name, struct stat * const in_statsp const enum Mode program_mode, const int eindex, const bool one_to_one, const bool recompress ) { - int infd = -1; if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", program_name, name, known_extensions[eindex].from ); + return -1; } + int infd = open( name, O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( name, "Can't open input file", errno ); else { - infd = open( name, O_RDONLY | O_BINARY ); - if( infd < 0 ) - show_file_error( name, "Can't open input file", errno ); - else + const int i = fstat( infd, in_statsp ); + const mode_t mode = in_statsp->st_mode; + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) { - const int i = fstat( infd, in_statsp ); - const mode_t mode = in_statsp->st_mode; - const bool can_read = ( i == 0 && - ( S_ISBLK( mode ) || S_ISCHR( mode ) || - S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) ) - { - if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", - program_name, name, ( can_read && one_to_one ) ? - ",\n and neither '-c' nor '-o' were specified" : "" ); - close( infd ); - infd = -1; - } + if( verbosity >= 0 ) + fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", + program_name, name, ( can_read && one_to_one ) ? + ",\n and neither '-c' nor '-o' were specified" : "" ); + close( infd ); + infd = -1; } } return infd; @@ -505,7 +545,7 @@ static bool check_tty_in( const char * const input_filename, const int infd, isatty( infd ) ) /* for example /dev/tty */ { show_file_error( input_filename, "I won't read compressed data from a terminal.", 0 ); - close( infd ); set_retval( retval, 1 ); + close( infd ); set_retval( retval, 2 ); if( program_mode != m_test ) cleanup_and_fail( *retval ); return false; } return true; @@ -814,12 +854,12 @@ static int decompress( const int infd, struct Pretty_print * const pp, for( first_member = true; ; first_member = false ) { - int i, size; + int i; unsigned dictionary_size = 0; /* keep gcc 3.3.6 happy */ Lzip_header header; if( inSize - inPos < lzma_header_size && !read_inbuf( infd, inBuf, &inPos, &inSize ) ) return 1; - size = inSize - inPos; + const int size = inSize - inPos; for( i = 0; i < size && i < Lh_size; ++i ) raw_props[i] = header[i] = inBuf[inPos++]; if( size <= Lh_size ) /* End Of File */ @@ -869,7 +909,6 @@ static int decompress( const int infd, struct Pretty_print * const pp, } if( lzip_mode ) { - int ds, i; if( !Lh_verify_version( header ) ) { if( verbosity >= 0 ) @@ -884,7 +923,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } raw_props[0] = 93; /* (45 * 2) + (9 * 0) + 3 */ - ds = dictionary_size; + int ds = dictionary_size; for( i = 1; i <= 4; ++i ) { raw_props[i] = ds & 0xFF; ds >>= 8; } } @@ -913,8 +952,8 @@ static int decompress( const int infd, struct Pretty_print * const pp, CRC32 crc32; -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ int readblock( const int fd, uint8_t * const buf, const int size ) { @@ -932,8 +971,8 @@ int readblock( const int fd, uint8_t * const buf, const int size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ int writeblock( const int fd, const uint8_t * const buf, const int size ) { @@ -1000,23 +1039,15 @@ int main( const int argc, const char * const argv[] ) { 1 << 25, 273 } }; /* -9 */ struct Lzma_options encoder_options = option_mapping[6]; /* default = "-6" */ const char * default_output_filename = ""; - static struct Arg_parser parser; /* static because valgrind complains */ - static struct Pretty_print pp; /* and memory management in C sucks */ - static const char ** filenames = 0; - int num_filenames = 0; enum Mode program_mode = m_compress; - int argind = 0; - int failed_tests = 0; - int retval = 0; int i; - bool filenames_given = false; bool force = false; bool ignore_trailing = true; bool keep_input_files = false; bool loose_trailing = false; bool recompress = false; - bool stdin_used = false; bool to_stdout = false; + if( argc > 0 ) invocation_name = argv[0]; enum { opt_lt = 256 }; const struct ap_Option options[] = @@ -1051,19 +1082,22 @@ int main( const int argc, const char * const argv[] ) { opt_lt, "loose-trailing", ap_no }, { 0, 0, ap_no } }; - if( argc > 0 ) invocation_name = argv[0]; CRC32_init(); + /* static because valgrind complains and memory management in C sucks */ + static struct Arg_parser parser; if( !ap_init( &parser, argc, argv, options, 0 ) ) { show_error( mem_msg, 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } + int argind = 0; for( ; argind < ap_arguments( &parser ); ++argind ) { const int code = ap_code( &parser, argind ); - const char * const arg = ap_argument( &parser, argind ); if( !code ) break; /* no more options */ + const char * const pn = ap_parsed_name( &parser, argind ); + const char * const arg = ap_argument( &parser, argind ); switch( code ) { case '0': case '1': case '2': case '3': case '4': @@ -1078,12 +1112,12 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'm': encoder_options.match_len_limit = - getnum( arg, min_match_len_limit, max_match_len ); break; + getnum( arg, pn, min_match_len_limit, max_match_len ); break; case 'n': break; case 'o': if( strcmp( arg, "-" ) == 0 ) to_stdout = true; else { default_output_filename = arg; } break; case 'q': verbosity = -1; break; - case 's': encoder_options.dictionary_size = get_dict_size( arg ); + case 's': encoder_options.dictionary_size = get_dict_size( arg, pn ); break; case 'S': break; case 't': set_mode( &program_mode, m_test ); break; @@ -1094,15 +1128,17 @@ int main( const int argc, const char * const argv[] ) } } /* end process options */ -#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__) +#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif - num_filenames = max( 1, ap_arguments( &parser ) - argind ); + static const char ** filenames = 0; + int num_filenames = max( 1, ap_arguments( &parser ) - argind ); filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] ); filenames[0] = "-"; + bool filenames_given = false; for( i = 0; argind + i < ap_arguments( &parser ); ++i ) { filenames[i] = ap_argument( &parser, argind + i ); @@ -1123,16 +1159,18 @@ int main( const int argc, const char * const argv[] ) if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) ) set_signals( signal_handler ); + static struct Pretty_print pp; Pp_init( &pp, filenames, num_filenames ); + int failed_tests = 0; + int retval = 0; const bool one_to_one = !to_stdout && program_mode != m_test && !to_file; + bool stdin_used = false; for( i = 0; i < num_filenames; ++i ) { const char * input_filename = ""; int infd; - int tmp; struct stat in_stats; - const struct stat * in_statsp; Pp_set_name( &pp, filenames[i] ); if( strcmp( filenames[i], "-" ) == 0 ) @@ -1174,7 +1212,9 @@ int main( const int argc, const char * const argv[] ) return 1; /* check tty only once and don't try to delete a tty */ } - in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0; + const struct stat * const in_statsp = + ( input_filename[0] && one_to_one ) ? &in_stats : 0; + int tmp; if( program_mode == m_compress ) tmp = compress( &encoder_options, &pp, infd ); else diff --git a/testsuite/check.sh b/testsuite/check.sh index 80de80c..c2e22a7 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Pdlzip - LZMA lossless data compressor -# Copyright (C) 2010-2021 Antonio Diaz Diaz. +# Copyright (C) 2010-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -92,6 +92,7 @@ done printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null +rm -f out || framework_failure printf "\ntesting decompression..." @@ -111,19 +112,23 @@ done lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO [ "${lines}" -eq 8 ] || test_failed $LINENO "${lines}" +"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO cat "${in_lz}" > copy.lz || framework_failure "${LZIP}" -dk copy.lz || test_failed $LINENO cmp in copy || test_failed $LINENO -printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d copy.lz 2> /dev/null +cat fox > copy || framework_failure +cat "${in_lz}" > out.lz || framework_failure +rm -f out || framework_failure +"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out [ $? = 1 ] || test_failed $LINENO +cmp fox copy || test_failed $LINENO +cmp in out || test_failed $LINENO "${LZIP}" -df copy.lz || test_failed $LINENO [ ! -e copy.lz ] || test_failed $LINENO cmp in copy || test_failed $LINENO +rm -f out || framework_failure printf "to be overwritten" > copy || framework_failure -"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null -[ $? = 1 ] || test_failed $LINENO "${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO cmp in copy || test_failed $LINENO rm -f out copy || framework_failure @@ -147,7 +152,7 @@ rm -f copy anyothername.out || framework_failure [ $? = 1 ] || test_failed $LINENO "${LZIP}" -cdq in "${in_lz}" > copy [ $? = 2 ] || test_failed $LINENO -cat copy in | cmp in - || test_failed $LINENO +cat copy in | cmp in - || test_failed $LINENO # copy must be empty "${LZIP}" -cdq nx_file.lz "${in_lz}" > copy [ $? = 1 ] || test_failed $LINENO cmp in copy || test_failed $LINENO @@ -295,7 +300,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \ [ $? = 2 ] || test_failed $LINENO $i done -"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do "${LZIP}" -cdq "${testdir}"/$i > out [ $? = 2 ] || test_failed $LINENO $i -- cgit v1.2.3