summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--INSTALL4
-rw-r--r--Makefile.in2
-rw-r--r--NEWS30
-rw-r--r--README4
-rw-r--r--carg_parser.c110
-rw-r--r--carg_parser.h14
-rwxr-xr-xconfigure6
-rw-r--r--decoder.c43
-rw-r--r--decoder.h119
-rw-r--r--doc/lunzip.16
-rw-r--r--list.c63
-rw-r--r--lzip.h9
-rw-r--r--lzip_index.c43
-rw-r--r--lzip_index.h2
-rw-r--r--main.c153
-rwxr-xr-xtestsuite/check.sh23
17 files changed, 367 insertions, 274 deletions
diff --git a/ChangeLog b/ChangeLog
index 08e3c0a..07d8d6e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2022-01-22 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.13 released.
+ * Decompression time has been reduced by 5-12% depending on the file.
+ * main.c (getnum): Show option name and valid range if error.
+
2021-01-01 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.12 released.
@@ -18,7 +24,7 @@
* lzip_index.c: Detect some kinds of corrupt trailers.
* main.c (main): Check return value of close( infd ).
* main.c: Compile on DOS with DJGPP.
- * configure: Accept appending to CFLAGS, 'CFLAGS+=OPTIONS'.
+ * configure: Accept appending to CFLAGS; 'CFLAGS+=OPTIONS'.
* INSTALL: Document use of CFLAGS+='-D __USE_MINGW_ANSI_STDIO'.
2018-02-05 Antonio Diaz Diaz <antonio@gnu.org>
@@ -115,7 +121,7 @@
* Created from the decompression code of clzip 1.1.
-Copyright (C) 2010-2021 Antonio Diaz Diaz.
+Copyright (C) 2010-2022 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute, and
diff --git a/INSTALL b/INSTALL
index 313b7dc..4282b1e 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C99 compiler. (gcc 3.3.6 or newer is recommended).
-I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards
+I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -69,7 +69,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2010-2021 Antonio Diaz Diaz.
+Copyright (C) 2010-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/Makefile.in b/Makefile.in
index aff94d4..ffc4ce8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -20,7 +20,7 @@ objs = carg_parser.o lzip_index.o list.o decoder.o main.o
all : $(progname)
$(progname) : $(objs)
- $(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(objs)
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(objs)
main.o : main.c
$(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
diff --git a/NEWS b/NEWS
index 98e59f3..fee658c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,28 +1,6 @@
-Changes in version 1.12:
+Changes in version 1.13:
-Lunzip now reports an error if a file name is empty (lunzip -t "").
+Decompression time has been reduced by 5-12% depending on the file.
-Option '-o, --output' now behaves like '-c, --stdout', but sending the
-output unconditionally to a file instead of to standard output. See the new
-description of '-o' in the manual. This change is backwards compatible only
-when decompressing from standard input alone. Therefore commands like:
- lunzip -d -o foo - bar.lz < foo.lz
-must now be split into:
- lunzip -d -o foo - < foo.lz
- lunzip -d bar.lz
-or rewritten as:
- lunzip -d - bar.lz < foo.lz > foo
-
-Lunzip now does not even open the output file if the input file is a terminal.
-
-The words 'decompressed' and 'compressed' have been replaced with the
-shorter 'out' and 'in' in the verbose output when decompressing or testing.
-
-Option '--list' now reports corruption or truncation of the last header in a
-multimenber file specifically instead of showing the generic message "Last
-member in input file is truncated or corrupt."
-
-The commands needed to extract files from a tar.lz archive have been
-documented in the output of '--help' and in the man page.
-
-9 new test files have been added to the testsuite.
+In case of error in a numerical argument to a command line option, lunzip
+now shows the name of the option and the range of valid values.
diff --git a/README b/README
index b09c908..39cd00d 100644
--- a/README
+++ b/README
@@ -60,7 +60,7 @@ filename.lz becomes filename
filename.tlz becomes filename.tar
anyothername becomes anyothername.out
-Decompressing a file is much like copying or moving it; therefore lunzip
+Decompressing a file is much like copying or moving it. Therefore lunzip
preserves the access and modification dates, permissions, and, when
possible, ownership of the file just as 'cp -p' does. (If the user ID or
the group ID can't be duplicated, the file permission bits S_ISUID and
@@ -89,7 +89,7 @@ been compressed. Decompressed is used to refer to data which have undergone
the process of decompression.
-Copyright (C) 2010-2021 Antonio Diaz Diaz.
+Copyright (C) 2010-2022 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute, and modify it.
diff --git a/carg_parser.c b/carg_parser.c
index d0c05d5..181ba23 100644
--- a/carg_parser.c
+++ b/carg_parser.c
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2021 Antonio Diaz Diaz.
+ Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -32,10 +32,10 @@ static void * ap_resize_buffer( void * buf, const int min_size )
}
-static char push_back_record( struct Arg_parser * const ap,
- const int code, const char * const argument )
+static char push_back_record( struct Arg_parser * const ap, const int code,
+ const char * const long_name,
+ const char * const argument )
{
- const int len = strlen( argument );
struct ap_Record * p;
void * tmp = ap_resize_buffer( ap->data,
( ap->data_size + 1 ) * sizeof (struct ap_Record) );
@@ -43,11 +43,29 @@ static char push_back_record( struct Arg_parser * const ap,
ap->data = (struct ap_Record *)tmp;
p = &(ap->data[ap->data_size]);
p->code = code;
- p->argument = 0;
- tmp = ap_resize_buffer( p->argument, len + 1 );
- if( !tmp ) return 0;
- p->argument = (char *)tmp;
- strncpy( p->argument, argument, len + 1 );
+ if( long_name )
+ {
+ const int len = strlen( long_name );
+ p->parsed_name = (char *)malloc( len + 2 + 1 );
+ if( !p->parsed_name ) return 0;
+ p->parsed_name[0] = p->parsed_name[1] = '-';
+ strncpy( p->parsed_name + 2, long_name, len + 1 );
+ }
+ else if( code > 0 && code < 256 )
+ {
+ p->parsed_name = (char *)malloc( 2 + 1 );
+ if( !p->parsed_name ) return 0;
+ p->parsed_name[0] = '-'; p->parsed_name[1] = code; p->parsed_name[2] = 0;
+ }
+ else p->parsed_name = 0;
+ if( argument )
+ {
+ const int len = strlen( argument );
+ p->argument = (char *)malloc( len + 1 );
+ if( !p->argument ) { free( p->parsed_name ); return 0; }
+ strncpy( p->argument, argument, len + 1 );
+ }
+ else p->argument = 0;
++ap->data_size;
return 1;
}
@@ -68,12 +86,14 @@ static char add_error( struct Arg_parser * const ap, const char * const msg )
static void free_data( struct Arg_parser * const ap )
{
int i;
- for( i = 0; i < ap->data_size; ++i ) free( ap->data[i].argument );
+ for( i = 0; i < ap->data_size; ++i )
+ { free( ap->data[i].argument ); free( ap->data[i].parsed_name ); }
if( ap->data ) { free( ap->data ); ap->data = 0; }
ap->data_size = 0;
}
+/* Return 0 only if out of memory. */
static char parse_long_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@@ -87,9 +107,10 @@ static char parse_long_option( struct Arg_parser * const ap,
/* Test all long options for either exact match or abbreviated matches. */
for( i = 0; options[i].code != 0; ++i )
- if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 )
+ if( options[i].long_name &&
+ strncmp( options[i].long_name, &opt[2], len ) == 0 )
{
- if( strlen( options[i].name ) == len ) /* Exact match found */
+ if( strlen( options[i].long_name ) == len ) /* Exact match found */
{ index = i; exact = 1; break; }
else if( index < 0 ) index = i; /* First nonexact match found */
else if( options[index].code != options[i].code ||
@@ -117,35 +138,39 @@ static char parse_long_option( struct Arg_parser * const ap,
{
if( options[index].has_arg == ap_no )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' doesn't allow an argument" );
return 1;
}
if( options[index].has_arg == ap_yes && !opt[len+3] )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
- return push_back_record( ap, options[index].code, &opt[len+3] );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, &opt[len+3] );
}
if( options[index].has_arg == ap_yes )
{
if( !arg || !arg[0] )
{
- add_error( ap, "option '--" ); add_error( ap, options[index].name );
+ add_error( ap, "option '--" ); add_error( ap, options[index].long_name );
add_error( ap, "' requires an argument" );
return 1;
}
++*argindp;
- return push_back_record( ap, options[index].code, arg );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, arg );
}
- return push_back_record( ap, options[index].code, "" );
+ return push_back_record( ap, options[index].code,
+ options[index].long_name, 0 );
}
+/* Return 0 only if out of memory. */
static char parse_short_option( struct Arg_parser * const ap,
const char * const opt, const char * const arg,
const struct ap_Option options[],
@@ -156,13 +181,13 @@ static char parse_short_option( struct Arg_parser * const ap,
while( cind > 0 )
{
int index = -1, i;
- const unsigned char code = opt[cind];
+ const unsigned char c = opt[cind];
char code_str[2];
- code_str[0] = code; code_str[1] = 0;
+ code_str[0] = c; code_str[1] = 0;
- if( code != 0 )
+ if( c != 0 )
for( i = 0; options[i].code; ++i )
- if( code == options[i].code )
+ if( c == options[i].code )
{ index = i; break; }
if( index < 0 )
@@ -176,7 +201,7 @@ static char parse_short_option( struct Arg_parser * const ap,
if( options[index].has_arg != ap_no && cind > 0 && opt[cind] )
{
- if( !push_back_record( ap, code, &opt[cind] ) ) return 0;
+ if( !push_back_record( ap, c, 0, &opt[cind] ) ) return 0;
++*argindp; cind = 0;
}
else if( options[index].has_arg == ap_yes )
@@ -188,9 +213,9 @@ static char parse_short_option( struct Arg_parser * const ap,
return 1;
}
++*argindp; cind = 0;
- if( !push_back_record( ap, code, arg ) ) return 0;
+ if( !push_back_record( ap, c, 0, arg ) ) return 0;
}
- else if( !push_back_record( ap, code, "" ) ) return 0;
+ else if( !push_back_record( ap, c, 0, 0 ) ) return 0;
}
return 1;
}
@@ -203,7 +228,7 @@ char ap_init( struct Arg_parser * const ap,
const char ** non_options = 0; /* skipped non-options */
int non_options_size = 0; /* number of skipped non-options */
int argind = 1; /* index in argv */
- int i;
+ char done = 0; /* false until success */
ap->data = 0;
ap->error = 0;
@@ -223,20 +248,20 @@ char ap_init( struct Arg_parser * const ap,
if( ch2 == '-' )
{
if( !argv[argind][2] ) { ++argind; break; } /* we found "--" */
- else if( !parse_long_option( ap, opt, arg, options, &argind ) ) return 0;
+ else if( !parse_long_option( ap, opt, arg, options, &argind ) ) goto out;
}
- else if( !parse_short_option( ap, opt, arg, options, &argind ) ) return 0;
+ else if( !parse_short_option( ap, opt, arg, options, &argind ) ) goto out;
if( ap->error ) break;
}
else
{
if( in_order )
- { if( !push_back_record( ap, 0, argv[argind++] ) ) return 0; }
+ { if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out; }
else
{
void * tmp = ap_resize_buffer( non_options,
( non_options_size + 1 ) * sizeof *non_options );
- if( !tmp ) return 0;
+ if( !tmp ) goto out;
non_options = (const char **)tmp;
non_options[non_options_size++] = argv[argind++];
}
@@ -245,13 +270,15 @@ char ap_init( struct Arg_parser * const ap,
if( ap->error ) free_data( ap );
else
{
+ int i;
for( i = 0; i < non_options_size; ++i )
- if( !push_back_record( ap, 0, non_options[i] ) ) return 0;
+ if( !push_back_record( ap, 0, 0, non_options[i] ) ) goto out;
while( argind < argc )
- if( !push_back_record( ap, 0, argv[argind++] ) ) return 0;
+ if( !push_back_record( ap, 0, 0, argv[argind++] ) ) goto out;
}
- if( non_options ) free( non_options );
- return 1;
+ done = 1;
+out: if( non_options ) free( non_options );
+ return done;
}
@@ -273,13 +300,20 @@ int ap_arguments( const struct Arg_parser * const ap )
int ap_code( const struct Arg_parser * const ap, const int i )
{
- if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].code;
- else return 0;
+ if( i < 0 || i >= ap_arguments( ap ) ) return 0;
+ return ap->data[i].code;
+ }
+
+
+const char * ap_parsed_name( const struct Arg_parser * const ap, const int i )
+ {
+ if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].parsed_name ) return "";
+ return ap->data[i].parsed_name;
}
const char * ap_argument( const struct Arg_parser * const ap, const int i )
{
- if( i >= 0 && i < ap_arguments( ap ) ) return ap->data[i].argument;
- else return "";
+ if( i < 0 || i >= ap_arguments( ap ) || !ap->data[i].argument ) return "";
+ return ap->data[i].argument;
}
diff --git a/carg_parser.h b/carg_parser.h
index c5f2352..0c64861 100644
--- a/carg_parser.h
+++ b/carg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C version)
- Copyright (C) 2006-2021 Antonio Diaz Diaz.
+ Copyright (C) 2006-2022 Antonio Diaz Diaz.
This library is free software. Redistribution and use in source and
binary forms, with or without modification, are permitted provided
@@ -24,9 +24,9 @@
message.
'options' is an array of 'struct ap_Option' terminated by an element
- containing a code which is zero. A null name means a short-only
- option. A code value outside the unsigned char range means a
- long-only option.
+ containing a code which is zero. A null long_name means a short-only
+ option. A code value outside the unsigned char range means a long-only
+ option.
Arg_parser normally makes it appear as if all the option arguments
were specified before all the non-option arguments for the purposes
@@ -50,7 +50,7 @@ enum ap_Has_arg { ap_no, ap_yes, ap_maybe };
struct ap_Option
{
int code; /* Short option letter or code ( code != 0 ) */
- const char * name; /* Long option name (maybe null) */
+ const char * long_name; /* Long option name (maybe null) */
enum ap_Has_arg has_arg;
};
@@ -58,6 +58,7 @@ struct ap_Option
struct ap_Record
{
int code;
+ char * parsed_name;
char * argument;
};
@@ -86,6 +87,9 @@ int ap_arguments( const struct Arg_parser * const ap );
Else ap_argument( i ) is the option's argument (or empty). */
int ap_code( const struct Arg_parser * const ap, const int i );
+/* Full name of the option parsed (short or long). */
+const char * ap_parsed_name( const struct Arg_parser * const ap, const int i );
+
const char * ap_argument( const struct Arg_parser * const ap, const int i );
#ifdef __cplusplus
diff --git a/configure b/configure
index 54fb2fb..e241235 100755
--- a/configure
+++ b/configure
@@ -1,12 +1,12 @@
#! /bin/sh
# configure script for Lunzip - Decompressor for the lzip format
-# Copyright (C) 2010-2021 Antonio Diaz Diaz.
+# Copyright (C) 2010-2022 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute, and modify it.
pkgname=lunzip
-pkgversion=1.12
+pkgversion=1.13
progname=lunzip
srctrigger=doc/${progname}.1
@@ -167,7 +167,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lunzip - Decompressor for the lzip format
-# Copyright (C) 2010-2021 Antonio Diaz Diaz.
+# Copyright (C) 2010-2022 Antonio Diaz Diaz.
# This file was generated automatically by configure. Don't edit.
#
# This Makefile is free software: you have unlimited permission
diff --git a/decoder.c b/decoder.c
index 63d30ea..b52b35f 100644
--- a/decoder.c
+++ b/decoder.c
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -32,8 +32,8 @@
CRC32 crc32;
-/* Returns the number of bytes really read.
- If (returned value < size) and (errno == 0), means EOF was reached.
+/* Return the number of bytes really read.
+ If (value returned < size) and (errno == 0), means EOF was reached.
*/
int readblock( const int fd, uint8_t * const buf, const int size )
{
@@ -51,8 +51,8 @@ int readblock( const int fd, uint8_t * const buf, const int size )
}
-/* Returns the number of bytes really written.
- If (returned value < size), it is always an error.
+/* Return the number of bytes really written.
+ If (value returned < size), it is always an error.
*/
static int writeblock( const int fd, const uint8_t * const buf, const int size )
{
@@ -118,8 +118,6 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
int size = Rd_read_data( d->rdec, trailer, Lt_size );
const unsigned long long data_size = LZd_data_position( d );
const unsigned long long member_size = Rd_member_position( d->rdec );
- unsigned td_crc;
- unsigned long long td_size, tm_size;
bool error = false;
if( size < Lt_size )
@@ -134,7 +132,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
while( size < Lt_size ) trailer[size++] = 0;
}
- td_crc = Lt_get_data_crc( trailer );
+ const unsigned td_crc = Lt_get_data_crc( trailer );
if( td_crc != LZd_crc( d ) )
{
error = true;
@@ -145,7 +143,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
td_crc, LZd_crc( d ) );
}
}
- td_size = Lt_get_data_size( trailer );
+ const unsigned long long td_size = Lt_get_data_size( trailer );
if( td_size != data_size )
{
error = true;
@@ -156,7 +154,7 @@ static bool LZd_verify_trailer( struct LZ_decoder * const d,
td_size, td_size, data_size, data_size );
}
}
- tm_size = Lt_get_member_size( trailer );
+ const unsigned long long tm_size = Lt_get_member_size( trailer );
if( tm_size != member_size )
{
error = true;
@@ -192,10 +190,6 @@ int LZd_decode_member( struct LZ_decoder * const d,
struct Pretty_print * const pp )
{
struct Range_decoder * const rdec = d->rdec;
- void (* const copy_block)
- ( struct LZ_decoder * const d, const unsigned distance, unsigned len ) =
- ( d->buffer_size >= d->dictionary_size ) ?
- &LZd_copy_block : &LZd_copy_block2;
Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[states][pos_states];
Bit_model bm_rep[states];
@@ -213,6 +207,7 @@ int LZd_decode_member( struct LZ_decoder * const d,
unsigned rep2 = 0; /* repeated distances */
unsigned rep3 = 0;
State state = 0;
+ const bool full_buffer = d->buffer_size >= d->dictionary_size;
Bm_array_init( bm_literal[0], (1 << literal_context_bits) * 0x300 );
Bm_array_init( bm_match[0], states * pos_states );
@@ -230,25 +225,19 @@ int LZd_decode_member( struct LZ_decoder * const d,
Rd_load( rdec );
while( !Rd_finished( rdec ) )
{
- int len;
const int pos_state = LZd_data_position( d ) & pos_state_mask;
if( Rd_decode_bit( rdec, &bm_match[state][pos_state] ) == 0 ) /* 1st bit */
{
/* literal byte */
Bit_model * const bm = bm_literal[get_lit_state(LZd_peek_prev( d ))];
- if( St_is_char( state ) )
- {
- state -= ( state < 4 ) ? state : 3;
+ if( ( state = St_set_char( state ) ) < 4 )
LZd_put_byte( d, Rd_decode_tree8( rdec, bm ) );
- }
else
- {
- state -= ( state < 10 ) ? 3 : 6;
LZd_put_byte( d, Rd_decode_matched( rdec, bm, LZd_peek( d, rep0 ) ) );
- }
continue;
}
/* match or repeated match */
+ int len;
if( Rd_decode_bit( rdec, &bm_rep[state] ) != 0 ) /* 2nd bit */
{
if( Rd_decode_bit( rdec, &bm_rep0[state] ) == 0 ) /* 3rd bit */
@@ -274,13 +263,12 @@ int LZd_decode_member( struct LZ_decoder * const d,
rep0 = distance;
}
state = St_set_rep( state );
- len = min_match_len + Rd_decode_len( rdec, &rep_len_model, pos_state );
+ len = Rd_decode_len( rdec, &rep_len_model, pos_state );
}
else /* match */
{
- unsigned distance;
- len = min_match_len + Rd_decode_len( rdec, &match_len_model, pos_state );
- distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] );
+ len = Rd_decode_len( rdec, &match_len_model, pos_state );
+ unsigned distance = Rd_decode_tree6( rdec, bm_dis_slot[get_len_state(len)] );
if( distance >= start_dis_model )
{
const unsigned dis_slot = distance;
@@ -321,7 +309,8 @@ int LZd_decode_member( struct LZ_decoder * const d,
( !d->pos_wrapped && rep0 >= LZd_data_position( d ) ) )
{ LZd_flush_data( d ); return 1; }
}
- copy_block( d, rep0, len );
+ if( full_buffer || rep0 < d->buffer_size ) LZd_copy_block( d, rep0, len );
+ else LZd_copy_block2( d, rep0, len );
}
LZd_flush_data( d );
return 2;
diff --git a/decoder.h b/decoder.h
index 0fe0110..0afdd83 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -101,12 +101,11 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
int i;
for( i = num_bits; i > 0; --i )
{
- bool bit;
Rd_normalize( rdec );
rdec->range >>= 1;
/* symbol <<= 1; */
/* if( rdec->code >= rdec->range ) { rdec->code -= rdec->range; symbol |= 1; } */
- bit = ( rdec->code >= rdec->range );
+ const bool bit = ( rdec->code >= rdec->range );
symbol <<= 1; symbol += bit;
rdec->code -= rdec->range & ( 0U - bit );
}
@@ -116,42 +115,75 @@ static inline unsigned Rd_decode( struct Range_decoder * const rdec,
static inline unsigned Rd_decode_bit( struct Range_decoder * const rdec,
Bit_model * const probability )
{
- uint32_t bound;
Rd_normalize( rdec );
- bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
if( rdec->code < bound )
{
rdec->range = bound;
- *probability += (bit_model_total - *probability) >> bit_model_move_bits;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
return 0;
}
else
{
- rdec->range -= bound;
rdec->code -= bound;
+ rdec->range -= bound;
*probability -= *probability >> bit_model_move_bits;
return 1;
}
}
-static inline unsigned Rd_decode_tree3( struct Range_decoder * const rdec,
- Bit_model bm[] )
+static inline void Rd_decode_symbol_bit( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * symbol )
{
- unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- return symbol & 7;
+ Rd_normalize( rdec );
+ *symbol <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *symbol |= 1;
+ }
+ }
+
+static inline void Rd_decode_symbol_bit_reversed( struct Range_decoder * const rdec,
+ Bit_model * const probability, unsigned * model,
+ unsigned * symbol, const int i )
+ {
+ Rd_normalize( rdec );
+ *model <<= 1;
+ const uint32_t bound = ( rdec->range >> bit_model_total_bits ) * *probability;
+ if( rdec->code < bound )
+ {
+ rdec->range = bound;
+ *probability += ( bit_model_total - *probability ) >> bit_model_move_bits;
+ }
+ else
+ {
+ rdec->code -= bound;
+ rdec->range -= bound;
+ *probability -= *probability >> bit_model_move_bits;
+ *model |= 1;
+ *symbol |= 1 << i;
+ }
}
static inline unsigned Rd_decode_tree6( struct Range_decoder * const rdec,
Bit_model bm[] )
{
- unsigned symbol = 2 | Rd_decode_bit( rdec, &bm[1] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ unsigned symbol = 1;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0x3F;
}
@@ -159,9 +191,14 @@ static inline unsigned Rd_decode_tree8( struct Range_decoder * const rdec,
Bit_model bm[] )
{
unsigned symbol = 1;
- int i;
- for( i = 0; i < 8; ++i )
- symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
return symbol & 0xFF;
}
@@ -173,21 +210,19 @@ Rd_decode_tree_reversed( struct Range_decoder * const rdec,
unsigned symbol = 0;
int i;
for( i = 0; i < num_bits; ++i )
- {
- const unsigned bit = Rd_decode_bit( rdec, &bm[model] );
- model <<= 1; model += bit;
- symbol |= ( bit << i );
- }
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, i );
return symbol;
}
static inline unsigned
Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] )
{
- unsigned symbol = Rd_decode_bit( rdec, &bm[1] );
- symbol += Rd_decode_bit( rdec, &bm[2+symbol] ) << 1;
- symbol += Rd_decode_bit( rdec, &bm[4+symbol] ) << 2;
- symbol += Rd_decode_bit( rdec, &bm[8+symbol] ) << 3;
+ unsigned model = 1;
+ unsigned symbol = 0;
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 0 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 1 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 2 );
+ Rd_decode_symbol_bit_reversed( rdec, &bm[model], &model, &symbol, 3 );
return symbol;
}
@@ -210,11 +245,24 @@ static inline unsigned Rd_decode_len( struct Range_decoder * const rdec,
struct Len_model * const lm,
const int pos_state )
{
+ Bit_model * bm;
+ unsigned mask, offset, symbol = 1;
+
if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 )
- return Rd_decode_tree3( rdec, lm->bm_low[pos_state] );
+ { bm = lm->bm_low[pos_state]; mask = 7; offset = 0; goto len3; }
if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 )
- return len_low_symbols + Rd_decode_tree3( rdec, lm->bm_mid[pos_state] );
- return len_low_symbols + len_mid_symbols + Rd_decode_tree8( rdec, lm->bm_high );
+ { bm = lm->bm_mid[pos_state]; mask = 7; offset = len_low_symbols; goto len3; }
+ bm = lm->bm_high; mask = 0xFF; offset = len_low_symbols + len_mid_symbols;
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+len3:
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ Rd_decode_symbol_bit( rdec, &bm[symbol], &symbol );
+ return ( symbol & mask ) + min_match_len + offset;
}
@@ -291,11 +339,10 @@ static inline void LZd_copy_block( struct LZ_decoder * const d,
}
}
+/* block is (at least partially) outside of the buffer */
static inline void LZd_copy_block2( struct LZ_decoder * const d,
const unsigned distance, unsigned len )
{
- if( d->buffer_size > distance ) /* block is in buffer */
- { LZd_copy_block( d, distance, len ); return; }
if( len < d->buffer_size - d->pos ) /* no wrap */
{
const unsigned offset = distance + 1 + d->stream_pos - d->pos;
diff --git a/doc/lunzip.1 b/doc/lunzip.1
index f96b431..9aa0300 100644
--- a/doc/lunzip.1
+++ b/doc/lunzip.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH LUNZIP "1" "January 2021" "lunzip 1.12" "User Commands"
+.TH LUNZIP "1" "January 2022" "lunzip 1.13" "User Commands"
.SH NAME
lunzip \- decompressor for the lzip format
.SH SYNOPSIS
@@ -77,7 +77,7 @@ To extract all the files from archive 'foo.tar.lz', use the commands
.PP
Exit status: 0 for a normal exit, 1 for environmental problems (file
not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or
-invalid input file, 3 for an internal consistency error (eg, bug) which
+invalid input file, 3 for an internal consistency error (e.g., bug) which
caused lunzip to panic.
.PP
The ideas embodied in lunzip are due to (at least) the following people:
@@ -90,7 +90,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lunzip home page: http://www.nongnu.org/lzip/lunzip.html
.SH COPYRIGHT
-Copyright \(co 2021 Antonio Diaz Diaz.
+Copyright \(co 2022 Antonio Diaz Diaz.
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/list.c b/list.c
index 8ab8c44..33de75c 100644
--- a/list.c
+++ b/list.c
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -52,17 +52,15 @@ int list_files( const char * const filenames[], const int num_filenames,
bool stdin_used = false;
for( i = 0; i < num_filenames; ++i )
{
- const char * input_filename;
- struct Lzip_index lzip_index;
- struct stat in_stats; /* not used */
- int infd;
const bool from_stdin = ( strcmp( filenames[i], "-" ) == 0 );
if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
- input_filename = from_stdin ? "(stdin)" : filenames[i];
- infd = from_stdin ? STDIN_FILENO :
+ const char * const input_filename = from_stdin ? "(stdin)" : filenames[i];
+ struct stat in_stats; /* not used */
+ const int infd = from_stdin ? STDIN_FILENO :
open_instream( input_filename, &in_stats, false, true );
if( infd < 0 ) { set_retval( &retval, 1 ); continue; }
+ struct Lzip_index lzip_index;
Li_init( &lzip_index, infd, ignore_trailing, loose_trailing );
close( infd );
if( lzip_index.retval != 0 )
@@ -71,37 +69,36 @@ int list_files( const char * const filenames[], const int num_filenames,
set_retval( &retval, lzip_index.retval );
Li_free( &lzip_index ); continue;
}
- if( verbosity >= 0 )
+ if( verbosity < 0 ) { Li_free( &lzip_index ); continue; }
+ const unsigned long long udata_size = Li_udata_size( &lzip_index );
+ const unsigned long long cdata_size = Li_cdata_size( &lzip_index );
+ total_comp += cdata_size; total_uncomp += udata_size; ++files;
+ const long members = lzip_index.members;
+ if( first_post )
{
- const unsigned long long udata_size = Li_udata_size( &lzip_index );
- const unsigned long long cdata_size = Li_cdata_size( &lzip_index );
- total_comp += cdata_size; total_uncomp += udata_size; ++files;
- if( first_post )
- {
- first_post = false;
- if( verbosity >= 1 ) fputs( " dict memb trail ", stdout );
- fputs( " uncompressed compressed saved name\n", stdout );
- }
- if( verbosity >= 1 )
- printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size ),
- lzip_index.members, Li_file_size( &lzip_index ) - cdata_size );
- list_line( udata_size, cdata_size, input_filename );
+ first_post = false;
+ if( verbosity >= 1 ) fputs( " dict memb trail ", stdout );
+ fputs( " uncompressed compressed saved name\n", stdout );
+ }
+ if( verbosity >= 1 )
+ printf( "%s %5ld %6lld ", format_ds( lzip_index.dictionary_size ),
+ members, Li_file_size( &lzip_index ) - cdata_size );
+ list_line( udata_size, cdata_size, input_filename );
- if( verbosity >= 2 && lzip_index.members > 1 )
+ if( verbosity >= 2 && members > 1 )
+ {
+ long i;
+ fputs( " member data_pos data_size member_pos member_size\n", stdout );
+ for( i = 0; i < members; ++i )
{
- long i;
- fputs( " member data_pos data_size member_pos member_size\n", stdout );
- for( i = 0; i < lzip_index.members; ++i )
- {
- const struct Block * db = Li_dblock( &lzip_index, i );
- const struct Block * mb = Li_mblock( &lzip_index, i );
- printf( "%6ld %14llu %14llu %14llu %14llu\n",
- i + 1, db->pos, db->size, mb->pos, mb->size );
- }
- first_post = true; /* reprint heading after list of members */
+ const struct Block * db = Li_dblock( &lzip_index, i );
+ const struct Block * mb = Li_mblock( &lzip_index, i );
+ printf( "%6ld %14llu %14llu %14llu %14llu\n",
+ i + 1, db->pos, db->size, mb->pos, mb->size );
}
- fflush( stdout );
+ first_post = true; /* reprint heading after list of members */
}
+ fflush( stdout );
Li_free( &lzip_index );
}
if( verbosity >= 0 && files > 1 )
diff --git a/lzip.h b/lzip.h
index 3961c33..4b77be8 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -131,6 +131,7 @@ static inline void CRC32_init( void )
}
}
+/* about as fast as it is possible without messing with endianness */
static inline void CRC32_update_buf( uint32_t * const crc,
const uint8_t * const buffer,
const int size )
@@ -228,12 +229,12 @@ static inline bool Lt_verify_consistency( const Lzip_trailer data )
{
const unsigned crc = Lt_get_data_crc( data );
const unsigned long long dsize = Lt_get_data_size( data );
- const unsigned long long msize = Lt_get_member_size( data );
- const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
- const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
+ const unsigned long long msize = Lt_get_member_size( data );
if( msize < min_member_size ) return false;
+ const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
if( mlimit > dsize && msize > mlimit ) return false;
+ const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
if( dlimit > msize && dsize > dlimit ) return false;
return true;
}
diff --git a/lzip_index.c b/lzip_index.c
index ca4df8d..559fd7a 100644
--- a/lzip_index.c
+++ b/lzip_index.c
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -133,44 +133,38 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
const bool ignore_trailing,
const bool loose_trailing )
{
+ if( *pos < min_member_size ) return false;
enum { block_size = 16384,
buffer_size = block_size + Lt_size - 1 + Lh_size };
uint8_t buffer[buffer_size];
int bsize = *pos % block_size; /* total bytes in buffer */
- int search_size, rd_size;
- unsigned long long ipos;
- int i;
- if( *pos < min_member_size ) return false;
if( bsize <= buffer_size - block_size ) bsize += block_size;
- search_size = bsize; /* bytes to search for trailer */
- rd_size = bsize; /* bytes to read from file */
- ipos = *pos - rd_size; /* aligned to block_size */
+ int search_size = bsize; /* bytes to search for trailer */
+ int rd_size = bsize; /* bytes to read from file */
+ unsigned long long ipos = *pos - rd_size; /* aligned to block_size */
while( true )
{
- const uint8_t max_msb = ( ipos + search_size ) >> 56;
if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
- { Li_set_errno_error( li, "Error seeking member trailer: " );
- return false; }
+ { Li_set_errno_error( li, "Error seeking member trailer: " ); return false; }
+ const uint8_t max_msb = ( ipos + search_size ) >> 56;
+ int i;
for( i = search_size; i >= Lt_size; --i )
if( buffer[i-1] <= max_msb ) /* most significant byte of member_size */
{
- Lzip_header header;
- const Lzip_header * header2;
const Lzip_trailer * const trailer =
(const Lzip_trailer *)( buffer + i - Lt_size );
const unsigned long long member_size = Lt_get_member_size( *trailer );
- unsigned dictionary_size;
- bool full_h2;
if( member_size == 0 ) /* skip trailing zeros */
{ while( i > Lt_size && buffer[i-9] == 0 ) --i; continue; }
if( member_size > ipos + i || !Lt_verify_consistency( *trailer ) )
continue;
+ Lzip_header header;
if( !Li_read_header( li, fd, header, ipos + i - member_size ) )
return false;
if( !Lh_verify( header ) ) continue;
- header2 = (const Lzip_header *)( buffer + i );
- full_h2 = bsize - i >= Lh_size;
+ const Lzip_header * header2 = (const Lzip_header *)( buffer + i );
+ const bool full_h2 = bsize - i >= Lh_size;
if( Lh_verify_prefix( *header2, bsize - i ) ) /* last member */
{
if( !full_h2 ) add_error( li, "Last member in input file is truncated." );
@@ -183,7 +177,7 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
if( !ignore_trailing )
{ add_error( li, trailing_msg ); li->retval = 2; return false; }
*pos = ipos + i - member_size;
- dictionary_size = Lh_get_dictionary_size( header );
+ const unsigned dictionary_size = Lh_get_dictionary_size( header );
if( li->dictionary_size < dictionary_size )
li->dictionary_size = dictionary_size;
return push_back_member( li, 0, Lt_get_data_size( *trailer ), *pos,
@@ -204,9 +198,6 @@ static bool Li_skip_trailing_data( struct Lzip_index * const li, const int fd,
bool Li_init( struct Lzip_index * const li, const int infd,
const bool ignore_trailing, const bool loose_trailing )
{
- Lzip_header header;
- unsigned long long pos;
- long i;
li->member_vector = 0;
li->error = 0;
li->insize = lseek( infd, 0, SEEK_END );
@@ -223,18 +214,17 @@ bool Li_init( struct Lzip_index * const li, const int infd,
{ add_error( li, "Input file is too long (2^63 bytes or more)." );
li->retval = 2; return false; }
+ Lzip_header header;
if( !Li_read_header( li, infd, header, 0 ) ) return false;
if( Li_check_header_error( li, header ) ) return false;
- pos = li->insize; /* always points to a header or to EOF */
+ unsigned long long pos = li->insize; /* always points to a header or to EOF */
while( pos >= min_member_size )
{
Lzip_trailer trailer;
- unsigned long long member_size;
- unsigned dictionary_size;
if( seek_read( infd, trailer, Lt_size, pos - Lt_size ) != Lt_size )
{ Li_set_errno_error( li, "Error reading member trailer: " ); break; }
- member_size = Lt_get_member_size( trailer );
+ const unsigned long long member_size = Lt_get_member_size( trailer );
if( member_size > pos || !Lt_verify_consistency( trailer ) )
{ /* bad trailer */
if( li->members <= 0 )
@@ -253,7 +243,7 @@ bool Li_init( struct Lzip_index * const li, const int infd,
break;
}
pos -= member_size;
- dictionary_size = Lh_get_dictionary_size( header );
+ const unsigned dictionary_size = Lh_get_dictionary_size( header );
if( li->dictionary_size < dictionary_size )
li->dictionary_size = dictionary_size;
if( !push_back_member( li, 0, Lt_get_data_size( trailer ), pos,
@@ -268,6 +258,7 @@ bool Li_init( struct Lzip_index * const li, const int infd,
return false;
}
Li_reverse_member_vector( li );
+ long i;
for( i = 0; ; ++i )
{
const long long end = block_end( li->member_vector[i].dblock );
diff --git a/lzip_index.h b/lzip_index.h
index 4e9cd44..0938533 100644
--- a/lzip_index.h
+++ b/lzip_index.h
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
diff --git a/main.c b/main.c
index b5e458e..73e29b4 100644
--- a/main.c
+++ b/main.c
@@ -1,5 +1,5 @@
/* Lunzip - Decompressor for the lzip format
- Copyright (C) 2010-2021 Antonio Diaz Diaz.
+ Copyright (C) 2010-2022 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@
Exit status: 0 for a normal exit, 1 for environmental problems
(file not found, invalid flags, I/O errors, etc), 2 to indicate a
corrupt or invalid input file, 3 for an internal consistency error
- (eg, bug) which caused lunzip to panic.
+ (e.g., bug) which caused lunzip to panic.
*/
#define _FILE_OFFSET_BITS 64
@@ -36,9 +36,9 @@
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
#include <io.h>
-#if defined(__MSVCRT__)
+#if defined __MSVCRT__
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define SIGHUP SIGTERM
@@ -50,7 +50,7 @@
#define S_IWOTH 0
#endif
#endif
-#if defined(__DJGPP__)
+#if defined __DJGPP__
#define S_ISSOCK(x) 0
#define S_ISVTX 0
#endif
@@ -68,10 +68,15 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
+#if ( defined SIZE_MAX && SIZE_MAX < UINT_MAX ) || \
+ ( defined SSIZE_MAX && SSIZE_MAX < INT_MAX )
+#error "Environments where 'size_t' is narrower than 'int' are not supported."
+#endif
+
int verbosity = 0;
static const char * const program_name = "lunzip";
-static const char * const program_year = "2021";
+static const char * const program_year = "2022";
static const char * invocation_name = "lunzip"; /* default value */
static const struct { const char * from; const char * to; } known_extensions[] = {
@@ -130,7 +135,7 @@ static void show_help( void )
"'tar -xf foo.tar.lz' or 'lunzip -cd foo.tar.lz | tar -xf -'.\n"
"\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
"not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
- "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
+ "invalid input file, 3 for an internal consistency error (e.g., bug) which\n"
"caused lunzip to panic.\n"
"\nThe ideas embodied in lunzip are due to (at least) the following people:\n"
"Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
@@ -174,8 +179,6 @@ struct Pretty_print
static void Pp_init( struct Pretty_print * const pp,
const char * const filenames[], const int num_filenames )
{
- unsigned stdin_name_len;
- int i;
pp->name = 0;
pp->padded_name = 0;
pp->stdin_name = "(stdin)";
@@ -183,7 +186,8 @@ static void Pp_init( struct Pretty_print * const pp,
pp->first_post = false;
if( verbosity <= 0 ) return;
- stdin_name_len = strlen( pp->stdin_name );
+ const unsigned stdin_name_len = strlen( pp->stdin_name );
+ int i;
for( i = 0; i < num_filenames; ++i )
{
const char * const s = filenames[i];
@@ -217,16 +221,14 @@ static void Pp_reset( struct Pretty_print * const pp )
void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
{
- if( verbosity >= 0 )
+ if( verbosity < 0 ) return;
+ if( pp->first_post )
{
- if( pp->first_post )
- {
- pp->first_post = false;
- fputs( pp->padded_name, stderr );
- if( !msg ) fflush( stderr );
- }
- if( msg ) fprintf( stderr, "%s\n", msg );
+ pp->first_post = false;
+ fputs( pp->padded_name, stderr );
+ if( !msg ) fflush( stderr );
}
+ if( msg ) fprintf( stderr, "%s\n", msg );
}
@@ -264,17 +266,53 @@ void show_header( const unsigned dictionary_size )
}
-static unsigned long getnum( const char * const ptr,
+/* separate large numbers >= 100_000 in groups of 3 digits using '_' */
+static const char * format_num3( unsigned long long num )
+ {
+ const char * const si_prefix = "kMGTPEZY";
+ const char * const binary_prefix = "KMGTPEZY";
+ enum { buffers = 8, bufsize = 4 * sizeof (long long) };
+ static char buffer[buffers][bufsize]; /* circle of static buffers for printf */
+ static int current = 0;
+ int i;
+ char * const buf = buffer[current++]; current %= buffers;
+ char * p = buf + bufsize - 1; /* fill the buffer backwards */
+ *p = 0; /* terminator */
+ if( num > 1024 )
+ {
+ char prefix = 0; /* try binary first, then si */
+ for( i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i )
+ { num /= 1024; prefix = binary_prefix[i]; }
+ if( prefix ) *(--p) = 'i';
+ else
+ for( i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i )
+ { num /= 1000; prefix = si_prefix[i]; }
+ if( prefix ) *(--p) = prefix;
+ }
+ const bool split = num >= 100000;
+
+ for( i = 0; ; )
+ {
+ *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break;
+ if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
+ }
+ return p;
+ }
+
+
+static unsigned long getnum( const char * const arg,
+ const char * const option_name,
const unsigned long llimit,
const unsigned long ulimit )
{
- unsigned long result;
char * tail;
errno = 0;
- result = strtoul( ptr, &tail, 0 );
- if( tail == ptr )
+ unsigned long long result = strtoul( arg, &tail, 0 );
+ if( tail == arg )
{
- show_error( "Bad or missing numerical argument.", 0, true );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Bad or missing numerical argument in "
+ "option '%s'.\n", program_name, option_name );
exit( 1 );
}
@@ -297,7 +335,9 @@ static unsigned long getnum( const char * const ptr,
}
if( exponent <= 0 )
{
- show_error( "Bad multiplier in numerical argument.", 0, true );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Bad multiplier in numerical argument of "
+ "option '%s'.\n", program_name, option_name );
exit( 1 );
}
for( i = 0; i < exponent; ++i )
@@ -309,21 +349,24 @@ static unsigned long getnum( const char * const ptr,
if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
if( errno )
{
- show_error( "Numerical argument out of limits.", 0, false );
+ if( verbosity >= 0 )
+ fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] "
+ "in option '%s'.\n", program_name, format_num3( llimit ),
+ format_num3( ulimit ), option_name );
exit( 1 );
}
return result;
}
-static int get_dict_size( const char * const arg )
+static int get_dict_size( const char * const arg, const char * const option_name )
{
char * tail;
const long bits = strtol( arg, &tail, 0 );
if( bits >= min_dictionary_bits &&
bits <= max_dictionary_bits && *tail == 0 )
return 1 << bits;
- return getnum( arg, min_dictionary_size, max_dictionary_size );
+ return getnum( arg, option_name, min_dictionary_size, max_dictionary_size );
}
@@ -468,7 +511,7 @@ static bool check_tty_in( const char * const input_filename, const int infd,
if( isatty( infd ) ) /* for example /dev/tty */
{ show_file_error( input_filename,
"I won't read compressed data from a terminal.", 0 );
- close( infd ); set_retval( retval, 1 );
+ close( infd ); set_retval( retval, 2 );
if( program_mode != m_test ) cleanup_and_fail( *retval );
return false; }
return true;
@@ -559,12 +602,9 @@ static int decompress( const unsigned long long cfile_size, const int infd,
for( first_member = true; ; first_member = false )
{
- int result, size;
- unsigned dictionary_size;
Lzip_header header;
- struct LZ_decoder decoder;
Rd_reset_member_position( &rdec );
- size = Rd_read_data( &rdec, header, Lh_size );
+ const int size = Rd_read_data( &rdec, header, Lh_size );
if( Rd_finished( &rdec ) ) /* End Of File */
{
if( first_member )
@@ -594,20 +634,21 @@ static int decompress( const unsigned long long cfile_size, const int infd,
if( !Lh_verify_version( header ) )
{ Pp_show_msg( pp, bad_version( Lh_version( header ) ) );
retval = 2; break; }
- dictionary_size = Lh_get_dictionary_size( header );
+ const unsigned dictionary_size = Lh_get_dictionary_size( header );
if( !isvalid_ds( dictionary_size ) )
{ Pp_show_msg( pp, bad_dict_msg ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
Pp_show_msg( pp, 0 );
+ struct LZ_decoder decoder;
if( !LZd_init( &decoder, &rdec, buffer_size, dictionary_size, outfd ) )
{
Pp_show_msg( pp, "Not enough memory. Try a smaller output buffer size." );
retval = 1; break;
}
show_dprogress( cfile_size, partial_file_pos, &rdec, pp ); /* init */
- result = LZd_decode_member( &decoder, pp );
+ const int result = LZd_decode_member( &decoder, pp );
partial_file_pos += Rd_member_position( &rdec );
LZd_free( &decoder );
if( result != 0 )
@@ -696,23 +737,15 @@ void show_dprogress( const unsigned long long cfile_size,
int main( const int argc, const char * const argv[] )
{
const char * default_output_filename = "";
- static struct Arg_parser parser; /* static because valgrind complains */
- static struct Pretty_print pp; /* and memory management in C sucks */
- static const char ** filenames = 0;
- int num_filenames = 0;
unsigned buffer_size = max_dictionary_size;
enum Mode program_mode = m_compress;
- int argind = 0;
- int failed_tests = 0;
- int retval = 0;
int i;
- bool filenames_given = false;
bool force = false;
bool ignore_trailing = true;
bool keep_input_files = false;
bool loose_trailing = false;
- bool stdin_used = false;
bool to_stdout = false;
+ if( argc > 0 ) invocation_name = argv[0];
enum { opt_lt = 256 };
const struct ap_Option options[] =
@@ -734,19 +767,22 @@ int main( const int argc, const char * const argv[] )
{ opt_lt, "loose-trailing", ap_no },
{ 0 , 0, ap_no } };
- if( argc > 0 ) invocation_name = argv[0];
CRC32_init();
+ /* static because valgrind complains and memory management in C sucks */
+ static struct Arg_parser parser;
if( !ap_init( &parser, argc, argv, options, 0 ) )
{ show_error( mem_msg, 0, false ); return 1; }
if( ap_error( &parser ) ) /* bad option */
{ show_error( ap_error( &parser ), 0, true ); return 1; }
+ int argind = 0;
for( ; argind < ap_arguments( &parser ); ++argind )
{
const int code = ap_code( &parser, argind );
- const char * const arg = ap_argument( &parser, argind );
if( !code ) break; /* no more options */
+ const char * const pn = ap_parsed_name( &parser, argind );
+ const char * const arg = ap_argument( &parser, argind );
switch( code )
{
case 'a': ignore_trailing = false; break;
@@ -761,7 +797,7 @@ int main( const int argc, const char * const argv[] )
else { default_output_filename = arg; } break;
case 'q': verbosity = -1; break;
case 't': set_mode( &program_mode, m_test ); break;
- case 'u': buffer_size = get_dict_size( arg ); break;
+ case 'u': buffer_size = get_dict_size( arg, pn ); break;
case 'v': if( verbosity < 4 ) ++verbosity; break;
case 'V': show_version(); return 0;
case opt_lt: loose_trailing = true; break;
@@ -769,15 +805,17 @@ int main( const int argc, const char * const argv[] )
}
} /* end process options */
-#if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
+#if defined __MSVCRT__ || defined __OS2__ || defined __DJGPP__
setmode( STDIN_FILENO, O_BINARY );
setmode( STDOUT_FILENO, O_BINARY );
#endif
- num_filenames = max( 1, ap_arguments( &parser ) - argind );
+ static const char ** filenames = 0;
+ int num_filenames = max( 1, ap_arguments( &parser ) - argind );
filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
filenames[0] = "-";
+ bool filenames_given = false;
for( i = 0; argind + i < ap_arguments( &parser ); ++i )
{
filenames[i] = ap_argument( &parser, argind + i );
@@ -816,17 +854,18 @@ int main( const int argc, const char * const argv[] )
if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
set_signals( signal_handler );
+ static struct Pretty_print pp;
Pp_init( &pp, filenames, num_filenames );
+ int failed_tests = 0;
+ int retval = 0;
const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
+ bool stdin_used = false;
for( i = 0; i < num_filenames; ++i )
{
- unsigned long long cfile_size;
const char * input_filename = "";
int infd;
- int tmp;
struct stat in_stats;
- const struct stat * in_statsp;
Pp_set_name( &pp, filenames[i] );
if( strcmp( filenames[i], "-" ) == 0 )
@@ -872,11 +911,13 @@ int main( const int argc, const char * const argv[] )
}
}
- in_statsp = ( input_filename[0] && one_to_one ) ? &in_stats : 0;
- cfile_size = ( input_filename[0] && S_ISREG( in_stats.st_mode ) ) ?
- ( in_stats.st_size + 99 ) / 100 : 0;
- tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing,
- loose_trailing, program_mode == m_test );
+ const struct stat * const in_statsp =
+ ( input_filename[0] && one_to_one ) ? &in_stats : 0;
+ const unsigned long long cfile_size =
+ ( input_filename[0] && S_ISREG( in_stats.st_mode ) ) ?
+ ( in_stats.st_size + 99 ) / 100 : 0;
+ int tmp = decompress( cfile_size, infd, &pp, buffer_size, ignore_trailing,
+ loose_trailing, program_mode == m_test );
if( close( infd ) != 0 )
{ show_file_error( pp.name, "Error closing input file", errno );
set_retval( &tmp, 1 ); }
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 19928dd..c495ba1 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lunzip - Decompressor for the lzip format
-# Copyright (C) 2010-2021 Antonio Diaz Diaz.
+# Copyright (C) 2010-2022 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute, and modify it.
@@ -91,6 +91,7 @@ rm -f uin.lz || framework_failure
printf "LZIP\001-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\002-.............................." | "${LZIP}" -t 2> /dev/null
printf "LZIP\001+.............................." | "${LZIP}" -t 2> /dev/null
+rm -f out || framework_failure
printf "\ntesting decompression..."
@@ -114,19 +115,23 @@ lines=$("${LZIP}" -tvv "${in_em}" 2>&1 | wc -l) || test_failed $LINENO
lines=$("${LZIP}" -lvv "${in_em}" | wc -l) || test_failed $LINENO
[ "${lines}" -eq 11 ] || test_failed $LINENO "${lines}"
+"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
cat "${in_lz}" > copy.lz || framework_failure
"${LZIP}" -dk copy.lz || test_failed $LINENO
cmp in copy || test_failed $LINENO
-printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -d copy.lz 2> /dev/null
+cat fox > copy || framework_failure
+cat "${in_lz}" > out.lz || framework_failure
+rm -f out || framework_failure
+"${LZIP}" -d copy.lz out.lz 2> /dev/null # skip copy, decompress out
[ $? = 1 ] || test_failed $LINENO
+cmp fox copy || test_failed $LINENO
+cmp in out || test_failed $LINENO
"${LZIP}" -df copy.lz || test_failed $LINENO
[ ! -e copy.lz ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
+rm -f out || framework_failure
printf "to be overwritten" > copy || framework_failure
-"${LZIP}" -d -o copy < "${in_lz}" 2> /dev/null
-[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -df -o copy < "${in_lz}" || test_failed $LINENO
cmp in copy || test_failed $LINENO
rm -f out copy || framework_failure
@@ -154,7 +159,7 @@ rm -f copy anyothername.out || framework_failure
[ $? = 1 ] || test_failed $LINENO
"${LZIP}" -cdq in "${in_lz}" > copy
[ $? = 2 ] || test_failed $LINENO
-cat copy in | cmp in - || test_failed $LINENO
+cat copy in | cmp in - || test_failed $LINENO # copy must be empty
"${LZIP}" -cdq nx_file.lz "${in_lz}" > copy
[ $? = 1 ] || test_failed $LINENO
cmp in copy || test_failed $LINENO
@@ -207,6 +212,7 @@ printf "\ngarbage" >> copy2.lz || framework_failure
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || test_failed $LINENO
cmp in2 copy2 || test_failed $LINENO
+rm -f copy2 || framework_failure
for i in 12 5120 6Ki 29 512KiB ; do
printf "to be overwritten" > copy || framework_failure
@@ -215,12 +221,12 @@ for i in 12 5120 6Ki 29 512KiB ; do
rm -f copy || framework_failure
"${LZIP}" -d -u$i -o copy "${in_lz}" || test_failed $LINENO $i
cmp in copy || test_failed $LINENO $i
- rm -f copy2 || framework_failure
"${LZIP}" -d -u$i -o copy2 "${in_lz}" "${in_lz}" ||
test_failed $LINENO $i
cmp in2 copy2 || test_failed $LINENO $i
+ rm -f copy2 || framework_failure
done
-rm -f in2 copy copy2 || framework_failure
+rm -f in2 copy || framework_failure
printf "\ntesting bad input..."
@@ -285,7 +291,6 @@ for i in fox_v2.lz fox_s11.lz fox_de20.lz \
[ $? = 2 ] || test_failed $LINENO $i
done
-"${LZIP}" -cd "${fox_lz}" > fox || test_failed $LINENO
for i in fox_bcrc.lz fox_crc0.lz fox_das46.lz fox_mes81.lz ; do
"${LZIP}" -cdq "${testdir}"/$i > out
[ $? = 2 ] || test_failed $LINENO $i