diff options
-rw-r--r-- | AUTHORS | 6 | ||||
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | Makefile.in | 4 | ||||
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | README | 26 | ||||
-rwxr-xr-x | configure | 2 | ||||
-rw-r--r-- | decoder.c | 17 | ||||
-rw-r--r-- | decoder.h | 41 | ||||
-rw-r--r-- | doc/lunzip.1 | 19 | ||||
-rw-r--r-- | main.c | 140 | ||||
-rwxr-xr-x | testsuite/check.sh | 20 |
11 files changed, 245 insertions, 43 deletions
@@ -1 +1,7 @@ Lunzip was written by Antonio Diaz Diaz. + +The ideas embodied in lunzip are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for +the definition of Markov chains), G.N.N. Martin (for the definition of +range encoding), Igor Pavlov (for putting all the above together in +LZMA), and Julian Seward (for bzip2's CLI). @@ -1,3 +1,9 @@ +2013-10-30 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.5-rc1 released. + * main.c: Added new option '-u, --buffer-size' (low memory mode). + * main.c (close_and_set_permissions): Behave like 'cp -p'. + 2013-09-17 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.4 released. diff --git a/Makefile.in b/Makefile.in index 307943b..e04819f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,8 +1,8 @@ DISTNAME = $(pkgname)-$(pkgversion) INSTALL = install -INSTALL_PROGRAM = $(INSTALL) -p -m 755 -INSTALL_DATA = $(INSTALL) -p -m 644 +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 SHELL = /bin/sh @@ -1,5 +1,6 @@ -Changes in version 1.4: +Changes in version 1.5: -File version is no more shown in status messages. +The new option "-u, --buffer-size", which activates a "low memory" +decompression mode, has been added. -Minor fixes. +File date, permissions, and ownership are now copied like "cp -p" does. @@ -5,13 +5,23 @@ small size makes it well suited for embedded devices or software installers that need to decompress files but do not need compression capabilities. Lunzip is fully compatible with lzip-1.4 or newer. +If the size of the output buffer is specified with the "--buffer-size" +option, lunzip uses the decompressed file as dictionary for distances +beyond the buffer size and is able to decompress any file using as +little memory as 50 kB, irrespective of the dictionary size used to +compress the file. Of course, the smaller the output buffer size used in +relation to the dictionary size, the more accesses to disk are needed +and the slower the decompression is. This "low memory" mode only works +when decompressing to a regular file. + The lzip file format is designed for long-term data archiving. It is clean, provides very safe 4 factor integrity checking, and is backed by the recovery capabilities of lziprecover. Lunzip uses the same well-defined exit status values used by lzip and -bzip2, which makes it safer when used in pipes or scripts than -decompressors returning ambiguous warning values, like gunzip. +bzip2, which makes it safer than decompressors returning ambiguous +warning values (like gunzip) when it is used as a back end for tar or +zutils. Lunzip replaces every file given in the command line with a decompressed version of itself. Each decompressed file has the same modification @@ -35,9 +45,15 @@ two or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -The amount of memory required by lunzip to decompress a file is only a -few tens of KiB larger than the dictionary size used to compress that -file. +The amount of memory required by lunzip to decompress a file is about +46 kB larger than the dictionary size used to compress that file, unless +the "--buffer-size" option is specified. + +The ideas embodied in lunzip are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for +the definition of Markov chains), G.N.N. Martin (for the definition of +range encoding), Igor Pavlov (for putting all the above together in +LZMA), and Julian Seward (for bzip2's CLI). Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -6,7 +6,7 @@ # to copy, distribute and modify it. pkgname=lunzip -pkgversion=1.4 +pkgversion=1.5-rc1 progname=lunzip srctrigger=doc/${progname}.1 @@ -87,6 +87,15 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size ) } +int seek_read( const int fd, uint8_t * const buf, const int size, + const int offset ) + { + if( lseek( fd, offset, SEEK_END ) >= 0 ) + return readblock( fd, buf, size ); + return 0; + } + + bool Rd_read_block( struct Range_decoder * const rdec ) { if( !rdec->at_stream_end ) @@ -193,6 +202,10 @@ int LZd_decode_member( struct LZ_decoder * const decoder, struct Pretty_print * const pp ) { struct Range_decoder * const rdec = decoder->rdec; + void (* const copy_block) + ( struct LZ_decoder * const decoder, const int distance, int len ) = + ( decoder->buffer_size >= decoder->dictionary_size ) ? + &LZd_copy_block : &LZd_copy_block2; unsigned rep0 = 0; /* rep[0-3] latest four distances */ unsigned rep1 = 0; /* used for efficient coding of */ unsigned rep2 = 0; /* repeated distances */ @@ -293,10 +306,10 @@ int LZd_decode_member( struct LZ_decoder * const decoder, rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state = St_set_match( state ); if( rep0 >= (unsigned)decoder->dictionary_size || - ( rep0 >= (unsigned)decoder->pos && !decoder->partial_data_pos ) ) + rep0 >= LZd_data_position( decoder ) ) { LZd_flush_data( decoder ); return 1; } } - LZd_copy_block( decoder, rep0, len ); + copy_block( decoder, rep0, len ); } } LZd_flush_data( decoder ); @@ -85,6 +85,7 @@ static inline void Rd_load( struct Range_decoder * const rdec ) for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; + rdec->code &= rdec->range; /* make sure that first byte is discarded */ } static inline void Rd_normalize( struct Range_decoder * const rdec ) @@ -259,6 +260,9 @@ void LZd_flush_data( struct LZ_decoder * const decoder ); bool LZd_verify_trailer( struct LZ_decoder * const decoder, struct Pretty_print * const pp ); +int seek_read( const int fd, uint8_t * const buf, const int size, + const int offset ); + static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) { const int i = @@ -269,9 +273,14 @@ static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, const int distance ) { - int i = decoder->pos - distance - 1; - if( i < 0 ) i += decoder->buffer_size; - return decoder->buffer[i]; + uint8_t b; + const int i = decoder->pos - distance - 1; + if( i >= 0 ) b = decoder->buffer[i]; + else if( i + decoder->buffer_size >= decoder->pos ) + b = decoder->buffer[i+decoder->buffer_size]; + else if( seek_read( decoder->outfd, &b, 1, i - decoder->stream_pos ) != 1 ) + { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } + return b; } static inline void LZd_put_byte( struct LZ_decoder * const decoder, @@ -300,13 +309,35 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, } } +static inline void LZd_copy_block2( struct LZ_decoder * const decoder, + const int distance, int len ) + { + if( distance < decoder->buffer_size ) /* block is in buffer */ + { LZd_copy_block( decoder, distance, len ); return; } + if( len < decoder->buffer_size - decoder->pos ) /* no wrap */ + { + const int offset = decoder->pos - decoder->stream_pos - distance - 1; + if( len <= -offset ) /* block is in file */ + { + if( seek_read( decoder->outfd, decoder->buffer + decoder->pos, len, offset ) != len ) + { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } + decoder->pos += len; + return; + } + } + for( ; len > 0; --len ) + LZd_put_byte( decoder, LZd_get_byte( decoder, distance ) ); + } + static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, - struct Range_decoder * const rde, const int ofd ) + struct Range_decoder * const rde, + const int buffer_size, const int ofd ) { decoder->partial_data_pos = 0; decoder->dictionary_size = Fh_get_dictionary_size( header ); - decoder->buffer_size = max( 65536, decoder->dictionary_size ); + decoder->buffer_size = + min( buffer_size, max( 65536, decoder->dictionary_size ) ); decoder->buffer = (uint8_t *)malloc( decoder->buffer_size ); if( !decoder->buffer ) return false; decoder->pos = 0; diff --git a/doc/lunzip.1 b/doc/lunzip.1 index 0d10c1d..76b51e1 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,12 +1,24 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LUNZIP "1" "September 2013" "Lunzip 1.4" "User Commands" +.TH LUNZIP "1" "October 2013" "Lunzip 1.5-rc1" "User Commands" .SH NAME Lunzip \- decompressor for lzip files .SH SYNOPSIS .B lunzip [\fIoptions\fR] [\fIfiles\fR] .SH DESCRIPTION -Lunzip \- Decompressor for lzip files. +Lunzip is a decompressor for lzip files. It is written in C and its +small size makes it well suited for embedded devices or software +installers that need to decompress files but do not need compression +capabilities. Lunzip is fully compatible with lzip\-1.4 or newer. +.PP +If the size of the output buffer is specified with the '\-\-buffer\-size' +option, lunzip uses the decompressed file as dictionary for distances +beyond the buffer size and is able to decompress any file using as +little memory as 50 kB, irrespective of the dictionary size used to +compress the file. Of course, the smaller the output buffer size used in +relation to the dictionary size, the more accesses to disk are needed +and the slower the decompression is. This 'low memory' mode only works +when decompressing to a regular file. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -36,6 +48,9 @@ suppress all messages \fB\-t\fR, \fB\-\-test\fR test compressed file integrity .TP +\fB\-u\fR, \fB\-\-buffer\-size=\fR<bytes> +set output buffer size in bytes +.TP \fB\-v\fR, \fB\-\-verbose\fR be verbose (a 2nd \fB\-v\fR gives more) .PP @@ -54,6 +54,10 @@ #include "lzip.h" #include "decoder.h" +#ifndef O_BINARY +#define O_BINARY 0 +#endif + #if CHAR_BIT != 8 #error "Environments where CHAR_BIT != 8 are not supported." #endif @@ -64,12 +68,6 @@ const char * const program_name = "lunzip"; const char * const program_year = "2013"; const char * invocation_name = 0; -#ifdef O_BINARY -const int o_binary = O_BINARY; -#else -const int o_binary = 0; -#endif - struct { const char * from; const char * to; } const known_extensions[] = { { ".lz", "" }, { ".tlz", ".tar" }, @@ -86,7 +84,18 @@ bool delete_output_on_interrupt = false; static void show_help( void ) { - printf( "%s - Decompressor for lzip files.\n", Program_name ); + printf( "Lunzip is a decompressor for lzip files. It is written in C and its\n" + "small size makes it well suited for embedded devices or software\n" + "installers that need to decompress files but do not need compression\n" + "capabilities. Lunzip is fully compatible with lzip-1.4 or newer.\n" + "\nIf the size of the output buffer is specified with the '--buffer-size'\n" + "option, lunzip uses the decompressed file as dictionary for distances\n" + "beyond the buffer size and is able to decompress any file using as\n" + "little memory as 50 kB, irrespective of the dictionary size used to\n" + "compress the file. Of course, the smaller the output buffer size used in\n" + "relation to the dictionary size, the more accesses to disk are needed\n" + "and the slower the decompression is. This 'low memory' mode only works\n" + "when decompressing to a regular file.\n" ); printf( "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -98,6 +107,7 @@ static void show_help( void ) " -o, --output=<file> if reading stdin, place the output into <file>\n" " -q, --quiet suppress all messages\n" " -t, --test test compressed file integrity\n" + " -u, --buffer-size=<bytes> set output buffer size in bytes\n" " -v, --verbose be verbose (a 2nd -v gives more)\n" "If no file names are given, lunzip decompresses from standard input to\n" "standard output.\n" @@ -137,6 +147,73 @@ static void show_header( const File_header header ) } +static unsigned long getnum( const char * const ptr, + const unsigned long llimit, + const unsigned long ulimit ) + { + unsigned long result; + char * tail; + errno = 0; + result = strtoul( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + exit( 1 ); + } + + if( !errno && tail[0] ) + { + int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0, i; + bool bad_multiplier = false; + switch( tail[0] ) + { + case ' ': break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; else bad_multiplier = true; + break; + case 'k': if( factor == 1000 ) exponent = 1; else bad_multiplier = true; + break; + default : bad_multiplier = true; + } + if( bad_multiplier ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + exit( 1 ); + } + for( i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits.", 0, false ); + exit( 1 ); + } + return result; + } + + +static int get_dict_size( const char * const arg ) + { + char * tail; + int bits = strtol( arg, &tail, 0 ); + if( bits >= min_dictionary_bits && + bits <= max_dictionary_bits && *tail == 0 ) + return ( 1 << bits ); + return getnum( arg, min_dictionary_size, max_dictionary_size ); + } + + static int extension_index( const char * const name ) { int i; @@ -152,9 +229,9 @@ static int extension_index( const char * const name ) static int open_instream( const char * const name, struct stat * const in_statsp, - const bool testing, const bool to_stdout ) + const bool no_ofile ) { - int infd = open( name, O_RDONLY | o_binary ); + int infd = open( name, O_RDONLY | O_BINARY ); if( infd < 0 ) { if( verbosity >= 0 ) @@ -168,7 +245,6 @@ static int open_instream( const char * const name, struct stat * const in_statsp const bool can_read = ( i == 0 && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - const bool no_ofile = to_stdout || testing; if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) { if( verbosity >= 0 ) @@ -224,7 +300,7 @@ static void set_d_outname( const char * const name, const int i ) static bool open_outstream( const bool force ) { - int flags = O_CREAT | O_WRONLY | o_binary; + int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename, flags, outfd_mode ); @@ -263,10 +339,14 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) bool warning = false; if( in_statsp ) { + const mode_t mode = in_statsp->st_mode; /* fchown will in many cases return with EPERM, which can be safely ignored. */ - if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && - errno != EPERM ) || - fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true; + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; } if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); outfd = -1; @@ -283,8 +363,8 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) } -static int decompress( const int infd, struct Pretty_print * const pp, - const bool testing ) +static int decompress( const int buffer_size, const int infd, + struct Pretty_print * const pp, const bool testing ) { unsigned long long partial_file_pos = 0; struct Range_decoder rdec; @@ -292,7 +372,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, bool first_member; if( !Rd_init( &rdec, infd ) ) { - show_error( "Not enough memory. Find a machine with more memory.", 0, false ); + show_error( "Not enough memory.", 0, false ); cleanup_and_fail( 1 ); } @@ -332,9 +412,9 @@ static int decompress( const int infd, struct Pretty_print * const pp, if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); } - if( !LZd_init( &decoder, header, &rdec, outfd ) ) + if( !LZd_init( &decoder, header, &rdec, buffer_size, outfd ) ) { - show_error( "Not enough memory. Find a machine with more memory.", 0, false ); + show_error( "Not enough memory. Try a smaller output buffer size.", 0, false ); cleanup_and_fail( 1 ); } result = LZd_decode_member( &decoder, pp ); @@ -431,6 +511,7 @@ int main( const int argc, const char * const argv[] ) const char * default_output_filename = ""; const char ** filenames = 0; int num_filenames = 0; + int buffer_size = max_dictionary_size; int infd = -1; int argind = 0; int retval = 0; @@ -453,6 +534,7 @@ int main( const int argc, const char * const argv[] ) { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 't', "test", ap_no }, + { 'u', "buffer-size", ap_yes }, { 'v', "verbose", ap_no }, { 'V', "version", ap_no }, { 0 , 0, ap_no } }; @@ -463,7 +545,7 @@ int main( const int argc, const char * const argv[] ) CRC32_init(); if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "Memory exhausted.", 0, false ); return 1; } + { show_error( "Not enough memory.", 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } @@ -483,6 +565,7 @@ int main( const int argc, const char * const argv[] ) case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 't': testing = true; break; + case 'u': buffer_size = get_dict_size( arg ); break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; default : internal_error( "uncaught option" ); @@ -507,6 +590,19 @@ int main( const int argc, const char * const argv[] ) if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true; } + if( buffer_size < max_dictionary_size ) + { + if( to_stdout || testing ) + { show_error( "'--buffer-size' is incompatible with '--stdout' and '--test'.", 0, false ); + return 1; } + if( !default_output_filename[0] ) + for( i = 0; i < num_filenames; ++i ) + if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 ) + { show_error( "Output file must be specified when decompressing from stdin with a\n" + " reduced buffer size.", 0, false ); + return 1; } + } + if( !to_stdout && !testing && ( filenames_given || default_output_filename[0] ) ) set_signals(); @@ -547,7 +643,7 @@ int main( const int argc, const char * const argv[] ) else { input_filename = filenames[i]; - infd = open_instream( input_filename, &in_stats, testing, to_stdout ); + infd = open_instream( input_filename, &in_stats, to_stdout || testing ); if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; } if( !testing ) { @@ -576,7 +672,7 @@ int main( const int argc, const char * const argv[] ) delete_output_on_interrupt = true; in_statsp = input_filename[0] ? &in_stats : 0; Pp_set_name( &pp, input_filename ); - tmp = decompress( infd, &pp, testing ); + tmp = decompress( buffer_size, infd, &pp, testing ); if( tmp > retval ) retval = tmp; if( tmp && !testing ) cleanup_and_fail( retval ); diff --git a/testsuite/check.sh b/testsuite/check.sh index c4f612e..52d5122 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -27,6 +27,12 @@ fail=0 printf "testing lunzip-%s..." "$2" +"${LZIP}" -cqu-1 "${in_lz}" > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cqu0 "${in_lz}" > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cqu4095 "${in_lz}" > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -tq in if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -tq < in @@ -45,11 +51,23 @@ if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi cmp in copy || fail=1 printf . +cat "${in_lz}" > copy.lz || framework_failure +"${LZIP}" -df copy.lz || fail=1 +cmp in copy || fail=1 +printf . + printf "to be overwritten" > copy || framework_failure "${LZIP}" -df -o copy < "${in_lz}" || fail=1 cmp in copy || fail=1 printf . +for i in 12 4096 4Ki 29 512KiB ; do + printf "to be overwritten" > copy || framework_failure + "${LZIP}" -df -u$i -o copy < "${in_lz}" || fail=1 + cmp in copy || fail=1 + printf . +done + cat "${in_lz}" > anyothername || framework_failure "${LZIP}" -d anyothername || fail=1 cmp in anyothername.out || fail=1 @@ -65,7 +83,7 @@ printf . printf "garbage" >> copy2.lz || framework_failure printf "to be overwritten" > copy2 || framework_failure -"${LZIP}" -dfk copy2.lz || fail=1 +"${LZIP}" -df copy2.lz || fail=1 cmp in2 copy2 || fail=1 printf . |