From 28b75c3e8bd03f49ef5bd6caa4e385761ea502ea Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 06:05:06 +0100 Subject: Merging upstream version 1.5~rc2. Signed-off-by: Daniel Baumann --- ChangeLog | 7 ++- INSTALL | 2 +- Makefile.in | 6 +-- NEWS | 4 +- README | 28 +++++----- carg_parser.c | 2 +- carg_parser.h | 2 +- configure | 6 +-- decoder.c | 121 +++++++++++++++++++++--------------------- decoder.h | 153 ++++++++++++++++++++++++++--------------------------- doc/lunzip.1 | 4 +- lzip.h | 88 +++++++++--------------------- main.c | 47 ++++++---------- testsuite/check.sh | 4 +- 14 files changed, 217 insertions(+), 257 deletions(-) diff --git a/ChangeLog b/ChangeLog index df75519..1ad704f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2014-01-21 Antonio Diaz Diaz + + * Version 1.5-rc2 released. + * Minor optimizations. + 2013-10-30 Antonio Diaz Diaz * Version 1.5-rc1 released. @@ -43,7 +48,7 @@ * Created from the decompression code of clzip 1.1. -Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index 5f44118..62f4988 100644 --- a/INSTALL +++ b/INSTALL @@ -58,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index e04819f..28e7809 100644 --- a/Makefile.in +++ b/Makefile.in @@ -37,8 +37,8 @@ doc : man info : $(VPATH)/doc/$(pkgname).info -$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo - cd $(VPATH)/doc && makeinfo $(pkgname).texinfo +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -87,7 +87,7 @@ uninstall-man : dist : doc ln -sf $(VPATH) $(DISTNAME) - tar -cvf $(DISTNAME).tar \ + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ $(DISTNAME)/AUTHORS \ $(DISTNAME)/COPYING \ $(DISTNAME)/ChangeLog \ diff --git a/NEWS b/NEWS index cefaf2d..ef7657d 100644 --- a/NEWS +++ b/NEWS @@ -3,4 +3,6 @@ Changes in version 1.5: The new option "-u, --buffer-size", which activates a "low memory" decompression mode, has been added. -File date, permissions, and ownership are now copied like "cp -p" does. +Copying of file dates, permissions, and ownership now behaves like "cp -p". +(If the user ID or the group ID can't be duplicated, the file permission +bits S_ISUID and S_ISGID are cleared). diff --git a/README b/README index a64ac21..3a06da8 100644 --- a/README +++ b/README @@ -12,7 +12,12 @@ little memory as 50 kB, irrespective of the dictionary size used to compress the file. Of course, the smaller the output buffer size used in relation to the dictionary size, the more accesses to disk are needed and the slower the decompression is. This "low memory" mode only works -when decompressing to a regular file. +when decompressing to a regular file and is intended for systems without +enough memory (RAM + swap) to keep the whole dictionary at once. + +The amount of memory required by lunzip to decompress a file is about +46 kB larger than the dictionary size used to compress that file, unless +the "--buffer-size" option is specified. The lzip file format is designed for long-term data archiving. It is clean, provides very safe 4 factor integrity checking, and is backed by @@ -23,12 +28,6 @@ bzip2, which makes it safer than decompressors returning ambiguous warning values (like gunzip) when it is used as a back end for tar or zutils. -Lunzip replaces every file given in the command line with a decompressed -version of itself. Each decompressed file has the same modification -date, permissions, and, when possible, ownership as the corresponding -compressed file. Lunzip is able to read from some types of non regular -files if the "--stdout" option is specified. - Lunzip attempts to guess the name for the decompressed file from that of the compressed file as follows: @@ -36,6 +35,15 @@ filename.lz becomes filename filename.tlz becomes filename.tar anyothername becomes anyothername.out +Decompressing a file is much like copying or moving it; therefore lunzip +preserves the access and modification dates, permissions, and, when +possible, ownership of the file just as "cp -p" does. (If the user ID or +the group ID can't be duplicated, the file permission bits S_ISUID and +S_ISGID are cleared). + +Lunzip is able to read from some types of non regular files if the +"--stdout" option is specified. + If no file names are specified, lunzip decompresses from standard input to standard output. In this case, lunzip will decline to read compressed input from a terminal. @@ -45,10 +53,6 @@ two or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -The amount of memory required by lunzip to decompress a file is about -46 kB larger than the dictionary size used to compress that file, unless -the "--buffer-size" option is specified. - The ideas embodied in lunzip are due to (at least) the following people: Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of @@ -56,7 +60,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 378b5e3..1dfcb2b 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/carg_parser.h b/carg_parser.h index 41aa7b3..b153da6 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/configure b/configure index e23fea5..e2097c8 100755 --- a/configure +++ b/configure @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Lunzip - Decompressor for lzip files -# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lunzip -pkgversion=1.5-rc1 +pkgversion=1.5-rc2 progname=lunzip srctrigger=doc/${progname}.1 @@ -165,7 +165,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lunzip - Decompressor for lzip files -# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.c b/decoder.c index 369dc86..b9eb2cd 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for lzip files - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,7 +62,7 @@ static int readblock( const int fd, uint8_t * const buf, const int size ) const int n = read( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; else if( n == 0 ) break; /* EOF */ - else if( errno != EINTR && errno != EAGAIN ) break; + else if( errno != EINTR ) break; errno = 0; } return size - rest; @@ -80,7 +80,7 @@ static int writeblock( const int fd, const uint8_t * const buf, const int size ) { const int n = write( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; - else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; + else if( n < 0 && errno != EINTR ) break; errno = 0; } return size - rest; @@ -111,31 +111,33 @@ bool Rd_read_block( struct Range_decoder * const rdec ) } -void LZd_flush_data( struct LZ_decoder * const decoder ) +void LZd_flush_data( struct LZ_decoder * const d ) { - if( decoder->pos > decoder->stream_pos ) + if( d->pos > d->stream_pos ) { - const int size = decoder->pos - decoder->stream_pos; - CRC32_update_buf( &decoder->crc, decoder->buffer + decoder->stream_pos, size ); - if( decoder->outfd >= 0 && - writeblock( decoder->outfd, decoder->buffer + decoder->stream_pos, size ) != size ) + const int size = d->pos - d->stream_pos; + CRC32_update_buf( &d->crc, d->buffer + d->stream_pos, size ); + if( d->outfd >= 0 && + writeblock( d->outfd, d->buffer + d->stream_pos, size ) != size ) { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } - if( decoder->pos >= decoder->buffer_size ) - { decoder->partial_data_pos += decoder->pos; decoder->pos = 0; } - decoder->stream_pos = decoder->pos; + if( d->pos >= d->buffer_size ) + { d->partial_data_pos += d->pos; d->pos = 0; } + d->stream_pos = d->pos; } } -bool LZd_verify_trailer( struct LZ_decoder * const decoder, +bool LZd_verify_trailer( struct LZ_decoder * const d, struct Pretty_print * const pp ) { File_trailer trailer; - const unsigned long long member_size = - Rd_member_position( decoder->rdec ) + Ft_size; + const unsigned long long member_size = Rd_member_position( d->rdec ) + Ft_size; + unsigned long long trailer_data_size; + unsigned long long trailer_member_size; + unsigned trailer_crc; bool error = false; - int size = Rd_read_data( decoder->rdec, trailer, Ft_size ); + int size = Rd_read_data( d->rdec, trailer, Ft_size ); if( size < Ft_size ) { error = true; @@ -148,63 +150,65 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, while( size < Ft_size ) trailer[size++] = 0; } - if( decoder->rdec->code != 0 ) + if( d->rdec->code != 0 ) { error = true; Pp_show_msg( pp, "Range decoder final code is not zero" ); } - if( Ft_get_data_crc( trailer ) != LZd_crc( decoder ) ) + trailer_crc = Ft_get_data_crc( trailer ); + if( trailer_crc != LZd_crc( d ) ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n", - Ft_get_data_crc( trailer ), LZd_crc( decoder ) ); + trailer_crc, LZd_crc( d ) ); } } - if( Ft_get_data_size( trailer ) != LZd_data_position( decoder ) ) + trailer_data_size = Ft_get_data_size( trailer ); + if( trailer_data_size != LZd_data_position( d ) ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n", - Ft_get_data_size( trailer ), LZd_data_position( decoder ), LZd_data_position( decoder ) ); + trailer_data_size, LZd_data_position( d ), LZd_data_position( d ) ); } } - if( Ft_get_member_size( trailer ) != member_size ) + trailer_member_size = Ft_get_member_size( trailer ); + if( trailer_member_size != member_size ) { error = true; if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n", - Ft_get_member_size( trailer ), member_size, member_size ); + trailer_member_size, member_size, member_size ); } } - if( !error && verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 ) + if( !error && verbosity >= 2 && LZd_data_position( d ) > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", - (double)LZd_data_position( decoder ) / member_size, - ( 8.0 * member_size ) / LZd_data_position( decoder ), - 100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( decoder ) ) ) ); + (double)LZd_data_position( d ) / member_size, + ( 8.0 * member_size ) / LZd_data_position( d ), + 100.0 * ( 1.0 - ( (double)member_size / LZd_data_position( d ) ) ) ); if( !error && verbosity >= 4 ) fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ", - Ft_get_data_crc( trailer ), - Ft_get_data_size( trailer ), Ft_get_member_size( trailer ) ); + trailer_crc, trailer_data_size, trailer_member_size ); return !error; } /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found. */ -int LZd_decode_member( struct LZ_decoder * const decoder, +int LZd_decode_member( struct LZ_decoder * const d, struct Pretty_print * const pp ) { - struct Range_decoder * const rdec = decoder->rdec; + struct Range_decoder * const rdec = d->rdec; void (* const copy_block) - ( struct LZ_decoder * const decoder, const int distance, int len ) = - ( decoder->buffer_size >= decoder->dictionary_size ) ? + ( struct LZ_decoder * const d, const int distance, int len ) = + ( d->buffer_size >= (int)d->dictionary_size ) ? &LZd_copy_block : &LZd_copy_block2; unsigned rep0 = 0; /* rep[0-3] latest four distances */ unsigned rep1 = 0; /* used for efficient coding of */ @@ -215,37 +219,37 @@ int LZd_decode_member( struct LZ_decoder * const decoder, Rd_load( rdec ); while( !Rd_finished( rdec ) ) { - const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( Rd_decode_bit( rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ + const int pos_state = LZd_data_position( d ) & pos_state_mask; + if( Rd_decode_bit( rdec, &d->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_get_prev_byte( decoder ); + const uint8_t prev_byte = LZd_get_prev_byte( d ); if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; - LZd_put_byte( decoder, Rd_decode_tree( rdec, - decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); + LZd_put_byte( d, Rd_decode_tree( rdec, + d->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( rdec, - decoder->bm_literal[get_lit_state(prev_byte)], - LZd_get_byte( decoder, rep0 ) ) ); + LZd_put_byte( d, Rd_decode_matched( rdec, + d->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( d, rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( rdec, &decoder->bm_rep[state] ) != 0 ) /* 2nd bit */ + if( Rd_decode_bit( rdec, &d->bm_rep[state] ) != 0 ) /* 2nd bit */ { - if( Rd_decode_bit( rdec, &decoder->bm_rep0[state] ) != 0 ) /* 3rd bit */ + if( Rd_decode_bit( rdec, &d->bm_rep0[state] ) != 0 ) /* 3rd bit */ { unsigned distance; - if( Rd_decode_bit( rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &d->bm_rep1[state] ) == 0 ) /* 4th bit */ distance = rep1; else { - if( Rd_decode_bit( rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */ + if( Rd_decode_bit( rdec, &d->bm_rep2[state] ) == 0 ) /* 5th bit */ distance = rep2; else { distance = rep3; rep3 = rep2; } @@ -256,19 +260,19 @@ int LZd_decode_member( struct LZ_decoder * const decoder, } else { - if( Rd_decode_bit( rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &d->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ { state = St_set_short_rep( state ); - LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; } + LZd_put_byte( d, LZd_get_byte( d, rep0 ) ); continue; } } state = St_set_rep( state ); - len = min_match_len + Rd_decode_len( rdec, &decoder->rep_len_model, pos_state ); + len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = rep0; - len = min_match_len + Rd_decode_len( rdec, &decoder->match_len_model, pos_state ); - dis_slot = Rd_decode_tree6( rdec, decoder->bm_dis_slot[get_len_state(len)] ); + len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { @@ -276,19 +280,19 @@ int LZd_decode_member( struct LZ_decoder * const decoder, rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) rep0 += Rd_decode_tree_reversed( rdec, - decoder->bm_dis + rep0 - dis_slot - 1, direct_bits ); + d->bm_dis + rep0 - dis_slot - 1, direct_bits ); else { rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( rdec, decoder->bm_align ); + rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align ); if( rep0 == 0xFFFFFFFFU ) /* Marker found */ { rep0 = rep0_saved; Rd_normalize( rdec ); - LZd_flush_data( decoder ); + LZd_flush_data( d ); if( len == min_match_len ) /* End Of Stream marker */ { - if( LZd_verify_trailer( decoder, pp ) ) return 0; else return 3; + if( LZd_verify_trailer( d, pp ) ) return 0; else return 3; } if( len == min_match_len + 1 ) /* Sync Flush marker */ { @@ -305,13 +309,12 @@ int LZd_decode_member( struct LZ_decoder * const decoder, } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state = St_set_match( state ); - if( rep0 >= (unsigned)decoder->dictionary_size || - rep0 >= LZd_data_position( decoder ) ) - { LZd_flush_data( decoder ); return 1; } + if( rep0 >= d->dictionary_size || rep0 >= LZd_data_position( d ) ) + { LZd_flush_data( d ); return 1; } } - copy_block( decoder, rep0, len ); + copy_block( d, rep0, len ); } } - LZd_flush_data( decoder ); + LZd_flush_data( d ); return 2; } diff --git a/decoder.h b/decoder.h index 118fe23..3b97014 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for lzip files - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -180,9 +180,9 @@ static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { int model = 1; - int symbol = 0; - int bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= bit; + int symbol = Rd_decode_bit( rdec, &bm[model] ); + int bit; + model = (model << 1) + symbol; bit = Rd_decode_bit( rdec, &bm[model] ); model = (model << 1) + bit; symbol |= (bit << 1); bit = Rd_decode_bit( rdec, &bm[model] ); @@ -196,8 +196,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, { Bit_model * const bm1 = bm + 0x100; int symbol = 1; - int i; - for( i = 7; i >= 0; --i ) + while( symbol < 0x100 ) { int match_bit, bit; match_byte <<= 1; @@ -211,7 +210,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, break; } } - return symbol - 0x100; + return symbol & 0xFF; } static inline int Rd_decode_len( struct Range_decoder * const rdec, @@ -231,7 +230,8 @@ static inline int Rd_decode_len( struct Range_decoder * const rdec, struct LZ_decoder { unsigned long long partial_data_pos; - int dictionary_size; + struct Range_decoder * rdec; + unsigned dictionary_size; int buffer_size; uint8_t * buffer; /* output buffer */ int pos; /* current pos in buffer */ @@ -250,128 +250,123 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * rdec; struct Len_model match_len_model; struct Len_model rep_len_model; }; -void LZd_flush_data( struct LZ_decoder * const decoder ); +void LZd_flush_data( struct LZ_decoder * const d ); -bool LZd_verify_trailer( struct LZ_decoder * const decoder, +bool LZd_verify_trailer( struct LZ_decoder * const d, struct Pretty_print * const pp ); int seek_read( const int fd, uint8_t * const buf, const int size, const int offset ); -static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) +static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d ) { - const int i = - ( ( decoder->pos > 0 ) ? decoder->pos : decoder->buffer_size ) - 1; - return decoder->buffer[i]; + const int i = ( ( d->pos > 0 ) ? d->pos : d->buffer_size ) - 1; + return d->buffer[i]; } -static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, +static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d, const int distance ) { uint8_t b; - const int i = decoder->pos - distance - 1; - if( i >= 0 ) b = decoder->buffer[i]; - else if( i + decoder->buffer_size >= decoder->pos ) - b = decoder->buffer[i+decoder->buffer_size]; - else if( seek_read( decoder->outfd, &b, 1, i - decoder->stream_pos ) != 1 ) + const int i = d->pos - distance - 1; + if( i >= 0 ) b = d->buffer[i]; + else if( i + d->buffer_size >= d->pos ) + b = d->buffer[i+d->buffer_size]; + else if( seek_read( d->outfd, &b, 1, i - d->stream_pos ) != 1 ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } return b; } -static inline void LZd_put_byte( struct LZ_decoder * const decoder, - const uint8_t b ) +static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b ) { - decoder->buffer[decoder->pos] = b; - if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder ); + d->buffer[d->pos] = b; + if( ++d->pos >= d->buffer_size ) LZd_flush_data( d ); } -static inline void LZd_copy_block( struct LZ_decoder * const decoder, +static inline void LZd_copy_block( struct LZ_decoder * const d, const int distance, int len ) { - int i = decoder->pos - distance - 1; - if( i < 0 ) i += decoder->buffer_size; - if( len < decoder->buffer_size - max( decoder->pos, i ) && - len <= abs( decoder->pos - i ) ) /* no wrap, no overlap */ + int i = d->pos - distance - 1; + if( i < 0 ) i += d->buffer_size; + if( len < d->buffer_size - max( d->pos, i ) && len <= abs( d->pos - i ) ) { - memcpy( decoder->buffer + decoder->pos, decoder->buffer + i, len ); - decoder->pos += len; + memcpy( d->buffer + d->pos, d->buffer + i, len ); /* no wrap, no overlap */ + d->pos += len; } else for( ; len > 0; --len ) { - decoder->buffer[decoder->pos] = decoder->buffer[i]; - if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder ); - if( ++i >= decoder->buffer_size ) i = 0; + d->buffer[d->pos] = d->buffer[i]; + if( ++d->pos >= d->buffer_size ) LZd_flush_data( d ); + if( ++i >= d->buffer_size ) i = 0; } } -static inline void LZd_copy_block2( struct LZ_decoder * const decoder, +static inline void LZd_copy_block2( struct LZ_decoder * const d, const int distance, int len ) { - if( distance < decoder->buffer_size ) /* block is in buffer */ - { LZd_copy_block( decoder, distance, len ); return; } - if( len < decoder->buffer_size - decoder->pos ) /* no wrap */ + if( distance < d->buffer_size ) /* block is in buffer */ + { LZd_copy_block( d, distance, len ); return; } + if( len < d->buffer_size - d->pos ) /* no wrap */ { - const int offset = decoder->pos - decoder->stream_pos - distance - 1; + const int offset = d->pos - d->stream_pos - distance - 1; if( len <= -offset ) /* block is in file */ { - if( seek_read( decoder->outfd, decoder->buffer + decoder->pos, len, offset ) != len ) + if( seek_read( d->outfd, d->buffer + d->pos, len, offset ) != len ) { show_error( "Seek error", errno, false ); cleanup_and_fail( 1 ); } - decoder->pos += len; + d->pos += len; return; } } for( ; len > 0; --len ) - LZd_put_byte( decoder, LZd_get_byte( decoder, distance ) ); + LZd_put_byte( d, LZd_get_byte( d, distance ) ); } -static inline bool LZd_init( struct LZ_decoder * const decoder, - const File_header header, +static inline bool LZd_init( struct LZ_decoder * const d, struct Range_decoder * const rde, - const int buffer_size, const int ofd ) + const int buffer_size, + const int dict_size, const int ofd ) { - decoder->partial_data_pos = 0; - decoder->dictionary_size = Fh_get_dictionary_size( header ); - decoder->buffer_size = - min( buffer_size, max( 65536, decoder->dictionary_size ) ); - decoder->buffer = (uint8_t *)malloc( decoder->buffer_size ); - if( !decoder->buffer ) return false; - decoder->pos = 0; - decoder->stream_pos = 0; - decoder->crc = 0xFFFFFFFFU; - decoder->outfd = ofd; - - Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( decoder->bm_match[0], states * pos_states ); - Bm_array_init( decoder->bm_rep, states ); - Bm_array_init( decoder->bm_rep0, states ); - Bm_array_init( decoder->bm_rep1, states ); - Bm_array_init( decoder->bm_rep2, states ); - Bm_array_init( decoder->bm_len[0], states * pos_states ); - Bm_array_init( decoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( decoder->bm_align, dis_align_size ); - - decoder->rdec = rde; - Lm_init( &decoder->match_len_model ); - Lm_init( &decoder->rep_len_model ); - decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */ + d->partial_data_pos = 0; + d->rdec = rde; + d->dictionary_size = dict_size; + d->buffer_size = min( buffer_size, max( 65536, dict_size ) ); + d->buffer = (uint8_t *)malloc( d->buffer_size ); + if( !d->buffer ) return false; + d->pos = 0; + d->stream_pos = 0; + d->crc = 0xFFFFFFFFU; + d->outfd = ofd; + + Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( d->bm_match[0], states * pos_states ); + Bm_array_init( d->bm_rep, states ); + Bm_array_init( d->bm_rep0, states ); + Bm_array_init( d->bm_rep1, states ); + Bm_array_init( d->bm_rep2, states ); + Bm_array_init( d->bm_len[0], states * pos_states ); + Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( d->bm_align, dis_align_size ); + + Lm_init( &d->match_len_model ); + Lm_init( &d->rep_len_model ); + d->buffer[d->buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } -static inline void LZd_free( struct LZ_decoder * const decoder ) - { free( decoder->buffer ); } +static inline void LZd_free( struct LZ_decoder * const d ) + { free( d->buffer ); } -static inline unsigned LZd_crc( const struct LZ_decoder * const decoder ) - { return decoder->crc ^ 0xFFFFFFFFU; } +static inline unsigned LZd_crc( const struct LZ_decoder * const d ) + { return d->crc ^ 0xFFFFFFFFU; } static inline unsigned long long -LZd_data_position( const struct LZ_decoder * const decoder ) - { return decoder->partial_data_pos + decoder->pos; } +LZd_data_position( const struct LZ_decoder * const d ) + { return d->partial_data_pos + d->pos; } -int LZd_decode_member( struct LZ_decoder * const decoder, +int LZd_decode_member( struct LZ_decoder * const d, struct Pretty_print * const pp ); diff --git a/doc/lunzip.1 b/doc/lunzip.1 index 76b51e1..4557839 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LUNZIP "1" "October 2013" "Lunzip 1.5-rc1" "User Commands" +.TH LUNZIP "1" "January 2014" "Lunzip 1.5-rc2" "User Commands" .SH NAME Lunzip \- decompressor for lzip files .SH SYNOPSIS @@ -66,7 +66,7 @@ Report bugs to lzip\-bug@nongnu.org .br Lunzip home page: http://www.nongnu.org/lzip/lunzip.html .SH COPYRIGHT -Copyright \(co 2013 Antonio Diaz Diaz. +Copyright \(co 2014 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/lzip.h b/lzip.h index a8640bf..39bde22 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for lzip files - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -116,12 +116,29 @@ struct Pretty_print { const char * name; const char * stdin_name; - int longest_name; + unsigned longest_name; bool first_post; }; -void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames ); +static inline void Pp_init( struct Pretty_print * const pp, + const char * const filenames[], const int num_filenames ) + { + unsigned stdin_name_len; + int i; + pp->name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->first_post = false; + stdin_name_len = strlen( pp->stdin_name ); + + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); + if( len > pp->longest_name ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) @@ -154,11 +171,9 @@ static inline void CRC32_init( void ) } } -static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } - static inline void CRC32_update_buf( uint32_t * const crc, - const uint8_t * const buffer, const int size ) + const uint8_t * const buffer, + const int size ) { int i; for( i = 0; i < size; ++i ) @@ -166,14 +181,6 @@ static inline void CRC32_update_buf( uint32_t * const crc, } -static inline int real_bits( unsigned value ) - { - int bits = 0; - while( value > 0 ) { value >>= 1; ++bits; } - return bits; - } - - static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ typedef uint8_t File_header[6]; /* 0-3 magic bytes */ @@ -181,9 +188,6 @@ typedef uint8_t File_header[6]; /* 0-3 magic bytes */ /* 5 coded_dict_size */ enum { Fh_size = 6 }; -static inline void Fh_set_magic( File_header data ) - { memcpy( data, magic_string, 4 ); data[4] = 1; } - static inline bool Fh_verify_magic( const File_header data ) { return ( memcmp( data, magic_string, 4 ) == 0 ); } @@ -201,25 +205,6 @@ static inline unsigned Fh_get_dictionary_size( const File_header data ) return sz; } -static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) - { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) - { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const unsigned base_size = 1 << data[5]; - const unsigned wedge = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; - } - return false; - } - typedef uint8_t File_trailer[20]; /* 0-3 CRC32 of the uncompressed data */ @@ -231,45 +216,24 @@ enum { Ft_size = 20 }; static inline unsigned Ft_get_data_crc( const File_trailer data ) { unsigned tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } - static inline unsigned long long Ft_get_data_size( const File_trailer data ) { unsigned long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - static inline unsigned long long Ft_get_member_size( const File_trailer data ) { unsigned long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } -static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - /* defined in main.c */ extern int verbosity; diff --git a/main.c b/main.c index 1862ffd..70bcb37 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Lunzip - Decompressor for lzip files - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -65,7 +65,7 @@ const char * const Program_name = "Lunzip"; const char * const program_name = "lunzip"; -const char * const program_year = "2013"; +const char * const program_year = "2014"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -130,14 +130,14 @@ static void show_version( void ) } -static void show_header( const File_header header ) +static void show_header( const unsigned dictionary_size ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; enum { factor = 1024 }; const char * p = ""; const char * np = " "; - unsigned num = Fh_get_dictionary_size( header ), i; + unsigned num = dictionary_size, i; bool exact = ( num % factor == 0 ); for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) @@ -231,7 +231,9 @@ static int extension_index( const char * const name ) static int open_instream( const char * const name, struct stat * const in_statsp, const bool no_ofile ) { - int infd = open( name, O_RDONLY | O_BINARY ); + int infd; + do infd = open( name, O_RDONLY | O_BINARY ); + while( infd < 0 && errno == EINTR ); if( infd < 0 ) { if( verbosity >= 0 ) @@ -303,7 +305,8 @@ static bool open_outstream( const bool force ) int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - outfd = open( output_filename, flags, outfd_mode ); + do outfd = open( output_filename, flags, outfd_mode ); + while( outfd < 0 && errno == EINTR ); if( outfd < 0 && verbosity >= 0 ) { if( errno == EEXIST ) @@ -379,6 +382,7 @@ static int decompress( const int buffer_size, const int infd, for( first_member = true; ; first_member = false ) { int result; + unsigned dictionary_size; File_header header; struct LZ_decoder decoder; Rd_reset_member_position( &rdec ); @@ -404,15 +408,17 @@ static int decompress( const int buffer_size, const int infd, Fh_version( header ) ); } retval = 2; break; } - if( Fh_get_dictionary_size( header ) < min_dictionary_size || - Fh_get_dictionary_size( header ) > max_dictionary_size ) + dictionary_size = Fh_get_dictionary_size( header ); + if( dictionary_size < min_dictionary_size || + dictionary_size > max_dictionary_size ) { Pp_show_msg( pp, "Invalid dictionary size in member header" ); retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); } + { Pp_show_msg( pp, 0 ); + if( verbosity >= 3 ) show_header( dictionary_size ); } - if( !LZd_init( &decoder, header, &rdec, buffer_size, outfd ) ) + if( !LZd_init( &decoder, &rdec, buffer_size, dictionary_size, outfd ) ) { show_error( "Not enough memory. Try a smaller output buffer size.", 0, false ); cleanup_and_fail( 1 ); @@ -459,27 +465,6 @@ static void set_signals( void ) } -void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames ) - { - unsigned stdin_name_len; - int i; - pp->name = 0; - pp->stdin_name = "(stdin)"; - pp->longest_name = 0; - pp->first_post = false; - stdin_name_len = strlen( pp->stdin_name ); - - for( i = 0; i < num_filenames; ++i ) - { - const char * const s = filenames[i]; - const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) ); - if( len > pp->longest_name ) pp->longest_name = len; - } - if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; - } - - void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity >= 0 ) diff --git a/testsuite/check.sh b/testsuite/check.sh index 52d5122..21199b7 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lunzip - Decompressor for lzip files -# Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -33,6 +33,8 @@ if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqu4095 "${in_lz}" > /dev/null if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cqu513MiB "${in_lz}" > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -tq in if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -tq < in -- cgit v1.2.3