From e83684e70d693debd8ec1beee56507507ccd1ec9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 06:00:36 +0100 Subject: Adding upstream version 1.3~rc1. Signed-off-by: Daniel Baumann --- ChangeLog | 8 ++ INSTALL | 7 +- Makefile.in | 4 +- NEWS | 11 ++- README | 12 +++ carg_parser.c | 10 +-- carg_parser.h | 2 +- configure | 28 ++++-- decoder.c | 71 +++++++-------- decoder.h | 71 ++++++--------- doc/lunzip.1 | 7 +- lunzip.h | 263 ------------------------------------------------------ lzip.h | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 23 +++-- 14 files changed, 423 insertions(+), 375 deletions(-) delete mode 100644 lunzip.h create mode 100644 lzip.h diff --git a/ChangeLog b/ChangeLog index b08eb00..c999a79 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2013-05-18 Antonio Diaz Diaz + + * Version 1.3-rc1 released. + * Decompression time has been reduced by 1%. + * main.c (show_header): Show header version if verbosity >= 4. + * Ignore option '-n, --threads' for compatibility with plzip. + * configure: Options now accept a separate argument. + 2013-02-18 Antonio Diaz Diaz * Version 1.2 released. diff --git a/INSTALL b/INSTALL index 53f1df3..2ec1487 100644 --- a/INSTALL +++ b/INSTALL @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.7.2 and 3.3.6, but the code should compile with any +I use gcc 4.8.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -36,8 +36,9 @@ the main archive. typing 'make install-bin', 'make install-info' or 'make install-man' respectively. -5a. Type 'make install-as-lzip' to install the program and any data - files and documentation, and link the program to the name 'lzip'. + Instead of 'make install', you can type 'make install-as-lzip' to + install the program and any data files and documentation, and link + the program to the name 'lzip'. Another way diff --git a/Makefile.in b/Makefile.in index 378f626..7fc59ac 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,8 +29,8 @@ main.o : main.c $(objs) : Makefile carg_parser.o : carg_parser.h -decoder.o : lunzip.h decoder.h -main.o : carg_parser.h lunzip.h decoder.h +decoder.o : lzip.h decoder.h +main.o : carg_parser.h lzip.h decoder.h doc : man diff --git a/NEWS b/NEWS index b9ed0f4..037f35b 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,10 @@ -Changes in version 1.2: +Changes in version 1.3: -Decompression time has been reduced by 12%. +Decompression time has been reduced by 1%. -The target "install-as-lzip" has been added to the Makefile. +File version is now shown only if verbosity >= 4. -The target "install-bin" has been added to the Makefile. +Option "-n, --threads" is now accepted and ignored for compatibility +with plzip. + +"configure" now accepts options with a separate argument. diff --git a/README b/README index 217fb0a..f02c51b 100644 --- a/README +++ b/README @@ -5,6 +5,18 @@ small size makes it well suited for embedded devices or software installers that need to decompress files but do not need compression capabilities. +Lunzip uses the same well-defined exit status values used by bzip2, +which makes it safer when used in pipes or scripts than compressors +returning ambiguous warning values, like gzip. + +The 4 factor integrity checking of the lzip format guarantees that the +decompressed version of the data is identical to the original. This +guards against corruption of the compressed data, and against undetected +bugs in lunzip (hopefully very unlikely). The chances of data corruption +going undetected are microscopic. Be aware, though, that the check +occurs upon decompression, so it can only tell you that something is +wrong. It can't help you recover the original uncompressed data. + If you ever need to recover data from a damaged lzip file, try the lziprecover program. diff --git a/carg_parser.c b/carg_parser.c index 973bb7e..a86f76f 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -89,15 +89,14 @@ static char parse_long_option( struct Arg_parser * const ap, int * const argindp ) { unsigned len; - int index = -1; - int i; + int index = -1, i; char exact = 0, ambig = 0; for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; /* Test all long options for either exact match or abbreviated matches. */ for( i = 0; options[i].code != 0; ++i ) - if( options[i].name && !strncmp( options[i].name, &opt[2], len ) ) + if( options[i].name && strncmp( options[i].name, &opt[2], len ) == 0 ) { if( strlen( options[i].name ) == len ) /* Exact match found */ { index = i; exact = 1; break; } @@ -165,8 +164,7 @@ static char parse_short_option( struct Arg_parser * const ap, while( cind > 0 ) { - int index = -1; - int i; + int index = -1, i; const unsigned char code = opt[cind]; char code_str[2]; code_str[0] = code; code_str[1] = 0; diff --git a/carg_parser.h b/carg_parser.h index 3575dd7..41aa7b3 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/configure b/configure index 703fe5d..5ce6cc3 100755 --- a/configure +++ b/configure @@ -5,12 +5,10 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. -args= -no_create= pkgname=lunzip -pkgversion=1.2 +pkgversion=1.3-rc1 progname=lunzip -srctrigger=lunzip.h +srctrigger=doc/lunzip.1 # clear some things potentially inherited from environment. LC_ALL=C @@ -36,10 +34,12 @@ if [ ! -x /bin/gcc ] && fi # Loop over all args -while [ -n "$1" ] ; do +args= +no_create= +while [ $# != 0 ] ; do # Get the first arg, and shuffle - option=$1 + option=$1 ; arg2=no shift # Add the argument quoted to args @@ -74,6 +74,14 @@ while [ -n "$1" ] ; do --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --srcdir=*) srcdir=${optarg} ;; --prefix=*) prefix=${optarg} ;; --exec-prefix=*) exec_prefix=${optarg} ;; @@ -93,6 +101,14 @@ while [ -n "$1" ] ; do echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to \"${option}\"" 1>&2 + exit 1 + fi + fi done # Find the source files, if location was not specified. diff --git a/decoder.c b/decoder.c index 25f7c73..55259be 100644 --- a/decoder.c +++ b/decoder.c @@ -25,7 +25,7 @@ #include #include -#include "lunzip.h" +#include "lzip.h" #include "decoder.h" @@ -124,10 +124,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); const unsigned long long member_size = - Rd_member_position( decoder->range_decoder ) + trailer_size; + Rd_member_position( decoder->rdec ) + trailer_size; bool error = false; - int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); + int size = Rd_read_data( decoder->rdec, trailer, trailer_size ); if( size < trailer_size ) { error = true; @@ -139,8 +139,10 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, } while( size < trailer_size ) trailer[size++] = 0; } + if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - if( decoder->range_decoder->code != 0 ) + + if( decoder->rdec->code != 0 ) { error = true; Pp_show_msg( pp, "Range decoder final code is not zero" ); @@ -175,7 +177,7 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, Ft_get_member_size( trailer ), member_size, member_size ); } } - if( !error && pp->verbosity >= 3 && LZd_data_position( decoder ) > 0 && member_size > 0 ) + if( !error && pp->verbosity >= 2 && LZd_data_position( decoder ) > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)LZd_data_position( decoder ) / member_size, ( 8.0 * member_size ) / LZd_data_position( decoder ), @@ -193,88 +195,87 @@ bool LZd_verify_trailer( struct LZ_decoder * const decoder, int LZd_decode_member( struct LZ_decoder * const decoder, struct Pretty_print * const pp ) { + struct Range_decoder * const rdec = decoder->rdec; unsigned rep0 = 0; /* rep[0-3] latest four distances */ unsigned rep1 = 0; /* used for efficient coding of */ unsigned rep2 = 0; /* repeated distances */ unsigned rep3 = 0; - State state = 0; - Rd_load( decoder->range_decoder ); - while( !Rd_finished( decoder->range_decoder ) ) + Rd_load( rdec ); + while( !Rd_finished( rdec ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 ) + if( Rd_decode_bit( rdec, &decoder->bm_match[state][pos_state] ) == 0 ) /* 1st bit */ { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( state ) ) { state -= ( state < 4 ) ? state : 3; - LZd_put_byte( decoder, Rd_decode_tree( decoder->range_decoder, + LZd_put_byte( decoder, Rd_decode_tree( rdec, decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { state -= ( state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( decoder->range_decoder, - decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, rep0 ) ) ); + LZd_put_byte( decoder, Rd_decode_matched( rdec, + decoder->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( decoder, rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep[state] ) == 1 ) + if( Rd_decode_bit( rdec, &decoder->bm_rep[state] ) == 1 ) /* 2nd bit */ { - len = 0; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep0[state] ) == 1 ) + if( Rd_decode_bit( rdec, &decoder->bm_rep0[state] ) == 0 ) /* 3rd bit */ + { + if( Rd_decode_bit( rdec, &decoder->bm_len[state][pos_state] ) == 0 ) /* 4th bit */ + { state = St_set_short_rep( state ); + LZd_put_byte( decoder, LZd_get_byte( decoder, rep0 ) ); continue; } + } + else { unsigned distance; - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep1[state] ) == 0 ) + if( Rd_decode_bit( rdec, &decoder->bm_rep1[state] ) == 0 ) /* 4th bit */ distance = rep1; else { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_rep2[state] ) == 0 ) + if( Rd_decode_bit( rdec, &decoder->bm_rep2[state] ) == 0 ) /* 5th bit */ distance = rep2; - else { distance = rep3; rep3 = rep2; } + else + { distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } - else - { - if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_len[state][pos_state] ) == 0 ) - { state = St_set_short_rep( state ); len = 1; } - } - if( len == 0 ) - { - state = St_set_rep( state ); - len = min_match_len + Led_decode( &decoder->rep_match_len_decoder, decoder->range_decoder, pos_state ); - } + state = St_set_rep( state ); + len = min_match_len + Rd_decode_len( rdec, &decoder->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = rep0; - len = min_match_len + Led_decode( &decoder->len_decoder, decoder->range_decoder, pos_state ); - dis_slot = Rd_decode_tree6( decoder->range_decoder, decoder->bm_dis_slot[get_dis_state(len)] ); + len = min_match_len + Rd_decode_len( rdec, &decoder->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( rdec, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - rep0 += Rd_decode_tree_reversed( decoder->range_decoder, + rep0 += Rd_decode_tree_reversed( rdec, decoder->bm_dis + rep0 - dis_slot - 1, direct_bits ); else { - rep0 += Rd_decode( decoder->range_decoder, direct_bits - dis_align_bits ) << dis_align_bits; - rep0 += Rd_decode_tree_reversed4( decoder->range_decoder, decoder->bm_align ); + rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + rep0 += Rd_decode_tree_reversed4( rdec, decoder->bm_align ); if( rep0 == 0xFFFFFFFFU ) /* Marker found */ { rep0 = rep0_saved; - Rd_normalize( decoder->range_decoder ); + Rd_normalize( rdec ); LZd_flush_data( decoder ); if( len == min_match_len ) /* End Of Stream marker */ { @@ -282,7 +283,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder, } if( len == min_match_len + 1 ) /* Sync Flush marker */ { - Rd_load( decoder->range_decoder ); continue; + Rd_load( rdec ); continue; } if( pp->verbosity >= 0 ) { diff --git a/decoder.h b/decoder.h index 29b164a..7bf78bc 100644 --- a/decoder.h +++ b/decoder.h @@ -140,24 +140,24 @@ static inline int Rd_decode_bit( struct Range_decoder * const rdec, static inline int Rd_decode_tree( struct Range_decoder * const rdec, Bit_model bm[], const int num_bits ) { - int model = 1; + int symbol = 1; int i; for( i = num_bits; i > 0; --i ) - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << num_bits); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << num_bits); } static inline int Rd_decode_tree6( struct Range_decoder * const rdec, Bit_model bm[] ) { - int model = 1; - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - model = ( model << 1 ) | Rd_decode_bit( rdec, &bm[model] ); - return model - (1 << 6); + int symbol = 1; + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); + return symbol - (1 << 6); } static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, @@ -213,36 +213,17 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, return symbol - 0x100; } - -struct Len_decoder - { - Bit_model choice1; - Bit_model choice2; - Bit_model bm_low[pos_states][len_low_symbols]; - Bit_model bm_mid[pos_states][len_mid_symbols]; - Bit_model bm_high[len_high_symbols]; - }; - -static inline void Led_init( struct Len_decoder * const len_decoder ) - { - Bm_init( &len_decoder->choice1 ); - Bm_init( &len_decoder->choice2 ); - Bm_array_init( len_decoder->bm_low[0], pos_states * len_low_symbols ); - Bm_array_init( len_decoder->bm_mid[0], pos_states * len_mid_symbols ); - Bm_array_init( len_decoder->bm_high, len_high_symbols ); - } - -static inline int Led_decode( struct Len_decoder * const len_decoder, - struct Range_decoder * const rdec, - const int pos_state ) +static inline int Rd_decode_len( struct Range_decoder * const rdec, + struct Len_model * const lm, + const int pos_state ) { - if( Rd_decode_bit( rdec, &len_decoder->choice1 ) == 0 ) - return Rd_decode_tree( rdec, len_decoder->bm_low[pos_state], len_low_bits ); - if( Rd_decode_bit( rdec, &len_decoder->choice2 ) == 0 ) + if( Rd_decode_bit( rdec, &lm->choice1 ) == 0 ) + return Rd_decode_tree( rdec, lm->bm_low[pos_state], len_low_bits ); + if( Rd_decode_bit( rdec, &lm->choice2 ) == 0 ) return len_low_symbols + - Rd_decode_tree( rdec, len_decoder->bm_mid[pos_state], len_mid_bits ); + Rd_decode_tree( rdec, lm->bm_mid[pos_state], len_mid_bits ); return len_low_symbols + len_mid_symbols + - Rd_decode_tree( rdec, len_decoder->bm_high, len_high_bits ); + Rd_decode_tree( rdec, lm->bm_high, len_high_bits ); } @@ -269,9 +250,9 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * range_decoder; - struct Len_decoder len_decoder; - struct Len_decoder rep_match_len_decoder; + struct Range_decoder * rdec; + struct Len_model match_len_model; + struct Len_model rep_len_model; }; void LZd_flush_data( struct LZ_decoder * const decoder ); @@ -322,7 +303,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, - struct Range_decoder * const rdec, const int ofd ) + struct Range_decoder * const rde, const int ofd ) { decoder->partial_data_pos = 0; decoder->dictionary_size = Fh_get_dictionary_size( header ); @@ -346,9 +327,9 @@ static inline bool LZd_init( struct LZ_decoder * const decoder, Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); Bm_array_init( decoder->bm_align, dis_align_size ); - decoder->range_decoder = rdec; - Led_init( &decoder->len_decoder ); - Led_init( &decoder->rep_match_len_decoder ); + decoder->rdec = rde; + Lm_init( &decoder->match_len_model ); + Lm_init( &decoder->rep_len_model ); decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } diff --git a/doc/lunzip.1 b/doc/lunzip.1 index 820aa8b..8fe9741 100644 --- a/doc/lunzip.1 +++ b/doc/lunzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH LUNZIP "1" "February 2013" "Lunzip 1.2" "User Commands" +.TH LUNZIP "1" "May 2013" "Lunzip 1.3-rc1" "User Commands" .SH NAME Lunzip \- small decompressor for lzip files .SH SYNOPSIS @@ -41,6 +41,11 @@ be verbose (a 2nd \fB\-v\fR gives more) .PP If no file names are given, lunzip decompresses from standard input to standard output. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems (file +not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or +invalid input file, 3 for an internal consistency error (eg, bug) which +caused lunzip to panic. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br diff --git a/lunzip.h b/lunzip.h deleted file mode 100644 index 4437ec6..0000000 --- a/lunzip.h +++ /dev/null @@ -1,263 +0,0 @@ -/* Lunzip - Decompressor for lzip files - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#ifndef max - #define max(x,y) ((x) >= (y) ? (x) : (y)) -#endif -#ifndef min - #define min(x,y) ((x) <= (y) ? (x) : (y)) -#endif - -typedef int State; - -enum { states = 12 }; - -static inline bool St_is_char( const State st ) { return st < 7; } - -static inline State St_set_char( const State st ) - { - static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - return next[st]; - } - -static inline State St_set_match( const State st ) - { return ( ( st < 7 ) ? 7 : 10 ); } - -static inline State St_set_rep( const State st ) - { return ( ( st < 7 ) ? 8 : 11 ); } - -static inline State St_set_short_rep( const State st ) - { return ( ( st < 7 ) ? 9 : 11 ); } - - -enum { - min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, - max_dictionary_bits = 29, - max_dictionary_size = 1 << max_dictionary_bits, - literal_context_bits = 3, - pos_state_bits = 2, - pos_states = 1 << pos_state_bits, - pos_state_mask = pos_states - 1, - - dis_slot_bits = 6, - start_dis_model = 4, - end_dis_model = 14, - modeled_distances = 1 << (end_dis_model / 2), - dis_align_bits = 4, - dis_align_size = 1 << dis_align_bits, - - len_low_bits = 3, - len_mid_bits = 3, - len_high_bits = 8, - len_low_symbols = 1 << len_low_bits, - len_mid_symbols = 1 << len_mid_bits, - len_high_symbols = 1 << len_high_bits, - max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, - - min_match_len = 2, /* must be 2 */ - max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ - min_match_len_limit = 5, - - max_dis_states = 4 }; - -static inline int get_dis_state( const int len ) - { return min( len - min_match_len, max_dis_states - 1 ); } - -static inline int get_lit_state( const uint8_t prev_byte ) - { return ( prev_byte >> ( 8 - literal_context_bits ) ); } - - -enum { bit_model_move_bits = 5, - bit_model_total_bits = 11, - bit_model_total = 1 << bit_model_total_bits }; - -typedef int Bit_model; - -static inline void Bm_init( Bit_model * const probability ) - { *probability = bit_model_total / 2; } - -static inline void Bm_array_init( Bit_model * const p, const int size ) - { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } - - -struct Pretty_print - { - const char * name; - const char * stdin_name; - int longest_name; - int verbosity; - bool first_post; - }; - -void Pp_init( struct Pretty_print * const pp, const char * const filenames[], - const int num_filenames, const int v ); - -static inline void Pp_set_name( struct Pretty_print * const pp, - const char * const filename ) - { - if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) - pp->name = filename; - else pp->name = pp->stdin_name; - pp->first_post = true; - } - -static inline void Pp_reset( struct Pretty_print * const pp ) - { if( pp->name && pp->name[0] ) pp->first_post = true; } -void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); - - -typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ - -extern CRC32 crc32; - -static inline void CRC32_init( void ) - { - unsigned n; - for( n = 0; n < 256; ++n ) - { - unsigned c = n; - int k; - for( k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } - crc32[n] = c; - } - } - -static inline void CRC32_update_byte( uint32_t * crc, const uint8_t byte ) - { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } - -static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffer, - const int size ) - { - int i; - for( i = 0; i < size; ++i ) - *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); - } - - -static inline int real_bits( unsigned value ) - { - int bits = 0; - while( value > 0 ) { value >>= 1; ++bits; } - return bits; - } - - -static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ - -typedef uint8_t File_header[6]; /* 0-3 magic bytes */ - /* 4 version */ - /* 5 coded_dict_size */ -enum { Fh_size = 6 }; - -static inline void Fh_set_magic( File_header data ) - { memcpy( data, magic_string, 4 ); data[4] = 1; } - -static inline bool Fh_verify_magic( const File_header data ) - { return ( memcmp( data, magic_string, 4 ) == 0 ); } - -static inline uint8_t Fh_version( const File_header data ) - { return data[4]; } - -static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] <= 1 ); } - -static inline int Fh_get_dictionary_size( const File_header data ) - { - int sz = ( 1 << ( data[5] & 0x1F ) ); - if( sz > min_dictionary_size && sz <= max_dictionary_size ) - sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 ); - return sz; - } - -static inline bool Fh_set_dictionary_size( File_header data, const int sz ) - { - if( sz >= min_dictionary_size && sz <= max_dictionary_size ) - { - data[5] = real_bits( sz - 1 ); - if( sz > min_dictionary_size ) - { - const int base_size = 1 << data[5]; - const int wedge = base_size / 16; - int i; - for( i = 7; i >= 1; --i ) - if( base_size - ( i * wedge ) >= sz ) - { data[5] |= ( i << 5 ); break; } - } - return true; - } - return false; - } - - -typedef uint8_t File_trailer[20]; - /* 0-3 CRC32 of the uncompressed data */ - /* 4-11 size of the uncompressed data */ - /* 12-19 member size including header and trailer */ - -enum { Ft_size = 20 }; - -static inline int Ft_versioned_size( const int version ) - { return ( ( version >= 1 ) ? 20 : 12 ); } - -static inline unsigned Ft_get_data_crc( const File_trailer data ) - { - unsigned tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } - -static inline unsigned long long Ft_get_data_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - -static inline unsigned long long Ft_get_member_size( const File_trailer data ) - { - unsigned long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } - return tmp; - } - -static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } - - -/* defined in main.c */ -void cleanup_and_fail( const int retval ); -void show_error( const char * const msg, const int errcode, const bool help ); diff --git a/lzip.h b/lzip.h new file mode 100644 index 0000000..d391fb1 --- /dev/null +++ b/lzip.h @@ -0,0 +1,281 @@ +/* Lunzip - Decompressor for lzip files + Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef max + #define max(x,y) ((x) >= (y) ? (x) : (y)) +#endif +#ifndef min + #define min(x,y) ((x) <= (y) ? (x) : (y)) +#endif + +typedef int State; + +enum { states = 12 }; + +static inline bool St_is_char( const State st ) { return st < 7; } + +static inline State St_set_char( const State st ) + { + static const State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + return next[st]; + } + +static inline State St_set_match( const State st ) + { return ( ( st < 7 ) ? 7 : 10 ); } + +static inline State St_set_rep( const State st ) + { return ( ( st < 7 ) ? 8 : 11 ); } + +static inline State St_set_short_rep( const State st ) + { return ( ( st < 7 ) ? 9 : 11 ); } + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + literal_context_bits = 3, + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << (end_dis_model / 2), /* 128 */ + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, /* must be 2 */ + max_match_len = min_match_len + max_len_symbols - 1, /* 273 */ + min_match_len_limit = 5, + + max_dis_states = 4 }; + +static inline int get_dis_state( const int len ) + { return min( len - min_match_len, max_dis_states - 1 ); } + +static inline int get_lit_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + + +enum { bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; + +typedef int Bit_model; + +static inline void Bm_init( Bit_model * const probability ) + { *probability = bit_model_total / 2; } + +static inline void Bm_array_init( Bit_model * const p, const int size ) + { int i = 0; while( i < size ) p[i++] = bit_model_total / 2; } + +struct Len_model + { + Bit_model choice1; + Bit_model choice2; + Bit_model bm_low[pos_states][len_low_symbols]; + Bit_model bm_mid[pos_states][len_mid_symbols]; + Bit_model bm_high[len_high_symbols]; + }; + +static inline void Lm_init( struct Len_model * const lm ) + { + Bm_init( &lm->choice1 ); + Bm_init( &lm->choice2 ); + Bm_array_init( lm->bm_low[0], pos_states * len_low_symbols ); + Bm_array_init( lm->bm_mid[0], pos_states * len_mid_symbols ); + Bm_array_init( lm->bm_high, len_high_symbols ); + } + + +struct Pretty_print + { + const char * name; + const char * stdin_name; + int longest_name; + int verbosity; + bool first_post; + }; + +void Pp_init( struct Pretty_print * const pp, const char * const filenames[], + const int num_filenames, const int v ); + +static inline void Pp_set_name( struct Pretty_print * const pp, + const char * const filename ) + { + if( filename && filename[0] && strcmp( filename, "-" ) != 0 ) + pp->name = filename; + else pp->name = pp->stdin_name; + pp->first_post = true; + } + +static inline void Pp_reset( struct Pretty_print * const pp ) + { if( pp->name && pp->name[0] ) pp->first_post = true; } +void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); + + +typedef uint32_t CRC32[256]; /* Table of CRCs of all 8-bit messages. */ + +extern CRC32 crc32; + +static inline void CRC32_init( void ) + { + unsigned n; + for( n = 0; n < 256; ++n ) + { + unsigned c = n; + int k; + for( k = 0; k < 8; ++k ) + { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; } + crc32[n] = c; + } + } + +static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) + { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } + +static inline void CRC32_update_buf( uint32_t * const crc, + const uint8_t * const buffer, const int size ) + { + int i; + for( i = 0; i < size; ++i ) + *crc = crc32[(*crc^buffer[i])&0xFF] ^ ( *crc >> 8 ); + } + + +static inline int real_bits( unsigned value ) + { + int bits = 0; + while( value > 0 ) { value >>= 1; ++bits; } + return bits; + } + + +static const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; /* "LZIP" */ + +typedef uint8_t File_header[6]; /* 0-3 magic bytes */ + /* 4 version */ + /* 5 coded_dict_size */ +enum { Fh_size = 6 }; + +static inline void Fh_set_magic( File_header data ) + { memcpy( data, magic_string, 4 ); data[4] = 1; } + +static inline bool Fh_verify_magic( const File_header data ) + { return ( memcmp( data, magic_string, 4 ) == 0 ); } + +static inline uint8_t Fh_version( const File_header data ) + { return data[4]; } + +static inline bool Fh_verify_version( const File_header data ) + { return ( data[4] <= 1 ); } + +static inline unsigned Fh_get_dictionary_size( const File_header data ) + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + +static inline bool Fh_set_dictionary_size( File_header data, const int sz ) + { + if( sz >= min_dictionary_size && sz <= max_dictionary_size ) + { + data[5] = real_bits( sz - 1 ); + if( sz > min_dictionary_size ) + { + const int base_size = 1 << data[5]; + const int wedge = base_size / 16; + int i; + for( i = 7; i >= 1; --i ) + if( base_size - ( i * wedge ) >= sz ) + { data[5] |= ( i << 5 ); break; } + } + return true; + } + return false; + } + + +typedef uint8_t File_trailer[20]; + /* 0-3 CRC32 of the uncompressed data */ + /* 4-11 size of the uncompressed data */ + /* 12-19 member size including header and trailer */ + +enum { Ft_size = 20 }; + +static inline int Ft_versioned_size( const int version ) + { return ( ( version >= 1 ) ? 20 : 12 ); } + +static inline unsigned Ft_get_data_crc( const File_trailer data ) + { + unsigned tmp = 0; + int i; + for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) + { + int i; + for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } + } + +static inline unsigned long long Ft_get_data_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + +static inline unsigned long long Ft_get_member_size( const File_trailer data ) + { + unsigned long long tmp = 0; + int i; + for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + +static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) + { + int i; + for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } + } + + +/* defined in main.c */ +void cleanup_and_fail( const int retval ); +void show_error( const char * const msg, const int errcode, const bool help ); diff --git a/main.c b/main.c index 9ba2280..75c4ea9 100644 --- a/main.c +++ b/main.c @@ -15,7 +15,7 @@ along with this program. If not, see . */ /* - Return values: 0 for a normal exit, 1 for environmental problems + Exit status: 0 for a normal exit, 1 for environmental problems (file not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (eg, bug) which caused lunzip to panic. @@ -51,7 +51,7 @@ #endif #include "carg_parser.h" -#include "lunzip.h" +#include "lzip.h" #include "decoder.h" #if CHAR_BIT != 8 @@ -101,6 +101,10 @@ static void show_help( void ) " -v, --verbose be verbose (a 2nd -v gives more)\n" "If no file names are given, lunzip decompresses from standard input to\n" "standard output.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n" + "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n" + "invalid input file, 3 for an internal consistency error (eg, bug) which\n" + "caused lunzip to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" "Lunzip home page: http://www.nongnu.org/lzip/lunzip.html\n" ); } @@ -129,8 +133,9 @@ void show_header( const File_header header ) for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } - fprintf( stderr, "version %d, dictionary size %s%4u %sB. ", - Fh_version( header ), np, num, p ); + if( verbosity >= 4 ) + fprintf( stderr, "version %d, ", Fh_version( header ) ); + fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); } @@ -326,7 +331,7 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) - { Pp_show_msg( pp, 0 ); if( verbosity >= 2 ) show_header( header ); } + { Pp_show_msg( pp, 0 ); if( verbosity >= 3 ) show_header( header ); } if( !LZd_init( &decoder, header, &rdec, outfd ) ) { @@ -350,13 +355,11 @@ static int decompress( const int infd, struct Pretty_print * const pp, retval = 2; break; } if( verbosity >= 2 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); Pp_reset( pp ); } + { fprintf( stderr, testing ? "ok\n" : "done\n" ); Pp_reset( pp ); } } Rd_free( &rdec ); if( verbosity == 1 && retval == 0 ) - { if( testing ) fprintf( stderr, "ok\n" ); - else fprintf( stderr, "done\n" ); } + fprintf( stderr, testing ? "ok\n" : "done\n" ); return retval; } @@ -448,6 +451,7 @@ int main( const int argc, const char * const argv[] ) { 'f', "force", ap_no }, { 'h', "help", ap_no }, { 'k', "keep", ap_no }, + { 'n', "threads", ap_yes }, { 'o', "output", ap_yes }, { 'q', "quiet", ap_no }, { 't', "test", ap_no }, @@ -477,6 +481,7 @@ int main( const int argc, const char * const argv[] ) case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; + case 'n': break; case 'o': default_output_filename = arg; break; case 'q': verbosity = -1; break; case 't': testing = true; break; -- cgit v1.2.3