From 2f1db1facab89cd2eda64441f9b730a3831f5746 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 08:23:45 +0100 Subject: Merging upstream version 1.6~pre3. Signed-off-by: Daniel Baumann --- ChangeLog | 33 ++++++++++++++++++++------------- INSTALL | 3 --- NEWS | 7 +++++++ configure | 6 +++--- decoder.cc | 33 +++++++++++++++++++++------------ decoder.h | 19 +++++++++++++------ doc/lzip.1 | 4 ++-- doc/lzip.info | 10 +++++----- doc/lzip.texinfo | 12 ++++++------ encoder.cc | 14 +++++++------- encoder.h | 4 ++-- lzdiff | 20 ++++++++++---------- lzgrep | 12 ++++++------ lzip.h | 3 ++- main.cc | 18 +++++++++--------- 15 files changed, 113 insertions(+), 85 deletions(-) diff --git a/ChangeLog b/ChangeLog index dfd0c86..dcba1f1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,11 @@ -2009-05-21 Antonio Diaz +2009-06-03 Antonio Diaz Diaz + + * Version 1.6-pre3 released. + * Added decompression support for Sync Flush marker. + * Removed some "bashisms" from lzdiff and lzgrep. + * Dictionary size for options "-1" to "-4" has been changed. + +2009-05-21 Antonio Diaz Diaz * Version 1.6-pre2 released. * Decompression time has been reduced by 17%. @@ -9,7 +16,7 @@ * testsuite/check.sh: Test lziprecover. * Export LC_ALL=C in all scripts. -2009-04-27 Antonio Diaz +2009-04-27 Antonio Diaz Diaz * Version 1.6-pre1 released. * Makefile.in: Extra files are now installed by default. @@ -17,7 +24,7 @@ decompression of corrupt files. * "--test" no more needs "/dev/null". -2009-04-12 Antonio Diaz +2009-04-12 Antonio Diaz Diaz * Version 1.5 released. * lzip.h: Coded dictionary size implemented in File_header. @@ -31,7 +38,7 @@ * Makefile.in: Man page is now installed by default. * testsuite/check.sh: Verify that files are open in binary mode. -2009-01-24 Antonio Diaz +2009-01-24 Antonio Diaz Diaz * Version 1.4 released. * Implemented compression of version 1 files. @@ -42,7 +49,7 @@ * Added "lziprecover", a member recoverer program. * testsuite/unzcrash.cc: Test all 1-byte errors. -2008-12-21 Antonio Diaz +2008-12-21 Antonio Diaz Diaz * Version 1.3 released. * This version automatically chooses the smallest possible @@ -51,7 +58,7 @@ * Implemented decompression of version 1 files. * testsuite/check.sh: Replaced "diff -q" with "cmp". -2008-12-10 Antonio Diaz +2008-12-10 Antonio Diaz Diaz * Version 1.2 released. * encoder.cc: A 1-byte read outside allocated memory has been fixed. @@ -63,31 +70,31 @@ non-compressed files. * "make install-info" should now work on Debian and OS X. -2008-11-17 Antonio Diaz +2008-11-17 Antonio Diaz Diaz * Version 1.1 released. * Changed short name of option "--dictionary-size" to "-s". * Changed short name of option "--match-length" to "-m". * Changed LONG_LONG_MAX to LLONG_MAX. -2008-10-14 Antonio Diaz +2008-10-14 Antonio Diaz Diaz * Version 1.0 released. * "-tvv" shows file version and dictionary size. -2008-09-30 Antonio Diaz +2008-09-30 Antonio Diaz Diaz * Version 0.5 released. * Decompression is now 1% faster. -2008-09-23 Antonio Diaz +2008-09-23 Antonio Diaz Diaz * Version 0.4 released. * Code cleanup for global variable "verbosity". * Regained the compression ratio of 0.2 with 5% faster speed. * Fixed compilation on sistems where size_t != unsigned int. -2008-09-15 Antonio Diaz +2008-09-15 Antonio Diaz Diaz * Version 0.3 released. * encoder.cc: Compression is now 15% faster, 1% worse. @@ -95,13 +102,13 @@ * main.cc (decompress): Show "done" instead of "ok" when not testing. * encoder.h: Use trials[] to return the list of pairs. -2008-09-09 Antonio Diaz +2008-09-09 Antonio Diaz Diaz * Version 0.2 released. * encoder.cc: Small improvements in compression speed. * Small documentation changes. -2008-08-20 Antonio Diaz +2008-08-20 Antonio Diaz Diaz * Version 0.1 released. diff --git a/INSTALL b/INSTALL index 7ca2833..4e5ac9a 100644 --- a/INSTALL +++ b/INSTALL @@ -32,9 +32,6 @@ the main archive. 5. Type `make install' to install the program and any data files and documentation. -6. Optionally, type `make install-extra' to install the lziprecover - program and the included wrapper scripts (lzdiff, lzgrep). - Another way ----------- diff --git a/NEWS b/NEWS index 86ccf02..0d39426 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ Changes in version 1.6: Decompression time has been reduced by 17%. +Decompression support for the "Sync Flush marker" from lzlib has been +added. + Support for .tbz extension has been added to lzdiff and lzgrep. Man pages for lzdiff, lzgrep and lziprecover have been added to the @@ -17,3 +20,7 @@ Lzdiff, lzgrep and lziprecover are now installed by default. The dependence of "--test" on the existence of "/dev/null" has been removed. + +Some "bashisms" have been removed from lzdiff and lzgrep. + +Dictionary size for options "-1" to "-4" has been changed. diff --git a/configure b/configure index c31a1d8..b917d94 100755 --- a/configure +++ b/configure @@ -5,13 +5,13 @@ # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2009-05-21 +# Date of this version: 2009-06-03 invocation_name=$0 args= no_create= progname=lzip -progversion=1.6-pre2 +progversion=1.6-pre3 srctrigger=lzip.h # clear some things potentially inherited from environment. @@ -100,7 +100,7 @@ while [ x"$1" != x ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; LDFLAGS=*) LDFLAGS=${optarg} ;; - --build=* | --enable-* | --with-* | --*dir=* | *=* | *-*-*) ;; + --* | *=* | *-*-*) ;; *) echo "configure: Unrecognized option: \"${option}\"; use --help for usage." 1>&2 exit 1 ;; diff --git a/decoder.cc b/decoder.cc index aeac2cd..0cb6188 100644 --- a/decoder.cc +++ b/decoder.cc @@ -17,6 +17,7 @@ #define _FILE_OFFSET_BITS 64 +#include #include #include #include @@ -45,16 +46,15 @@ bool Input_buffer::read_block() void LZ_decoder::flush_data() { - if( !member_finished ) + const int size = pos - stream_pos; + if( size > 0 ) { - crc32.update( crc_, buffer, pos ); - if( odes_ >= 0 ) - { - const int wr = writeblock( odes_, (char *)buffer, pos ); - if( wr != pos ) throw Error( "write error" ); - } + crc32.update( crc_, buffer + stream_pos, size ); + if( odes_ >= 0 && + writeblock( odes_, (char *)buffer + stream_pos, size ) != size ) + throw Error( "write error" ); if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; } - else member_finished = true; + stream_pos = pos; } } @@ -180,9 +180,8 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } else { - rep3 = rep2; rep2 = rep1; rep1 = rep0; + unsigned int rep0_saved = rep0; len = min_match_len + len_decoder.decode( range_decoder, pos_state ); - state.set_match(); const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits ); if( dis_slot < start_dis_model ) rep0 = dis_slot; else @@ -197,10 +196,17 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits ); if( rep0 == 0xFFFFFFFF ) // Marker found { + rep0 = rep0_saved; range_decoder.normalize(); flush_data(); if( len == min_match_len ) // End Of Stream marker - { if( verify_trailer( pp ) ) return 0; else return 3; } + { + if( verify_trailer( pp ) ) return 0; else return 3; + } + if( len == min_match_len + 1 ) // Sync Flush marker + { + range_decoder.reload(); continue; + } if( verbosity >= 0 ) { pp(); @@ -208,9 +214,12 @@ int LZ_decoder::decode_member( const Pretty_print & pp ) } return 4; } - if( rep0 >= (unsigned int)buffer_size ) { flush_data(); return 1; } + if( rep0 >= (unsigned int)dictionary_size ) + { flush_data(); return 1; } } } + rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; + state.set_match(); } copy_block( rep0, len ); prev_byte = get_byte( 0 ); diff --git a/decoder.h b/decoder.h index aaf85a6..290ab97 100644 --- a/decoder.h +++ b/decoder.h @@ -18,7 +18,6 @@ class Input_buffer { enum { buffer_size = 65536 }; - uint8_t * const buffer; int pos; int stream_pos; // when reached, a new block must be read @@ -73,6 +72,13 @@ public: return ibuf.get_byte(); } + void reload() throw() + { + code = 0; + range = 0xFFFFFFFF; + for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte(); + } + void normalize() { if( range <= 0x00FFFFFF ) @@ -101,8 +107,7 @@ public: int decode_bit( Bit_model & bm ) { - if( range <= 0x00FFFFFF ) - { range <<= 8; code = (code << 8) | get_byte(); } + normalize(); const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability; if( code < bound ) { @@ -204,12 +209,13 @@ class LZ_decoder { long long partial_data_pos; const int format_version; + const int dictionary_size; const int buffer_size; uint8_t * const buffer; int pos; + int stream_pos; // first byte not yet written to file uint32_t crc_; const int odes_; - bool member_finished; Bit_model bm_match[State::states][pos_states]; Bit_model bm_rep[State::states]; @@ -264,12 +270,13 @@ public: : partial_data_pos( 0 ), format_version( header.version ), - buffer_size( header.dictionary_size() ), + dictionary_size( header.dictionary_size() ), + buffer_size( std::max( 65536, dictionary_size ) ), buffer( new uint8_t[buffer_size] ), pos( 0 ), + stream_pos( 0 ), crc_( 0xFFFFFFFF ), odes_( odes ), - member_finished( false ), range_decoder( sizeof header, ibuf ), literal_decoder() {} diff --git a/doc/lzip.1 b/doc/lzip.1 index bcd4c5a..0ec87e3 100644 --- a/doc/lzip.1 +++ b/doc/lzip.1 @@ -1,7 +1,7 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.36. -.TH LZIP "1" "May 2009" "Lzip 1.6-pre2" "User Commands" +.TH LZIP "1" "June 2009" "Lzip 1.6-pre3" "User Commands" .SH NAME -Lzip \- manual page for Lzip 1.6-pre2 +Lzip \- manual page for Lzip 1.6-pre3 .SH SYNOPSIS .B lzip [\fIoptions\fR] [\fIfiles\fR] diff --git a/doc/lzip.info b/doc/lzip.info index e1e5029..dc66a8b 100644 --- a/doc/lzip.info +++ b/doc/lzip.info @@ -11,7 +11,7 @@ File: lzip.info, Node: Top, Next: Introduction, Up: (dir) Lzip **** -This manual is for Lzip (version 1.6-pre2, 21 May 2009). +This manual is for Lzip (version 1.6-pre3, 3 June 2009). * Menu: @@ -256,10 +256,10 @@ The format for running lzip is: slower than `-1'. These options have no effect when decompressing. Level Dictionary size Match length limit - -1 4MiB 10 bytes - -2 4MiB 12 bytes - -3 4MiB 17 bytes - -4 4MiB 26 bytes + -1 1MiB 10 bytes + -2 1MiB 12 bytes + -3 1MiB 17 bytes + -4 2MiB 26 bytes -5 4MiB 44 bytes -6 8MiB 80 bytes -7 16MiB 108 bytes diff --git a/doc/lzip.texinfo b/doc/lzip.texinfo index 87e223f..ec20af9 100644 --- a/doc/lzip.texinfo +++ b/doc/lzip.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 21 May 2009 -@set VERSION 1.6-pre2 +@set UPDATED 3 June 2009 +@set VERSION 1.6-pre3 @dircategory Data Compression @direntry @@ -276,10 +276,10 @@ as shown in the table below. Note that @samp{-9} can be much slower than @multitable {Level} {Dictionary size} {Match length limit} @item Level @tab Dictionary size @tab Match length limit -@item -1 @tab 4MiB @tab 10 bytes -@item -2 @tab 4MiB @tab 12 bytes -@item -3 @tab 4MiB @tab 17 bytes -@item -4 @tab 4MiB @tab 26 bytes +@item -1 @tab 1MiB @tab 10 bytes +@item -2 @tab 1MiB @tab 12 bytes +@item -3 @tab 1MiB @tab 17 bytes +@item -4 @tab 2MiB @tab 26 bytes @item -5 @tab 4MiB @tab 44 bytes @item -6 @tab 8MiB @tab 80 bytes @item -7 @tab 16MiB @tab 108 bytes diff --git a/encoder.cc b/encoder.cc index 291f1d1..287fe75 100644 --- a/encoder.cc +++ b/encoder.cc @@ -47,6 +47,7 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit, const int ides ) : partial_data_pos( 0 ), + after_size( max_match_len ), pos( 0 ), cyclic_pos( 0 ), stream_pos( 0 ), @@ -55,8 +56,7 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit, prev_positions( new int32_t[num_prev_positions] ), at_stream_end( false ) { - const int buffer_size_limit = ( 2 * dict_size ) + - max_num_trials + max_match_len; + const int buffer_size_limit = ( 2 * dict_size ) + max_num_trials + after_size; buffer_size = std::max( 65536, dict_size ); buffer = (uint8_t *)std::malloc( buffer_size ); if( !buffer ) throw std::bad_alloc(); @@ -72,7 +72,7 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit, dictionary_size_ = std::max( min_dictionary_size, stream_pos ); else dictionary_size_ = dict_size; pos_limit = buffer_size; - if( !at_stream_end ) pos_limit -= max_match_len; + if( !at_stream_end ) pos_limit -= after_size; prev_pos_tree = new int32_t[2*dictionary_size_]; for( int i = 0; i < num_prev_positions; ++i ) prev_positions[i] = -1; } @@ -96,6 +96,7 @@ bool Matchfinder::move_pos() throw() if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0; if( ++pos >= pos_limit ) { + if( pos > stream_pos ) { pos = stream_pos; return false; } if( !at_stream_end ) { const int offset = pos - dictionary_size_ - max_num_trials; @@ -110,7 +111,6 @@ bool Matchfinder::move_pos() throw() if( prev_pos_tree[i] >= 0 ) prev_pos_tree[i] -= offset; return read_block(); } - else if( pos > stream_pos ) { pos = stream_pos; return false; } } return true; } @@ -456,7 +456,7 @@ int LZ_encoder::best_pair_sequence( const int reps[num_rep_distances], // End Of Stream mark => (dis == 0xFFFFFFFF, len == min_match_len) -void LZ_encoder::flush( const State & state ) +void LZ_encoder::full_flush( const State & state ) { const int pos_state = ( matchfinder.data_position() ) & pos_state_mask; range_encoder.encode_bit( bm_match[state()][pos_state], 1 ); @@ -515,7 +515,7 @@ bool LZ_encoder::encode_member( const long long member_size ) while( true ) { - if( matchfinder.finished() ) { flush( state ); return true; } + if( matchfinder.finished() ) { full_flush( state ); return true; } if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } int ahead = best_pair_sequence( rep_distances, state ); @@ -580,7 +580,7 @@ bool LZ_encoder::encode_member( const long long member_size ) if( range_encoder.member_position() >= member_size_limit ) { if( !matchfinder.dec_pos( ahead ) ) return false; - flush( state ); + full_flush( state ); return true; } if( ahead <= 0 ) break; diff --git a/encoder.h b/encoder.h index 41daa48..fe44e71 100644 --- a/encoder.h +++ b/encoder.h @@ -143,6 +143,7 @@ class Matchfinder long long partial_data_pos; int dictionary_size_; // bytes to keep in buffer before pos + const int after_size; // bytes to keep in buffer after pos int buffer_size; uint8_t * buffer; int pos; @@ -199,7 +200,6 @@ public: class Range_encoder { enum { buffer_size = 65536 }; - uint64_t low; long long partial_member_pos; uint8_t * const buffer; @@ -569,7 +569,7 @@ class LZ_encoder int best_pair_sequence( const int reps[num_rep_distances], const State & state ); - void flush( const State & state ); + void full_flush( const State & state ); public: LZ_encoder( Matchfinder & mf, const File_header & header, const int odes ); diff --git a/lzdiff b/lzdiff index c3c605d..b4e60d0 100755 --- a/lzdiff +++ b/lzdiff @@ -46,7 +46,7 @@ while [ x"$1" != x ] ; do echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html" exit 0 ;; --version | --ve* | -V) - echo "Lzdiff 0.4" + echo "Lzdiff 0.5" echo "Copyright (C) 2009 Antonio Diaz Diaz." echo "This script is free software: you have unlimited permission" echo "to copy, distribute and modify it." @@ -62,7 +62,7 @@ while [ x"$1" != x ] ; do --cmp) diff_prog=cmp ;; -) - echo "${invocation_name}: reading from stdin not supported\n" + echo "${invocation_name}: reading from stdin not supported" exit 1 ;; --) ;; @@ -93,21 +93,21 @@ fi if test -z "${file2}"; then case "${file1}" in *.gz) - file2=`echo "${file1}" | sed 's/.gz$//'` ;; + file2=`printf "%s" "${file1}" | sed 's/.gz$//'` ;; *.tgz) - file2=`echo "${file1}" | sed 's/tgz$/tar/'` ;; + file2=`printf "%s" "${file1}" | sed 's/tgz$/tar/'` ;; *.bz2) - file2=`echo "${file1}" | sed 's/.bz2$//'` ;; + file2=`printf "%s" "${file1}" | sed 's/.bz2$//'` ;; *.tbz) - file2=`echo "${file1}" | sed 's/tbz$/tar/'` ;; + file2=`printf "%s" "${file1}" | sed 's/tbz$/tar/'` ;; *.tbz2) - file2=`echo "${file1}" | sed 's/tbz2$/tar/'` ;; + file2=`printf "%s" "${file1}" | sed 's/tbz2$/tar/'` ;; *.lz) - file2=`echo "${file1}" | sed 's/.lz$//'` ;; + file2=`printf "%s" "${file1}" | sed 's/.lz$//'` ;; *.tlz) - file2=`echo "${file1}" | sed 's/tlz$/tar/'` ;; + file2=`printf "%s" "${file1}" | sed 's/tlz$/tar/'` ;; *) - file2="${file1}"; file1="${file1}${default_ext}" ;; + file2="${file1}${default_ext}" ;; esac fi diff --git a/lzgrep b/lzgrep index 0991f58..9cb2e49 100755 --- a/lzgrep +++ b/lzgrep @@ -44,7 +44,7 @@ while [ x"$1" != x ] ; do echo "Lzip home page: http://www.nongnu.org/lzip/lzip.html" exit 0 ;; --version | --ve* | -V) - echo "Lzgrep 0.4" + echo "Lzgrep 0.5" echo "Copyright (C) 2009 Antonio Diaz Diaz." echo "This script is free software: you have unlimited permission" echo "to copy, distribute and modify it." @@ -56,7 +56,7 @@ while [ x"$1" != x ] ; do --lz*) default_prog=lzip ;; -[drRzZ] | --di* | --exc* | --inc* | --nu* | --rec*) - echo "${invocation_name}: option $1 not supported\n" + echo "${invocation_name}: option $1 not supported" exit 1 ;; -e?* | -f?* | --file=* | --reg*=*) args="${args} $1"; have_pat=1 ;; @@ -116,15 +116,15 @@ for i in "$@" ; do fi ;; esac if test ${list} -eq 1; then - ${prog} -- "$i" | grep ${args} 2>&1 > /dev/null && echo $i + ${prog} -- "$i" | grep ${args} 2>&1 > /dev/null && echo "$i" r=$? elif test $# -eq 1 -o ${no_name} -eq 1; then ${prog} -- "$i" | grep ${args} r=$? else - j=${i//\\/\\\\} - j=${j//|/\\|} - j=${j//&/\\&} + j=`printf "%s" "$i" | sed 's/\\\\/\\\\\\\\/g'` + j=`printf "%s" "$j" | sed 's/|/\\\\|/g'` + j=`printf "%s" "$j" | sed 's/&/\\\\&/g'` j=`printf "%s" "$j" | tr '\n' ' '` ${prog} -- "$i" | grep ${args} | sed "s|^|${j}:|" r=$? diff --git a/lzip.h b/lzip.h index a50941f..0a2293d 100644 --- a/lzip.h +++ b/lzip.h @@ -157,6 +157,8 @@ public: } }; +extern const CRC32 crc32; + const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; @@ -272,7 +274,6 @@ struct Error }; extern int verbosity; -extern const CRC32 crc32; void show_error( const char * msg, const int errcode = 0, const bool help = false ) throw(); void internal_error( const char * msg ); diff --git a/main.cc b/main.cc index 87d186a..2cb998c 100644 --- a/main.cc +++ b/main.cc @@ -84,7 +84,7 @@ void show_help() throw() { std::printf( "%s - A data compressor based on the LZMA algorithm.\n", Program_name ); std::printf( "\nUsage: %s [options] [files]\n", invocation_name ); - std::printf( "Options:\n" ); + std::printf( "\nOptions:\n" ); std::printf( " -h, --help display this help and exit\n" ); std::printf( " -V, --version output version information and exit\n" ); std::printf( " -b, --member-size= set member size limit in bytes\n" ); @@ -102,7 +102,7 @@ void show_help() throw() std::printf( " -1 .. -9 set compression level [default 6]\n" ); std::printf( " --fast alias for -1\n" ); std::printf( " --best alias for -9\n" ); - std::printf( "If no file names are given, lzip compresses or decompresses\n" ); + std::printf( "If no file names are given, %s compresses or decompresses\n", program_name ); std::printf( "from standard input to standard output.\n" ); std::printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); std::printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); @@ -645,10 +645,10 @@ int main( const int argc, const char * argv[] ) // to the corresponding LZMA compression modes. const lzma_options option_mapping[] = { - { 1 << 22, 10 }, // -1 - { 1 << 22, 12 }, // -2 - { 1 << 22, 17 }, // -3 - { 1 << 22, 26 }, // -4 + { 1 << 20, 10 }, // -1 + { 1 << 20, 12 }, // -2 + { 1 << 20, 17 }, // -3 + { 1 << 21, 26 }, // -4 { 1 << 22, 44 }, // -5 { 1 << 23, 80 }, // -6 { 1 << 24, 108 }, // -7 @@ -741,13 +741,13 @@ int main( const int argc, const char * argv[] ) if( filenames_given ) set_signals(); Pretty_print pp( filenames ); - if( program_mode == m_compress ) + if( program_mode == m_test ) + outhandle = -1; + else if( program_mode == m_compress ) { dis_slots.init(); prob_prices.init(); } - else if( program_mode == m_test ) - outhandle = -1; int retval = 0; for( unsigned int i = 0; i < filenames.size(); ++i ) -- cgit v1.2.3