From baab8c4746bececfa4d7a048ed723a97405dab79 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 6 Nov 2015 12:33:49 +0100 Subject: Merging upstream version 1.3. Signed-off-by: Daniel Baumann --- ChangeLog | 29 +++-- INSTALL | 26 ++-- Makefile.in | 4 +- NEWS | 16 +-- README | 15 ++- carg_parser.c | 17 +-- carg_parser.h | 17 +-- clzip.h | 27 ++-- configure | 62 ++++----- decoder.c | 37 +++--- decoder.h | 83 ++++++------ doc/clzip.1 | 16 ++- doc/clzip.info | 113 +++++++++-------- doc/clzip.texinfo | 106 +++++++++------- encoder.c | 361 +++++++++++++++++++++++++++-------------------------- encoder.h | 277 ++++++++++++++++++++-------------------- main.c | 189 +++++++++++++++------------- testsuite/check.sh | 9 +- 18 files changed, 736 insertions(+), 668 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3a8a0a1..b8264e5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,30 +1,41 @@ +2012-02-25 Antonio Diaz Diaz + + * Version 1.3 released. + * main.c (close_and_set_permissions): Inability to change output + file attributes has been downgraded from error to warning. + * encoder.c (Mf_init): Return false if out of memory instead of + calling cleanup_and_fail. + * Small change in '--help' output and man page. + * Changed quote characters in messages as advised by GNU Standards. + * configure: 'datadir' renamed to 'datarootdir'. + 2011-05-18 Antonio Diaz Diaz * Version 1.2 released. - * main.c: Added new option `-F, --recompress'. + * main.c: Added new option '-F, --recompress'. * main.c (decompress): Print only one status line for each - multimember file when only one `-v' is specified. + multi-member file when only one '-v' is specified. * encoder.h (Lee_update_prices): Update high length symbol prices - independently of the value of `pos_state'. This gives better - compression for large values of `--match-length' without being + independently of the value of 'pos_state'. This gives better + compression for large values of '--match-length' without being slower. * encoder.h encoder.c: Optimize pair price calculations. This - reduces compression time for large values of `--match-length' + reduces compression time for large values of '--match-length' by up to 6%. 2011-01-11 Antonio Diaz Diaz * Version 1.1 released. - * Code has been converted to `C89 + long long' from C99. + * Code has been converted to 'C89 + long long' from C99. * main.c: Fixed warning about fchown return value being ignored. - * decoder.c: `-tvvvv' now shows compression ratio. + * decoder.c: '-tvvvv' now shows compression ratio. * main.c: Match length limit set by options -1 to -8 has been reduced to extend range of use towards gzip. Lower numbers now compress less but faster. (-1 now takes 43% less time for only 20% larger compressed size). * encoder.c: Compression of option -9 has been slightly increased. * main.c (open_instream): Do not show the message - " and `--stdout' was not specified" for directories, etc. + " and '--stdout' was not specified" for directories, etc. * New examples have been added to the manual. 2010-04-05 Antonio Diaz Diaz @@ -34,7 +45,7 @@ * Translated to C from the C++ source of lzip 1.10. -Copyright (C) 2010, 2011 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index dee400d..a0aad32 100644 --- a/INSTALL +++ b/INSTALL @@ -18,7 +18,7 @@ This creates the directory ./clzip[version] containing the source from the main archive. 2. Change to clzip directory and run configure. - (Try `configure --help' for usage instructions). + (Try 'configure --help' for usage instructions). cd clzip[version] ./configure @@ -27,30 +27,30 @@ the main archive. make -4. Optionally, type `make check' to run the tests that come with clzip. +4. Optionally, type 'make check' to run the tests that come with clzip. -5. Type `make install' to install the program and any data files and +5. Type 'make install' to install the program and any data files and documentation. Another way ----------- You can also compile clzip into a separate directory. To do this, you -must use a version of `make' that supports the `VPATH' variable, such -as GNU `make'. `cd' to the directory where you want the object files -and executables to go and run the `configure' script. `configure' -automatically checks for the source code in `.', in `..' and in the -directory that `configure' is in. - -`configure' recognizes the option `--srcdir=DIR' to control where to -look for the sources. Usually `configure' can determine that directory +must use a version of 'make' that supports the 'VPATH' variable, such +as GNU 'make'. 'cd' to the directory where you want the object files +and executables to go and run the 'configure' script. 'configure' +automatically checks for the source code in '.', in '..' and in the +directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to +look for the sources. Usually 'configure' can determine that directory automatically. -After running `configure', you can run `make' and `make install' as +After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2010, 2011 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index ec79b02..31524a2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -16,10 +16,10 @@ objs = carg_parser.o decoder.o encoder.o main.o all : $(progname) $(progname) : $(objs) - $(CC) $(LDFLAGS) -o $@ $^ + $(CC) $(LDFLAGS) -o $@ $(objs) $(progname)_profiled : $(objs) - $(CC) $(LDFLAGS) -pg -o $@ $^ + $(CC) $(LDFLAGS) -pg -o $@ $(objs) main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< diff --git a/NEWS b/NEWS index 6a4e828..36f565f 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,12 @@ -Changes in version 1.2: +Changes in version 1.3: -The option "-F, --recompress", which forces recompression of files whose -name already has the ".lz" or ".tlz" suffix, has been added. +Inability to change output file attributes has been downgraded from +error to warning. -Print only one status line for each multimember file when only one "-v" -is specified. +A small change has been made in the "--help" output and man page. -For large values of "--match-length", compression ratio has been -slightly increased and compression time has been reduced by up to 6%. +Quote characters in messages have been changed as advised by GNU Coding +Standards. + +Configure option "--datadir" has been renamed to "--datarootdir" to +follow GNU Standards. diff --git a/README b/README index bb97dfc..71b1ad8 100644 --- a/README +++ b/README @@ -11,6 +11,9 @@ compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ compiler. +If you ever need to recover data from a damaged lzip file, try the +lziprecover program. + Clzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed file has the same modification date, permissions, and, when possible, @@ -28,11 +31,11 @@ or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -Clzip can produce multimember files and safely recover, with lziprecover, -the undamaged members in case of file damage. Clzip can also split the -compressed output in volumes of a given size, even when reading from -standard input. This allows the direct creation of multivolume -compressed tar archives. +Clzip can produce multi-member files and safely recover, with +lziprecover, the undamaged members in case of file damage. Clzip can +also split the compressed output in volumes of a given size, even when +reading from standard input. This allows the direct creation of +multivolume compressed tar archives. Clzip will automatically use the smallest possible dictionary size without exceeding the given limit. Keep in mind that the decompression @@ -65,7 +68,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash). -Copyright (C) 2010, 2011 Antonio Diaz Diaz. +Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 52c8658..326bd41 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,6 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +32,7 @@ #include "carg_parser.h" -/* assure at least a minimum size for buffer `buf' */ +/* assure at least a minimum size for buffer 'buf' */ static void * ap_resize_buffer( void * buf, const int min_size ) { if( buf ) buf = realloc( buf, min_size ); @@ -108,31 +109,31 @@ static char parse_long_option( struct Arg_parser * const ap, if( ambig && !exact ) { - add_error( ap, "option `" ); add_error( ap, opt ); + add_error( ap, "option '" ); add_error( ap, opt ); add_error( ap, "' is ambiguous" ); return 1; } if( index < 0 ) /* nothing found */ { - add_error( ap, "unrecognized option `" ); add_error( ap, opt ); + add_error( ap, "unrecognized option '" ); add_error( ap, opt ); add_error( ap, "'" ); return 1; } ++*argindp; - if( opt[len+2] ) /* `--=' syntax */ + if( opt[len+2] ) /* '--=' syntax */ { if( options[index].has_arg == ap_no ) { - add_error( ap, "option `--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].name ); add_error( ap, "' doesn't allow an argument" ); return 1; } if( options[index].has_arg == ap_yes && !opt[len+3] ) { - add_error( ap, "option `--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].name ); add_error( ap, "' requires an argument" ); return 1; } @@ -143,7 +144,7 @@ static char parse_long_option( struct Arg_parser * const ap, { if( !arg || !arg[0] ) { - add_error( ap, "option `--" ); add_error( ap, options[index].name ); + add_error( ap, "option '--" ); add_error( ap, options[index].name ); add_error( ap, "' requires an argument" ); return 1; } diff --git a/carg_parser.h b/carg_parser.h index 0f61c12..3575dd7 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,6 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 + Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,13 +26,13 @@ Public License. */ -/* Arg_parser reads the arguments in `argv' and creates a number of +/* Arg_parser reads the arguments in 'argv' and creates a number of option codes, option arguments and non-option arguments. - In case of error, `ap_error' returns a non-null pointer to an error + In case of error, 'ap_error' returns a non-null pointer to an error message. - `options' is an array of `struct ap_Option' terminated by an element + 'options' is an array of 'struct ap_Option' terminated by an element containing a code which is zero. A null name means a short-only option. A code value outside the unsigned char range means a long-only option. @@ -40,13 +41,13 @@ were specified before all the non-option arguments for the purposes of parsing, even if the user of your program intermixed option and non-option arguments. If you want the arguments in the exact order - the user typed them, call `ap_init' with `in_order' = true. + the user typed them, call 'ap_init' with 'in_order' = true. - The argument `--' terminates all options; any following arguments are + The argument '--' terminates all options; any following arguments are treated as non-option arguments, even if they begin with a hyphen. - The syntax for optional option arguments is `-' - (without whitespace), or `--='. + The syntax for optional option arguments is '-' + (without whitespace), or '--='. */ #ifdef __cplusplus diff --git a/clzip.h b/clzip.h index b534d66..42c014e 100644 --- a/clzip.h +++ b/clzip.h @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -108,7 +108,7 @@ static inline void Bm_init( Bit_model * const probability ) struct Pretty_print { - const char * name_; + const char * name; const char * stdin_name; int longest_name; int verbosity; @@ -122,13 +122,13 @@ static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { if( filename && filename[0] && strcmp( filename, "-" ) ) - pp->name_ = filename; - else pp->name_ = pp->stdin_name; + pp->name = filename; + else pp->name = pp->stdin_name; pp->first_post = true; } static inline void Pp_reset( struct Pretty_print * const pp ) - { if( pp->name_ && pp->name_[0] ) pp->first_post = true; } + { if( pp->name && pp->name[0] ) pp->first_post = true; } void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ); @@ -160,11 +160,11 @@ static inline void CRC32_update_buf( uint32_t * crc, const uint8_t * const buffe } -static inline int real_bits( const int value ) +static inline int real_bits( const unsigned int value ) { - int bits = 0, i, mask; - for( i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) - if( value & mask ) bits = i; + int bits = 0, i = 1; + unsigned int mask = 1; + for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i; return bits; } @@ -177,15 +177,10 @@ typedef uint8_t File_header[6]; /* 0-3 magic bytes */ enum { Fh_size = 6 }; static inline void Fh_set_magic( File_header data ) - { - memcpy( data, magic_string, 4 ); - data[4] = 1; - } + { memcpy( data, magic_string, 4 ); data[4] = 1; } static inline bool Fh_verify_magic( const File_header data ) - { - return ( memcmp( data, magic_string, 4 ) == 0 ); - } + { return ( memcmp( data, magic_string, 4 ) == 0 ); } static inline uint8_t Fh_version( const File_header data ) { return data[4]; } diff --git a/configure b/configure index c6401de..3ecf2bc 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # configure script for Clzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -8,7 +8,7 @@ args= no_create= pkgname=clzip -pkgversion=1.2 +pkgversion=1.3 progname=clzip srctrigger=clzip.h @@ -19,10 +19,9 @@ srcdir= prefix=/usr/local exec_prefix='$(prefix)' bindir='$(exec_prefix)/bin' -datadir='$(prefix)/share' -infodir='$(datadir)/info' -mandir='$(datadir)/man' -sysconfdir='$(prefix)/etc' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' CC= CPPFLAGS= CFLAGS='-Wall -W -O2' @@ -40,12 +39,12 @@ while [ -n "$1" ] ; do # Split out the argument for options that take them case ${option} in - *=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,'` ;; + *=*) optarg=`echo ${option} | sed -e 's,^[^=]*=,,;s,/$,,'` ;; esac # Process the options case ${option} in - --help | --he* | -h) + --help | -h) echo "Usage: configure [options]" echo echo "Options: [defaults in brackets]" @@ -55,42 +54,31 @@ while [ -n "$1" ] ; do echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" - echo " --datadir=DIR base directory for doc and data [${datadir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" echo " --infodir=DIR info files directory [${infodir}]" echo " --mandir=DIR man pages directory [${mandir}]" - echo " --sysconfdir=DIR read-only single-machine data directory [${sysconfdir}]" echo " CC=COMPILER C compiler to use [gcc]" echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" echo " CFLAGS=OPTIONS command line options for the C compiler [${CFLAGS}]" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" echo exit 0 ;; - --version | --ve* | -V) + --version | -V) echo "Configure script for ${pkgname} version ${pkgversion}" exit 0 ;; - --srcdir* | --sr*) - srcdir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --prefix* | --pr*) - prefix=`echo ${optarg} | sed -e 's,/$,,'` ;; - --exec-prefix* | --ex*) - exec_prefix=`echo ${optarg} | sed -e 's,/$,,'` ;; - --bindir* | --bi*) - bindir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --datadir* | --da*) - datadir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --infodir* | --inf*) - infodir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --mandir* | --ma*) - mandir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --sysconfdir* | --sy*) - sysconfdir=`echo ${optarg} | sed -e 's,/$,,'` ;; - --no-create | --no-c*) - no_create=yes ;; - - CC=*) CC=${optarg} ;; + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --no-create) no_create=yes ;; + + CC=*) CC=${optarg} ;; CPPFLAGS=*) CPPFLAGS=${optarg} ;; - CFLAGS=*) CFLAGS=${optarg} ;; - LDFLAGS=*) LDFLAGS=${optarg} ;; + CFLAGS=*) CFLAGS=${optarg} ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; --* | *=* | *-*-*) ;; *) @@ -154,10 +142,9 @@ echo "VPATH = ${srcdir}" echo "prefix = ${prefix}" echo "exec_prefix = ${exec_prefix}" echo "bindir = ${bindir}" -echo "datadir = ${datadir}" +echo "datarootdir = ${datarootdir}" echo "infodir = ${infodir}" echo "mandir = ${mandir}" -echo "sysconfdir = ${sysconfdir}" echo "CC = ${CC}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CFLAGS = ${CFLAGS}" @@ -165,7 +152,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Clzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -178,10 +165,9 @@ VPATH = ${srcdir} prefix = ${prefix} exec_prefix = ${exec_prefix} bindir = ${bindir} -datadir = ${datadir} +datarootdir = ${datarootdir} infodir = ${infodir} mandir = ${mandir} -sysconfdir = ${sysconfdir} CC = ${CC} CPPFLAGS = ${CPPFLAGS} CFLAGS = ${CFLAGS} diff --git a/decoder.c b/decoder.c index caaaa2f..02f7f0b 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -65,7 +65,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) if( rest <= 0 ) break; n = write( fd, buf + size - rest, rest ); if( n > 0 ) rest -= n; - else if( errno && errno != EINTR && errno != EAGAIN ) break; + else if( n < 0 && errno != EINTR && errno != EAGAIN ) break; } return ( rest > 0 ) ? size - rest : size; } @@ -91,7 +91,7 @@ void LZd_flush_data( struct LZ_decoder * const decoder ) const int size = decoder->pos - decoder->stream_pos; if( size > 0 ) { - CRC32_update_buf( &decoder->crc_, decoder->buffer + decoder->stream_pos, size ); + CRC32_update_buf( &decoder->crc, decoder->buffer + decoder->stream_pos, size ); if( decoder->outfd >= 0 && writeblock( decoder->outfd, decoder->buffer + decoder->stream_pos, size ) != size ) { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } @@ -105,31 +105,26 @@ void LZd_flush_data( struct LZ_decoder * const decoder ) bool LZd_verify_trailer( struct LZ_decoder * const decoder, struct Pretty_print * const pp ) { - int i; File_trailer trailer; const int trailer_size = Ft_versioned_size( decoder->member_version ); const long long member_size = Rd_member_position( decoder->range_decoder ) + trailer_size; bool error = false; - for( i = 0; i < trailer_size && !error; ++i ) + int size = Rd_read_data( decoder->range_decoder, trailer, trailer_size ); + if( size < trailer_size ) { - if( !Rd_finished( decoder->range_decoder ) ) - trailer[i] = Rd_get_byte( decoder->range_decoder ); - else + error = true; + if( pp->verbosity >= 0 ) { - error = true; - if( pp->verbosity >= 0 ) - { - Pp_show_msg( pp, 0 ); - fprintf( stderr, "Trailer truncated at trailer position %d;" - " some checks may fail.\n", i ); - } - for( ; i < trailer_size; ++i ) trailer[i] = 0; + Pp_show_msg( pp, 0 ); + fprintf( stderr, "Trailer truncated at trailer position %d;" + " some checks may fail.\n", size ); } + while( size < trailer_size ) trailer[size++] = 0; } if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - if( !Rd_code_is_zero( decoder->range_decoder ) ) + if( decoder->range_decoder->code != 0 ) { error = true; Pp_show_msg( pp, "Range decoder final code is not zero" ); @@ -190,11 +185,9 @@ int LZd_decode_member( struct LZ_decoder * const decoder, State state = 0; Rd_load( decoder->range_decoder ); - while( true ) + while( !Rd_finished( decoder->range_decoder ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( Rd_finished( decoder->range_decoder ) ) - { LZd_flush_data( decoder ); return 2; } if( Rd_decode_bit( decoder->range_decoder, &decoder->bm_match[state][pos_state] ) == 0 ) { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); @@ -271,7 +264,7 @@ int LZd_decode_member( struct LZ_decoder * const decoder, if( pp->verbosity >= 0 ) { Pp_show_msg( pp, 0 ); - fprintf( stderr, "Unsupported marker code `%d'.\n", len ); + fprintf( stderr, "Unsupported marker code '%d'.\n", len ); } return 4; } @@ -286,4 +279,6 @@ int LZd_decode_member( struct LZ_decoder * const decoder, LZd_copy_block( decoder, rep0, len ); } } + LZd_flush_data( decoder ); + return 2; } diff --git a/decoder.h b/decoder.h index f109c54..6445d1b 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,33 +31,27 @@ struct Range_decoder bool Rd_read_block( struct Range_decoder * const rdec ); -static inline void Rd_init( struct Range_decoder * const rdec, const int ifd ) +static inline bool Rd_init( struct Range_decoder * const rdec, const int ifd ) { rdec->partial_member_pos = 0; rdec->buffer = (uint8_t *)malloc( rd_buffer_size ); - if( !rdec->buffer ) - { - show_error( "Not enough memory. Find a machine with more memory.", 0, false ); - cleanup_and_fail( 1 ); - } + if( !rdec->buffer ) return false; rdec->pos = 0; rdec->stream_pos = 0; rdec->code = 0; rdec->range = 0xFFFFFFFFU; rdec->infd = ifd; rdec->at_stream_end = false; + return true; } static inline void Rd_free( struct Range_decoder * const rdec ) - { free( rdec->buffer ); rdec->buffer = 0; } - -static inline bool Rd_code_is_zero( struct Range_decoder * const rdec ) - { return ( rdec->code == 0 ); } + { free( rdec->buffer ); } static inline bool Rd_finished( struct Range_decoder * const rdec ) { return rdec->pos >= rdec->stream_pos && !Rd_read_block( rdec ); } -static inline long long Rd_member_position( struct Range_decoder * const rdec ) +static inline long long Rd_member_position( const struct Range_decoder * const rdec ) { return rdec->partial_member_pos + rdec->pos; } static inline void Rd_reset_member_position( struct Range_decoder * const rdec ) @@ -69,6 +63,20 @@ static inline uint8_t Rd_get_byte( struct Range_decoder * const rdec ) return rdec->buffer[rdec->pos++]; } +static inline int Rd_read_data( struct Range_decoder * const rdec, + uint8_t * const outbuf, const int size ) + { + int rest = size; + while( rest > 0 && !Rd_finished( rdec ) ) + { + const int rd = min( rest, rdec->stream_pos - rdec->pos ); + memcpy( outbuf + size - rest, rdec->buffer + rdec->pos, rd ); + rdec->pos += rd; + rest -= rd; + } + return ( rest > 0 ) ? size - rest : size; + } + static inline void Rd_load( struct Range_decoder * const rdec ) { int i; @@ -223,27 +231,27 @@ struct Literal_decoder Bit_model bm_literal[1<bm_literal[i][j] ); + Bm_init( &lidec->bm_literal[i][j] ); } -static inline int Lid_state( const int prev_byte ) +static inline int Lid_state( const uint8_t prev_byte ) { return ( prev_byte >> ( 8 - literal_context_bits ) ); } -static inline uint8_t Lid_decode( struct Literal_decoder * const literal_decoder, +static inline uint8_t Lid_decode( struct Literal_decoder * const lidec, struct Range_decoder * const rdec, const uint8_t prev_byte ) - { return Rd_decode_tree( rdec, literal_decoder->bm_literal[Lid_state(prev_byte)], 8 ); } + { return Rd_decode_tree( rdec, lidec->bm_literal[Lid_state(prev_byte)], 8 ); } -static inline uint8_t Lid_decode_matched( struct Literal_decoder * const literal_decoder, +static inline uint8_t Lid_decode_matched( struct Literal_decoder * const lidec, struct Range_decoder * const rdec, const uint8_t prev_byte, const uint8_t match_byte ) - { return Rd_decode_matched( rdec, literal_decoder->bm_literal[Lid_state(prev_byte)], match_byte ); } + { return Rd_decode_matched( rdec, lidec->bm_literal[Lid_state(prev_byte)], match_byte ); } struct LZ_decoder @@ -254,7 +262,7 @@ struct LZ_decoder uint8_t * buffer; /* output buffer */ int pos; /* current pos in buffer */ int stream_pos; /* first byte not yet written to file */ - uint32_t crc_; + uint32_t crc; int outfd; /* output file descriptor */ int member_version; @@ -279,14 +287,14 @@ void LZd_flush_data( struct LZ_decoder * const decoder ); bool LZd_verify_trailer( struct LZ_decoder * const decoder, struct Pretty_print * const pp ); -static inline uint8_t LZd_get_prev_byte( struct LZ_decoder * const decoder ) - { - const int i = - ( ( decoder->pos > 0 ) ? decoder->pos : decoder->buffer_size ) - 1; - return decoder->buffer[i]; - } +static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) + { + const int i = + ( ( decoder->pos > 0 ) ? decoder->pos : decoder->buffer_size ) - 1; + return decoder->buffer[i]; + } -static inline uint8_t LZd_get_byte( struct LZ_decoder * const decoder, +static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, const int distance ) { int i = decoder->pos - distance - 1; @@ -312,7 +320,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, memcpy( decoder->buffer + decoder->pos, decoder->buffer + i, len ); decoder->pos += len; } - else for( ; len > 0 ; --len ) + else for( ; len > 0; --len ) { decoder->buffer[decoder->pos] = decoder->buffer[i]; if( ++decoder->pos >= decoder->buffer_size ) LZd_flush_data( decoder ); @@ -320,7 +328,7 @@ static inline void LZd_copy_block( struct LZ_decoder * const decoder, } } -static inline void LZd_init( struct LZ_decoder * const decoder, +static inline bool LZd_init( struct LZ_decoder * const decoder, const File_header header, struct Range_decoder * const rdec, const int ofd ) { @@ -329,14 +337,10 @@ static inline void LZd_init( struct LZ_decoder * const decoder, decoder->dictionary_size = Fh_get_dictionary_size( header ); decoder->buffer_size = max( 65536, decoder->dictionary_size ); decoder->buffer = (uint8_t *)malloc( decoder->buffer_size ); - if( !decoder->buffer ) - { - show_error( "Not enough memory. Find a machine with more memory.", 0, false ); - cleanup_and_fail( 1 ); - } + if( !decoder->buffer ) return false; decoder->pos = 0; decoder->stream_pos = 0; - decoder->crc_ = 0xFFFFFFFFU; + decoder->crc = 0xFFFFFFFFU; decoder->outfd = ofd; decoder->member_version = Fh_version( header ); @@ -365,15 +369,16 @@ static inline void LZd_init( struct LZ_decoder * const decoder, Led_init( &decoder->rep_match_len_decoder ); Lid_init( &decoder->literal_decoder ); decoder->buffer[decoder->buffer_size-1] = 0; /* prev_byte of first_byte */ + return true; } static inline void LZd_free( struct LZ_decoder * const decoder ) - { free( decoder->buffer ); decoder->buffer = 0; } + { free( decoder->buffer ); } -static inline uint32_t LZd_crc( struct LZ_decoder * const decoder ) - { return decoder->crc_ ^ 0xFFFFFFFFU; } +static inline uint32_t LZd_crc( const struct LZ_decoder * const decoder ) + { return decoder->crc ^ 0xFFFFFFFFU; } -static inline long long LZd_data_position( struct LZ_decoder * const decoder ) +static inline long long LZd_data_position( const struct LZ_decoder * const decoder ) { return decoder->partial_data_pos + decoder->pos; } int LZd_decode_member( struct LZ_decoder * const decoder, diff --git a/doc/clzip.1 b/doc/clzip.1 index 1b2cac8..48401e2 100644 --- a/doc/clzip.1 +++ b/doc/clzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH CLZIP "1" "May 2011" "Clzip 1.2" "User Commands" +.TH CLZIP "1" "February 2012" "Clzip 1.3" "User Commands" .SH NAME Clzip \- reduces the size of files .SH SYNOPSIS @@ -15,7 +15,7 @@ display this help and exit \fB\-V\fR, \fB\-\-version\fR output version information and exit .TP -\fB\-b\fR, \fB\-\-member\-size=\fR +\fB\-b\fR, \fB\-\-member\-size=\fR set member size limit in bytes .TP \fB\-c\fR, \fB\-\-stdout\fR @@ -33,7 +33,7 @@ force recompression of compressed files \fB\-k\fR, \fB\-\-keep\fR keep (don't delete) input files .TP -\fB\-m\fR, \fB\-\-match\-length=\fR +\fB\-m\fR, \fB\-\-match\-length=\fR set match length limit in bytes [36] .TP \fB\-o\fR, \fB\-\-output=\fR @@ -42,10 +42,10 @@ if reading stdin, place the output into \fB\-q\fR, \fB\-\-quiet\fR suppress all messages .TP -\fB\-s\fR, \fB\-\-dictionary\-size=\fR +\fB\-s\fR, \fB\-\-dictionary\-size=\fR set dictionary size limit in bytes [8MiB] .TP -\fB\-S\fR, \fB\-\-volume\-size=\fR +\fB\-S\fR, \fB\-\-volume\-size=\fR set volume size limit in bytes .TP \fB\-t\fR, \fB\-\-test\fR @@ -67,12 +67,16 @@ If no file names are given, clzip compresses or decompresses from standard input to standard output. Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +The bidimensional parameter space of LZMA can't be mapped to a linear +scale optimal for all files. If your files are large, very repetitive, +etc, you may need to use the \fB\-\-match\-length\fR and \fB\-\-dictionary\-size\fR +options directly to achieve optimal performance. .SH "REPORTING BUGS" Report bugs to lzip\-bug@nongnu.org .br Clzip home page: http://www.nongnu.org/lzip/clzip.html .SH COPYRIGHT -Copyright \(co 2011 Antonio Diaz Diaz. +Copyright \(co 2012 Antonio Diaz Diaz. License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/doc/clzip.info b/doc/clzip.info index 3551973..e4b9d3e 100644 --- a/doc/clzip.info +++ b/doc/clzip.info @@ -12,7 +12,7 @@ File: clzip.info, Node: Top, Next: Introduction, Up: (dir) Clzip Manual ************ -This manual is for Clzip (version 1.2, 18 May 2011). +This manual is for Clzip (version 1.3, 25 February 2012). * Menu: @@ -25,7 +25,7 @@ This manual is for Clzip (version 1.2, 18 May 2011). * Concept Index:: Index of concepts - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -47,6 +47,9 @@ fully compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ compiler. + If you ever need to recover data from a damaged lzip file, try the +lziprecover program. + Clzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed file has the same modification date, permissions, and, when possible, @@ -64,7 +67,7 @@ two or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. - Clzip can produce multimember files and safely recover, with + Clzip can produce multi-member files and safely recover, with lziprecover, the undamaged members in case of file damage. Clzip can also split the compressed output in volumes of a given size, even when reading from standard input. This allows the direct creation of @@ -185,12 +188,12 @@ The format for running clzip is: `--version' Print the version number of clzip on the standard output and exit. -`-b SIZE' -`--member-size=SIZE' - Produce a multimember file and set the member size limit to SIZE - bytes. Minimum member size limit is 100kB. Small member size may - degrade compression ratio, so use it only when needed. The default - is to produce single-member files. +`-b BYTES' +`--member-size=BYTES' + Produce a multi-member file and set the member size limit to BYTES. + Minimum member size limit is 100kB. Small member size may degrade + compression ratio, so use it only when needed. The default is to + produce single-member files. `-c' `--stdout' @@ -205,7 +208,7 @@ The format for running clzip is: `-f' `--force' - Force overwrite of output file. + Force overwrite of output files. `-F' `--recompress' @@ -217,8 +220,8 @@ The format for running clzip is: Keep (don't delete) input files during compression or decompression. -`-m LENGTH' -`--match-length=LENGTH' +`-m BYTES' +`--match-length=BYTES' Set the match length limit in bytes. After a match this long is found, the search is finished. Valid values range from 5 to 273. Larger values usually give better compression ratios but longer @@ -237,26 +240,26 @@ The format for running clzip is: `--quiet' Quiet operation. Suppress all messages. -`-s SIZE' -`--dictionary-size=SIZE' +`-s BYTES' +`--dictionary-size=BYTES' Set the dictionary size limit in bytes. Valid values range from 4KiB to 512MiB. Clzip will use the smallest possible dictionary size for each member without exceeding this limit. Note that dictionary sizes are quantized. If the specified size does not match one of the valid sizes, it will be rounded upwards by adding - up to (SIZE / 16) to it. + up to (BYTES / 16) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. -`-S SIZE' -`--volume-size=SIZE' +`-S BYTES' +`--volume-size=BYTES' Split the compressed output into several volume files with names `original_name00001.lz', `original_name00002.lz', etc, and set the - volume size limit to SIZE bytes. Each volume is a complete, maybe - multimember, lzip file. Minimum volume size limit is 100kB. Small + volume size limit to BYTES. Each volume is a complete, maybe + multi-member, lzip file. Minimum volume size limit is 100kB. Small volume size may degrade compression ratio, so use it only when needed. @@ -269,11 +272,12 @@ The format for running clzip is: `-v' `--verbose' - Verbose mode. When compressing, show the compression ratio for - each file processed. When decompressing or testing, further -v's - (up to 4) increase the verbosity level, showing status, dictionary - size, compression ratio, and trailer contents (CRC, data size, - member size). + Verbose mode. + When compressing, show the compression ratio for each file + processed. + When decompressing or testing, further -v's (up to 4) increase the + verbosity level, showing status, dictionary size, compression + ratio, and trailer contents (CRC, data size, member size). `-1 .. -9' Set the compression parameters (dictionary size and match length @@ -376,7 +380,7 @@ additional information before, between, or after them. `Member size (8 bytes)' Total size of the member, including header and trailer. This - facilitates safe recovery of undamaged members from multimember + facilitates safe recovery of undamaged members from multi-member files. @@ -388,64 +392,70 @@ File: clzip.info, Node: Examples, Next: Problems, Prev: File Format, Up: Top WARNING! Even if clzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, etc). -Therefore, if the data you are going to compress is important give the +Therefore, if the data you are going to compress is important, give the `--keep' option to clzip and do not remove the original file until you verify the compressed file with a command like `clzip -cd file.lz | cmp file -'. -Example 1: Replace a regular file with its compressed version file.lz +Example 1: Replace a regular file with its compressed version `file.lz' and show the compression ratio. clzip -v file -Example 2: Like example 1 but the created file.lz is multimember with a -member size of 1MiB. The compression ratio is not shown. +Example 2: Like example 1 but the created `file.lz' is multi-member +with a member size of 1MiB. The compression ratio is not shown. clzip -b 1MiB file -Example 3: Restore a regular file from its compressed version file.lz. -If the operation is successful, file.lz is removed. +Example 3: Restore a regular file from its compressed version +`file.lz'. If the operation is successful, `file.lz' is removed. clzip -d file.lz -Example 4: Verify the integrity of the compressed file file.lz and show -status. +Example 4: Verify the integrity of the compressed file `file.lz' and +show status. clzip -tv file.lz Example 5: Compress a whole floppy in /dev/fd0 and send the output to -file.lz. +`file.lz'. clzip -c /dev/fd0 > file.lz -Example 6: Decompress file.lz partially until 10KiB of decompressed data -are produced. +Example 6: Decompress `file.lz' partially until 10KiB of decompressed +data are produced. clzip -cd file.lz | dd bs=1024 count=10 -Example 7: Create a multivolume compressed tar archive with a volume +Example 7: Decompress `file.lz' partially from decompressed byte 10000 +to decompressed byte 15000 (5000 bytes are produced). + + clzip -cd file.lz | dd bs=1000 skip=10 count=5 + + +Example 8: Create a multivolume compressed tar archive with a volume size of 1440KiB. tar -c some_directory | clzip -S 1440KiB -o volume_name -Example 8: Extract a multivolume compressed tar archive. +Example 9: Extract a multivolume compressed tar archive. clzip -cd volume_name*.lz | tar -xf - -Example 9: Create a multivolume compressed backup of a big database file -with a volume size of 650MB, where each volume is a multimember file -with a member size of 32MiB. +Example 10: Create a multivolume compressed backup of a big database +file with a volume size of 650MB, where each volume is a multi-member +file with a member size of 32MiB. - clzip -b 32MiB -S 650MB big_database + clzip -b 32MiB -S 650MB big_db  File: clzip.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top @@ -486,12 +496,17 @@ Concept Index  Tag Table: Node: Top226 -Node: Introduction903 -Node: Algorithm4480 -Node: Invoking Clzip7004 -Node: File Format12275 -Node: Examples14269 -Node: Problems16038 -Node: Concept Index16564 +Node: Introduction914 +Node: Algorithm4584 +Node: Invoking Clzip7108 +Node: File Format12380 +Node: Examples14375 +Node: Problems16336 +Node: Concept Index16862  End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/clzip.texinfo b/doc/clzip.texinfo index 85396ab..284ed3f 100644 --- a/doc/clzip.texinfo +++ b/doc/clzip.texinfo @@ -1,12 +1,13 @@ \input texinfo @c -*-texinfo-*- @c %**start of header @setfilename clzip.info +@documentencoding ISO-8859-15 @settitle Clzip Manual @finalout @c %**end of header -@set UPDATED 18 May 2011 -@set VERSION 1.2 +@set UPDATED 25 February 2012 +@set VERSION 1.3 @dircategory Data Compression @direntry @@ -44,7 +45,7 @@ This manual is for Clzip (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2010, 2011 Antonio Diaz Diaz. +Copyright @copyright{} 2010, 2011, 2012 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -65,6 +66,9 @@ compatible with lzip-1.4 or newer. Clzip is in fact a C language version of lzip, intended for embedded devices or systems lacking a C++ compiler. +If you ever need to recover data from a damaged lzip file, try the +lziprecover program. + Clzip replaces every file given in the command line with a compressed version of itself, with the name "original_name.lz". Each compressed file has the same modification date, permissions, and, when possible, @@ -82,11 +86,11 @@ or more compressed files. The result is the concatenation of the corresponding uncompressed files. Integrity testing of concatenated compressed files is also supported. -Clzip can produce multimember files and safely recover, with lziprecover, -the undamaged members in case of file damage. Clzip can also split the -compressed output in volumes of a given size, even when reading from -standard input. This allows the direct creation of multivolume -compressed tar archives. +Clzip can produce multi-member files and safely recover, with +lziprecover, the undamaged members in case of file damage. Clzip can +also split the compressed output in volumes of a given size, even when +reading from standard input. This allows the direct creation of +multivolume compressed tar archives. The amount of memory required for compression is about 5 MiB plus 1 or 2 times the dictionary size limit (1 if input file size is less than @@ -210,10 +214,10 @@ Print an informative help message describing the options and exit. @itemx --version Print the version number of clzip on the standard output and exit. -@item -b @var{size} -@itemx --member-size=@var{size} -Produce a multimember file and set the member size limit to @var{size} -bytes. Minimum member size limit is 100kB. Small member size may degrade +@item -b @var{bytes} +@itemx --member-size=@var{bytes} +Produce a multi-member file and set the member size limit to @var{bytes}. +Minimum member size limit is 100kB. Small member size may degrade compression ratio, so use it only when needed. The default is to produce single-member files. @@ -229,7 +233,7 @@ Decompress. @item -f @itemx --force -Force overwrite of output file. +Force overwrite of output files. @item -F @itemx --recompress @@ -240,8 +244,8 @@ Force recompression of files whose name already has the @samp{.lz} or @itemx --keep Keep (don't delete) input files during compression or decompression. -@item -m @var{length} -@itemx --match-length=@var{length} +@item -m @var{bytes} +@itemx --match-length=@var{bytes} Set the match length limit in bytes. After a match this long is found, the search is finished. Valid values range from 5 to 273. Larger values usually give better compression ratios but longer compression times. @@ -259,25 +263,25 @@ compressing and splitting the output in volumes. @itemx --quiet Quiet operation. Suppress all messages. -@item -s @var{size} -@itemx --dictionary-size=@var{size} +@item -s @var{bytes} +@itemx --dictionary-size=@var{bytes} Set the dictionary size limit in bytes. Valid values range from 4KiB to 512MiB. Clzip will use the smallest possible dictionary size for each member without exceeding this limit. Note that dictionary sizes are quantized. If the specified size does not match one of the valid sizes, -it will be rounded upwards by adding up to (@var{size} / 16) to it. +it will be rounded upwards by adding up to (@var{bytes} / 16) to it. For maximum compression you should use a dictionary size limit as large as possible, but keep in mind that the decompression memory requirement is affected at compression time by the choice of dictionary size limit. -@item -S @var{size} -@itemx --volume-size=@var{size} +@item -S @var{bytes} +@itemx --volume-size=@var{bytes} Split the compressed output into several volume files with names @samp{original_name00001.lz}, @samp{original_name00002.lz}, etc, and set -the volume size limit to @var{size} bytes. Each volume is a complete, -maybe multimember, lzip file. Minimum volume size limit is 100kB. Small -volume size may degrade compression ratio, so use it only when needed. +the volume size limit to @var{bytes}. Each volume is a complete, maybe +multi-member, lzip file. Minimum volume size limit is 100kB. Small volume +size may degrade compression ratio, so use it only when needed. @item -t @itemx --test @@ -287,8 +291,8 @@ Use it together with @samp{-v} to see information about the file. @item -v @itemx --verbose -Verbose mode. -When compressing, show the compression ratio for each file processed. +Verbose mode.@* +When compressing, show the compression ratio for each file processed.@* When decompressing or testing, further -v's (up to 4) increase the verbosity level, showing status, dictionary size, compression ratio, and trailer contents (CRC, data size, member size). @@ -404,7 +408,7 @@ Size of the uncompressed original data. @item Member size (8 bytes) Total size of the member, including header and trailer. This facilitates -safe recovery of undamaged members from multimember files. +safe recovery of undamaged members from multi-member files. @end table @@ -415,15 +419,15 @@ safe recovery of undamaged members from multimember files. WARNING! Even if clzip is bug-free, other causes may result in a corrupt compressed file (bugs in the system libraries, memory errors, etc). -Therefore, if the data you are going to compress is important give the +Therefore, if the data you are going to compress is important, give the @samp{--keep} option to clzip and do not remove the original file until -you verify the compressed file with a command like @w{@samp{clzip -cd -file.lz | cmp file -}}. +you verify the compressed file with a command like +@w{@samp{clzip -cd file.lz | cmp file -}}. @sp 1 @noindent -Example 1: Replace a regular file with its compressed version file.lz -and show the compression ratio. +Example 1: Replace a regular file with its compressed version +@samp{file.lz} and show the compression ratio. @example clzip -v file @@ -431,8 +435,8 @@ clzip -v file @sp 1 @noindent -Example 2: Like example 1 but the created file.lz is multimember with a -member size of 1MiB. The compression ratio is not shown. +Example 2: Like example 1 but the created @samp{file.lz} is multi-member +with a member size of 1MiB. The compression ratio is not shown. @example clzip -b 1MiB file @@ -440,8 +444,9 @@ clzip -b 1MiB file @sp 1 @noindent -Example 3: Restore a regular file from its compressed version file.lz. -If the operation is successful, file.lz is removed. +Example 3: Restore a regular file from its compressed version +@samp{file.lz}. If the operation is successful, @samp{file.lz} is +removed. @example clzip -d file.lz @@ -449,8 +454,8 @@ clzip -d file.lz @sp 1 @noindent -Example 4: Verify the integrity of the compressed file file.lz and show -status. +Example 4: Verify the integrity of the compressed file @samp{file.lz} +and show status. @example clzip -tv file.lz @@ -459,7 +464,7 @@ clzip -tv file.lz @sp 1 @noindent Example 5: Compress a whole floppy in /dev/fd0 and send the output to -file.lz. +@samp{file.lz}. @example clzip -c /dev/fd0 > file.lz @@ -467,8 +472,8 @@ clzip -c /dev/fd0 > file.lz @sp 1 @noindent -Example 6: Decompress file.lz partially until 10KiB of decompressed data -are produced. +Example 6: Decompress @samp{file.lz} partially until 10KiB of +decompressed data are produced. @example clzip -cd file.lz | dd bs=1024 count=10 @@ -476,7 +481,16 @@ clzip -cd file.lz | dd bs=1024 count=10 @sp 1 @noindent -Example 7: Create a multivolume compressed tar archive with a volume +Example 7: Decompress @samp{file.lz} partially from decompressed byte +10000 to decompressed byte 15000 (5000 bytes are produced). + +@example +clzip -cd file.lz | dd bs=1000 skip=10 count=5 +@end example + +@sp 1 +@noindent +Example 8: Create a multivolume compressed tar archive with a volume size of 1440KiB. @example @@ -485,7 +499,7 @@ tar -c some_directory | clzip -S 1440KiB -o volume_name @sp 1 @noindent -Example 8: Extract a multivolume compressed tar archive. +Example 9: Extract a multivolume compressed tar archive. @example clzip -cd volume_name*.lz | tar -xf - @@ -493,12 +507,12 @@ clzip -cd volume_name*.lz | tar -xf - @sp 1 @noindent -Example 9: Create a multivolume compressed backup of a big database file -with a volume size of 650MB, where each volume is a multimember file -with a member size of 32MiB. +Example 10: Create a multivolume compressed backup of a big database +file with a volume size of 650MB, where each volume is a multi-member +file with a member size of 32MiB. @example -clzip -b 32MiB -S 650MB big_database +clzip -b 32MiB -S 650MB big_db @end example diff --git a/encoder.c b/encoder.c index 20180d7..84e2586 100644 --- a/encoder.c +++ b/encoder.c @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,9 +19,9 @@ #include #include +#include #include #include -#include #include "clzip.h" #include "encoder.h" @@ -40,65 +40,75 @@ bool Mf_read_block( struct Matchfinder * const mf ) mf->stream_pos += rd; if( rd != size && errno ) { show_error( "Read error", errno, false ); cleanup_and_fail( 1 ); } - mf->at_stream_end = ( rd < size ); + if( rd < size ) + { mf->at_stream_end = true; mf->pos_limit = mf->buffer_size; } } return mf->pos < mf->stream_pos; } -void Mf_init( struct Matchfinder * const mf, +void Mf_normalize_pos( struct Matchfinder * const mf ) + { + if( mf->pos > mf->stream_pos ) + internal_error( "pos > stream_pos in Mf_normalize_pos" ); + if( !mf->at_stream_end ) + { + int i; + const int offset = mf->pos - mf->dictionary_size - before_size; + const int size = mf->stream_pos - offset; + memmove( mf->buffer, mf->buffer + offset, size ); + mf->partial_data_pos += offset; + mf->pos -= offset; + mf->stream_pos -= offset; + for( i = 0; i < num_prev_positions; ++i ) + if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; + for( i = 0; i < 2 * mf->dictionary_size; ++i ) + if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; + Mf_read_block( mf ); + } + } + + +bool Mf_init( struct Matchfinder * const mf, const int dict_size, const int len_limit, const int ifd ) { const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; int i; mf->partial_data_pos = 0; - mf->prev_positions = - (int32_t *)malloc( num_prev_positions * sizeof (int32_t) ); - if( !mf->prev_positions ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } + mf->prev_positions = (int32_t *)malloc( num_prev_positions * sizeof (int32_t) ); + if( !mf->prev_positions ) return false; mf->pos = 0; mf->cyclic_pos = 0; mf->stream_pos = 0; - mf->match_len_limit_ = len_limit; + mf->match_len_limit = len_limit; mf->cycles = ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256; mf->infd = ifd; mf->at_stream_end = false; + for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; mf->buffer_size = max( 65536, dict_size ); mf->buffer = (uint8_t *)malloc( mf->buffer_size ); - if( !mf->buffer ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } + if( !mf->buffer ) { free( mf->prev_positions ); return false; } if( Mf_read_block( mf ) && !mf->at_stream_end && mf->buffer_size < buffer_size_limit ) { mf->buffer_size = buffer_size_limit; - mf->buffer = (uint8_t *)realloc( mf->buffer, mf->buffer_size ); - if( !mf->buffer ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } + uint8_t * const tmp = (uint8_t *)realloc( mf->buffer, mf->buffer_size ); + if( !tmp ) + { free( mf->buffer ); free( mf->prev_positions ); return false; } + mf->buffer = tmp; Mf_read_block( mf ); } if( mf->at_stream_end && mf->stream_pos < dict_size ) - mf->dictionary_size_ = max( min_dictionary_size, mf->stream_pos ); - else mf->dictionary_size_ = dict_size; + mf->dictionary_size = max( min_dictionary_size, mf->stream_pos ); + else mf->dictionary_size = dict_size; mf->pos_limit = mf->buffer_size; if( !mf->at_stream_end ) mf->pos_limit -= after_size; mf->prev_pos_tree = - (int32_t *)malloc( 2 * mf->dictionary_size_ * sizeof (int32_t) ); + (int32_t *)malloc( 2 * mf->dictionary_size * sizeof (int32_t) ); if( !mf->prev_pos_tree ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } - for( i = 0; i < num_prev_positions; ++i ) mf->prev_positions[i] = -1; + { free( mf->buffer ); free( mf->prev_positions ); return false; } + return true; } @@ -116,32 +126,6 @@ void Mf_reset( struct Matchfinder * const mf ) } -void Mf_move_pos( struct Matchfinder * const mf ) - { - if( ++mf->cyclic_pos >= mf->dictionary_size_ ) mf->cyclic_pos = 0; - if( ++mf->pos >= mf->pos_limit ) - { - if( mf->pos > mf->stream_pos ) - internal_error( "pos > stream_pos in Mf_move_pos" ); - if( !mf->at_stream_end ) - { - int i; - const int offset = mf->pos - mf->dictionary_size_ - before_size; - const int size = mf->stream_pos - offset; - memmove( mf->buffer, mf->buffer + offset, size ); - mf->partial_data_pos += offset; - mf->pos -= offset; - mf->stream_pos -= offset; - for( i = 0; i < num_prev_positions; ++i ) - if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; - for( i = 0; i < 2 * mf->dictionary_size_; ++i ) - if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; - Mf_read_block( mf ); - } - } - } - - int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) { int32_t * ptr0 = mf->prev_pos_tree + ( mf->cyclic_pos << 1 ); @@ -150,11 +134,11 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) const uint8_t * newdata; int len = 0, len0 = 0, len1 = 0; int maxlen = min_match_len - 1; - const int min_pos = (mf->pos >= mf->dictionary_size_) ? - (mf->pos - mf->dictionary_size_ + 1) : 0; + const int min_pos = (mf->pos >= mf->dictionary_size) ? + (mf->pos - mf->dictionary_size + 1) : 0; const uint8_t * const data = mf->buffer + mf->pos; int count, delta, key2, key3, key4, newpos, tmp; - int len_limit = mf->match_len_limit_; + int len_limit = mf->match_len_limit; if( len_limit > Mf_available_bytes( mf ) ) { @@ -187,7 +171,6 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) newpos = mf->prev_positions[key4]; mf->prev_positions[key4] = mf->pos; - for( count = mf->cycles; ; ) { if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } @@ -199,7 +182,7 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) newptr = mf->prev_pos_tree + ( ( mf->cyclic_pos - delta + - ( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size_ ) ) << 1 ); + ( ( mf->cyclic_pos >= delta ) ? 0 : mf->dictionary_size ) ) << 1 ); if( len < len_limit ) { @@ -234,42 +217,44 @@ int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ) } -void Re_flush_data( struct Range_encoder * const range_encoder ) +void Re_flush_data( struct Range_encoder * const renc ) { - if( range_encoder->pos > 0 ) + if( renc->pos > 0 ) { - if( range_encoder->outfd >= 0 && - writeblock( range_encoder->outfd, range_encoder->buffer, - range_encoder->pos ) != range_encoder->pos ) + if( renc->outfd >= 0 && + writeblock( renc->outfd, renc->buffer, + renc->pos ) != renc->pos ) { show_error( "Write error", errno, false ); cleanup_and_fail( 1 ); } - range_encoder->partial_member_pos += range_encoder->pos; - range_encoder->pos = 0; + renc->partial_member_pos += renc->pos; + renc->pos = 0; } } void Lee_encode( struct Len_encoder * const len_encoder, - struct Range_encoder * const range_encoder, + struct Range_encoder * const renc, int symbol, const int pos_state ) { symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( range_encoder, &len_encoder->choice1, 0 ); - Re_encode_tree( range_encoder, len_encoder->bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &len_encoder->choice1, 0 ); + Re_encode_tree( renc, len_encoder->bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( range_encoder, &len_encoder->choice1, 1 ); + Re_encode_bit( renc, &len_encoder->choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( range_encoder, &len_encoder->choice2, 0 ); - Re_encode_tree( range_encoder, len_encoder->bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); + Re_encode_bit( renc, &len_encoder->choice2, 0 ); + Re_encode_tree( renc, len_encoder->bm_mid[pos_state], + symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( range_encoder, &len_encoder->choice2, 1 ); - Re_encode_tree( range_encoder, len_encoder->bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); + Re_encode_bit( renc, &len_encoder->choice2, 1 ); + Re_encode_tree( renc, len_encoder->bm_high, + symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } if( --len_encoder->counters[pos_state] <= 0 ) @@ -277,11 +262,32 @@ void Lee_encode( struct Len_encoder * const len_encoder, } + /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ +void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) + { + int i; + const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + File_trailer trailer; + Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); + Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); + LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); + Re_flush( &encoder->range_encoder ); + Ft_set_data_crc( trailer, LZe_crc( encoder ) ); + Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); + Ft_set_member_size( trailer, Re_member_position( &encoder->range_encoder ) + + Ft_size ); + for( i = 0; i < Ft_size; ++i ) + Re_put_byte( &encoder->range_encoder, trailer[i] ); + Re_flush_data( &encoder->range_encoder ); + } + + void LZe_fill_align_prices( struct LZ_encoder * const encoder ) { int i; for( i = 0; i < dis_align_size; ++i ) - encoder->align_prices[i] = price_symbol_reversed( encoder->bm_align, i, dis_align_bits ); + encoder->align_prices[i] = + price_symbol_reversed( encoder->bm_align, i, dis_align_bits ); encoder->align_price_count = dis_align_size; } @@ -295,7 +301,8 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) const int direct_bits = ( dis_slot >> 1 ) - 1; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; const int price = - price_symbol_reversed( encoder->bm_dis + base - dis_slot, dis - base, direct_bits ); + price_symbol_reversed( encoder->bm_dis + base - dis_slot, + dis - base, direct_bits ); for( dis_state = 0; dis_state < max_dis_states; ++dis_state ) encoder->dis_prices[dis_state][dis] = price; } @@ -320,17 +327,63 @@ void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) } +bool LZe_init( struct LZ_encoder * const encoder, + struct Matchfinder * const mf, + const File_header header, const int outfd ) + { + int i, j; + encoder->longest_match_found = 0; + encoder->crc = 0xFFFFFFFFU; + + for( i = 0; i < states; ++i ) + { + for( j = 0; j < pos_states; ++j ) + { + Bm_init( &encoder->bm_match[i][j] ); + Bm_init( &encoder->bm_len[i][j] ); + } + Bm_init( &encoder->bm_rep[i] ); + Bm_init( &encoder->bm_rep0[i] ); + Bm_init( &encoder->bm_rep1[i] ); + Bm_init( &encoder->bm_rep2[i] ); + } + for( i = 0; i < max_dis_states; ++i ) + for( j = 0; j < 1<bm_dis_slot[i][j] ); + for( i = 0; i < modeled_distances-end_dis_model+1; ++i ) + Bm_init( &encoder->bm_dis[i] ); + for( i = 0; i < dis_align_size; ++i ) + Bm_init( &encoder->bm_align[i] ); + + encoder->matchfinder = mf; + if( !Re_init( &encoder->range_encoder, outfd ) ) return false; + Lee_init( &encoder->len_encoder, encoder->matchfinder->match_len_limit ); + Lee_init( &encoder->rep_match_len_encoder, encoder->matchfinder->match_len_limit ); + Lie_init( &encoder->literal_encoder ); + encoder->num_dis_slots = + 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); + + LZe_fill_align_prices( encoder ); + + for( i = 0; i < Fh_size; ++i ) + Re_put_byte( &encoder->range_encoder, header[i] ); + return true; + } + + /* Return value == number of bytes advanced (ahead). trials[0]..trials[retval-1] contain the steps to encode. - ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. */ + ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. +*/ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, - const int reps[num_rep_distances], const State state ) + const int reps[num_rep_distances], + const State state ) { int main_len, i, rep, cur = 0, num_trials; int replens[num_rep_distances]; int rep_index = 0; - if( encoder->longest_match_found > 0 ) /* from previous call */ + if( encoder->longest_match_found > 0 ) /* from previous call */ { main_len = encoder->longest_match_found; encoder->longest_match_found = 0; @@ -342,20 +395,21 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, replens[i] = Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len ); if( replens[i] > replens[rep_index] ) rep_index = i; } - if( replens[rep_index] >= Mf_match_len_limit( encoder->matchfinder ) ) + if( replens[rep_index] >= encoder->matchfinder->match_len_limit ) { encoder->trials[0].dis = rep_index; encoder->trials[0].price = replens[rep_index]; - LZe_move_pos( encoder, replens[rep_index], true ); + LZe_move_pos( encoder, replens[rep_index] ); return replens[rep_index]; } - if( main_len >= Mf_match_len_limit( encoder->matchfinder ) ) + if( main_len >= encoder->matchfinder->match_len_limit ) { - encoder->trials[0].dis = encoder->match_distances[Mf_match_len_limit( encoder->matchfinder )] + - num_rep_distances; + encoder->trials[0].dis = + encoder->match_distances[encoder->matchfinder->match_len_limit] + + num_rep_distances; encoder->trials[0].price = main_len; - LZe_move_pos( encoder, main_len, true ); + LZe_move_pos( encoder, main_len ); return main_len; } @@ -368,17 +422,21 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); encoder->trials[0].state = state; - for( i = 0; i < num_rep_distances; ++i ) encoder->trials[0].reps[i] = reps[i]; + for( i = 0; i < num_rep_distances; ++i ) + encoder->trials[0].reps[i] = reps[i]; encoder->trials[1].dis = -1; encoder->trials[1].prev_index = 0; encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); if( St_is_char( state ) ) - encoder->trials[1].price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); + encoder->trials[1].price += + Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); else - encoder->trials[1].price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); + encoder->trials[1].price += + Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); if( match_byte == cur_byte ) - Tr_update( &encoder->trials[1], 0, 0, rep_match_price + LZe_price_rep_len1( encoder, state, pos_state ) ); + Tr_update( &encoder->trials[1], 0, 0, rep_match_price + + LZe_price_rep_len1( encoder, state, pos_state ) ); if( main_len < min_match_len ) { @@ -404,7 +462,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, encoder->trials[len].dis = encoder->match_distances[len] + num_rep_distances; encoder->trials[len].prev_index = 0; encoder->trials[len].price = normal_match_price + - LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state ); + LZe_price_pair( encoder, encoder->match_distances[len], len, pos_state ); } } @@ -435,7 +493,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, return cur; } newlen = LZe_read_match_distances( encoder ); - if( newlen >= Mf_match_len_limit( encoder->matchfinder ) ) + if( newlen >= encoder->matchfinder->match_len_limit ) { encoder->longest_match_found = newlen; LZe_backward( encoder, cur ); @@ -449,6 +507,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, for( i = 0; i < num_rep_distances; ++i ) cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; + if( prev_index == cur - 1 ) { if( cur_trial->dis == 0 ) St_set_short_rep( &cur_trial->state ); @@ -466,11 +525,14 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, cur_byte = Mf_peek( encoder->matchfinder, 0 ); match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); - next_price = cur_trial->price + price0( encoder->bm_match[cur_trial->state][pos_state] ); + next_price = cur_trial->price + + price0( encoder->bm_match[cur_trial->state][pos_state] ); if( St_is_char( cur_trial->state ) ) - next_price += Lie_price_symbol( &encoder->literal_encoder, prev_byte, cur_byte ); + next_price += Lie_price_symbol( &encoder->literal_encoder, + prev_byte, cur_byte ); else - next_price += Lie_price_matched( &encoder->literal_encoder, prev_byte, cur_byte, match_byte ); + next_price += Lie_price_matched( &encoder->literal_encoder, + prev_byte, cur_byte, match_byte ); Mf_move_pos( encoder->matchfinder ); next_trial = &encoder->trials[cur+1]; @@ -486,7 +548,7 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, len_limit = min( min( max_num_trials - 1 - cur, Mf_available_bytes( encoder->matchfinder ) ), - Mf_match_len_limit( encoder->matchfinder ) ); + encoder->matchfinder->match_len_limit ); if( len_limit < min_match_len ) continue; for( rep = 0; rep < num_rep_distances; ++rep ) @@ -545,82 +607,21 @@ int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } - /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ -void LZe_full_flush( struct LZ_encoder * const encoder, const State state ) - { - int i; - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - File_trailer trailer; - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &encoder->range_encoder ); - Ft_set_data_crc( trailer, LZe_crc( encoder ) ); - Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); - Ft_set_member_size( trailer, LZe_member_position( encoder ) + Ft_size ); - for( i = 0; i < Ft_size; ++i ) - Re_put_byte( &encoder->range_encoder, trailer[i] ); - Re_flush_data( &encoder->range_encoder ); - } - - -void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, - const File_header header, const int outfd ) - { - int i, j; - encoder->longest_match_found = 0; - encoder->crc_ = 0xFFFFFFFFU; - - for( i = 0; i < states; ++i ) - { - for( j = 0; j < pos_states; ++j ) - { - Bm_init( &encoder->bm_match[i][j] ); - Bm_init( &encoder->bm_len[i][j] ); - } - Bm_init( &encoder->bm_rep[i] ); - Bm_init( &encoder->bm_rep0[i] ); - Bm_init( &encoder->bm_rep1[i] ); - Bm_init( &encoder->bm_rep2[i] ); - } - for( i = 0; i < max_dis_states; ++i ) - for( j = 0; j < 1<bm_dis_slot[i][j] ); - for( i = 0; i < modeled_distances-end_dis_model+1; ++i ) - Bm_init( &encoder->bm_dis[i] ); - for( i = 0; i < dis_align_size; ++i ) - Bm_init( &encoder->bm_align[i] ); - - encoder->matchfinder = mf; - Re_init( &encoder->range_encoder, outfd ); - Lee_init( &encoder->len_encoder, Mf_match_len_limit( encoder->matchfinder ) ), - Lee_init( &encoder->rep_match_len_encoder, Mf_match_len_limit( encoder->matchfinder ) ), - Lie_init( &encoder->literal_encoder ); - encoder->num_dis_slots = 2 * real_bits( Mf_dictionary_size( encoder->matchfinder ) - 1 ); - - LZe_fill_align_prices( encoder ); - - for( i = 0; i < Fh_size; ++i ) - Re_put_byte( &encoder->range_encoder, header[i] ); - } - - bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size ) { const long long member_size_limit = member_size - Ft_size - max_marker_size; const int fill_count = - ( Mf_match_len_limit( encoder->matchfinder ) > 12 ) ? 512 : 2048; + ( encoder->matchfinder->match_len_limit > 12 ) ? 512 : 2048; int fill_counter = 0; - int ahead; - int i; + int ahead, i; int rep_distances[num_rep_distances]; State state = 0; for( i = 0; i < num_rep_distances; ++i ) rep_distances[i] = 0; if( Mf_data_position( encoder->matchfinder ) != 0 || - LZe_member_position( encoder ) != Fh_size ) + Re_member_position( &encoder->range_encoder ) != Fh_size ) return false; /* can be called only once */ if( !Mf_finished( encoder->matchfinder ) ) /* encode first byte */ @@ -629,46 +630,50 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][0], 0 ); Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte ); - CRC32_update_byte( &encoder->crc_, cur_byte ); - LZe_move_pos( encoder, 1, false ); + CRC32_update_byte( &encoder->crc, cur_byte ); + Mf_longest_match_len( encoder->matchfinder, 0 ); + Mf_move_pos( encoder->matchfinder ); } - while( true ) + while( !Mf_finished( encoder->matchfinder ) ) { - if( Mf_finished( encoder->matchfinder ) ) - { LZe_full_flush( encoder, state ); return true; } if( fill_counter <= 0 ) { LZe_fill_distance_prices( encoder ); fill_counter = fill_count; } ahead = LZe_sequence_optimizer( encoder, rep_distances, state ); - if( ahead <= 0 ) return false; + if( ahead <= 0 ) return false; /* can't happen */ fill_counter -= ahead; for( i = 0; ; ) { - const int pos_state = ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; + const int pos_state = + ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; const int dis = encoder->trials[i].dis; const int len = encoder->trials[i].price; bool bit = ( dis < 0 && len == 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], !bit ); - if( bit ) /* literal byte */ + Re_encode_bit( &encoder->range_encoder, + &encoder->bm_match[state][pos_state], !bit ); + if( bit ) /* literal byte */ { const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 ); const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); - CRC32_update_byte( &encoder->crc_, cur_byte ); + CRC32_update_byte( &encoder->crc, cur_byte ); if( St_is_char( state ) ) - Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte ); + Lie_encode( &encoder->literal_encoder, &encoder->range_encoder, + prev_byte, cur_byte ); else { - const uint8_t match_byte = Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 ); - Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, prev_byte, cur_byte, match_byte ); + const uint8_t match_byte = + Mf_peek( encoder->matchfinder, -ahead-rep_distances[0]-1 ); + Lie_encode_matched( &encoder->literal_encoder, &encoder->range_encoder, + prev_byte, cur_byte, match_byte ); } St_set_char( &state ); } else /* match or repeated match */ { - CRC32_update_buf( &encoder->crc_, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); + CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); LZe_mtf_reps( dis, rep_distances ); bit = ( dis < num_rep_distances ); Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], bit ); @@ -698,7 +703,7 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, } } ahead -= len; i += len; - if( LZe_member_position( encoder ) >= member_size_limit ) + if( Re_member_position( &encoder->range_encoder ) >= member_size_limit ) { if( !Mf_dec_pos( encoder->matchfinder, ahead ) ) return false; LZe_full_flush( encoder, state ); @@ -707,4 +712,6 @@ bool LZe_encode_member( struct LZ_encoder * const encoder, if( ahead <= 0 ) break; } } + LZe_full_flush( encoder, state ); + return true; } diff --git a/encoder.h b/encoder.h index e7a6481..13df958 100644 --- a/encoder.h +++ b/encoder.h @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -142,7 +142,7 @@ enum { /* bytes to keep in buffer before dictionary */ num_prev_positions3 = 1 << 18, num_prev_positions2 = 1 << 16, num_prev_positions = num_prev_positions4 + num_prev_positions3 + - num_prev_positions2 }; + num_prev_positions2 }; struct Matchfinder { @@ -150,43 +150,44 @@ struct Matchfinder uint8_t * buffer; /* input buffer */ int32_t * prev_positions; /* last seen position of key */ int32_t * prev_pos_tree; - int dictionary_size_; /* bytes to keep in buffer before pos */ + int dictionary_size; /* bytes to keep in buffer before pos */ int buffer_size; int pos; /* current pos in buffer */ int cyclic_pos; /* current pos in dictionary */ int stream_pos; /* first byte not yet read from file */ int pos_limit; /* when reached, a new block must be read */ - int match_len_limit_; + int match_len_limit; int cycles; int infd; /* input file descriptor */ bool at_stream_end; /* stream_pos shows real end of file */ }; bool Mf_read_block( struct Matchfinder * const mf ); +void Mf_normalize_pos( struct Matchfinder * const mf ); -void Mf_init( struct Matchfinder * const mf, +bool Mf_init( struct Matchfinder * const mf, const int dict_size, const int len_limit, const int ifd ); static inline void Mf_free( struct Matchfinder * const mf ) { free( mf->prev_pos_tree ); mf->prev_pos_tree = 0; - free( mf->prev_positions ); mf->prev_positions = 0; free( mf->buffer ); mf->buffer = 0; + free( mf->prev_positions ); mf->prev_positions = 0; } -static inline uint8_t Mf_peek( struct Matchfinder * const mf, const int i ) +static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i ) { return mf->buffer[mf->pos+i]; } -static inline int Mf_available_bytes( struct Matchfinder * const mf ) + +static inline int Mf_available_bytes( const struct Matchfinder * const mf ) { return mf->stream_pos - mf->pos; } -static inline long long Mf_data_position( struct Matchfinder * const mf ) + +static inline long long Mf_data_position( const struct Matchfinder * const mf ) { return mf->partial_data_pos + mf->pos; } -static inline int Mf_dictionary_size( struct Matchfinder * const mf ) - { return mf->dictionary_size_; } -static inline bool Mf_finished( struct Matchfinder * const mf ) + +static inline bool Mf_finished( const struct Matchfinder * const mf ) { return mf->at_stream_end && mf->pos >= mf->stream_pos; } -static inline int Mf_match_len_limit( struct Matchfinder * const mf ) - { return mf->match_len_limit_; } -static inline const uint8_t * Mf_ptr_to_current_pos( struct Matchfinder * const mf ) + +static inline const uint8_t * Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) { return mf->buffer + mf->pos; } static inline bool Mf_dec_pos( struct Matchfinder * const mf, @@ -195,25 +196,30 @@ static inline bool Mf_dec_pos( struct Matchfinder * const mf, if( ahead < 0 || mf->pos < ahead ) return false; mf->pos -= ahead; mf->cyclic_pos -= ahead; - if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size_; + if( mf->cyclic_pos < 0 ) mf->cyclic_pos += mf->dictionary_size; return true; } -static inline int Mf_true_match_len( struct Matchfinder * const mf, +static inline int Mf_true_match_len( const struct Matchfinder * const mf, const int index, const int distance, int len_limit ) { - const uint8_t * const data = mf->buffer + mf->pos + index - distance; + const uint8_t * const data = mf->buffer + mf->pos + index; int i = 0; if( index + len_limit > Mf_available_bytes( mf ) ) len_limit = Mf_available_bytes( mf ) - index; - while( i < len_limit && data[i] == data[i+distance] ) ++i; + while( i < len_limit && data[i-distance] == data[i] ) ++i; return i; } +static inline void Mf_move_pos( struct Matchfinder * const mf ) + { + if( ++mf->cyclic_pos >= mf->dictionary_size ) mf->cyclic_pos = 0; + if( ++mf->pos >= mf->pos_limit ) Mf_normalize_pos( mf ); + } + void Mf_reset( struct Matchfinder * const mf ); -void Mf_move_pos( struct Matchfinder * const mf ); int Mf_longest_match_len( struct Matchfinder * const mf, int * const distances ); @@ -231,89 +237,85 @@ struct Range_encoder uint8_t cache; }; -void Re_flush_data( struct Range_encoder * const range_encoder ); +void Re_flush_data( struct Range_encoder * const renc ); -static inline void Re_put_byte( struct Range_encoder * const range_encoder, +static inline void Re_put_byte( struct Range_encoder * const renc, const uint8_t b ) { - range_encoder->buffer[range_encoder->pos] = b; - if( ++range_encoder->pos >= re_buffer_size ) Re_flush_data( range_encoder ); + renc->buffer[renc->pos] = b; + if( ++renc->pos >= re_buffer_size ) Re_flush_data( renc ); } -static inline void Re_shift_low( struct Range_encoder * const range_encoder ) +static inline void Re_shift_low( struct Range_encoder * const renc ) { - const uint32_t carry = range_encoder->low >> 32; - if( range_encoder->low < 0xFF000000U || carry == 1 ) + const bool carry = ( renc->low > 0xFFFFFFFFU ); + if( carry || renc->low < 0xFF000000U ) { - Re_put_byte( range_encoder, range_encoder->cache + carry ); - for( ; range_encoder->ff_count > 0; --range_encoder->ff_count ) - Re_put_byte( range_encoder, 0xFF + carry ); - range_encoder->cache = range_encoder->low >> 24; + Re_put_byte( renc, renc->cache + carry ); + for( ; renc->ff_count > 0; --renc->ff_count ) + Re_put_byte( renc, 0xFF + carry ); + renc->cache = renc->low >> 24; } - else ++range_encoder->ff_count; - range_encoder->low = ( range_encoder->low & 0x00FFFFFFU ) << 8; + else ++renc->ff_count; + renc->low = ( renc->low & 0x00FFFFFFU ) << 8; } -static inline void Re_init( struct Range_encoder * const range_encoder, - const int ofd ) - { - range_encoder->low = 0; - range_encoder->partial_member_pos = 0; - range_encoder->buffer = (uint8_t *)malloc( re_buffer_size ); - if( !range_encoder->buffer ) - { - show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); - cleanup_and_fail( 1 ); - } - range_encoder->pos = 0; - range_encoder->range = 0xFFFFFFFFU; - range_encoder->ff_count = 0; - range_encoder->outfd = ofd; - range_encoder->cache = 0; +static inline bool Re_init( struct Range_encoder * const renc, const int ofd ) + { + renc->low = 0; + renc->partial_member_pos = 0; + renc->buffer = (uint8_t *)malloc( re_buffer_size ); + if( !renc->buffer ) return false; + renc->pos = 0; + renc->range = 0xFFFFFFFFU; + renc->ff_count = 0; + renc->outfd = ofd; + renc->cache = 0; + return true; } -static inline void Re_free( struct Range_encoder * const range_encoder ) - { free( range_encoder->buffer ); range_encoder->buffer = 0; } +static inline void Re_free( struct Range_encoder * const renc ) + { free( renc->buffer ); } -static inline long long Re_member_position( struct Range_encoder * const range_encoder ) - { return range_encoder->partial_member_pos + range_encoder->pos + range_encoder->ff_count; } +static inline long long Re_member_position( const struct Range_encoder * const renc ) + { return renc->partial_member_pos + renc->pos + renc->ff_count; } -static inline void Re_flush( struct Range_encoder * const range_encoder ) - { int i; for( i = 0; i < 5; ++i ) Re_shift_low( range_encoder ); } +static inline void Re_flush( struct Range_encoder * const renc ) + { int i; for( i = 0; i < 5; ++i ) Re_shift_low( renc ); } -static inline void Re_encode( struct Range_encoder * const range_encoder, +static inline void Re_encode( struct Range_encoder * const renc, const int symbol, const int num_bits ) { int i; for( i = num_bits - 1; i >= 0; --i ) { - range_encoder->range >>= 1; - if( (symbol >> i) & 1 ) range_encoder->low += range_encoder->range; - if( range_encoder->range <= 0x00FFFFFFU ) - { range_encoder->range <<= 8; Re_shift_low( range_encoder ); } + renc->range >>= 1; + if( (symbol >> i) & 1 ) renc->low += renc->range; + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } } } -static inline void Re_encode_bit( struct Range_encoder * const range_encoder, +static inline void Re_encode_bit( struct Range_encoder * const renc, Bit_model * const probability, const int bit ) { - const uint32_t bound = ( range_encoder->range >> bit_model_total_bits ) * *probability; + const uint32_t bound = ( renc->range >> bit_model_total_bits ) * *probability; if( !bit ) { - range_encoder->range = bound; + renc->range = bound; *probability += (bit_model_total - *probability) >> bit_model_move_bits; } else { - range_encoder->low += bound; - range_encoder->range -= bound; + renc->low += bound; + renc->range -= bound; *probability -= *probability >> bit_model_move_bits; } - if( range_encoder->range <= 0x00FFFFFFU ) - { range_encoder->range <<= 8; Re_shift_low( range_encoder ); } + if( renc->range <= 0x00FFFFFFU ) + { renc->range <<= 8; Re_shift_low( renc ); } } -static inline void Re_encode_tree( struct Range_encoder * const range_encoder, +static inline void Re_encode_tree( struct Range_encoder * const renc, Bit_model bm[], const int symbol, const int num_bits ) { int mask = ( 1 << ( num_bits - 1 ) ); @@ -322,13 +324,13 @@ static inline void Re_encode_tree( struct Range_encoder * const range_encoder, for( i = num_bits; i > 0; --i, mask >>= 1 ) { const int bit = ( symbol & mask ); - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model <<= 1; if( bit ) model |= 1; } } -static inline void Re_encode_tree_reversed( struct Range_encoder * const range_encoder, +static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, Bit_model bm[], int symbol, const int num_bits ) { int model = 1; @@ -336,13 +338,13 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const range_e for( i = num_bits; i > 0; --i ) { const int bit = symbol & 1; - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model = ( model << 1 ) | bit; symbol >>= 1; } } -static inline void Re_encode_matched( struct Range_encoder * const range_encoder, +static inline void Re_encode_matched( struct Range_encoder * const renc, Bit_model bm[], int symbol, int match_byte ) { int model = 1; @@ -351,14 +353,14 @@ static inline void Re_encode_matched( struct Range_encoder * const range_encoder { const int match_bit = ( match_byte >> i ) & 1; int bit = ( symbol >> i ) & 1; - Re_encode_bit( range_encoder, &bm[(match_bit<<8)+model+0x100], bit ); + Re_encode_bit( renc, &bm[(match_bit<<8)+model+0x100], bit ); model = ( model << 1 ) | bit; if( match_bit != bit ) { while( --i >= 0 ) { bit = ( symbol >> i ) & 1; - Re_encode_bit( range_encoder, &bm[model], bit ); + Re_encode_bit( renc, &bm[model], bit ); model = ( model << 1 ) | bit; } break; @@ -393,11 +395,11 @@ static inline void Lee_update_prices( struct Len_encoder * const len_encoder, pps[len] = tmp + price0( len_encoder->choice2 ) + price_symbol( len_encoder->bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); for( ; len < len_encoder->len_symbols; ++len ) - /* using 4 slots per value makes "Lee_price" faster */ - len_encoder->prices[3][len] = len_encoder->prices[2][len] = - len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->choice2 ) + - price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + /* using 4 slots per value makes "Lee_price" faster */ + len_encoder->prices[3][len] = len_encoder->prices[2][len] = + len_encoder->prices[1][len] = len_encoder->prices[0][len] = + tmp + price1( len_encoder->choice2 ) + + price_symbol( len_encoder->bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); len_encoder->counters[pos_state] = len_encoder->len_symbols; } @@ -420,10 +422,10 @@ static inline void Lee_init( struct Len_encoder * const len_encoder, } void Lee_encode( struct Len_encoder * const len_encoder, - struct Range_encoder * const range_encoder, int symbol, - const int pos_state ); + struct Range_encoder * const renc, + int symbol, const int pos_state ); -static inline int Lee_price( struct Len_encoder * const len_encoder, +static inline int Lee_price( const struct Len_encoder * const len_encoder, const int symbol, const int pos_state ) { return len_encoder->prices[pos_state][symbol - min_match_len]; } @@ -433,34 +435,38 @@ struct Literal_encoder Bit_model bm_literal[1<> ( 8 - literal_context_bits ) ); } - -static inline void Lie_init( struct Literal_encoder * const literal_encoder ) +static inline void Lie_init( struct Literal_encoder * const lienc ) { int i, j; for( i = 0; i < 1<bm_literal[i][j] ); + Bm_init( &lienc->bm_literal[i][j] ); } -static inline void Lie_encode( struct Literal_encoder * const literal_encoder, - struct Range_encoder * const range_encoder, +static inline int Lie_state( const uint8_t prev_byte ) + { return ( prev_byte >> ( 8 - literal_context_bits ) ); } + +static inline void Lie_encode( struct Literal_encoder * const lienc, + struct Range_encoder * const renc, uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } + { Re_encode_tree( renc, lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } -static inline void Lie_encode_matched( struct Literal_encoder * const literal_encoder, - struct Range_encoder * const range_encoder, - uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { Re_encode_matched( range_encoder, literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); } +static inline void Lie_encode_matched( struct Literal_encoder * const lienc, + struct Range_encoder * const renc, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { Re_encode_matched( renc, lienc->bm_literal[Lie_state(prev_byte)], + symbol, match_byte ); } -static inline int Lie_price_symbol( struct Literal_encoder * const literal_encoder, +static inline int Lie_price_symbol( const struct Literal_encoder * const lienc, uint8_t prev_byte, uint8_t symbol ) - { return price_symbol( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } + { return price_symbol( lienc->bm_literal[Lie_state(prev_byte)], symbol, 8 ); } -static inline int Lie_price_matched( struct Literal_encoder * const literal_encoder, - uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { return price_matched( literal_encoder->bm_literal[Lie_state(prev_byte)], symbol, match_byte ); } +static inline int Lie_price_matched( const struct Literal_encoder * const lienc, + uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) + { return price_matched( lienc->bm_literal[Lie_state(prev_byte)], + symbol, match_byte ); } enum { infinite_price = 0x0FFFFFFF, @@ -487,7 +493,7 @@ static inline void Tr_update( struct Trial * const trial, struct LZ_encoder { int longest_match_found; - uint32_t crc_; + uint32_t crc; Bit_model bm_match[states][pos_states]; Bit_model bm_rep[states]; @@ -515,11 +521,20 @@ struct LZ_encoder int align_price_count; }; +void LZe_full_flush( struct LZ_encoder * const encoder, const State state ); + void LZe_fill_align_prices( struct LZ_encoder * const encoder ); void LZe_fill_distance_prices( struct LZ_encoder * const encoder ); -static inline uint32_t LZe_crc( struct LZ_encoder * const encoder ) - { return encoder->crc_ ^ 0xFFFFFFFFU; } +bool LZe_init( struct LZ_encoder * const encoder, + struct Matchfinder * const mf, + const File_header header, const int outfd ); + +static inline void LZe_free( struct LZ_encoder * const encoder ) + { Re_free( &encoder->range_encoder ); } + +static inline uint32_t LZe_crc( const struct LZ_encoder * const encoder ) + { return encoder->crc ^ 0xFFFFFFFFU; } /* move-to-front dis in/into reps */ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) @@ -538,13 +553,15 @@ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) } } -static inline int LZe_price_rep_len1( struct LZ_encoder * const encoder, +static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder, const State state, const int pos_state ) { - return price0( encoder->bm_rep0[state] ) + price0( encoder->bm_len[state][pos_state] ); + return price0( encoder->bm_rep0[state] ) + + price0( encoder->bm_len[state][pos_state] ); } -static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int rep, +static inline int LZe_price_rep( const struct LZ_encoder * const encoder, + const int rep, const State state, const int pos_state ) { int price; @@ -561,7 +578,7 @@ static inline int LZe_price_rep( struct LZ_encoder * const encoder, const int re return price; } -static inline int LZe_price_dis( struct LZ_encoder * const encoder, +static inline int LZe_price_dis( const struct LZ_encoder * const encoder, const int dis, const int dis_state ) { if( dis < modeled_distances ) @@ -571,7 +588,7 @@ static inline int LZe_price_dis( struct LZ_encoder * const encoder, encoder->align_prices[dis & (dis_align_size - 1)]; } -static inline int LZe_price_pair( struct LZ_encoder * const encoder, +static inline int LZe_price_pair( const struct LZ_encoder * const encoder, const int dis, const int len, const int pos_state ) { @@ -598,12 +615,15 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const uint32_t direct_dis = dis - base; if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_dis + base - dis_slot, - direct_dis, direct_bits ); + Re_encode_tree_reversed( &encoder->range_encoder, + encoder->bm_dis + base - dis_slot, + direct_dis, direct_bits ); else { - Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, direct_dis, dis_align_bits ); + Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, + direct_bits - dis_align_bits ); + Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, + direct_dis, dis_align_bits ); if( --encoder->align_price_count <= 0 ) LZe_fill_align_prices( encoder ); } } @@ -611,19 +631,21 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, static inline int LZe_read_match_distances( struct LZ_encoder * const encoder ) { - int len = Mf_longest_match_len( encoder->matchfinder, encoder->match_distances ); - if( len == Mf_match_len_limit( encoder->matchfinder ) ) - len += Mf_true_match_len( encoder->matchfinder, len, encoder->match_distances[len] + 1, max_match_len - len ); + int len = Mf_longest_match_len( encoder->matchfinder, + encoder->match_distances ); + if( len == encoder->matchfinder->match_len_limit && len < max_match_len ) + len += Mf_true_match_len( encoder->matchfinder, len, + encoder->match_distances[len] + 1, + max_match_len - len ); return len; } -static inline void LZe_move_pos( struct LZ_encoder * const encoder, - int n, bool skip ) +static inline void LZe_move_pos( struct LZ_encoder * const encoder, int n ) { + if( --n >= 0 ) Mf_move_pos( encoder->matchfinder ); while( --n >= 0 ) { - if( skip ) skip = false; - else Mf_longest_match_len( encoder->matchfinder, 0 ); + Mf_longest_match_len( encoder->matchfinder, 0 ); Mf_move_pos( encoder->matchfinder ); } } @@ -642,19 +664,8 @@ static inline void LZe_backward( struct LZ_encoder * const encoder, int cur ) } int LZe_sequence_optimizer( struct LZ_encoder * const encoder, - const int reps[num_rep_distances], const State state ); - -void LZe_full_flush( struct LZ_encoder * const encoder, const State state ); - -void LZe_init( struct LZ_encoder * const encoder, struct Matchfinder * const mf, - const File_header header, const int outfd ); - -static inline void LZe_free( struct LZ_encoder * const encoder ) - { - Re_free( &encoder->range_encoder ); - } - -bool LZe_encode_member( struct LZ_encoder * const encoder, const long long member_size ); + const int reps[num_rep_distances], + const State state ); -static inline long long LZe_member_position( struct LZ_encoder * const encoder ) - { return Re_member_position( &encoder->range_encoder ); } +bool LZe_encode_member( struct LZ_encoder * const encoder, + const long long member_size ); diff --git a/main.c b/main.c index f24893a..91215a9 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Clzip - Data compressor based on the LZMA algorithm - Copyright (C) 2010, 2011 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,14 +24,14 @@ #define _FILE_OFFSET_BITS 64 #include +#include #include #include -#include #include +#include +#include #include #include -#include -#include #include #include #include @@ -74,7 +74,7 @@ long long int llabs( long long int number ); const char * const Program_name = "Clzip"; const char * const program_name = "clzip"; -const char * const program_year = "2011"; +const char * const program_year = "2012"; const char * invocation_name = 0; #ifdef O_BINARY @@ -99,11 +99,13 @@ enum Mode { m_compress, m_decompress, m_test }; char * output_filename = 0; int outfd = -1; int verbosity = 0; +const mode_t usr_rw = S_IRUSR | S_IWUSR; +const mode_t all_rw = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; mode_t outfd_mode = S_IRUSR | S_IWUSR; bool delete_output_on_interrupt = false; -/* assure at least a minimum size for buffer `buf' */ +/* assure at least a minimum size for buffer 'buf' */ static void * resize_buffer( void * buf, const int min_size ) { if( buf ) buf = realloc( buf, min_size ); @@ -121,31 +123,35 @@ static void show_help() { printf( "%s - Data compressor based on the LZMA algorithm.\n", Program_name ); printf( "\nUsage: %s [options] [files]\n", invocation_name ); - printf( "\nOptions:\n" ); - printf( " -h, --help display this help and exit\n" ); - printf( " -V, --version output version information and exit\n" ); - printf( " -b, --member-size= set member size limit in bytes\n" ); - printf( " -c, --stdout send output to standard output\n" ); - printf( " -d, --decompress decompress\n" ); - printf( " -f, --force overwrite existing output files\n" ); - printf( " -F, --recompress force recompression of compressed files\n" ); - printf( " -k, --keep keep (don't delete) input files\n" ); - printf( " -m, --match-length= set match length limit in bytes [36]\n" ); - printf( " -o, --output= if reading stdin, place the output into \n" ); - printf( " -q, --quiet suppress all messages\n" ); - printf( " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" ); - printf( " -S, --volume-size= set volume size limit in bytes\n" ); - printf( " -t, --test test compressed file integrity\n" ); - printf( " -v, --verbose be verbose (a 2nd -v gives more)\n" ); - printf( " -1 .. -9 set compression level [default 6]\n" ); - printf( " --fast alias for -1\n" ); - printf( " --best alias for -9\n" ); - printf( "If no file names are given, %s compresses or decompresses\n", program_name ); - printf( "from standard input to standard output.\n" ); - printf( "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" ); - printf( "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); - printf( "\nReport bugs to lzip-bug@nongnu.org\n" ); - printf( "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" ); + printf( "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --member-size= set member size limit in bytes\n" + " -c, --stdout send output to standard output\n" + " -d, --decompress decompress\n" + " -f, --force overwrite existing output files\n" + " -F, --recompress force recompression of compressed files\n" + " -k, --keep keep (don't delete) input files\n" + " -m, --match-length= set match length limit in bytes [36]\n" + " -o, --output= if reading stdin, place the output into \n" + " -q, --quiet suppress all messages\n" + " -s, --dictionary-size= set dictionary size limit in bytes [8MiB]\n" + " -S, --volume-size= set volume size limit in bytes\n" + " -t, --test test compressed file integrity\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -1 .. -9 set compression level [default 6]\n" + " --fast alias for -1\n" + " --best alias for -9\n" + "If no file names are given, clzip compresses or decompresses\n" + "from standard input to standard output.\n" + "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" + "The bidimensional parameter space of LZMA can't be mapped to a linear\n" + "scale optimal for all files. If your files are large, very repetitive,\n" + "etc, you may need to use the --match-length and --dictionary-size\n" + "options directly to achieve optimal performance.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Clzip home page: http://www.nongnu.org/lzip/clzip.html\n" ); } @@ -153,9 +159,9 @@ static void show_version() { printf( "%s %s\n", Program_name, PROGVERSION ); printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - printf( "License GPLv3+: GNU GPL version 3 or later \n" ); - printf( "This is free software: you are free to change and redistribute it.\n" ); - printf( "There is NO WARRANTY, to the extent permitted by law.\n" ); + printf( "License GPLv3+: GNU GPL version 3 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); } @@ -166,11 +172,12 @@ static const char * format_num( long long num ) enum { buf_size = 16, factor = 1024 }; static char buf[buf_size]; const char *p = ""; + bool exact = ( num % factor == 0 ); int i; for( i = 0; i < 8 && ( llabs( num ) > 9999 || - ( llabs( num ) >= factor && num % factor == 0 ) ); ++i ) - { num /= factor; p = prefix[i]; } + ( exact && llabs( num ) >= factor ) ); ++i ) + { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; } snprintf( buf, buf_size, "%lld %s", num, p ); return buf; } @@ -251,7 +258,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( program_mode == m_compress && !recompress && eindex >= 0 ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n", + fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", program_name, name, known_extensions[eindex].from ); } else @@ -260,7 +267,7 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( infd < 0 ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Can't open input file `%s': %s.\n", + fprintf( stderr, "%s: Can't open input file '%s': %s.\n", program_name, name, strerror( errno ) ); } else @@ -273,10 +280,10 @@ static int open_instream( const char * const name, struct stat * const in_statsp if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) { if( verbosity >= 0 ) - fprintf( stderr, "%s: Input file `%s' is not a regular file%s.\n", + fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", program_name, name, ( can_read && !to_stdout ) ? - " and `--stdout' was not specified" : "" ); + " and '--stdout' was not specified" : "" ); close( infd ); infd = -1; } @@ -329,7 +336,7 @@ static void set_d_outname( const char * const name, const int i ) strcpy( output_filename, name ); strcat( output_filename, ".out" ); if( verbosity >= 1 ) - fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n", + fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'.\n", program_name, name, output_filename ); } @@ -343,10 +350,10 @@ static bool open_outstream( const bool force ) if( outfd < 0 && verbosity >= 0 ) { if( errno == EEXIST ) - fprintf( stderr, "%s: Output file %s already exists, skipping.\n", + fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", program_name, output_filename ); else - fprintf( stderr, "%s: Can't create output file `%s': %s.\n", + fprintf( stderr, "%s: Can't create output file '%s': %s.\n", program_name, output_filename, strerror( errno ) ); } return ( outfd >= 0 ); @@ -376,7 +383,7 @@ void cleanup_and_fail( const int retval ) { delete_output_on_interrupt = false; if( verbosity >= 0 ) - fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", + fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", program_name, output_filename ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( remove( output_filename ) != 0 && errno != ENOENT ) @@ -389,31 +396,26 @@ void cleanup_and_fail( const int retval ) /* Set permissions, owner and times. */ static void close_and_set_permissions( const struct stat * const in_statsp ) { - bool error = false; + bool warning = false; if( in_statsp ) { + /* fchown will in many cases return with EPERM, which can be safely ignored. */ if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && errno != EPERM ) || - fchmod( outfd, in_statsp->st_mode ) != 0 ) - error = true; - /* fchown will in many cases return with EPERM, which can be safely ignored. */ + fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true; } - if( close( outfd ) == 0 ) outfd = -1; - else cleanup_and_fail( 1 ); + if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); + outfd = -1; delete_output_on_interrupt = false; - if( !in_statsp ) return; - if( !error ) + if( in_statsp ) { struct utimbuf t; t.actime = in_statsp->st_atime; t.modtime = in_statsp->st_mtime; - if( utime( output_filename, &t ) != 0 ) error = true; + if( utime( output_filename, &t ) != 0 ) warning = true; } - if( error ) - { + if( warning && verbosity >= 1 ) show_error( "Can't change output file attributes.", 0, false ); - cleanup_and_fail( 1 ); - } } @@ -449,21 +451,29 @@ static int compress( const long long member_size, const long long volume_size, encoder_options->match_len_limit > max_match_len ) internal_error( "invalid argument to encoder" ); - Mf_init( &matchfinder, Fh_get_dictionary_size( header ), - encoder_options->match_len_limit, infd ); - Fh_set_dictionary_size( header, Mf_dictionary_size( &matchfinder ) ); + if( !Mf_init( &matchfinder, Fh_get_dictionary_size( header ), + encoder_options->match_len_limit, infd ) ) + { + Pp_show_msg( pp, "Not enough memory. Try a smaller dictionary size" ); + return 1; + } + Fh_set_dictionary_size( header, matchfinder.dictionary_size ); while( true ) /* encode one member per iteration */ { struct LZ_encoder encoder; const long long size = min( member_size, volume_size - partial_volume_size ); - LZe_init( &encoder, &matchfinder, header, outfd ); + if( !LZe_init( &encoder, &matchfinder, header, outfd ) ) + { + show_error( "Not enough memory. Try a smaller dictionary size.", 0, false ); + cleanup_and_fail( 1 ); + } if( !LZe_encode_member( &encoder, size ) ) { Pp_show_msg( pp, "Encoder error" ); retval = 1; break; } in_size += Mf_data_position( &matchfinder ); - out_size += LZe_member_position( &encoder ); - partial_volume_size += LZe_member_position( &encoder ); + out_size += Re_member_position( &encoder.range_encoder ); + partial_volume_size += Re_member_position( &encoder.range_encoder ); LZe_free( &encoder ); if( Mf_finished( &matchfinder ) ) break; if( partial_volume_size >= volume_size - min_dictionary_size ) @@ -484,7 +494,7 @@ static int compress( const long long member_size, const long long volume_size, if( retval == 0 && verbosity >= 1 ) { if( in_size <= 0 || out_size <= 0 ) - fprintf( stderr, "No data compressed.\n" ); + fprintf( stderr, " no data compressed.\n" ); else fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " "%5.2f%% saved, %lld in, %lld out.\n", @@ -503,17 +513,20 @@ static int decompress( const int infd, struct Pretty_print * const pp, { long long partial_file_pos = 0; struct Range_decoder rdec; - int retval = 0, i, result; + int retval = 0, result; bool first_member; - Rd_init( &rdec, infd ); + if( !Rd_init( &rdec, infd ) ) + { + show_error( "Not enough memory. Find a machine with more memory.", 0, false ); + cleanup_and_fail( 1 ); + } for( first_member = true; ; first_member = false, Pp_reset( pp ) ) { File_header header; struct LZ_decoder decoder; Rd_reset_member_position( &rdec ); - for( i = 0; i < Fh_size; ++i ) - header[i] = Rd_get_byte( &rdec ); + Rd_read_data( &rdec, header, Fh_size ); if( Rd_finished( &rdec ) ) /* End Of File */ { if( first_member ) @@ -547,8 +560,12 @@ static int decompress( const int infd, struct Pretty_print * const pp, Fh_version( header ), format_num( Fh_get_dictionary_size( header ) ) ); } - LZd_init( &decoder, header, &rdec, outfd ); + if( !LZd_init( &decoder, header, &rdec, outfd ) ) + { + show_error( "Not enough memory. Find a machine with more memory.", 0, false ); + cleanup_and_fail( 1 ); + } result = LZd_decode_member( &decoder, pp ); partial_file_pos += Rd_member_position( &rdec ); LZd_free( &decoder ); @@ -598,7 +615,7 @@ void Pp_init( struct Pretty_print * const pp, const char * const filenames[], { unsigned int stdin_name_len; int i; - pp->name_ = 0; + pp->name = 0; pp->stdin_name = "(stdin)"; pp->longest_name = 0; pp->verbosity = v; @@ -623,8 +640,8 @@ void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { int i, len; pp->first_post = false; - fprintf( stderr, " %s: ", pp->name_ ); - len = pp->longest_name - strlen( pp->name_ ); + fprintf( stderr, " %s: ", pp->name ); + len = pp->longest_name - strlen( pp->name ); for( i = 0; i < len; ++i ) fprintf( stderr, " " ); if( !msg ) fflush( stderr ); } @@ -644,7 +661,7 @@ void show_error( const char * const msg, const int errcode, const bool help ) fprintf( stderr, "\n" ); } if( help && invocation_name && invocation_name[0] ) - fprintf( stderr, "Try `%s --help' for more information.\n", + fprintf( stderr, "Try '%s --help' for more information.\n", invocation_name ); } } @@ -708,7 +725,6 @@ int main( const int argc, const char * const argv[] ) { 'b', "member-size", ap_yes }, { 'c', "stdout", ap_no }, { 'd', "decompress", ap_no }, - { 'e', "extreme", ap_no }, { 'f', "force", ap_no }, { 'F', "recompress", ap_no }, { 'h', "help", ap_no }, @@ -727,6 +743,7 @@ int main( const int argc, const char * const argv[] ) invocation_name = argv[0]; CRC32_init(); + if( !ap_init( &parser, argc, argv, options, 0 ) ) { show_error( "Memory exhausted.", 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ @@ -746,7 +763,6 @@ int main( const int argc, const char * const argv[] ) case 'b': member_size = getnum( arg, 100000, LLONG_MAX / 2 ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; - case 'e': break; /* ignored by now */ case 'f': force = true; break; case 'F': recompress = true; break; case 'h': show_help(); return 0; @@ -766,10 +782,18 @@ int main( const int argc, const char * const argv[] ) } /* end process options */ #if defined(__MSVCRT__) || defined(__OS2__) - _setmode( STDIN_FILENO, O_BINARY ); - _setmode( STDOUT_FILENO, O_BINARY ); + _fsetmode( stdin, "b" ); + _fsetmode( stdout, "b" ); #endif + if( program_mode == m_test ) + outfd = -1; + else if( program_mode == m_compress ) + { + Dis_slots_init(); + Prob_prices_init(); + } + for( ; argind < ap_arguments( &parser ); ++argind ) { if( strcmp( ap_argument( &parser, argind ), "-" ) ) @@ -790,13 +814,6 @@ int main( const int argc, const char * const argv[] ) set_signals(); Pp_init( &pp, filenames, num_filenames, verbosity ); - if( program_mode == m_test ) - outfd = -1; - else if( program_mode == m_compress ) - { - Dis_slots_init(); - Prob_prices_init(); - } output_filename = resize_buffer( output_filename, 1 ); for( i = 0; i < num_filenames; ++i ) @@ -824,7 +841,7 @@ int main( const int argc, const char * const argv[] ) strlen( default_output_filename ) + 1 ); strcpy( output_filename, default_output_filename ); } - outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; + outfd_mode = all_rw; if( !open_outstream( force ) ) { if( outfd == -1 && retval < 1 ) retval = 1; @@ -849,7 +866,7 @@ int main( const int argc, const char * const argv[] ) if( program_mode == m_compress ) set_c_outname( input_filename, volume_size != LLONG_MAX ); else set_d_outname( input_filename, eindex ); - outfd_mode = S_IRUSR | S_IWUSR; + outfd_mode = usr_rw; if( !open_outstream( force ) ) { if( outfd == -1 && retval < 1 ) retval = 1; diff --git a/testsuite/check.sh b/testsuite/check.sh index 50bec2e..9d07625 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Clzip - Data compressor based on the LZMA algorithm -# Copyright (C) 2010, 2011 Antonio Diaz Diaz. +# Copyright (C) 2010, 2011, 2012 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -19,12 +19,13 @@ fi if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -printf "testing clzip-%s..." "$2" cd "${objdir}"/tmp cat "${testdir}"/test.txt > in || framework_failure fail=0 +printf "testing clzip-%s..." "$2" + "${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 printf . "${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1 @@ -43,7 +44,7 @@ printf . cmp in copy || fail=1 printf . -"${LZIP}" -cf "${testdir}"/test_v1.lz > out 2>/dev/null +"${LZIP}" -cfq "${testdir}"/test_v1.lz > out if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi "${LZIP}" -cF "${testdir}"/test_v1.lz > out || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 @@ -81,7 +82,7 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9 ; do printf . done -"${LZIP}" -$i < in > anyothername || fail=1 +"${LZIP}" < in > anyothername || fail=1 "${LZIP}" -d anyothername || fail=1 cmp in anyothername.out || fail=1 printf . -- cgit v1.2.3