diff options
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | INSTALL | 6 | ||||
-rw-r--r-- | NEWS | 23 | ||||
-rw-r--r-- | README | 19 | ||||
-rw-r--r-- | bbexample.c | 2 | ||||
-rw-r--r-- | cbuffer.c | 2 | ||||
-rwxr-xr-x | configure | 18 | ||||
-rw-r--r-- | decoder.c | 57 | ||||
-rw-r--r-- | decoder.h | 8 | ||||
-rw-r--r-- | doc/lzlib.info | 125 | ||||
-rw-r--r-- | doc/lzlib.texinfo | 87 | ||||
-rw-r--r-- | doc/minilzip.1 | 6 | ||||
-rw-r--r-- | encoder.c | 75 | ||||
-rw-r--r-- | encoder.h | 48 | ||||
-rw-r--r-- | lzcheck.c | 4 | ||||
-rw-r--r-- | lzip.h | 13 | ||||
-rw-r--r-- | lzlib.c | 34 | ||||
-rw-r--r-- | lzlib.h | 4 | ||||
-rw-r--r-- | main.c | 21 | ||||
-rwxr-xr-x | testsuite/check.sh | 42 |
21 files changed, 321 insertions, 282 deletions
@@ -4,4 +4,4 @@ The ideas embodied in lzlib are due to (at least) the following people: Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the definition of Markov chains), G.N.N. Martin (for the definition of range encoding), Igor Pavlov (for putting all the above together in -LZMA), and Julian Seward (for bzip2's CLI and the idea of unzcrash). +LZMA), and Julian Seward (for bzip2's CLI). @@ -1,3 +1,10 @@ +2013-07-28 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.5-rc1 released. + * Removed decompression support for version 0 files. + * The LZ_compress_sync_flush mechanism has been fixed (again). + * Minor fixes. + 2013-05-28 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.4 released. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C compiler. -I use gcc 4.8.0 and 3.3.6, but the code should compile with any +I use gcc 4.8.1 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -10,9 +10,9 @@ Procedure --------- 1. Unpack the archive if you have not done so already: - lzip -cd lzlib[version].tar.lz | tar -xf - + tar -xf lzlib[version].tar.lz or - gzip -cd lzlib[version].tar.gz | tar -xf - + lzip -cd lzlib[version].tar.lz | tar -xf - This creates the directory ./lzlib[version] containing the source from the main archive. @@ -1,21 +1,8 @@ -Changes in version 1.4: +Changes in version 1.5: -Multi-step trials have been implemented. +Decompression support for deprecated version 0 files has been removed. -Compression ratio has been slightly increased. +A bug has been fixed that would make an instance of "struct LZ_Encoder" +unresponsive if "LZ_compress_sync_flush" is called at the wrong moment. -Compression time has been reduced by 8%. - -Decompression time has been reduced by 7%. - -Arguments and return values of functions in lzlib.h have been changed -from 'long long' to 'unsigned long long'. - -The minimum size of the input compression buffer has been reduced to 64KiB. - -"LZ_decompress_read" now tells "LZ_header_error" from "LZ_unexpected_eof" -the same way as lzip does when the EOF happens at the header. - -The target "install-as-lzip" has been added to the Makefile. - -The target "install-bin" has been added to the Makefile. +Minor fixes. @@ -5,6 +5,10 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. +The lzip file format is designed for long-term data archiving. It is +clean, provides very safe 4 factor integrity checking, and is backed by +the recovery capabilities of lziprecover. + The functions and variables forming the interface of the compression library are declared in the file lzlib.h. Usage examples of the library are given in the files main.c and bbexample.c from the source @@ -31,9 +35,18 @@ any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. Lzlib implements a simplified version of the LZMA (Lempel-Ziv-Markov -chain-Algorithm) algorithm. The original LZMA algorithm was designed by -Igor Pavlov. For a description of the LZMA algorithm, see the Lzip -manual. +chain-Algorithm) algorithm. The high compression of LZMA comes from +combining two basic, well-proven compression ideas: sliding dictionaries +(LZ77/78) and markov models (the thing used by every compression +algorithm that uses a range encoder or similar order-0 entropy coder as +its last stage) with segregation of contexts according to what the bits +are used for. + +The ideas embodied in lzlib are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for +the definition of Markov chains), G.N.N. Martin (for the definition of +range encoding), Igor Pavlov (for putting all the above together in +LZMA), and Julian Seward (for bzip2's CLI). Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. diff --git a/bbexample.c b/bbexample.c index 1b33978..4a924fc 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,4 +1,4 @@ -/* Buff to buff example - A test program for the lzlib library +/* Buff to buff example - Test program for the lzlib library Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you have unlimited permission @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -1,17 +1,17 @@ #! /bin/sh -# configure script for Lzlib - A compression library for lzip files +# configure script for Lzlib - Compression library for lzip files # Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzlib -pkgversion=1.4 +pkgversion=1.5-rc1 soversion=1 progname=minilzip progname_shared= libname=lz -srctrigger=${libname}lib.h +srctrigger=doc/${pkgname}.texinfo # clear some things potentially inherited from environment. LC_ALL=C @@ -113,14 +113,14 @@ while [ $# != 0 ] ; do *=* | *-*-*) ;; *) echo "configure: unrecognized option: '${option}'" 1>&2 - echo "Try 'configure --help' for more information." + echo "Try 'configure --help' for more information." 1>&2 exit 1 ;; esac # Check if the option took a separate argument if [ "${arg2}" = yes ] ; then if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift - else echo "configure: Missing argument to \"${option}\"" 1>&2 + else echo "configure: Missing argument to '${option}'" 1>&2 exit 1 fi fi @@ -138,10 +138,8 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - exec 1>&2 - echo - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" - echo "configure: (At least ${srctrigger} is missing)." + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -181,7 +179,7 @@ echo "CFLAGS = ${CFLAGS}" echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF -# Makefile for Lzlib - A compression library for lzip files +# Makefile for Lzlib - Compression library for lzip files # Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -28,16 +28,13 @@ static bool LZd_verify_trailer( struct LZ_decoder * const decoder ) { File_trailer trailer; - const int trailer_size = Ft_versioned_size( decoder->member_version ); const unsigned long long member_size = - decoder->rdec->member_position + trailer_size; + decoder->rdec->member_position + Ft_size; - int size = Rd_read_data( decoder->rdec, trailer, trailer_size ); - if( size < trailer_size ) + int size = Rd_read_data( decoder->rdec, trailer, Ft_size ); + if( size < Ft_size ) return false; - if( decoder->member_version == 0 ) Ft_set_member_size( trailer, member_size ); - return ( decoder->rdec->code == 0 && Ft_get_data_crc( trailer ) == LZd_crc( decoder ) && Ft_get_data_size( trailer ) == LZd_data_position( decoder ) && @@ -49,38 +46,39 @@ static bool LZd_verify_trailer( struct LZ_decoder * const decoder ) 3 = trailer error, 4 = unknown marker found. */ static int LZd_decode_member( struct LZ_decoder * const decoder ) { + struct Range_decoder * const rdec = decoder->rdec; State * const state = &decoder->state; + if( decoder->member_finished ) return 0; - if( !Rd_try_reload( decoder->rdec, false ) ) return 0; + if( !Rd_try_reload( rdec, false ) ) return 0; if( decoder->verify_trailer_pending ) { - if( Rd_available_bytes( decoder->rdec ) < Ft_versioned_size( decoder->member_version ) && - !decoder->rdec->at_stream_end ) + if( Rd_available_bytes( rdec ) < Ft_size && !rdec->at_stream_end ) return 0; decoder->verify_trailer_pending = false; decoder->member_finished = true; if( LZd_verify_trailer( decoder ) ) return 0; else return 3; } - while( !Rd_finished( decoder->rdec ) ) + while( !Rd_finished( rdec ) ) { const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( !Rd_enough_available_bytes( decoder->rdec ) || + if( !Rd_enough_available_bytes( rdec ) || !LZd_enough_free_bytes( decoder ) ) return 0; - if( Rd_decode_bit( decoder->rdec, &decoder->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ + if( Rd_decode_bit( rdec, &decoder->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { const uint8_t prev_byte = LZd_get_prev_byte( decoder ); if( St_is_char( *state ) ) { *state -= ( *state < 4 ) ? *state : 3; - LZd_put_byte( decoder, Rd_decode_tree( decoder->rdec, + LZd_put_byte( decoder, Rd_decode_tree( rdec, decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { *state -= ( *state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( decoder->rdec, + LZd_put_byte( decoder, Rd_decode_matched( rdec, decoder->bm_literal[get_lit_state(prev_byte)], LZd_get_byte( decoder, decoder->rep0 ) ) ); } @@ -88,22 +86,22 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) else { int len; - if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep[*state] ) == 1 ) /* 2nd bit */ + if( Rd_decode_bit( rdec, &decoder->bm_rep[*state] ) == 1 ) /* 2nd bit */ { - if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep0[*state] ) == 0 ) /* 3rd bit */ + if( Rd_decode_bit( rdec, &decoder->bm_rep0[*state] ) == 0 ) /* 3rd bit */ { - if( Rd_decode_bit( decoder->rdec, &decoder->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &decoder->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ { *state = St_set_short_rep( *state ); LZd_put_byte( decoder, LZd_get_byte( decoder, decoder->rep0 ) ); continue; } } else { unsigned distance; - if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep1[*state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &decoder->bm_rep1[*state] ) == 0 ) /* 4th bit */ distance = decoder->rep1; else { - if( Rd_decode_bit( decoder->rdec, &decoder->bm_rep2[*state] ) == 0 ) /* 5th bit */ + if( Rd_decode_bit( rdec, &decoder->bm_rep2[*state] ) == 0 ) /* 5th bit */ distance = decoder->rep2; else { distance = decoder->rep3; decoder->rep3 = decoder->rep2; } @@ -113,42 +111,41 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) decoder->rep0 = distance; } *state = St_set_rep( *state ); - len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->rep_len_model, pos_state ); + len = min_match_len + Rd_decode_len( rdec, &decoder->rep_len_model, pos_state ); } else { int dis_slot; const unsigned rep0_saved = decoder->rep0; - len = min_match_len + Rd_decode_len( decoder->rdec, &decoder->match_len_model, pos_state ); - dis_slot = Rd_decode_tree6( decoder->rdec, decoder->bm_dis_slot[get_dis_state(len)] ); + len = min_match_len + Rd_decode_len( rdec, &decoder->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( rdec, decoder->bm_dis_slot[get_dis_state(len)] ); if( dis_slot < start_dis_model ) decoder->rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; decoder->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - decoder->rep0 += Rd_decode_tree_reversed( decoder->rdec, + decoder->rep0 += Rd_decode_tree_reversed( rdec, decoder->bm_dis + decoder->rep0 - dis_slot - 1, direct_bits ); else { - decoder->rep0 += Rd_decode( decoder->rdec, direct_bits - dis_align_bits ) << dis_align_bits; - decoder->rep0 += Rd_decode_tree_reversed4( decoder->rdec, decoder->bm_align ); + decoder->rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + decoder->rep0 += Rd_decode_tree_reversed4( rdec, decoder->bm_align ); if( decoder->rep0 == 0xFFFFFFFFU ) /* Marker found */ { decoder->rep0 = rep0_saved; - Rd_normalize( decoder->rdec ); + Rd_normalize( rdec ); if( len == min_match_len ) /* End Of Stream marker */ { - if( Rd_available_bytes( decoder->rdec ) < Ft_versioned_size( decoder->member_version ) && - !decoder->rdec->at_stream_end ) + if( Rd_available_bytes( rdec ) < Ft_size && !rdec->at_stream_end ) { decoder->verify_trailer_pending = true; return 0; } decoder->member_finished = true; if( LZd_verify_trailer( decoder ) ) return 0; else return 3; } if( len == min_match_len + 1 ) /* Sync Flush marker */ { - if( Rd_try_reload( decoder->rdec, true ) ) continue; + if( Rd_try_reload( rdec, true ) ) continue; else return 0; } return 4; @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -33,8 +33,8 @@ struct Range_decoder unsigned long long member_position; uint32_t code; uint32_t range; - bool reload_pending; bool at_stream_end; + bool reload_pending; }; static inline bool Rd_init( struct Range_decoder * const rdec ) @@ -43,8 +43,8 @@ static inline bool Rd_init( struct Range_decoder * const rdec ) rdec->member_position = 0; rdec->code = 0; rdec->range = 0xFFFFFFFFU; - rdec->reload_pending = false; rdec->at_stream_end = false; + rdec->reload_pending = false; return true; } @@ -311,7 +311,6 @@ struct LZ_decoder unsigned long long partial_data_pos; int dictionary_size; uint32_t crc; - int member_version; bool member_finished; bool verify_trailer_pending; unsigned rep0; /* rep[0-3] latest four distances */ @@ -391,7 +390,6 @@ static inline bool LZd_init( struct LZ_decoder * const decoder, return false; decoder->partial_data_pos = 0; decoder->crc = 0xFFFFFFFFU; - decoder->member_version = Fh_version( header ); decoder->member_finished = false; decoder->verify_trailer_pending = false; decoder->rep0 = 0; diff --git a/doc/lzlib.info b/doc/lzlib.info index 84e0501..47460f9 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -3,7 +3,7 @@ lzlib.texinfo. INFO-DIR-SECTION Data Compression START-INFO-DIR-ENTRY -* Lzlib: (lzlib). A compression library for lzip files +* Lzlib: (lzlib). Compression library for lzip files END-INFO-DIR-ENTRY @@ -12,22 +12,22 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.4, 28 May 2013). +This manual is for Lzlib (version 1.5-rc1, 28 July 2013). * Menu: * Introduction:: Purpose and features of Lzlib -* Library Version:: Checking library version +* Library version:: Checking library version * Buffering:: Sizes of Lzlib's buffers -* Parameter Limits:: Min / max values for some parameters -* Compression Functions:: Descriptions of the compression functions -* Decompression Functions:: Descriptions of the decompression functions -* Error Codes:: Meaning of codes returned by functions -* Error Messages:: Error messages corresponding to error codes -* Data Format:: Detailed format of the compressed data +* Parameter limits:: Min / max values for some parameters +* Compression functions:: Descriptions of the compression functions +* Decompression functions:: Descriptions of the decompression functions +* Error codes:: Meaning of codes returned by functions +* Error messages:: Error messages corresponding to error codes +* Data format:: Detailed format of the compressed data * Examples:: A small tutorial with examples * Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Concept index:: Index of concepts Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. @@ -36,7 +36,7 @@ This manual is for Lzlib (version 1.4, 28 May 2013). copy, distribute and modify it. -File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up: Top +File: lzlib.info, Node: Introduction, Next: Library version, Prev: Top, Up: Top 1 Introduction ************** @@ -46,6 +46,10 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. + The lzip file format is designed for long-term data archiving. It is +clean, provides very safe 4 factor integrity checking, and is backed by +the recovery capabilities of lziprecover. + The functions and variables forming the interface of the compression library are declared in the file `lzlib.h'. Usage examples of the library are given in the files `main.c' and `bbexample.c' from the @@ -73,14 +77,23 @@ compressed data, so the library should never crash even in case of corrupted input. Lzlib implements a simplified version of the LZMA (Lempel-Ziv-Markov -chain-Algorithm) algorithm. The original LZMA algorithm was designed by -Igor Pavlov. For a description of the LZMA algorithm, see the Lzip -manual. +chain-Algorithm) algorithm. The high compression of LZMA comes from +combining two basic, well-proven compression ideas: sliding dictionaries +(LZ77/78) and markov models (the thing used by every compression +algorithm that uses a range encoder or similar order-0 entropy coder as +its last stage) with segregation of contexts according to what the bits +are used for. + + The ideas embodied in lzlib are due to (at least) the following +people: Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey +Markov (for the definition of Markov chains), G.N.N. Martin (for the +definition of range encoding), Igor Pavlov (for putting all the above +together in LZMA), and Julian Seward (for bzip2's CLI). -File: lzlib.info, Node: Library Version, Next: Buffering, Prev: Introduction, Up: Top +File: lzlib.info, Node: Library version, Next: Buffering, Prev: Introduction, Up: Top -2 Library Version +2 Library version ***************** -- Function: const char * LZ_version ( void ) @@ -98,7 +111,7 @@ application. error( "bad library version" ); -File: lzlib.info, Node: Buffering, Next: Parameter Limits, Prev: Library Version, Up: Top +File: lzlib.info, Node: Buffering, Next: Parameter limits, Prev: Library version, Up: Top 3 Buffering *********** @@ -126,9 +139,9 @@ minimum sizes: member currently being decompressed or 64KiB, whichever is larger. -File: lzlib.info, Node: Parameter Limits, Next: Compression Functions, Prev: Buffering, Up: Top +File: lzlib.info, Node: Parameter limits, Next: Compression functions, Prev: Buffering, Up: Top -4 Parameter Limits +4 Parameter limits ****************** These functions provide minimum and maximum values for some parameters. @@ -155,9 +168,9 @@ Current values are shown in square brackets. Returns the largest valid match length limit [273]. -File: lzlib.info, Node: Compression Functions, Next: Decompression Functions, Prev: Parameter Limits, Up: Top +File: lzlib.info, Node: Compression functions, Next: Decompression functions, Prev: Parameter limits, Up: Top -5 Compression Functions +5 Compression functions *********************** These are the functions used to compress data. In case of error, all of @@ -254,7 +267,7 @@ calling `LZ_compress_errno' before using it. -- Function: enum LZ_Errno LZ_compress_errno ( struct LZ_Encoder * const ENCODER ) - Returns the current error code for ENCODER (*note Error Codes::). + Returns the current error code for ENCODER (*note Error codes::). -- Function: int LZ_compress_finished ( struct LZ_Encoder * const ENCODER ) @@ -287,9 +300,9 @@ calling `LZ_compress_errno' before using it. perhaps not yet read. -File: lzlib.info, Node: Decompression Functions, Next: Error Codes, Prev: Compression Functions, Up: Top +File: lzlib.info, Node: Decompression functions, Next: Error codes, Prev: Compression functions, Up: Top -6 Decompression Functions +6 Decompression functions ************************* These are the functions used to decompress data. In case of error, all @@ -370,7 +383,7 @@ verified by calling `LZ_decompress_errno' before using it. -- Function: enum LZ_Errno LZ_decompress_errno ( struct LZ_Decoder * const DECODER ) - Returns the current error code for DECODER (*note Error Codes::). + Returns the current error code for DECODER (*note Error codes::). -- Function: int LZ_decompress_finished ( struct LZ_Decoder * const DECODER ) @@ -419,9 +432,9 @@ verified by calling `LZ_decompress_errno' before using it. but perhaps not yet read. -File: lzlib.info, Node: Error Codes, Next: Error Messages, Prev: Decompression Functions, Up: Top +File: lzlib.info, Node: Error codes, Next: Error messages, Prev: Decompression functions, Up: Top -7 Error Codes +7 Error codes ************* Most library functions return -1 to indicate that they have failed. But @@ -471,9 +484,9 @@ whether a call failed. If the call failed, then you can examine Problems::). -File: lzlib.info, Node: Error Messages, Next: Data Format, Prev: Error Codes, Up: Top +File: lzlib.info, Node: Error messages, Next: Data format, Prev: Error codes, Up: Top -8 Error Messages +8 Error messages **************** -- Function: const char * LZ_strerror ( const enum LZ_Errno LZ_ERRNO ) @@ -487,9 +500,9 @@ File: lzlib.info, Node: Error Messages, Next: Data Format, Prev: Error Codes, `LZ_(de)compress_errno'. -File: lzlib.info, Node: Data Format, Next: Examples, Prev: Error Messages, Up: Top +File: lzlib.info, Node: Data format, Next: Examples, Prev: Error messages, Up: Top -9 Data Format +9 Data format ************* Perfection is reached, not when there is no longer anything to add, but @@ -545,6 +558,8 @@ with no additional information before, between, or after them. The lzma stream, finished by an end of stream marker. Uses default values for encoder properties. See the lzip manual for a full description. + Lzip only uses the LZMA marker `2' ("End Of Stream" marker). Lzlib + also uses the LZMA marker `3' ("Sync Flush" marker). `CRC32 (4 bytes)' CRC of the uncompressed original data. @@ -560,7 +575,7 @@ with no additional information before, between, or after them. -File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top +File: lzlib.info, Node: Examples, Next: Problems, Prev: Data format, Up: Top 10 A small tutorial with examples ********************************* @@ -680,7 +695,7 @@ next member in case of data error. 7) LZ_decompress_close -File: lzlib.info, Node: Problems, Next: Concept Index, Prev: Examples, Up: Top +File: lzlib.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top 11 Reporting Bugs ***************** @@ -696,9 +711,9 @@ by running `minilzip --version' or in `LZ_version_string' from `lzlib.h'. -File: lzlib.info, Node: Concept Index, Prev: Problems, Up: Top +File: lzlib.info, Node: Concept index, Prev: Problems, Up: Top -Concept Index +Concept index ************* @@ -706,34 +721,34 @@ Concept Index * buffering: Buffering. (line 6) * bugs: Problems. (line 6) -* compression functions: Compression Functions. (line 6) -* data format: Data Format. (line 6) -* decompression functions: Decompression Functions. +* compression functions: Compression functions. (line 6) +* data format: Data format. (line 6) +* decompression functions: Decompression functions. (line 6) -* error codes: Error Codes. (line 6) -* error messages: Error Messages. (line 6) +* error codes: Error codes. (line 6) +* error messages: Error messages. (line 6) * examples: Examples. (line 6) * getting help: Problems. (line 6) * introduction: Introduction. (line 6) -* library version: Library Version. (line 6) -* parameter limits: Parameter Limits. (line 6) +* library version: Library version. (line 6) +* parameter limits: Parameter limits. (line 6) Tag Table: -Node: Top219 -Node: Introduction1319 -Node: Library Version3165 -Node: Buffering3810 -Node: Parameter Limits4929 -Node: Compression Functions5886 -Node: Decompression Functions12096 -Node: Error Codes18257 -Node: Error Messages20196 -Node: Data Format20775 -Node: Examples23268 -Node: Problems27351 -Node: Concept Index27923 +Node: Top217 +Node: Introduction1322 +Node: Library version3903 +Node: Buffering4548 +Node: Parameter limits5667 +Node: Compression functions6624 +Node: Decompression functions12834 +Node: Error codes18995 +Node: Error messages20934 +Node: Data format21513 +Node: Examples24136 +Node: Problems28219 +Node: Concept index28791 End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index c08303e..2830694 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -6,19 +6,19 @@ @finalout @c %**end of header -@set UPDATED 28 May 2013 -@set VERSION 1.4 +@set UPDATED 28 July 2013 +@set VERSION 1.5-rc1 @dircategory Data Compression @direntry -* Lzlib: (lzlib). A compression library for lzip files +* Lzlib: (lzlib). Compression library for lzip files @end direntry @ifnothtml @titlepage @title Lzlib -@subtitle A compression library for lzip files +@subtitle Compression library for lzip files @subtitle for Lzlib version @value{VERSION}, @value{UPDATED} @author by Antonio Diaz Diaz @@ -36,17 +36,17 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of Lzlib -* Library Version:: Checking library version +* Library version:: Checking library version * Buffering:: Sizes of Lzlib's buffers -* Parameter Limits:: Min / max values for some parameters -* Compression Functions:: Descriptions of the compression functions -* Decompression Functions:: Descriptions of the decompression functions -* Error Codes:: Meaning of codes returned by functions -* Error Messages:: Error messages corresponding to error codes -* Data Format:: Detailed format of the compressed data +* Parameter limits:: Min / max values for some parameters +* Compression functions:: Descriptions of the compression functions +* Decompression functions:: Descriptions of the decompression functions +* Error codes:: Meaning of codes returned by functions +* Error messages:: Error messages corresponding to error codes +* Data format:: Detailed format of the compressed data * Examples:: A small tutorial with examples * Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Concept index:: Index of concepts @end menu @sp 1 @@ -65,6 +65,10 @@ and decompression functions, including integrity checking of the decompressed data. The compressed data format used by the library is the lzip format. Lzlib is written in C. +The lzip file format is designed for long-term data archiving. It is +clean, provides very safe 4 factor integrity checking, and is backed by +the recovery capabilities of lziprecover. + The functions and variables forming the interface of the compression library are declared in the file @samp{lzlib.h}. Usage examples of the library are given in the files @samp{main.c} and @samp{bbexample.c} from @@ -91,13 +95,22 @@ any signal handler. The decoder checks the consistency of the compressed data, so the library should never crash even in case of corrupted input. Lzlib implements a simplified version of the LZMA (Lempel-Ziv-Markov -chain-Algorithm) algorithm. The original LZMA algorithm was designed by -Igor Pavlov. For a description of the LZMA algorithm, see the Lzip -manual. - - -@node Library Version -@chapter Library Version +chain-Algorithm) algorithm. The high compression of LZMA comes from +combining two basic, well-proven compression ideas: sliding dictionaries +(LZ77/78) and markov models (the thing used by every compression +algorithm that uses a range encoder or similar order-0 entropy coder as +its last stage) with segregation of contexts according to what the bits +are used for. + +The ideas embodied in lzlib are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for +the definition of Markov chains), G.N.N. Martin (for the definition of +range encoding), Igor Pavlov (for putting all the above together in +LZMA), and Julian Seward (for bzip2's CLI). + + +@node Library version +@chapter Library version @cindex library version @deftypefun {const char *} LZ_version ( void ) @@ -150,8 +163,8 @@ whichever is larger. @end itemize -@node Parameter Limits -@chapter Parameter Limits +@node Parameter limits +@chapter Parameter limits @cindex parameter limits These functions provide minimum and maximum values for some parameters. @@ -182,8 +195,8 @@ Returns the largest valid match length limit [273]. @end deftypefun -@node Compression Functions -@chapter Compression Functions +@node Compression functions +@chapter Compression functions @cindex compression functions These are the functions used to compress data. In case of error, all of @@ -288,7 +301,7 @@ accept a @var{size} up to the returned number of bytes. @deftypefun {enum LZ_Errno} LZ_compress_errno ( struct LZ_Encoder * const @var{encoder} ) -Returns the current error code for @var{encoder} (@pxref{Error Codes}). +Returns the current error code for @var{encoder} (@pxref{Error codes}). @end deftypefun @@ -328,8 +341,8 @@ perhaps not yet read. @end deftypefun -@node Decompression Functions -@chapter Decompression Functions +@node Decompression functions +@chapter Decompression functions @cindex decompression functions These are the functions used to decompress data. In case of error, all @@ -421,7 +434,7 @@ will accept a @var{size} up to the returned number of bytes. @deftypefun {enum LZ_Errno} LZ_decompress_errno ( struct LZ_Decoder * const @var{decoder} ) -Returns the current error code for @var{decoder} (@pxref{Error Codes}). +Returns the current error code for @var{decoder} (@pxref{Error codes}). @end deftypefun @@ -480,8 +493,8 @@ perhaps not yet read. @end deftypefun -@node Error Codes -@chapter Error Codes +@node Error codes +@chapter Error codes @cindex error codes Most library functions return -1 to indicate that they have failed. But @@ -538,8 +551,8 @@ A bug was detected in the library. Please, report it (@pxref{Problems}). @end deftypevr -@node Error Messages -@chapter Error Messages +@node Error messages +@chapter Error messages @cindex error messages @deftypefun {const char *} LZ_strerror ( const enum LZ_Errno @var{lz_errno} ) @@ -553,8 +566,8 @@ The value of @var{lz_errno} normally comes from a call to @end deftypefun -@node Data Format -@chapter Data Format +@node Data format +@chapter Data format @cindex data format Perfection is reached, not when there is no longer anything to add, but @@ -612,7 +625,9 @@ Valid values for dictionary size range from 4KiB to 512MiB. @item Lzma stream The lzma stream, finished by an end of stream marker. Uses default values -for encoder properties. See the lzip manual for a full description. +for encoder properties. See the lzip manual for a full description.@* +Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib +also uses the LZMA marker @samp{3} ("Sync Flush" marker). @item CRC32 (4 bytes) CRC of the uncompressed original data. @@ -793,8 +808,8 @@ find by running @w{@samp{minilzip --version}} or in @samp{LZ_version_string} from @samp{lzlib.h}. -@node Concept Index -@unnumbered Concept Index +@node Concept index +@unnumbered Concept index @printindex cp diff --git a/doc/minilzip.1 b/doc/minilzip.1 index eba916d..da2c211 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,12 +1,12 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH MINILZIP "1" "May 2013" "Minilzip 1.4" "User Commands" +.TH MINILZIP "1" "July 2013" "Minilzip 1.5-rc1" "User Commands" .SH NAME Minilzip \- reduces the size of files .SH SYNOPSIS .B minilzip [\fIoptions\fR] [\fIfiles\fR] .SH DESCRIPTION -Minilzip \- A test program for the lzlib library. +Minilzip \- Test program for the lzlib library. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -82,7 +82,7 @@ Report bugs to lzip\-bug@nongnu.org Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT Copyright \(co 2013 Antonio Diaz Diaz. -Using Lzlib 1.4 +Using Lzlib 1.5\-rc1 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -51,7 +51,8 @@ static bool Mf_init( struct Matchfinder * const mf, const int dict_size, const int match_len_limit ) { const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; - int i, size; + unsigned size; + int i; mf->partial_data_pos = 0; mf->match_len_limit = match_len_limit; @@ -62,6 +63,7 @@ static bool Mf_init( struct Matchfinder * const mf, 16 + ( match_len_limit / 2 ) : 256; mf->at_stream_end = false; mf->been_flushed = false; + mf->flushing = false; mf->buffer_size = max( 65536, buffer_size_limit ); mf->buffer = (uint8_t *)malloc( mf->buffer_size ); @@ -69,7 +71,7 @@ static bool Mf_init( struct Matchfinder * const mf, mf->dictionary_size = dict_size; mf->pos_limit = mf->buffer_size - after_size; size = 1 << max( 16, real_bits( mf->dictionary_size - 1 ) - 2 ); - if( mf->dictionary_size > 1 << 26 ) + if( mf->dictionary_size > 1 << 26 ) /* 64 MiB */ size >>= 1; mf->key4_mask = size - 1; size += num_prev_positions2; @@ -77,7 +79,8 @@ static bool Mf_init( struct Matchfinder * const mf, mf->num_prev_positions = size; size += ( 2 * ( mf->dictionary_size + 1 ) ); - mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); + if( size * sizeof (int32_t) <= size ) mf->prev_positions = 0; + else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); if( !mf->prev_positions ) { free( mf->buffer ); return false; } mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; @@ -116,6 +119,7 @@ static void Mf_reset( struct Matchfinder * const mf ) mf->cyclic_pos = 0; mf->at_stream_end = false; mf->been_flushed = false; + mf->flushing = false; for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; } @@ -264,18 +268,17 @@ static bool LZe_full_flush( struct LZ_encoder * const encoder, const State state const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; File_trailer trailer; if( encoder->member_finished || - Cb_free_bytes( &encoder->range_encoder.cb ) < max_marker_size + Ft_size ) + Cb_free_bytes( &encoder->renc.cb ) < max_marker_size + Ft_size ) return false; - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); + Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 ); LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &encoder->range_encoder ); + Re_flush( &encoder->renc ); Ft_set_data_crc( trailer, LZe_crc( encoder ) ); Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); - Ft_set_member_size( trailer, Re_member_position( &encoder->range_encoder ) + - Ft_size ); + Ft_set_member_size( trailer, Re_member_position( &encoder->renc ) + Ft_size ); for( i = 0; i < Ft_size; ++i ) - Cb_put_byte( &encoder->range_encoder.cb, trailer[i] ); + Cb_put_byte( &encoder->renc.cb, trailer[i] ); return true; } @@ -286,12 +289,12 @@ static bool LZe_sync_flush( struct LZ_encoder * const encoder ) const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; const State state = encoder->state; if( encoder->member_finished || - Cb_free_bytes( &encoder->range_encoder.cb ) < max_marker_size ) + Cb_free_bytes( &encoder->renc.cb ) < max_marker_size ) return false; - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[state], 0 ); + Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 ); LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len + 1, pos_state ); - Re_flush( &encoder->range_encoder ); + Re_flush( &encoder->renc ); return true; } @@ -363,7 +366,7 @@ static bool LZe_init( struct LZ_encoder * const encoder, Bm_array_init( encoder->bm_align, dis_align_size ); encoder->matchfinder = mf; - if( !Re_init( &encoder->range_encoder ) ) return false; + if( !Re_init( &encoder->renc ) ) return false; Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); encoder->num_dis_slots = @@ -376,13 +379,13 @@ static bool LZe_init( struct LZ_encoder * const encoder, encoder->member_finished = false; for( i = 0; i < Fh_size; ++i ) - Cb_put_byte( &encoder->range_encoder.cb, header[i] ); + Cb_put_byte( &encoder->renc.cb, header[i] ); return true; } /* Return value == number of bytes advanced (ahead). - trials[0]..trials[retval-1] contain the steps to encode. + trials[0]..trials[ahead-1] contain the steps to encode. ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. */ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, @@ -583,8 +586,7 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, if( St_is_char( cur_state ) ) next_price += LZe_price_literal( encoder, prev_byte, cur_byte ); else - next_price += LZe_price_matched( encoder, - prev_byte, cur_byte, match_byte ); + next_price += LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); if( !Mf_move_pos( encoder->matchfinder ) ) return 0; /* try last updates to next trial */ @@ -741,15 +743,15 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } -static bool LZe_encode_member( struct LZ_encoder * const encoder, - const bool finish ) +static bool LZe_encode_member( struct LZ_encoder * const encoder ) { const int fill_count = ( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512; int ahead, i; State * const state = &encoder->state; + if( encoder->member_finished ) return true; - if( Re_member_position( &encoder->range_encoder ) >= encoder->member_size_limit ) + if( Re_member_position( &encoder->renc ) >= encoder->member_size_limit ) { if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; return true; @@ -761,10 +763,10 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, const uint8_t prev_byte = 0; uint8_t cur_byte; if( Mf_available_bytes( encoder->matchfinder ) < max_match_len && - !encoder->matchfinder->at_stream_end ) + !Mf_flushing_or_end( encoder->matchfinder ) ) return true; cur_byte = Mf_peek( encoder->matchfinder, 0 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_match[*state][0], 0 ); + Re_encode_bit( &encoder->renc, &encoder->bm_match[*state][0], 0 ); LZe_encode_literal( encoder, prev_byte, cur_byte ); CRC32_update_byte( &encoder->crc, cur_byte ); Mf_get_match_pairs( encoder->matchfinder, 0 ); @@ -774,7 +776,7 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, while( !Mf_finished( encoder->matchfinder ) ) { if( !Mf_enough_available_bytes( encoder->matchfinder ) || - !Re_enough_free_bytes( &encoder->range_encoder ) ) return true; + !Re_enough_free_bytes( &encoder->renc ) ) return true; if( encoder->pending_num_pairs == 0 ) { if( encoder->fill_counter <= 0 ) @@ -794,7 +796,7 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, const int len = encoder->trials[i].price; bool bit = ( dis < 0 && len == 1 ); - Re_encode_bit( &encoder->range_encoder, + Re_encode_bit( &encoder->renc, &encoder->bm_match[*state][pos_state], !bit ); if( bit ) /* literal byte */ { @@ -811,28 +813,28 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, } *state = St_set_char( *state ); } - else /* match or repeated match */ + else /* match or repeated match */ { CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); LZe_mtf_reps( dis, encoder->rep_distances ); bit = ( dis < num_rep_distances ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep[*state], bit ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep[*state], bit ); if( bit ) { bit = ( dis == 0 ); - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep0[*state], !bit ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep0[*state], !bit ); if( bit ) - Re_encode_bit( &encoder->range_encoder, &encoder->bm_len[*state][pos_state], len > 1 ); + Re_encode_bit( &encoder->renc, &encoder->bm_len[*state][pos_state], len > 1 ); else { - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep1[*state], dis > 1 ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep1[*state], dis > 1 ); if( dis > 1 ) - Re_encode_bit( &encoder->range_encoder, &encoder->bm_rep2[*state], dis > 2 ); + Re_encode_bit( &encoder->renc, &encoder->bm_rep2[*state], dis > 2 ); } if( len == 1 ) *state = St_set_short_rep( *state ); else { - Lee_encode( &encoder->rep_len_encoder, &encoder->range_encoder, len, pos_state ); + Lee_encode( &encoder->rep_len_encoder, &encoder->renc, len, pos_state ); *state = St_set_rep( *state ); } } @@ -844,7 +846,7 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, } } ahead -= len; i += len; - if( Re_member_position( &encoder->range_encoder ) >= encoder->member_size_limit ) + if( Re_member_position( &encoder->renc ) >= encoder->member_size_limit ) { if( !Mf_dec_pos( encoder->matchfinder, ahead ) ) return false; if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; @@ -853,7 +855,6 @@ static bool LZe_encode_member( struct LZ_encoder * const encoder, if( ahead <= 0 ) break; } } - if( finish && LZe_full_flush( encoder, *state ) ) - encoder->member_finished = true; + if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; return true; } @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -239,10 +239,9 @@ struct Matchfinder int num_prev_positions; /* size of prev_positions */ bool at_stream_end; /* stream_pos shows real end of file */ bool been_flushed; + bool flushing; }; -static bool Mf_normalize_pos( struct Matchfinder * const mf ); - static int Mf_write_data( struct Matchfinder * const mf, const uint8_t * const inbuf, const int size ) { @@ -255,6 +254,8 @@ static int Mf_write_data( struct Matchfinder * const mf, return sz; } +static bool Mf_normalize_pos( struct Matchfinder * const mf ); + static inline void Mf_free( struct Matchfinder * const mf ) { free( mf->prev_positions ); @@ -271,16 +272,18 @@ static inline unsigned long long Mf_data_position( const struct Matchfinder * const mf ) { return mf->partial_data_pos + mf->pos; } +static inline void Mf_finish( struct Matchfinder * const mf ) + { mf->at_stream_end = true; mf->flushing = false; } + static inline bool Mf_finished( const struct Matchfinder * const mf ) { return mf->at_stream_end && mf->pos >= mf->stream_pos; } -static inline const uint8_t * -Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) - { return mf->buffer + mf->pos; } - static inline void Mf_set_flushing( struct Matchfinder * const mf, const bool flushing ) - { mf->at_stream_end = flushing; } + { mf->flushing = flushing; } + +static inline bool Mf_flushing_or_end( const struct Matchfinder * const mf ) + { return mf->at_stream_end || mf->flushing; } static inline int Mf_free_bytes( const struct Matchfinder * const mf ) { if( mf->at_stream_end ) return 0; return mf->buffer_size - mf->stream_pos; } @@ -288,9 +291,13 @@ static inline int Mf_free_bytes( const struct Matchfinder * const mf ) static inline bool Mf_enough_available_bytes( const struct Matchfinder * const mf ) { return ( mf->pos + after_size <= mf->stream_pos || - ( mf->at_stream_end && mf->pos < mf->stream_pos ) ); + ( Mf_flushing_or_end( mf ) && mf->pos < mf->stream_pos ) ); } +static inline const uint8_t * +Mf_ptr_to_current_pos( const struct Matchfinder * const mf ) + { return mf->buffer + mf->pos; } + static inline bool Mf_dec_pos( struct Matchfinder * const mf, const int ahead ) { @@ -321,7 +328,6 @@ static inline bool Mf_move_pos( struct Matchfinder * const mf ) return true; } -static void Mf_reset( struct Matchfinder * const mf ); static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pairs ); @@ -590,7 +596,7 @@ struct LZ_encoder Bit_model bm_align[dis_align_size]; struct Matchfinder * matchfinder; - struct Range_encoder range_encoder; + struct Range_encoder renc; struct Len_encoder match_len_encoder; struct Len_encoder rep_len_encoder; @@ -611,13 +617,12 @@ struct LZ_encoder static inline bool LZe_member_finished( const struct LZ_encoder * const encoder ) { - return ( encoder->member_finished && - !Cb_used_bytes( &encoder->range_encoder.cb ) ); + return ( encoder->member_finished && !Cb_used_bytes( &encoder->renc.cb ) ); } static inline void LZe_free( struct LZ_encoder * const encoder ) - { Re_free( &encoder->range_encoder ); } + { Re_free( &encoder->renc ); } static inline unsigned LZe_crc( const struct LZ_encoder * const encoder ) { return encoder->crc ^ 0xFFFFFFFFU; } @@ -702,13 +707,13 @@ static inline int LZe_price_matched( const struct LZ_encoder * const encoder, static inline void LZe_encode_literal( struct LZ_encoder * const encoder, uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( &encoder->range_encoder, + { Re_encode_tree( &encoder->renc, encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } static inline void LZe_encode_matched( struct LZ_encoder * const encoder, uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { Re_encode_matched( &encoder->range_encoder, + { Re_encode_matched( &encoder->renc, encoder->bm_literal[get_lit_state(prev_byte)], symbol, match_byte ); } @@ -717,9 +722,8 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const int pos_state ) { const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->match_len_encoder, &encoder->range_encoder, len, pos_state ); - Re_encode_tree( &encoder->range_encoder, - encoder->bm_dis_slot[get_dis_state(len)], + Lee_encode( &encoder->match_len_encoder, &encoder->renc, len, pos_state ); + Re_encode_tree( &encoder->renc, encoder->bm_dis_slot[get_dis_state(len)], dis_slot, dis_slot_bits ); if( dis_slot >= start_dis_model ) @@ -729,14 +733,14 @@ static inline void LZe_encode_pair( struct LZ_encoder * const encoder, const uint32_t direct_dis = dis - base; if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &encoder->range_encoder, + Re_encode_tree_reversed( &encoder->renc, encoder->bm_dis + base - dis_slot - 1, direct_dis, direct_bits ); else { - Re_encode( &encoder->range_encoder, direct_dis >> dis_align_bits, + Re_encode( &encoder->renc, direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &encoder->range_encoder, encoder->bm_align, + Re_encode_tree_reversed( &encoder->renc, encoder->bm_align, direct_dis, dis_align_bits ); --encoder->align_price_count; } @@ -1,4 +1,4 @@ -/* Lzcheck - A test program for the lzlib library +/* Lzcheck - Test program for the lzlib library Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you have unlimited permission @@ -37,7 +37,7 @@ uint8_t out_buffer[buffer_size]; int main( const int argc, const char * const argv[] ) { const int dictionary_size = 1 << 20; - const int match_len_limit = 36; + const int match_len_limit = 16; const unsigned long long member_size = INT64_MAX; struct LZ_Encoder * encoder; struct LZ_Decoder * decoder; @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -208,7 +208,7 @@ static inline uint8_t Fh_version( const File_header data ) { return data[4]; } static inline bool Fh_verify_version( const File_header data ) - { return ( data[4] <= 1 ); } + { return ( data[4] == 1 ); } static inline unsigned Fh_get_dictionary_size( const File_header data ) { @@ -218,15 +218,15 @@ static inline unsigned Fh_get_dictionary_size( const File_header data ) return sz; } -static inline bool Fh_set_dictionary_size( File_header data, const int sz ) +static inline bool Fh_set_dictionary_size( File_header data, const unsigned sz ) { if( sz >= min_dictionary_size && sz <= max_dictionary_size ) { data[5] = real_bits( sz - 1 ); if( sz > min_dictionary_size ) { - const int base_size = 1 << data[5]; - const int wedge = base_size / 16; + const unsigned base_size = 1 << data[5]; + const unsigned wedge = base_size / 16; int i; for( i = 7; i >= 1; --i ) if( base_size - ( i * wedge ) >= sz ) @@ -252,9 +252,6 @@ typedef uint8_t File_trailer[20]; enum { Ft_size = 20 }; -static inline int Ft_versioned_size( const int version ) - { return ( ( version >= 1 ) ? 20 : 12 ); } - static inline unsigned Ft_get_data_crc( const File_trailer data ) { unsigned tmp = 0; @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -199,7 +199,7 @@ int LZ_compress_close( struct LZ_Encoder * const e ) int LZ_compress_finish( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - Mf_set_flushing( e->matchfinder, true ); + Mf_finish( e->matchfinder ); e->flush_pending = 0; /* if (open --> write --> finish) use same dictionary size as lzip. */ /* this does not save any memory. */ @@ -220,16 +220,13 @@ int LZ_compress_restart_member( struct LZ_Encoder * const e, { e->lz_errno = LZ_bad_argument; return -1; } e->partial_in_size += Mf_data_position( e->matchfinder ); - e->partial_out_size += Re_member_position( &e->lz_encoder->range_encoder ); + e->partial_out_size += Re_member_position( &e->lz_encoder->renc ); Mf_reset( e->matchfinder ); - LZe_free( e->lz_encoder ); free( e->lz_encoder ); - e->lz_encoder = (struct LZ_encoder *)malloc( sizeof (struct LZ_encoder) ); - if( !e->lz_encoder || - !LZe_init( e->lz_encoder, e->matchfinder, e->member_header, member_size ) ) + LZe_free( e->lz_encoder ); + if( !LZe_init( e->lz_encoder, e->matchfinder, e->member_header, member_size ) ) { - if( e->lz_encoder ) - { LZe_free( e->lz_encoder ); free( e->lz_encoder ); e->lz_encoder = 0; } + LZe_free( e->lz_encoder ); free( e->lz_encoder ); e->lz_encoder = 0; e->lz_errno = LZ_mem_error; e->fatal = true; return -1; } @@ -241,11 +238,11 @@ int LZ_compress_restart_member( struct LZ_Encoder * const e, int LZ_compress_sync_flush( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( !e->flush_pending && !e->matchfinder->at_stream_end ) + if( e->flush_pending <= 0 && !Mf_flushing_or_end( e->matchfinder ) ) { e->flush_pending = 2; /* 2 consecutive markers guarantee decoding */ Mf_set_flushing( e->matchfinder, true ); - if( !LZe_encode_member( e->lz_encoder, false ) ) + if( !LZe_encode_member( e->lz_encoder ) ) { e->lz_errno = LZ_library_error; e->fatal = true; return -1; } while( e->flush_pending > 0 && LZe_sync_flush( e->lz_encoder ) ) { if( --e->flush_pending <= 0 ) Mf_set_flushing( e->matchfinder, false ); } @@ -258,11 +255,11 @@ int LZ_compress_read( struct LZ_Encoder * const e, uint8_t * const buffer, const int size ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( !LZe_encode_member( e->lz_encoder, !e->flush_pending ) ) + if( !LZe_encode_member( e->lz_encoder ) ) { e->lz_errno = LZ_library_error; e->fatal = true; return -1; } while( e->flush_pending > 0 && LZe_sync_flush( e->lz_encoder ) ) { if( --e->flush_pending <= 0 ) Mf_set_flushing( e->matchfinder, false ); } - return Re_read_data( &e->lz_encoder->range_encoder, buffer, size ); + return Re_read_data( &e->lz_encoder->renc, buffer, size ); } @@ -270,7 +267,7 @@ int LZ_compress_write( struct LZ_Encoder * const e, const uint8_t * const buffer, const int size ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( e->flush_pending ) return 0; + if( e->flush_pending > 0 || size < 0 ) return 0; return Mf_write_data( e->matchfinder, buffer, size ); } @@ -278,7 +275,7 @@ int LZ_compress_write( struct LZ_Encoder * const e, int LZ_compress_write_size( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) || e->fatal ) return -1; - if( e->flush_pending ) return 0; + if( e->flush_pending > 0 ) return 0; return Mf_free_bytes( e->matchfinder ); } @@ -293,7 +290,7 @@ enum LZ_Errno LZ_compress_errno( struct LZ_Encoder * const e ) int LZ_compress_finished( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return -1; - return ( !e->flush_pending && Mf_finished( e->matchfinder ) && + return ( e->flush_pending <= 0 && Mf_finished( e->matchfinder ) && LZe_member_finished( e->lz_encoder ) ); } @@ -315,7 +312,7 @@ unsigned long long LZ_compress_data_position( struct LZ_Encoder * const e ) unsigned long long LZ_compress_member_position( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return Re_member_position( &e->lz_encoder->range_encoder ); + return Re_member_position( &e->lz_encoder->renc ); } @@ -329,8 +326,7 @@ unsigned long long LZ_compress_total_in_size( struct LZ_Encoder * const e ) unsigned long long LZ_compress_total_out_size( struct LZ_Encoder * const e ) { if( !verify_encoder( e ) ) return 0; - return e->partial_out_size + - Re_member_position( &e->lz_encoder->range_encoder ); + return e->partial_out_size + Re_member_position( &e->lz_encoder->renc ); } @@ -1,4 +1,4 @@ -/* Lzlib - A compression library for lzip files +/* Lzlib - Compression library for lzip files Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.4"; +static const char * const LZ_version_string = "1.5-rc1"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,4 +1,4 @@ -/* Minilzip - A test program for the lzlib library +/* Minilzip - Test program for the lzlib library Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify @@ -147,7 +147,7 @@ static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg static void show_help( void ) { - printf( "%s - A test program for the lzlib library.\n", Program_name ); + printf( "%s - Test program for the lzlib library.\n", Program_name ); printf( "\nUsage: %s [options] [files]\n", invocation_name ); printf( "\nOptions:\n" " -h, --help display this help and exit\n" @@ -196,7 +196,7 @@ static void show_version( void ) } -void show_header( struct LZ_Decoder * const decoder ) +static void show_header( struct LZ_Decoder * const decoder ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; @@ -209,8 +209,6 @@ void show_header( struct LZ_Decoder * const decoder ) for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; np = ""; } - if( verbosity >= 4 ) - fprintf( stderr, "version %d, ", LZ_decompress_member_version( decoder ) ); fprintf( stderr, "dictionary size %s%4u %sB. ", np, num, p ); } @@ -323,12 +321,13 @@ static int open_instream( const char * const name, struct stat * const in_statsp const bool can_read = ( i == 0 && ( S_ISBLK( mode ) || S_ISCHR( mode ) || S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); - if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) + const bool no_ofile = to_stdout || program_mode == m_test; + if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) ) { if( verbosity >= 0 ) fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n", program_name, name, - ( can_read && !to_stdout ) ? + ( can_read && !no_ofile ) ? " and '--stdout' was not specified" : "" ); close( infd ); infd = -1; @@ -468,7 +467,7 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) /* Returns the number of bytes really read. If (returned value < size) and (errno == 0), means EOF was reached. */ -int readblock( const int fd, uint8_t * const buf, const int size ) +static int readblock( const int fd, uint8_t * const buf, const int size ) { int rest = size; errno = 0; @@ -487,7 +486,7 @@ int readblock( const int fd, uint8_t * const buf, const int size ) /* Returns the number of bytes really written. If (returned value < size), it is always an error. */ -int writeblock( const int fd, const uint8_t * const buf, const int size ) +static int writeblock( const int fd, const uint8_t * const buf, const int size ) { int rest = size; errno = 0; @@ -714,8 +713,8 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, if( lz_errno == LZ_header_error || ( first_member && out_size == 0 ) ) { if( !first_member ) break; /* trailing garbage */ - Pp_show_msg( pp, "Error reading member header" ); - return 1; + Pp_show_msg( pp, "Bad magic number (file not in lzip format)" ); + return 2; } if( lz_errno == LZ_mem_error ) { diff --git a/testsuite/check.sh b/testsuite/check.sh index a548def..64c0c93 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -24,21 +24,34 @@ mkdir tmp cd "${objdir}"/tmp cat "${testdir}"/test.txt > in || framework_failure +in_lz="${testdir}"/test.txt.lz fail=0 printf "testing lzlib-%s..." "$2" "${LZIP}" -cqs-1 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs0 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs4095 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqm274 in > /dev/null -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi - -"${LZIP}" -t "${testdir}"/test.txt.lz || fail=1 -"${LZIP}" -cd "${testdir}"/test.txt.lz > copy || fail=1 +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -tq in +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -tq < in +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cdq in +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cdq < in +if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi +dd if="${in_lz}" bs=1 count=6 2> /dev/null | "${LZIP}" -tq +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi +dd if="${in_lz}" bs=1 count=20 2> /dev/null | "${LZIP}" -tq +if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi + +"${LZIP}" -t "${in_lz}" || fail=1 +"${LZIP}" -cd "${in_lz}" > copy || fail=1 cmp in copy || fail=1 printf . @@ -47,9 +60,9 @@ printf . cmp in copy || fail=1 printf . -"${LZIP}" -cfq "${testdir}"/test.txt.lz > out -if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi -"${LZIP}" -cF "${testdir}"/test.txt.lz > out || fail=1 +"${LZIP}" -cfq "${in_lz}" > out +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cF "${in_lz}" > out || fail=1 "${LZIP}" -cd out | "${LZIP}" -d > copy || fail=1 cmp in copy || fail=1 printf . @@ -60,30 +73,30 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do printf "garbage" >> copy.lz || fail=1 "${LZIP}" -df copy.lz || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -c -$i in > out || fail=1 printf "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 - printf . done +printf . for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 - printf . done +printf . "${LZIP}" < in > anyothername || fail=1 "${LZIP}" -d anyothername || fail=1 @@ -97,7 +110,6 @@ cmp in2 copy2 || fail=1 printf . "${BBEXAMPLE}" in || fail=1 -printf . "${BBEXAMPLE}" out || fail=1 printf . |