From f9b371c0e53918679b5714ea501b2bee13d08bdf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 7 Nov 2015 15:01:05 +0100 Subject: Merging upstream version 1.6~pre2. Signed-off-by: Daniel Baumann --- ChangeLog | 8 +- INSTALL | 14 +- Makefile.in | 18 +- NEWS | 6 + README | 8 +- bbexample.c | 2 +- carg_parser.c | 2 +- carg_parser.h | 2 +- cbuffer.c | 2 +- configure | 8 +- decoder.c | 114 ++++---- decoder.h | 146 +++++----- doc/lzlib.info | 35 ++- doc/lzlib.texi | 812 +++++++++++++++++++++++++++++++++++++++++++++++++++++ doc/lzlib.texinfo | 810 ---------------------------------------------------- doc/minilzip.1 | 6 +- encoder.c | 605 +++++++++++++++++++-------------------- encoder.h | 262 ++++++++--------- lzcheck.c | 2 +- lzip.h | 31 +- lzlib.c | 5 +- lzlib.h | 4 +- main.c | 91 +++--- testsuite/check.sh | 20 +- 24 files changed, 1485 insertions(+), 1528 deletions(-) create mode 100644 doc/lzlib.texi delete mode 100644 doc/lzlib.texinfo diff --git a/ChangeLog b/ChangeLog index 43b33db..cfb81b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2014-01-30 Antonio Diaz Diaz + + * Version 1.6-pre2 released. + * main.c (close_and_set_permissions): Behave like 'cp -p'. + * lzlib.texinfo: Renamed to lzlib.texi. + 2013-10-07 Antonio Diaz Diaz * Version 1.6-pre1 released. @@ -140,7 +146,7 @@ * Version 0.1 released. -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute and diff --git a/INSTALL b/INSTALL index b112152..0e778fe 100644 --- a/INSTALL +++ b/INSTALL @@ -43,12 +43,12 @@ the main archive. Another way ----------- -You can also compile lzlib into a separate directory. To do this, you -must use a version of 'make' that supports the 'VPATH' variable, such -as GNU 'make'. 'cd' to the directory where you want the object files -and executables to go and run the 'configure' script. 'configure' -automatically checks for the source code in '.', in '..' and in the -directory that 'configure' is in. +You can also compile lzlib into a separate directory. +To do this, you must use a version of 'make' that supports the 'VPATH' +variable, such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..' and +in the directory that 'configure' is in. 'configure' recognizes the option '--srcdir=DIR' to control where to look for the sources. Usually 'configure' can determine that directory @@ -58,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index 007f0cf..ebdef06 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2,8 +2,8 @@ DISTNAME = $(pkgname)-$(pkgversion) AR = ar INSTALL = install -INSTALL_PROGRAM = $(INSTALL) -p -m 755 -INSTALL_DATA = $(INSTALL) -p -m 644 +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 INSTALL_DIR = $(INSTALL) -d -m 755 LDCONFIG = /sbin/ldconfig SHELL = /bin/sh @@ -41,12 +41,12 @@ lzcheck : lzcheck.o lib$(libname).a main.o : main.c $(CC) $(CPPFLAGS) $(CFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< -%.o : %.c - $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< - lzlib_sh.o : lzlib.c $(CC) -fpic -fPIC $(CPPFLAGS) $(CFLAGS) -c -o $@ $< +%.o : %.c + $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< + lzdeps = lzlib.h lzip.h cbuffer.c decoder.h decoder.c encoder.h encoder.c $(objs) : Makefile @@ -62,8 +62,8 @@ doc : info man info : $(VPATH)/doc/$(pkgname).info -$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo - cd $(VPATH)/doc && makeinfo $(pkgname).texinfo +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -136,7 +136,7 @@ uninstall-man : dist : doc ln -sf $(VPATH) $(DISTNAME) - tar -cvf $(DISTNAME).tar \ + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ $(DISTNAME)/AUTHORS \ $(DISTNAME)/COPYING \ $(DISTNAME)/ChangeLog \ @@ -147,7 +147,7 @@ dist : doc $(DISTNAME)/configure \ $(DISTNAME)/doc/$(progname).1 \ $(DISTNAME)/doc/$(pkgname).info \ - $(DISTNAME)/doc/$(pkgname).texinfo \ + $(DISTNAME)/doc/$(pkgname).texi \ $(DISTNAME)/testsuite/check.sh \ $(DISTNAME)/testsuite/test.txt \ $(DISTNAME)/testsuite/test.txt.lz \ diff --git a/NEWS b/NEWS index c67c9b5..b1d1c27 100644 --- a/NEWS +++ b/NEWS @@ -5,3 +5,9 @@ Improved portability to BSD systems: The configure script now accepts the option "--disable-ldconfig". "make install" now ignores any errors from ldconfig. + +Minilzip now copies file dates, permissions, and ownership like "cp -p". +(If the user ID or the group ID can't be duplicated, the file permission +bits S_ISUID and S_ISGID are cleared). + +"lzlib.texinfo" has been renamed to "lzlib.texi". diff --git a/README b/README index ea5ad55..2e66843 100644 --- a/README +++ b/README @@ -10,9 +10,9 @@ clean, provides very safe 4 factor integrity checking, and is backed by the recovery capabilities of lziprecover. The functions and variables forming the interface of the compression -library are declared in the file lzlib.h. Usage examples of the library -are given in the files main.c and bbexample.c from the source -distribution. +library are declared in the file 'lzlib.h'. Usage examples of the +library are given in the files 'main.c' and 'bbexample.c' from the +source distribution. Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data has been processed by the @@ -49,7 +49,7 @@ range encoding), Igor Pavlov (for putting all the above together in LZMA), and Julian Seward (for bzip2's CLI). -Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/bbexample.c b/bbexample.c index 4a924fc..7a8e08c 100644 --- a/bbexample.c +++ b/bbexample.c @@ -1,5 +1,5 @@ /* Buff to buff example - Test program for the lzlib library - Copyright (C) 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. diff --git a/carg_parser.c b/carg_parser.c index 378b5e3..1dfcb2b 100644 --- a/carg_parser.c +++ b/carg_parser.c @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/carg_parser.h b/carg_parser.h index 41aa7b3..b153da6 100644 --- a/carg_parser.h +++ b/carg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C version) - Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify diff --git a/cbuffer.c b/cbuffer.c index 8a9cbc0..71d4d6b 100644 --- a/cbuffer.c +++ b/cbuffer.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/configure b/configure index cbd6078..77f568e 100755 --- a/configure +++ b/configure @@ -1,18 +1,18 @@ #! /bin/sh # configure script for Lzlib - Compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. pkgname=lzlib -pkgversion=1.6-pre1 +pkgversion=1.6-pre2 soversion=1 progname=minilzip progname_shared= disable_ldconfig= libname=lz -srctrigger=doc/${pkgname}.texinfo +srctrigger=doc/${pkgname}.texi # clear some things potentially inherited from environment. LC_ALL=C @@ -183,7 +183,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - Compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission diff --git a/decoder.c b/decoder.c index e1b4c1d..96cccb1 100644 --- a/decoder.c +++ b/decoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,123 +25,120 @@ Public License. */ -static bool LZd_verify_trailer( struct LZ_decoder * const decoder ) +static bool LZd_verify_trailer( struct LZ_decoder * const d ) { File_trailer trailer; - const unsigned long long member_size = - decoder->rdec->member_position + Ft_size; + const unsigned long long member_size = d->rdec->member_position + Ft_size; - int size = Rd_read_data( decoder->rdec, trailer, Ft_size ); + int size = Rd_read_data( d->rdec, trailer, Ft_size ); if( size < Ft_size ) return false; - return ( decoder->rdec->code == 0 && - Ft_get_data_crc( trailer ) == LZd_crc( decoder ) && - Ft_get_data_size( trailer ) == LZd_data_position( decoder ) && + return ( d->rdec->code == 0 && + Ft_get_data_crc( trailer ) == LZd_crc( d ) && + Ft_get_data_size( trailer ) == LZd_data_position( d ) && Ft_get_member_size( trailer ) == member_size ); } /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF, 3 = trailer error, 4 = unknown marker found. */ -static int LZd_decode_member( struct LZ_decoder * const decoder ) +static int LZd_decode_member( struct LZ_decoder * const d ) { - struct Range_decoder * const rdec = decoder->rdec; - State * const state = &decoder->state; + struct Range_decoder * const rdec = d->rdec; + State * const state = &d->state; - if( decoder->member_finished ) return 0; + if( d->member_finished ) return 0; if( !Rd_try_reload( rdec, false ) ) return 0; - if( decoder->verify_trailer_pending ) + if( d->verify_trailer_pending ) { if( Rd_available_bytes( rdec ) < Ft_size && !rdec->at_stream_end ) return 0; - decoder->verify_trailer_pending = false; - decoder->member_finished = true; - if( LZd_verify_trailer( decoder ) ) return 0; else return 3; + d->verify_trailer_pending = false; + d->member_finished = true; + if( LZd_verify_trailer( d ) ) return 0; else return 3; } while( !Rd_finished( rdec ) ) { - const int pos_state = LZd_data_position( decoder ) & pos_state_mask; - if( !Rd_enough_available_bytes( rdec ) || - !LZd_enough_free_bytes( decoder ) ) + const int pos_state = LZd_data_position( d ) & pos_state_mask; + if( !Rd_enough_available_bytes( rdec ) || !LZd_enough_free_bytes( d ) ) return 0; - if( Rd_decode_bit( rdec, &decoder->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ + if( Rd_decode_bit( rdec, &d->bm_match[*state][pos_state] ) == 0 ) /* 1st bit */ { - const uint8_t prev_byte = LZd_get_prev_byte( decoder ); + const uint8_t prev_byte = LZd_get_prev_byte( d ); if( St_is_char( *state ) ) { *state -= ( *state < 4 ) ? *state : 3; - LZd_put_byte( decoder, Rd_decode_tree( rdec, - decoder->bm_literal[get_lit_state(prev_byte)], 8 ) ); + LZd_put_byte( d, Rd_decode_tree( rdec, + d->bm_literal[get_lit_state(prev_byte)], 8 ) ); } else { *state -= ( *state < 10 ) ? 3 : 6; - LZd_put_byte( decoder, Rd_decode_matched( rdec, - decoder->bm_literal[get_lit_state(prev_byte)], - LZd_get_byte( decoder, decoder->rep0 ) ) ); + LZd_put_byte( d, Rd_decode_matched( rdec, + d->bm_literal[get_lit_state(prev_byte)], + LZd_get_byte( d, d->rep0 ) ) ); } } else { int len; - if( Rd_decode_bit( rdec, &decoder->bm_rep[*state] ) != 0 ) /* 2nd bit */ + if( Rd_decode_bit( rdec, &d->bm_rep[*state] ) != 0 ) /* 2nd bit */ { - if( Rd_decode_bit( rdec, &decoder->bm_rep0[*state] ) != 0 ) /* 3rd bit */ + if( Rd_decode_bit( rdec, &d->bm_rep0[*state] ) != 0 ) /* 3rd bit */ { unsigned distance; - if( Rd_decode_bit( rdec, &decoder->bm_rep1[*state] ) == 0 ) /* 4th bit */ - distance = decoder->rep1; + if( Rd_decode_bit( rdec, &d->bm_rep1[*state] ) == 0 ) /* 4th bit */ + distance = d->rep1; else { - if( Rd_decode_bit( rdec, &decoder->bm_rep2[*state] ) == 0 ) /* 5th bit */ - distance = decoder->rep2; + if( Rd_decode_bit( rdec, &d->bm_rep2[*state] ) == 0 ) /* 5th bit */ + distance = d->rep2; else - { distance = decoder->rep3; decoder->rep3 = decoder->rep2; } - decoder->rep2 = decoder->rep1; + { distance = d->rep3; d->rep3 = d->rep2; } + d->rep2 = d->rep1; } - decoder->rep1 = decoder->rep0; - decoder->rep0 = distance; + d->rep1 = d->rep0; + d->rep0 = distance; } else { - if( Rd_decode_bit( rdec, &decoder->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ + if( Rd_decode_bit( rdec, &d->bm_len[*state][pos_state] ) == 0 ) /* 4th bit */ { *state = St_set_short_rep( *state ); - LZd_put_byte( decoder, LZd_get_byte( decoder, decoder->rep0 ) ); continue; } + LZd_put_byte( d, LZd_get_byte( d, d->rep0 ) ); continue; } } *state = St_set_rep( *state ); - len = min_match_len + Rd_decode_len( rdec, &decoder->rep_len_model, pos_state ); + len = min_match_len + Rd_decode_len( rdec, &d->rep_len_model, pos_state ); } else { int dis_slot; - const unsigned rep0_saved = decoder->rep0; - len = min_match_len + Rd_decode_len( rdec, &decoder->match_len_model, pos_state ); - dis_slot = Rd_decode_tree6( rdec, decoder->bm_dis_slot[get_len_state(len)] ); - if( dis_slot < start_dis_model ) decoder->rep0 = dis_slot; + const unsigned rep0_saved = d->rep0; + len = min_match_len + Rd_decode_len( rdec, &d->match_len_model, pos_state ); + dis_slot = Rd_decode_tree6( rdec, d->bm_dis_slot[get_len_state(len)] ); + if( dis_slot < start_dis_model ) d->rep0 = dis_slot; else { const int direct_bits = ( dis_slot >> 1 ) - 1; - decoder->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + d->rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits; if( dis_slot < end_dis_model ) - decoder->rep0 += Rd_decode_tree_reversed( rdec, - decoder->bm_dis + decoder->rep0 - dis_slot - 1, - direct_bits ); + d->rep0 += Rd_decode_tree_reversed( rdec, + d->bm_dis + d->rep0 - dis_slot - 1, direct_bits ); else { - decoder->rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; - decoder->rep0 += Rd_decode_tree_reversed4( rdec, decoder->bm_align ); - if( decoder->rep0 == 0xFFFFFFFFU ) /* Marker found */ + d->rep0 += Rd_decode( rdec, direct_bits - dis_align_bits ) << dis_align_bits; + d->rep0 += Rd_decode_tree_reversed4( rdec, d->bm_align ); + if( d->rep0 == 0xFFFFFFFFU ) /* Marker found */ { - decoder->rep0 = rep0_saved; + d->rep0 = rep0_saved; Rd_normalize( rdec ); if( len == min_match_len ) /* End Of Stream marker */ { if( Rd_available_bytes( rdec ) < Ft_size && !rdec->at_stream_end ) - { decoder->verify_trailer_pending = true; return 0; } - decoder->member_finished = true; - if( LZd_verify_trailer( decoder ) ) return 0; else return 3; + { d->verify_trailer_pending = true; return 0; } + d->member_finished = true; + if( LZd_verify_trailer( d ) ) return 0; else return 3; } if( len == min_match_len + 1 ) /* Sync Flush marker */ { @@ -152,15 +149,12 @@ static int LZd_decode_member( struct LZ_decoder * const decoder ) } } } - decoder->rep3 = decoder->rep2; - decoder->rep2 = decoder->rep1; decoder->rep1 = rep0_saved; + d->rep3 = d->rep2; d->rep2 = d->rep1; d->rep1 = rep0_saved; *state = St_set_match( *state ); - if( decoder->rep0 >= (unsigned)decoder->dictionary_size || - ( decoder->rep0 >= (unsigned)decoder->cb.put && - !decoder->partial_data_pos ) ) + if( d->rep0 >= d->dictionary_size || d->rep0 >= LZd_data_position( d ) ) return 1; } - LZd_copy_block( decoder, decoder->rep0, len ); + LZd_copy_block( d, d->rep0, len ); } } return 2; diff --git a/decoder.h b/decoder.h index c4135ff..5caa5c3 100644 --- a/decoder.h +++ b/decoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -159,6 +159,7 @@ static bool Rd_try_reload( struct Range_decoder * const rdec, const bool force ) for( i = 0; i < 5; ++i ) rdec->code = (rdec->code << 8) | Rd_get_byte( rdec ); rdec->range = 0xFFFFFFFFU; + rdec->code &= rdec->range; /* make sure that first byte is discarded */ } return !rdec->reload_pending; } @@ -233,7 +234,7 @@ static inline int Rd_decode_tree6( struct Range_decoder * const rdec, symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); symbol = ( symbol << 1 ) | Rd_decode_bit( rdec, &bm[symbol] ); - return symbol - (1 << 6); + return symbol & 0x3F; } static inline int Rd_decode_tree_reversed( struct Range_decoder * const rdec, @@ -255,9 +256,9 @@ static inline int Rd_decode_tree_reversed4( struct Range_decoder * const rdec, Bit_model bm[] ) { int model = 1; - int symbol = 0; - int bit = Rd_decode_bit( rdec, &bm[model] ); - model = (model << 1) + bit; symbol |= bit; + int symbol = Rd_decode_bit( rdec, &bm[model] ); + int bit; + model = (model << 1) + symbol; bit = Rd_decode_bit( rdec, &bm[model] ); model = (model << 1) + bit; symbol |= (bit << 1); bit = Rd_decode_bit( rdec, &bm[model] ); @@ -271,8 +272,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, { Bit_model * const bm1 = bm + 0x100; int symbol = 1; - int i; - for( i = 7; i >= 0; --i ) + while( symbol < 0x100 ) { int match_bit, bit; match_byte <<= 1; @@ -286,7 +286,7 @@ static inline int Rd_decode_matched( struct Range_decoder * const rdec, break; } } - return symbol - 0x100; + return symbol & 0xFF; } static inline int Rd_decode_len( struct Range_decoder * const rdec, @@ -309,7 +309,8 @@ struct LZ_decoder { struct Circular_buffer cb; unsigned long long partial_data_pos; - int dictionary_size; + struct Range_decoder * rdec; + unsigned dictionary_size; uint32_t crc; bool member_finished; bool verify_trailer_pending; @@ -330,101 +331,98 @@ struct LZ_decoder Bit_model bm_dis[modeled_distances-end_dis_model]; Bit_model bm_align[dis_align_size]; - struct Range_decoder * rdec; struct Len_model match_len_model; struct Len_model rep_len_model; }; -static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const decoder ) - { return Cb_free_bytes( &decoder->cb ) >= lzd_min_free_bytes; } +static inline bool LZd_enough_free_bytes( const struct LZ_decoder * const d ) + { return Cb_free_bytes( &d->cb ) >= lzd_min_free_bytes; } -static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const decoder ) +static inline uint8_t LZd_get_prev_byte( const struct LZ_decoder * const d ) { - const int i = - ( ( decoder->cb.put > 0 ) ? decoder->cb.put : decoder->cb.buffer_size ) - 1; - return decoder->cb.buffer[i]; + const int i = ( ( d->cb.put > 0 ) ? d->cb.put : d->cb.buffer_size ) - 1; + return d->cb.buffer[i]; } -static inline uint8_t LZd_get_byte( const struct LZ_decoder * const decoder, +static inline uint8_t LZd_get_byte( const struct LZ_decoder * const d, const int distance ) { - int i = decoder->cb.put - distance - 1; - if( i < 0 ) i += decoder->cb.buffer_size; - return decoder->cb.buffer[i]; + int i = d->cb.put - distance - 1; + if( i < 0 ) i += d->cb.buffer_size; + return d->cb.buffer[i]; } -static inline void LZd_put_byte( struct LZ_decoder * const decoder, - const uint8_t b ) +static inline void LZd_put_byte( struct LZ_decoder * const d, const uint8_t b ) { - CRC32_update_byte( &decoder->crc, b ); - decoder->cb.buffer[decoder->cb.put] = b; - if( ++decoder->cb.put >= decoder->cb.buffer_size ) - { decoder->partial_data_pos += decoder->cb.put; decoder->cb.put = 0; } + CRC32_update_byte( &d->crc, b ); + d->cb.buffer[d->cb.put] = b; + if( ++d->cb.put >= d->cb.buffer_size ) + { d->partial_data_pos += d->cb.put; d->cb.put = 0; } } -static inline void LZd_copy_block( struct LZ_decoder * const decoder, +static inline void LZd_copy_block( struct LZ_decoder * const d, const int distance, int len ) { - int i = decoder->cb.put - distance - 1; - if( i < 0 ) i += decoder->cb.buffer_size; - if( len < decoder->cb.buffer_size - max( decoder->cb.put, i ) && - len <= abs( decoder->cb.put - i ) ) /* no wrap, no overlap */ + int i = d->cb.put - distance - 1; + if( i < 0 ) i += d->cb.buffer_size; + if( len < d->cb.buffer_size - max( d->cb.put, i ) && + len <= abs( d->cb.put - i ) ) /* no wrap, no overlap */ { - CRC32_update_buf( &decoder->crc, decoder->cb.buffer + i, len ); - memcpy( decoder->cb.buffer + decoder->cb.put, decoder->cb.buffer + i, len ); - decoder->cb.put += len; + CRC32_update_buf( &d->crc, d->cb.buffer + i, len ); + memcpy( d->cb.buffer + d->cb.put, d->cb.buffer + i, len ); + d->cb.put += len; } else for( ; len > 0; --len ) { - LZd_put_byte( decoder, decoder->cb.buffer[i] ); - if( ++i >= decoder->cb.buffer_size ) i = 0; + LZd_put_byte( d, d->cb.buffer[i] ); + if( ++i >= d->cb.buffer_size ) i = 0; } } -static inline bool LZd_init( struct LZ_decoder * const decoder, - const File_header header, - struct Range_decoder * const rde ) +static inline bool LZd_init( struct LZ_decoder * const d, + struct Range_decoder * const rde, + const int dict_size ) { - decoder->dictionary_size = Fh_get_dictionary_size( header ); - if( !Cb_init( &decoder->cb, max( 65536, decoder->dictionary_size ) + lzd_min_free_bytes ) ) + if( !Cb_init( &d->cb, max( 65536, dict_size ) + lzd_min_free_bytes ) ) return false; - decoder->partial_data_pos = 0; - decoder->crc = 0xFFFFFFFFU; - decoder->member_finished = false; - decoder->verify_trailer_pending = false; - decoder->rep0 = 0; - decoder->rep1 = 0; - decoder->rep2 = 0; - decoder->rep3 = 0; - decoder->state = 0; - - Bm_array_init( decoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( decoder->bm_match[0], states * pos_states ); - Bm_array_init( decoder->bm_rep, states ); - Bm_array_init( decoder->bm_rep0, states ); - Bm_array_init( decoder->bm_rep1, states ); - Bm_array_init( decoder->bm_rep2, states ); - Bm_array_init( decoder->bm_len[0], states * pos_states ); - Bm_array_init( decoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( decoder->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( decoder->bm_align, dis_align_size ); - - decoder->rdec = rde; - Lm_init( &decoder->match_len_model ); - Lm_init( &decoder->rep_len_model ); - decoder->cb.buffer[decoder->cb.buffer_size-1] = 0; /* prev_byte of first_byte */ + d->partial_data_pos = 0; + d->rdec = rde; + d->dictionary_size = dict_size; + d->crc = 0xFFFFFFFFU; + d->member_finished = false; + d->verify_trailer_pending = false; + d->rep0 = 0; + d->rep1 = 0; + d->rep2 = 0; + d->rep3 = 0; + d->state = 0; + + Bm_array_init( d->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( d->bm_match[0], states * pos_states ); + Bm_array_init( d->bm_rep, states ); + Bm_array_init( d->bm_rep0, states ); + Bm_array_init( d->bm_rep1, states ); + Bm_array_init( d->bm_rep2, states ); + Bm_array_init( d->bm_len[0], states * pos_states ); + Bm_array_init( d->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( d->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( d->bm_align, dis_align_size ); + + Lm_init( &d->match_len_model ); + Lm_init( &d->rep_len_model ); + d->cb.buffer[d->cb.buffer_size-1] = 0; /* prev_byte of first_byte */ return true; } -static inline void LZd_free( struct LZ_decoder * const decoder ) - { Cb_free( &decoder->cb ); } +static inline void LZd_free( struct LZ_decoder * const d ) + { Cb_free( &d->cb ); } -static inline bool LZd_member_finished( const struct LZ_decoder * const decoder ) - { return ( decoder->member_finished && !Cb_used_bytes( &decoder->cb ) ); } +static inline bool LZd_member_finished( const struct LZ_decoder * const d ) + { return ( d->member_finished && !Cb_used_bytes( &d->cb ) ); } -static inline unsigned LZd_crc( const struct LZ_decoder * const decoder ) - { return decoder->crc ^ 0xFFFFFFFFU; } +static inline unsigned LZd_crc( const struct LZ_decoder * const d ) + { return d->crc ^ 0xFFFFFFFFU; } static inline unsigned long long -LZd_data_position( const struct LZ_decoder * const decoder ) - { return decoder->partial_data_pos + decoder->cb.put; } +LZd_data_position( const struct LZ_decoder * const d ) + { return d->partial_data_pos + d->cb.put; } diff --git a/doc/lzlib.info b/doc/lzlib.info index dd67c12..1344ced 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -1,9 +1,8 @@ -This is lzlib.info, produced by makeinfo version 4.13+ from -lzlib.texinfo. +This is lzlib.info, produced by makeinfo version 4.13+ from lzlib.texi. INFO-DIR-SECTION Data Compression START-INFO-DIR-ENTRY -* Lzlib: (lzlib). Compression library for lzip files +* Lzlib: (lzlib). Compression library for the lzip format END-INFO-DIR-ENTRY  @@ -12,7 +11,7 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.6-pre1, 7 October 2013). +This manual is for Lzlib (version 1.6-pre2, 30 January 2014). * Menu: @@ -30,7 +29,7 @@ This manual is for Lzlib (version 1.6-pre1, 7 October 2013). * Concept index:: Index of concepts - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -736,19 +735,19 @@ Concept index  Tag Table: -Node: Top218 -Node: Introduction1326 -Node: Library version3907 -Node: Buffering4552 -Node: Parameter limits5675 -Node: Compression functions6634 -Node: Decompression functions12847 -Node: Error codes19008 -Node: Error messages20947 -Node: Data format21526 -Node: Examples24175 -Node: Problems28258 -Node: Concept index28830 +Node: Top220 +Node: Introduction1335 +Node: Library version3916 +Node: Buffering4561 +Node: Parameter limits5684 +Node: Compression functions6643 +Node: Decompression functions12856 +Node: Error codes19017 +Node: Error messages20956 +Node: Data format21535 +Node: Examples24184 +Node: Problems28267 +Node: Concept index28839  End Tag Table diff --git a/doc/lzlib.texi b/doc/lzlib.texi new file mode 100644 index 0000000..88f0b16 --- /dev/null +++ b/doc/lzlib.texi @@ -0,0 +1,812 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename lzlib.info +@documentencoding ISO-8859-15 +@settitle Lzlib Manual +@finalout +@c %**end of header + +@set UPDATED 30 January 2014 +@set VERSION 1.6-pre2 + +@dircategory Data Compression +@direntry +* Lzlib: (lzlib). Compression library for the lzip format +@end direntry + + +@ifnothtml +@titlepage +@title Lzlib +@subtitle Compression library for the lzip format +@subtitle for Lzlib version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@node Top +@top + +This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of Lzlib +* Library version:: Checking library version +* Buffering:: Sizes of Lzlib's buffers +* Parameter limits:: Min / max values for some parameters +* Compression functions:: Descriptions of the compression functions +* Decompression functions:: Descriptions of the decompression functions +* Error codes:: Meaning of codes returned by functions +* Error messages:: Error messages corresponding to error codes +* Data format:: Detailed format of the compressed data +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2009, 2010, 2011, 2012, 2013, 2014 +Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission +to copy, distribute and modify it. + + +@node Introduction +@chapter Introduction +@cindex introduction + +Lzlib is a data compression library providing in-memory LZMA compression +and decompression functions, including integrity checking of the +decompressed data. The compressed data format used by the library is the +lzip format. Lzlib is written in C. + +The lzip file format is designed for long-term data archiving. It is +clean, provides very safe 4 factor integrity checking, and is backed by +the recovery capabilities of lziprecover. + +The functions and variables forming the interface of the compression +library are declared in the file @samp{lzlib.h}. Usage examples of the +library are given in the files @samp{main.c} and @samp{bbexample.c} from +the source distribution. + +Compression/decompression is done by repeatedly calling a couple of +read/write functions until all the data has been processed by the +library. This interface is safer and less error prone than the +traditional zlib interface. + +Compression/decompression is done when the read function is called. This +means the value returned by the position functions will not be updated +until some data is read, even if you write a lot of data. If you want +the data to be compressed in advance, just call the read function with a +@var{size} equal to 0. + +Lzlib will correctly decompress a data stream which is the concatenation +of two or more compressed data streams. The result is the concatenation +of the corresponding decompressed data streams. Integrity testing of +concatenated compressed data streams is also supported. + +All the library functions are thread safe. The library does not install +any signal handler. The decoder checks the consistency of the compressed +data, so the library should never crash even in case of corrupted input. + +Lzlib implements a simplified version of the LZMA (Lempel-Ziv-Markov +chain-Algorithm) algorithm. The high compression of LZMA comes from +combining two basic, well-proven compression ideas: sliding dictionaries +(LZ77/78) and markov models (the thing used by every compression +algorithm that uses a range encoder or similar order-0 entropy coder as +its last stage) with segregation of contexts according to what the bits +are used for. + +The ideas embodied in lzlib are due to (at least) the following people: +Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for +the definition of Markov chains), G.N.N. Martin (for the definition of +range encoding), Igor Pavlov (for putting all the above together in +LZMA), and Julian Seward (for bzip2's CLI). + + +@node Library version +@chapter Library version +@cindex library version + +@deftypefun {const char *} LZ_version ( void ) +Returns the library version as a string. +@end deftypefun + +@deftypevr Constant {const char *} LZ_version_string +This constant is defined in the header file @samp{lzlib.h}. +@end deftypevr + +The application should compare LZ_version and LZ_version_string for +consistency. If the first character differs, the library code actually +used may be incompatible with the @samp{lzlib.h} header file used by the +application. + +@example +if( LZ_version()[0] != LZ_version_string[0] ) + error( "bad library version" ); +@end example + + +@node Buffering +@chapter Buffering +@cindex buffering + +Lzlib internal functions need access to a memory chunk at least as large +as the dictionary size (sliding window). For efficiency reasons, the +input buffer for compression is twice as large as the dictionary size. +Finally, for security reasons, lzlib uses two more internal buffers. + +These are the four buffers used by lzlib, and their guaranteed minimum +sizes: + +@itemize @bullet +@item Input compression buffer. Written to by the +@samp{LZ_compress_write} function. Its size is two times the dictionary +size set with the @samp{LZ_compress_open} function or 64 KiB, whichever +is larger. + +@item Output compression buffer. Read from by the +@samp{LZ_compress_read} function. Its size is 64 KiB. + +@item Input decompression buffer. Written to by the +@samp{LZ_decompress_write} function. Its size is 64 KiB. + +@item Output decompression buffer. Read from by the +@samp{LZ_decompress_read} function. Its size is the dictionary size set +in the header of the member currently being decompressed or 64 KiB, +whichever is larger. +@end itemize + + +@node Parameter limits +@chapter Parameter limits +@cindex parameter limits + +These functions provide minimum and maximum values for some parameters. +Current values are shown in square brackets. + +@deftypefun int LZ_min_dictionary_bits ( void ) +Returns the base 2 logarithm of the smallest valid dictionary size [12]. +@end deftypefun + +@deftypefun int LZ_min_dictionary_size ( void ) +Returns the smallest valid dictionary size [4 KiB]. +@end deftypefun + +@deftypefun int LZ_max_dictionary_bits ( void ) +Returns the base 2 logarithm of the largest valid dictionary size [29]. +@end deftypefun + +@deftypefun int LZ_max_dictionary_size ( void ) +Returns the largest valid dictionary size [512 MiB]. +@end deftypefun + +@deftypefun int LZ_min_match_len_limit ( void ) +Returns the smallest valid match length limit [5]. +@end deftypefun + +@deftypefun int LZ_max_match_len_limit ( void ) +Returns the largest valid match length limit [273]. +@end deftypefun + + +@node Compression functions +@chapter Compression functions +@cindex compression functions + +These are the functions used to compress data. In case of error, all of +them return -1 or 0, for signed and unsigned return values respectively, +except @samp{LZ_compress_open} whose return value must be verified by +calling @samp{LZ_compress_errno} before using it. + + +@deftypefun {struct LZ_Encoder *} LZ_compress_open ( const int @var{dictionary_size}, const int @var{match_len_limit}, const unsigned long long @var{member_size} ) +Initializes the internal stream state for compression and returns a +pointer that can only be used as the @var{encoder} argument for the +other LZ_compress functions, or a null pointer if the encoder could not +be allocated. + +The returned pointer must be verified by calling +@samp{LZ_compress_errno} before using it. If @samp{LZ_compress_errno} +does not return @samp{LZ_ok}, the returned pointer must not be used and +should be freed with @samp{LZ_compress_close} to avoid memory leaks. + +@var{dictionary_size} sets the dictionary size to be used, in bytes. +Valid values range from 4 KiB to 512 MiB. Note that dictionary sizes are +quantized. If the specified size does not match one of the valid sizes, +it will be rounded upwards by adding up to (@var{dictionary_size} / 16) +to it. + +@var{match_len_limit} sets the match length limit in bytes. Valid values +range from 5 to 273. Larger values usually give better compression +ratios but longer compression times. + +@var{member_size} sets the member size limit in bytes. Minimum member +size limit is 100 kB. Small member size may degrade compression ratio, so +use it only when needed. To produce a single-member data stream, give +@var{member_size} a value larger than the amount of data to be produced, +for example INT64_MAX. +@end deftypefun + + +@deftypefun int LZ_compress_close ( struct LZ_Encoder * const @var{encoder} ) +Frees all dynamically allocated data structures for this stream. This +function discards any unprocessed input and does not flush any pending +output. After a call to @samp{LZ_compress_close}, @var{encoder} can no +more be used as an argument to any LZ_compress function. +@end deftypefun + + +@deftypefun int LZ_compress_finish ( struct LZ_Encoder * const @var{encoder} ) +Use this function to tell @samp{lzlib} that all the data for this member +has already been written (with the @samp{LZ_compress_write} function). +After all the produced compressed data has been read with +@samp{LZ_compress_read} and @samp{LZ_compress_member_finished} returns +1, a new member can be started with @samp{LZ_compress_restart_member}. +@end deftypefun + + +@deftypefun int LZ_compress_restart_member ( struct LZ_Encoder * const @var{encoder}, const unsigned long long @var{member_size} ) +Use this function to start a new member, in a multi-member data stream. +Call this function only after @samp{LZ_compress_member_finished} +indicates that the current member has been fully read (with the +@samp{LZ_compress_read} function). +@end deftypefun + + +@deftypefun int LZ_compress_sync_flush ( struct LZ_Encoder * const @var{encoder} ) +Use this function to make available to @samp{LZ_compress_read} all the +data already written with the @samp{LZ_compress_write} function. +Repeated use of @samp{LZ_compress_sync_flush} may degrade compression +ratio, so use it only when needed. +@end deftypefun + + +@deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) +The @samp{LZ_compress_read} function reads up to @var{size} bytes from +the stream pointed to by @var{encoder}, storing the results in +@var{buffer}. + +The return value is the number of bytes actually read. This might be +less than @var{size}; for example, if there aren't that many bytes left +in the stream or if more bytes have to be yet written with the +@samp{LZ_compress_write} function. Note that reading less than +@var{size} bytes is not an error. +@end deftypefun + + +@deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) +The @samp{LZ_compress_write} function writes up to @var{size} bytes from +@var{buffer} to the stream pointed to by @var{encoder}. + +The return value is the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is +not an error. +@end deftypefun + + +@deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} ) +The @samp{LZ_compress_write_size} function returns the maximum number of +bytes that can be immediately written through the @samp{LZ_compress_write} +function. + +It is guaranteed that an immediate call to @samp{LZ_compress_write} will +accept a @var{size} up to the returned number of bytes. +@end deftypefun + + +@deftypefun {enum LZ_Errno} LZ_compress_errno ( struct LZ_Encoder * const @var{encoder} ) +Returns the current error code for @var{encoder} (@pxref{Error codes}). +@end deftypefun + + +@deftypefun int LZ_compress_finished ( struct LZ_Encoder * const @var{encoder} ) +Returns 1 if all the data has been read and @samp{LZ_compress_close} can +be safely called. Otherwise it returns 0. +@end deftypefun + + +@deftypefun int LZ_compress_member_finished ( struct LZ_Encoder * const @var{encoder} ) +Returns 1 if the current member, in a multi-member data stream, has been +fully read and @samp{LZ_compress_restart_member} can be safely called. +Otherwise it returns 0. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_compress_data_position ( struct LZ_Encoder * const @var{encoder} ) +Returns the number of input bytes already compressed in the current +member. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_compress_member_position ( struct LZ_Encoder * const @var{encoder} ) +Returns the number of compressed bytes already produced, but perhaps not +yet read, in the current member. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_compress_total_in_size ( struct LZ_Encoder * const @var{encoder} ) +Returns the total number of input bytes already compressed. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_compress_total_out_size ( struct LZ_Encoder * const @var{encoder} ) +Returns the total number of compressed bytes already produced, but +perhaps not yet read. +@end deftypefun + + +@node Decompression functions +@chapter Decompression functions +@cindex decompression functions + +These are the functions used to decompress data. In case of error, all +of them return -1 or 0, for signed and unsigned return values +respectively, except @samp{LZ_decompress_open} whose return value must +be verified by calling @samp{LZ_decompress_errno} before using it. + + +@deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) +Initializes the internal stream state for decompression and returns a +pointer that can only be used as the @var{decoder} argument for the +other LZ_decompress functions, or a null pointer if the decoder could +not be allocated. + +The returned pointer must be verified by calling +@samp{LZ_decompress_errno} before using it. If +@samp{LZ_decompress_errno} does not return @samp{LZ_ok}, the returned +pointer must not be used and should be freed with +@samp{LZ_decompress_close} to avoid memory leaks. +@end deftypefun + + +@deftypefun int LZ_decompress_close ( struct LZ_Decoder * const @var{decoder} ) +Frees all dynamically allocated data structures for this stream. This +function discards any unprocessed input and does not flush any pending +output. After a call to @samp{LZ_decompress_close}, @var{decoder} can no +more be used as an argument to any LZ_decompress function. +@end deftypefun + + +@deftypefun int LZ_decompress_finish ( struct LZ_Decoder * const @var{decoder} ) +Use this function to tell @samp{lzlib} that all the data for this stream +has already been written (with the @samp{LZ_decompress_write} function). +@end deftypefun + + +@deftypefun int LZ_decompress_reset ( struct LZ_Decoder * const @var{decoder} ) +Resets the internal state of @var{decoder} as it was just after opening +it with the @samp{LZ_decompress_open} function. Data stored in the +internal buffers is discarded. Position counters are set to 0. +@end deftypefun + + +@deftypefun int LZ_decompress_sync_to_member ( struct LZ_Decoder * const @var{decoder} ) +Resets the error state of @var{decoder} and enters a search state that +lasts until a new member header (or the end of the stream) is found. +After a successful call to @samp{LZ_decompress_sync_to_member}, data +written with @samp{LZ_decompress_write} will be consumed and +@samp{LZ_decompress_read} will return 0 until a header is found. + +This function is useful to discard any data preceding the first member, +or to discard the rest of the current member, for example in case of a +data error. If the decoder is already at the beginning of a member, this +function does nothing. +@end deftypefun + + +@deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) +The @samp{LZ_decompress_read} function reads up to @var{size} bytes from +the stream pointed to by @var{decoder}, storing the results in +@var{buffer}. + +The return value is the number of bytes actually read. This might be +less than @var{size}; for example, if there aren't that many bytes left +in the stream or if more bytes have to be yet written with the +@samp{LZ_decompress_write} function. Note that reading less than +@var{size} bytes is not an error. +@end deftypefun + + +@deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) +The @samp{LZ_decompress_write} function writes up to @var{size} bytes from +@var{buffer} to the stream pointed to by @var{decoder}. + +The return value is the number of bytes actually written. This might be +less than @var{size}. Note that writing less than @var{size} bytes is +not an error. +@end deftypefun + + +@deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} ) +The @samp{LZ_decompress_write_size} function returns the maximum number +of bytes that can be immediately written through the +@samp{LZ_decompress_write} function. + +It is guaranteed that an immediate call to @samp{LZ_decompress_write} +will accept a @var{size} up to the returned number of bytes. +@end deftypefun + + +@deftypefun {enum LZ_Errno} LZ_decompress_errno ( struct LZ_Decoder * const @var{decoder} ) +Returns the current error code for @var{decoder} (@pxref{Error codes}). +@end deftypefun + + +@deftypefun int LZ_decompress_finished ( struct LZ_Decoder * const @var{decoder} ) +Returns 1 if all the data has been read and @samp{LZ_decompress_close} +can be safely called. Otherwise it returns 0. +@end deftypefun + + +@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} ) +Returns 1 if the previous call to @samp{LZ_decompress_read} finished +reading the current member, indicating that final values for member are +available through @samp{LZ_decompress_data_crc}, +@samp{LZ_decompress_data_position}, and +@samp{LZ_decompress_member_position}. Otherwise it returns 0. +@end deftypefun + + +@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} ) +Returns the version of current member from member header. +@end deftypefun + + +@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} ) +Returns the dictionary size of current member from member header. +@end deftypefun + + +@deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) +Returns the 32 bit Cyclic Redundancy Check of the data decompressed from +the current member. The returned value is valid only when +@samp{LZ_decompress_member_finished} returns 1. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) +Returns the number of decompressed bytes already produced, but perhaps +not yet read, in the current member. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_decompress_member_position ( struct LZ_Decoder * const @var{decoder} ) +Returns the number of input bytes already decompressed in the current +member. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_decompress_total_in_size ( struct LZ_Decoder * const @var{decoder} ) +Returns the total number of input bytes already decompressed. +@end deftypefun + + +@deftypefun {unsigned long long} LZ_decompress_total_out_size ( struct LZ_Decoder * const @var{decoder} ) +Returns the total number of decompressed bytes already produced, but +perhaps not yet read. +@end deftypefun + + +@node Error codes +@chapter Error codes +@cindex error codes + +Most library functions return -1 to indicate that they have failed. But +this return value only tells you that an error has occurred. To find out +what kind of error it was, you need to verify the error code by calling +@samp{LZ_(de)compress_errno}. + +Library functions do not change the value returned by +@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned +by @samp{LZ_(de)compress_errno} after a successful call is not +necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno} +to determine whether a call failed. If the call failed, then you can +examine @samp{LZ_(de)compress_errno}. + +The error codes are defined in the header file @samp{lzlib.h}. + +@deftypevr Constant {enum LZ_Errno} LZ_ok +The value of this constant is 0 and is used to indicate that there is no +error. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_bad_argument +At least one of the arguments passed to the library function was +invalid. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_mem_error +No memory available. The system cannot allocate more virtual memory +because its capacity is full. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_sequence_error +A library function was called in the wrong order. For example +@samp{LZ_compress_restart_member} was called before +@samp{LZ_compress_member_finished} indicates that the current member is +finished. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_header_error +Reading of member header failed. If this happens at the end of the data +stream it may indicate trailing garbage. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_unexpected_eof +The end of the data stream was reached in the middle of a member. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_data_error +The data stream is corrupt. +@end deftypevr + +@deftypevr Constant {enum LZ_Errno} LZ_library_error +A bug was detected in the library. Please, report it (@pxref{Problems}). +@end deftypevr + + +@node Error messages +@chapter Error messages +@cindex error messages + +@deftypefun {const char *} LZ_strerror ( const enum LZ_Errno @var{lz_errno} ) +Returns the standard error message for a given error code. The messages +are fairly short; there are no multi-line messages or embedded newlines. +This function makes it easy for your program to report informative error +messages about the failure of a library call. + +The value of @var{lz_errno} normally comes from a call to +@samp{LZ_(de)compress_errno}. +@end deftypefun + + +@node Data format +@chapter Data format +@cindex data format + +Perfection is reached, not when there is no longer anything to add, but +when there is no longer anything to take away.@* +--- Antoine de Saint-Exupery + +@sp 1 +In the diagram below, a box like this: +@verbatim ++---+ +| | <-- the vertical bars might be missing ++---+ +@end verbatim + +represents one byte; a box like this: +@verbatim ++==============+ +| | ++==============+ +@end verbatim + +represents a variable number of bytes. + +@sp 1 +A lzip data stream consists of a series of "members" (compressed data +sets). The members simply appear one after another in the data stream, +with no additional information before, between, or after them. + +Each member has the following structure: +@verbatim ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | ++--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +@end verbatim + +All multibyte values are stored in little endian order. + +@table @samp +@item ID string +A four byte string, identifying the lzip format, with the value "LZIP" +(0x4C, 0x5A, 0x49, 0x50). + +@item VN (version number, 1 byte) +Just in case something needs to be modified in the future. 1 for now. + +@item DS (coded dictionary size, 1 byte) +Lzip divides the distance between any two powers of 2 into 8 equally +spaced intervals, named "wedges". The dictionary size is calculated by +taking a power of 2 (the base size) and substracting from it a number of +wedges between 0 and 7. The size of a wedge is (base_size / 16).@* +Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* +Bits 7-5 contain the number of wedges (0 to 7) to substract from the +base size to obtain the dictionary size.@* +Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* +Valid values for dictionary size range from 4 KiB to 512 MiB. + +@item Lzma stream +The lzma stream, finished by an end of stream marker. Uses default +values for encoder properties. See the lzip manual for a full +description.@* +Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib +also uses the LZMA marker @samp{3} ("Sync Flush" marker). + +@item CRC32 (4 bytes) +CRC of the uncompressed original data. + +@item Data size (8 bytes) +Size of the uncompressed original data. + +@item Member size (8 bytes) +Total size of the member, including header and trailer. This field acts +as a distributed index, allows the verification of stream integrity, and +facilitates safe recovery of undamaged members from multi-member files. + +@end table + + +@node Examples +@chapter A small tutorial with examples +@cindex examples + +This chapter shows the order in which the library functions should be +called depending on what kind of data stream you want to compress or +decompress. See the file @samp{bbexample.c} in the source distribution +for an example of how buffer-to-buffer compression/decompression can be +implemented using lzlib. + +Note that lzlib's interface is symmetrical. That is, the code for normal +compression and decompression is identical except because one calls +LZ_compress* functions while the other calls LZ_decompress* functions. + +@sp 1 +@noindent +Example 1: Normal compression (@var{member_size} > total output). + +@example +1) LZ_compress_open +2) LZ_compress_write +3) LZ_compress_read +4) go back to step 2 until all input data has been written +5) LZ_compress_finish +6) LZ_compress_read +7) go back to step 6 until LZ_compress_finished returns 1 +8) LZ_compress_close +@end example + +@sp 1 +@noindent +Example 2: Normal compression using LZ_compress_write_size. + +@example +1) LZ_compress_open +2) go to step 5 if LZ_compress_write_size returns 0 +3) LZ_compress_write +4) if no more data to write, call LZ_compress_finish +5) LZ_compress_read +6) go back to step 2 until LZ_compress_finished returns 1 +7) LZ_compress_close +@end example + +@sp 1 +@noindent +Example 3: Decompression. + +@example +1) LZ_decompress_open +2) LZ_decompress_write +3) LZ_decompress_read +4) go back to step 2 until all input data has been written +5) LZ_decompress_finish +6) LZ_decompress_read +7) go back to step 6 until LZ_decompress_finished returns 1 +8) LZ_decompress_close +@end example + +@sp 1 +@noindent +Example 4: Decompression using LZ_decompress_write_size. + +@example +1) LZ_decompress_open +2) go to step 5 if LZ_decompress_write_size returns 0 +3) LZ_decompress_write +4) if no more data to write, call LZ_decompress_finish +5) LZ_decompress_read +5a) optionally, if LZ_decompress_member_finished returns 1, read + final values for member with LZ_decompress_data_crc, etc. +6) go back to step 2 until LZ_decompress_finished returns 1 +7) LZ_decompress_close +@end example + +@sp 1 +@noindent +Example 5: Multi-member compression (@var{member_size} < total output). + +@example + 1) LZ_compress_open + 2) go to step 5 if LZ_compress_write_size returns 0 + 3) LZ_compress_write + 4) if no more data to write, call LZ_compress_finish + 5) LZ_compress_read + 6) go back to step 2 until LZ_compress_member_finished returns 1 + 7) go to step 10 if LZ_compress_finished() returns 1 + 8) LZ_compress_restart_member + 9) go back to step 2 +10) LZ_compress_close +@end example + +@sp 1 +@noindent +Example 6: Multi-member compression (user-restarted members). + +@example + 1) LZ_compress_open + 2) LZ_compress_write + 3) LZ_compress_read + 4) go back to step 2 until member termination is desired + 5) LZ_compress_finish + 6) LZ_compress_read + 7) go back to step 6 until LZ_compress_member_finished returns 1 + 8) verify that LZ_compress_finished returns 1 + 9) go to step 12 if all input data has been written +10) LZ_compress_restart_member +11) go back to step 2 +12) LZ_compress_close +@end example + +@sp 1 +@noindent +Example 7: Decompression with automatic removal of leading garbage. + +@example +1) LZ_decompress_open +2) LZ_decompress_sync_to_member +3) go to step 6 if LZ_decompress_write_size returns 0 +4) LZ_decompress_write +5) if no more data to write, call LZ_decompress_finish +6) LZ_decompress_read +7) go back to step 3 until LZ_decompress_finished returns 1 +8) LZ_decompress_close +@end example + +@sp 1 +@noindent +Example 8: Streamed decompression with automatic resynchronization to +next member in case of data error. + +@example +1) LZ_decompress_open +2) go to step 5 if LZ_decompress_write_size returns 0 +3) LZ_decompress_write +4) if no more data to write, call LZ_decompress_finish +5) if LZ_decompress_read produces LZ_header_error or LZ_data_error, + call LZ_decompress_sync_to_member +6) go back to step 2 until LZ_decompress_finished returns 1 +7) LZ_decompress_close +@end example + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in Lzlib. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in Lzlib, please send electronic mail to +@email{lzip-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{minilzip --version}} or in +@samp{LZ_version_string} from @samp{lzlib.h}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo deleted file mode 100644 index 8fafc66..0000000 --- a/doc/lzlib.texinfo +++ /dev/null @@ -1,810 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header -@setfilename lzlib.info -@documentencoding ISO-8859-15 -@settitle Lzlib Manual -@finalout -@c %**end of header - -@set UPDATED 7 October 2013 -@set VERSION 1.6-pre1 - -@dircategory Data Compression -@direntry -* Lzlib: (lzlib). Compression library for lzip files -@end direntry - - -@ifnothtml -@titlepage -@title Lzlib -@subtitle Compression library for lzip files -@subtitle for Lzlib version @value{VERSION}, @value{UPDATED} -@author by Antonio Diaz Diaz - -@page -@vskip 0pt plus 1filll -@end titlepage - -@contents -@end ifnothtml - -@node Top -@top - -This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). - -@menu -* Introduction:: Purpose and features of Lzlib -* Library version:: Checking library version -* Buffering:: Sizes of Lzlib's buffers -* Parameter limits:: Min / max values for some parameters -* Compression functions:: Descriptions of the compression functions -* Decompression functions:: Descriptions of the decompression functions -* Error codes:: Meaning of codes returned by functions -* Error messages:: Error messages corresponding to error codes -* Data format:: Detailed format of the compressed data -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept index:: Index of concepts -@end menu - -@sp 1 -Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. - -This manual is free documentation: you have unlimited permission -to copy, distribute and modify it. - - -@node Introduction -@chapter Introduction -@cindex introduction - -Lzlib is a data compression library providing in-memory LZMA compression -and decompression functions, including integrity checking of the -decompressed data. The compressed data format used by the library is the -lzip format. Lzlib is written in C. - -The lzip file format is designed for long-term data archiving. It is -clean, provides very safe 4 factor integrity checking, and is backed by -the recovery capabilities of lziprecover. - -The functions and variables forming the interface of the compression -library are declared in the file @samp{lzlib.h}. Usage examples of the -library are given in the files @samp{main.c} and @samp{bbexample.c} from -the source distribution. - -Compression/decompression is done by repeatedly calling a couple of -read/write functions until all the data has been processed by the -library. This interface is safer and less error prone than the -traditional zlib interface. - -Compression/decompression is done when the read function is called. This -means the value returned by the position functions will not be updated -until some data is read, even if you write a lot of data. If you want -the data to be compressed in advance, just call the read function with a -@var{size} equal to 0. - -Lzlib will correctly decompress a data stream which is the concatenation -of two or more compressed data streams. The result is the concatenation -of the corresponding decompressed data streams. Integrity testing of -concatenated compressed data streams is also supported. - -All the library functions are thread safe. The library does not install -any signal handler. The decoder checks the consistency of the compressed -data, so the library should never crash even in case of corrupted input. - -Lzlib implements a simplified version of the LZMA (Lempel-Ziv-Markov -chain-Algorithm) algorithm. The high compression of LZMA comes from -combining two basic, well-proven compression ideas: sliding dictionaries -(LZ77/78) and markov models (the thing used by every compression -algorithm that uses a range encoder or similar order-0 entropy coder as -its last stage) with segregation of contexts according to what the bits -are used for. - -The ideas embodied in lzlib are due to (at least) the following people: -Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for -the definition of Markov chains), G.N.N. Martin (for the definition of -range encoding), Igor Pavlov (for putting all the above together in -LZMA), and Julian Seward (for bzip2's CLI). - - -@node Library version -@chapter Library version -@cindex library version - -@deftypefun {const char *} LZ_version ( void ) -Returns the library version as a string. -@end deftypefun - -@deftypevr Constant {const char *} LZ_version_string -This constant is defined in the header file @samp{lzlib.h}. -@end deftypevr - -The application should compare LZ_version and LZ_version_string for -consistency. If the first character differs, the library code actually -used may be incompatible with the @samp{lzlib.h} header file used by the -application. - -@example -if( LZ_version()[0] != LZ_version_string[0] ) - error( "bad library version" ); -@end example - - -@node Buffering -@chapter Buffering -@cindex buffering - -Lzlib internal functions need access to a memory chunk at least as large -as the dictionary size (sliding window). For efficiency reasons, the -input buffer for compression is twice as large as the dictionary size. -Finally, for security reasons, lzlib uses two more internal buffers. - -These are the four buffers used by lzlib, and their guaranteed minimum -sizes: - -@itemize @bullet -@item Input compression buffer. Written to by the -@samp{LZ_compress_write} function. Its size is two times the dictionary -size set with the @samp{LZ_compress_open} function or 64 KiB, whichever -is larger. - -@item Output compression buffer. Read from by the -@samp{LZ_compress_read} function. Its size is 64 KiB. - -@item Input decompression buffer. Written to by the -@samp{LZ_decompress_write} function. Its size is 64 KiB. - -@item Output decompression buffer. Read from by the -@samp{LZ_decompress_read} function. Its size is the dictionary size set -in the header of the member currently being decompressed or 64 KiB, -whichever is larger. -@end itemize - - -@node Parameter limits -@chapter Parameter limits -@cindex parameter limits - -These functions provide minimum and maximum values for some parameters. -Current values are shown in square brackets. - -@deftypefun int LZ_min_dictionary_bits ( void ) -Returns the base 2 logarithm of the smallest valid dictionary size [12]. -@end deftypefun - -@deftypefun int LZ_min_dictionary_size ( void ) -Returns the smallest valid dictionary size [4 KiB]. -@end deftypefun - -@deftypefun int LZ_max_dictionary_bits ( void ) -Returns the base 2 logarithm of the largest valid dictionary size [29]. -@end deftypefun - -@deftypefun int LZ_max_dictionary_size ( void ) -Returns the largest valid dictionary size [512 MiB]. -@end deftypefun - -@deftypefun int LZ_min_match_len_limit ( void ) -Returns the smallest valid match length limit [5]. -@end deftypefun - -@deftypefun int LZ_max_match_len_limit ( void ) -Returns the largest valid match length limit [273]. -@end deftypefun - - -@node Compression functions -@chapter Compression functions -@cindex compression functions - -These are the functions used to compress data. In case of error, all of -them return -1 or 0, for signed and unsigned return values respectively, -except @samp{LZ_compress_open} whose return value must be verified by -calling @samp{LZ_compress_errno} before using it. - - -@deftypefun {struct LZ_Encoder *} LZ_compress_open ( const int @var{dictionary_size}, const int @var{match_len_limit}, const unsigned long long @var{member_size} ) -Initializes the internal stream state for compression and returns a -pointer that can only be used as the @var{encoder} argument for the -other LZ_compress functions, or a null pointer if the encoder could not -be allocated. - -The returned pointer must be verified by calling -@samp{LZ_compress_errno} before using it. If @samp{LZ_compress_errno} -does not return @samp{LZ_ok}, the returned pointer must not be used and -should be freed with @samp{LZ_compress_close} to avoid memory leaks. - -@var{dictionary_size} sets the dictionary size to be used, in bytes. -Valid values range from 4 KiB to 512 MiB. Note that dictionary sizes are -quantized. If the specified size does not match one of the valid sizes, -it will be rounded upwards by adding up to (@var{dictionary_size} / 16) -to it. - -@var{match_len_limit} sets the match length limit in bytes. Valid values -range from 5 to 273. Larger values usually give better compression -ratios but longer compression times. - -@var{member_size} sets the member size limit in bytes. Minimum member -size limit is 100 kB. Small member size may degrade compression ratio, so -use it only when needed. To produce a single-member data stream, give -@var{member_size} a value larger than the amount of data to be produced, -for example INT64_MAX. -@end deftypefun - - -@deftypefun int LZ_compress_close ( struct LZ_Encoder * const @var{encoder} ) -Frees all dynamically allocated data structures for this stream. This -function discards any unprocessed input and does not flush any pending -output. After a call to @samp{LZ_compress_close}, @var{encoder} can no -more be used as an argument to any LZ_compress function. -@end deftypefun - - -@deftypefun int LZ_compress_finish ( struct LZ_Encoder * const @var{encoder} ) -Use this function to tell @samp{lzlib} that all the data for this member -has already been written (with the @samp{LZ_compress_write} function). -After all the produced compressed data has been read with -@samp{LZ_compress_read} and @samp{LZ_compress_member_finished} returns -1, a new member can be started with @samp{LZ_compress_restart_member}. -@end deftypefun - - -@deftypefun int LZ_compress_restart_member ( struct LZ_Encoder * const @var{encoder}, const unsigned long long @var{member_size} ) -Use this function to start a new member, in a multi-member data stream. -Call this function only after @samp{LZ_compress_member_finished} -indicates that the current member has been fully read (with the -@samp{LZ_compress_read} function). -@end deftypefun - - -@deftypefun int LZ_compress_sync_flush ( struct LZ_Encoder * const @var{encoder} ) -Use this function to make available to @samp{LZ_compress_read} all the -data already written with the @samp{LZ_compress_write} function. -Repeated use of @samp{LZ_compress_sync_flush} may degrade compression -ratio, so use it only when needed. -@end deftypefun - - -@deftypefun int LZ_compress_read ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The @samp{LZ_compress_read} function reads up to @var{size} bytes from -the stream pointed to by @var{encoder}, storing the results in -@var{buffer}. - -The return value is the number of bytes actually read. This might be -less than @var{size}; for example, if there aren't that many bytes left -in the stream or if more bytes have to be yet written with the -@samp{LZ_compress_write} function. Note that reading less than -@var{size} bytes is not an error. -@end deftypefun - - -@deftypefun int LZ_compress_write ( struct LZ_Encoder * const @var{encoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The @samp{LZ_compress_write} function writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{encoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. -@end deftypefun - - -@deftypefun int LZ_compress_write_size ( struct LZ_Encoder * const @var{encoder} ) -The @samp{LZ_compress_write_size} function returns the maximum number of -bytes that can be immediately written through the @samp{LZ_compress_write} -function. - -It is guaranteed that an immediate call to @samp{LZ_compress_write} will -accept a @var{size} up to the returned number of bytes. -@end deftypefun - - -@deftypefun {enum LZ_Errno} LZ_compress_errno ( struct LZ_Encoder * const @var{encoder} ) -Returns the current error code for @var{encoder} (@pxref{Error codes}). -@end deftypefun - - -@deftypefun int LZ_compress_finished ( struct LZ_Encoder * const @var{encoder} ) -Returns 1 if all the data has been read and @samp{LZ_compress_close} can -be safely called. Otherwise it returns 0. -@end deftypefun - - -@deftypefun int LZ_compress_member_finished ( struct LZ_Encoder * const @var{encoder} ) -Returns 1 if the current member, in a multi-member data stream, has been -fully read and @samp{LZ_compress_restart_member} can be safely called. -Otherwise it returns 0. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_compress_data_position ( struct LZ_Encoder * const @var{encoder} ) -Returns the number of input bytes already compressed in the current -member. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_compress_member_position ( struct LZ_Encoder * const @var{encoder} ) -Returns the number of compressed bytes already produced, but perhaps not -yet read, in the current member. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_compress_total_in_size ( struct LZ_Encoder * const @var{encoder} ) -Returns the total number of input bytes already compressed. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_compress_total_out_size ( struct LZ_Encoder * const @var{encoder} ) -Returns the total number of compressed bytes already produced, but -perhaps not yet read. -@end deftypefun - - -@node Decompression functions -@chapter Decompression functions -@cindex decompression functions - -These are the functions used to decompress data. In case of error, all -of them return -1 or 0, for signed and unsigned return values -respectively, except @samp{LZ_decompress_open} whose return value must -be verified by calling @samp{LZ_decompress_errno} before using it. - - -@deftypefun {struct LZ_Decoder *} LZ_decompress_open ( void ) -Initializes the internal stream state for decompression and returns a -pointer that can only be used as the @var{decoder} argument for the -other LZ_decompress functions, or a null pointer if the decoder could -not be allocated. - -The returned pointer must be verified by calling -@samp{LZ_decompress_errno} before using it. If -@samp{LZ_decompress_errno} does not return @samp{LZ_ok}, the returned -pointer must not be used and should be freed with -@samp{LZ_decompress_close} to avoid memory leaks. -@end deftypefun - - -@deftypefun int LZ_decompress_close ( struct LZ_Decoder * const @var{decoder} ) -Frees all dynamically allocated data structures for this stream. This -function discards any unprocessed input and does not flush any pending -output. After a call to @samp{LZ_decompress_close}, @var{decoder} can no -more be used as an argument to any LZ_decompress function. -@end deftypefun - - -@deftypefun int LZ_decompress_finish ( struct LZ_Decoder * const @var{decoder} ) -Use this function to tell @samp{lzlib} that all the data for this stream -has already been written (with the @samp{LZ_decompress_write} function). -@end deftypefun - - -@deftypefun int LZ_decompress_reset ( struct LZ_Decoder * const @var{decoder} ) -Resets the internal state of @var{decoder} as it was just after opening -it with the @samp{LZ_decompress_open} function. Data stored in the -internal buffers is discarded. Position counters are set to 0. -@end deftypefun - - -@deftypefun int LZ_decompress_sync_to_member ( struct LZ_Decoder * const @var{decoder} ) -Resets the error state of @var{decoder} and enters a search state that -lasts until a new member header (or the end of the stream) is found. -After a successful call to @samp{LZ_decompress_sync_to_member}, data -written with @samp{LZ_decompress_write} will be consumed and -@samp{LZ_decompress_read} will return 0 until a header is found. - -This function is useful to discard any data preceding the first member, -or to discard the rest of the current member, for example in case of a -data error. If the decoder is already at the beginning of a member, this -function does nothing. -@end deftypefun - - -@deftypefun int LZ_decompress_read ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The @samp{LZ_decompress_read} function reads up to @var{size} bytes from -the stream pointed to by @var{decoder}, storing the results in -@var{buffer}. - -The return value is the number of bytes actually read. This might be -less than @var{size}; for example, if there aren't that many bytes left -in the stream or if more bytes have to be yet written with the -@samp{LZ_decompress_write} function. Note that reading less than -@var{size} bytes is not an error. -@end deftypefun - - -@deftypefun int LZ_decompress_write ( struct LZ_Decoder * const @var{decoder}, uint8_t * const @var{buffer}, const int @var{size} ) -The @samp{LZ_decompress_write} function writes up to @var{size} bytes from -@var{buffer} to the stream pointed to by @var{decoder}. - -The return value is the number of bytes actually written. This might be -less than @var{size}. Note that writing less than @var{size} bytes is -not an error. -@end deftypefun - - -@deftypefun int LZ_decompress_write_size ( struct LZ_Decoder * const @var{decoder} ) -The @samp{LZ_decompress_write_size} function returns the maximum number -of bytes that can be immediately written through the -@samp{LZ_decompress_write} function. - -It is guaranteed that an immediate call to @samp{LZ_decompress_write} -will accept a @var{size} up to the returned number of bytes. -@end deftypefun - - -@deftypefun {enum LZ_Errno} LZ_decompress_errno ( struct LZ_Decoder * const @var{decoder} ) -Returns the current error code for @var{decoder} (@pxref{Error codes}). -@end deftypefun - - -@deftypefun int LZ_decompress_finished ( struct LZ_Decoder * const @var{decoder} ) -Returns 1 if all the data has been read and @samp{LZ_decompress_close} -can be safely called. Otherwise it returns 0. -@end deftypefun - - -@deftypefun int LZ_decompress_member_finished ( struct LZ_Decoder * const @var{decoder} ) -Returns 1 if the previous call to @samp{LZ_decompress_read} finished -reading the current member, indicating that final values for member are -available through @samp{LZ_decompress_data_crc}, -@samp{LZ_decompress_data_position}, and -@samp{LZ_decompress_member_position}. Otherwise it returns 0. -@end deftypefun - - -@deftypefun int LZ_decompress_member_version ( struct LZ_Decoder * const @var{decoder} ) -Returns the version of current member from member header. -@end deftypefun - - -@deftypefun int LZ_decompress_dictionary_size ( struct LZ_Decoder * const @var{decoder} ) -Returns the dictionary size of current member from member header. -@end deftypefun - - -@deftypefun {unsigned} LZ_decompress_data_crc ( struct LZ_Decoder * const @var{decoder} ) -Returns the 32 bit Cyclic Redundancy Check of the data decompressed from -the current member. The returned value is valid only when -@samp{LZ_decompress_member_finished} returns 1. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_decompress_data_position ( struct LZ_Decoder * const @var{decoder} ) -Returns the number of decompressed bytes already produced, but perhaps -not yet read, in the current member. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_decompress_member_position ( struct LZ_Decoder * const @var{decoder} ) -Returns the number of input bytes already decompressed in the current -member. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_decompress_total_in_size ( struct LZ_Decoder * const @var{decoder} ) -Returns the total number of input bytes already decompressed. -@end deftypefun - - -@deftypefun {unsigned long long} LZ_decompress_total_out_size ( struct LZ_Decoder * const @var{decoder} ) -Returns the total number of decompressed bytes already produced, but -perhaps not yet read. -@end deftypefun - - -@node Error codes -@chapter Error codes -@cindex error codes - -Most library functions return -1 to indicate that they have failed. But -this return value only tells you that an error has occurred. To find out -what kind of error it was, you need to verify the error code by calling -@samp{LZ_(de)compress_errno}. - -Library functions do not change the value returned by -@samp{LZ_(de)compress_errno} when they succeed; thus, the value returned -by @samp{LZ_(de)compress_errno} after a successful call is not -necessarily LZ_ok, and you should not use @samp{LZ_(de)compress_errno} -to determine whether a call failed. If the call failed, then you can -examine @samp{LZ_(de)compress_errno}. - -The error codes are defined in the header file @samp{lzlib.h}. - -@deftypevr Constant {enum LZ_Errno} LZ_ok -The value of this constant is 0 and is used to indicate that there is no -error. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_bad_argument -At least one of the arguments passed to the library function was -invalid. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_mem_error -No memory available. The system cannot allocate more virtual memory -because its capacity is full. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_sequence_error -A library function was called in the wrong order. For example -@samp{LZ_compress_restart_member} was called before -@samp{LZ_compress_member_finished} indicates that the current member is -finished. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_header_error -Reading of member header failed. If this happens at the end of the data -stream it may indicate trailing garbage. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_unexpected_eof -The end of the data stream was reached in the middle of a member. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_data_error -The data stream is corrupt. -@end deftypevr - -@deftypevr Constant {enum LZ_Errno} LZ_library_error -A bug was detected in the library. Please, report it (@pxref{Problems}). -@end deftypevr - - -@node Error messages -@chapter Error messages -@cindex error messages - -@deftypefun {const char *} LZ_strerror ( const enum LZ_Errno @var{lz_errno} ) -Returns the standard error message for a given error code. The messages -are fairly short; there are no multi-line messages or embedded newlines. -This function makes it easy for your program to report informative error -messages about the failure of a library call. - -The value of @var{lz_errno} normally comes from a call to -@samp{LZ_(de)compress_errno}. -@end deftypefun - - -@node Data format -@chapter Data format -@cindex data format - -Perfection is reached, not when there is no longer anything to add, but -when there is no longer anything to take away.@* ---- Antoine de Saint-Exupery - -@sp 1 -In the diagram below, a box like this: -@verbatim -+---+ -| | <-- the vertical bars might be missing -+---+ -@end verbatim - -represents one byte; a box like this: -@verbatim -+==============+ -| | -+==============+ -@end verbatim - -represents a variable number of bytes. - -@sp 1 -A lzip data stream consists of a series of "members" (compressed data -sets). The members simply appear one after another in the data stream, -with no additional information before, between, or after them. - -Each member has the following structure: -@verbatim -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -| ID string | VN | DS | Lzma stream | CRC32 | Data size | Member size | -+--+--+--+--+----+----+=============+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ -@end verbatim - -All multibyte values are stored in little endian order. - -@table @samp -@item ID string -A four byte string, identifying the lzip format, with the value "LZIP" -(0x4C, 0x5A, 0x49, 0x50). - -@item VN (version number, 1 byte) -Just in case something needs to be modified in the future. 1 for now. - -@item DS (coded dictionary size, 1 byte) -Lzip divides the distance between any two powers of 2 into 8 equally -spaced intervals, named "wedges". The dictionary size is calculated by -taking a power of 2 (the base size) and substracting from it a number of -wedges between 0 and 7. The size of a wedge is (base_size / 16).@* -Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@* -Bits 7-5 contain the number of wedges (0 to 7) to substract from the -base size to obtain the dictionary size.@* -Example: 0xD3 = 2^19 - 6 * 2^15 = 512 KiB - 6 * 32 KiB = 320 KiB@* -Valid values for dictionary size range from 4 KiB to 512 MiB. - -@item Lzma stream -The lzma stream, finished by an end of stream marker. Uses default values -for encoder properties. See the lzip manual for a full description.@* -Lzip only uses the LZMA marker @samp{2} ("End Of Stream" marker). Lzlib -also uses the LZMA marker @samp{3} ("Sync Flush" marker). - -@item CRC32 (4 bytes) -CRC of the uncompressed original data. - -@item Data size (8 bytes) -Size of the uncompressed original data. - -@item Member size (8 bytes) -Total size of the member, including header and trailer. This field acts -as a distributed index, allows the verification of stream integrity, and -facilitates safe recovery of undamaged members from multi-member files. - -@end table - - -@node Examples -@chapter A small tutorial with examples -@cindex examples - -This chapter shows the order in which the library functions should be -called depending on what kind of data stream you want to compress or -decompress. See the file @samp{bbexample.c} in the source distribution -for an example of how buffer-to-buffer compression/decompression can be -implemented using lzlib. - -Note that lzlib's interface is symmetrical. That is, the code for normal -compression and decompression is identical except because one calls -LZ_compress* functions while the other calls LZ_decompress* functions. - -@sp 1 -@noindent -Example 1: Normal compression (@var{member_size} > total output). - -@example -1) LZ_compress_open -2) LZ_compress_write -3) LZ_compress_read -4) go back to step 2 until all input data has been written -5) LZ_compress_finish -6) LZ_compress_read -7) go back to step 6 until LZ_compress_finished returns 1 -8) LZ_compress_close -@end example - -@sp 1 -@noindent -Example 2: Normal compression using LZ_compress_write_size. - -@example -1) LZ_compress_open -2) go to step 5 if LZ_compress_write_size returns 0 -3) LZ_compress_write -4) if no more data to write, call LZ_compress_finish -5) LZ_compress_read -6) go back to step 2 until LZ_compress_finished returns 1 -7) LZ_compress_close -@end example - -@sp 1 -@noindent -Example 3: Decompression. - -@example -1) LZ_decompress_open -2) LZ_decompress_write -3) LZ_decompress_read -4) go back to step 2 until all input data has been written -5) LZ_decompress_finish -6) LZ_decompress_read -7) go back to step 6 until LZ_decompress_finished returns 1 -8) LZ_decompress_close -@end example - -@sp 1 -@noindent -Example 4: Decompression using LZ_decompress_write_size. - -@example -1) LZ_decompress_open -2) go to step 5 if LZ_decompress_write_size returns 0 -3) LZ_decompress_write -4) if no more data to write, call LZ_decompress_finish -5) LZ_decompress_read -5a) optionally, if LZ_decompress_member_finished returns 1, read - final values for member with LZ_decompress_data_crc, etc. -6) go back to step 2 until LZ_decompress_finished returns 1 -7) LZ_decompress_close -@end example - -@sp 1 -@noindent -Example 5: Multi-member compression (@var{member_size} < total output). - -@example - 1) LZ_compress_open - 2) go to step 5 if LZ_compress_write_size returns 0 - 3) LZ_compress_write - 4) if no more data to write, call LZ_compress_finish - 5) LZ_compress_read - 6) go back to step 2 until LZ_compress_member_finished returns 1 - 7) go to step 10 if LZ_compress_finished() returns 1 - 8) LZ_compress_restart_member - 9) go back to step 2 -10) LZ_compress_close -@end example - -@sp 1 -@noindent -Example 6: Multi-member compression (user-restarted members). - -@example - 1) LZ_compress_open - 2) LZ_compress_write - 3) LZ_compress_read - 4) go back to step 2 until member termination is desired - 5) LZ_compress_finish - 6) LZ_compress_read - 7) go back to step 6 until LZ_compress_member_finished returns 1 - 8) verify that LZ_compress_finished returns 1 - 9) go to step 12 if all input data has been written -10) LZ_compress_restart_member -11) go back to step 2 -12) LZ_compress_close -@end example - -@sp 1 -@noindent -Example 7: Decompression with automatic removal of leading garbage. - -@example -1) LZ_decompress_open -2) LZ_decompress_sync_to_member -3) go to step 6 if LZ_decompress_write_size returns 0 -4) LZ_decompress_write -5) if no more data to write, call LZ_decompress_finish -6) LZ_decompress_read -7) go back to step 3 until LZ_decompress_finished returns 1 -8) LZ_decompress_close -@end example - -@sp 1 -@noindent -Example 8: Streamed decompression with automatic resynchronization to -next member in case of data error. - -@example -1) LZ_decompress_open -2) go to step 5 if LZ_decompress_write_size returns 0 -3) LZ_decompress_write -4) if no more data to write, call LZ_decompress_finish -5) if LZ_decompress_read produces LZ_header_error or LZ_data_error, - call LZ_decompress_sync_to_member -6) go back to step 2 until LZ_decompress_finished returns 1 -7) LZ_decompress_close -@end example - - -@node Problems -@chapter Reporting bugs -@cindex bugs -@cindex getting help - -There are probably bugs in Lzlib. There are certainly errors and -omissions in this manual. If you report them, they will get fixed. If -you don't, no one will ever know about them and they will remain unfixed -for all eternity, if not longer. - -If you find a bug in Lzlib, please send electronic mail to -@email{lzip-bug@@nongnu.org}. Include the version number, which you can -find by running @w{@samp{minilzip --version}} or in -@samp{LZ_version_string} from @samp{lzlib.h}. - - -@node Concept index -@unnumbered Concept index - -@printindex cp - -@bye diff --git a/doc/minilzip.1 b/doc/minilzip.1 index 6217975..0535797 100644 --- a/doc/minilzip.1 +++ b/doc/minilzip.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1. -.TH MINILZIP "1" "October 2013" "Minilzip 1.6-pre1" "User Commands" +.TH MINILZIP "1" "January 2014" "Minilzip 1.6-pre2" "User Commands" .SH NAME Minilzip \- reduces the size of files .SH SYNOPSIS @@ -82,8 +82,8 @@ Report bugs to lzip\-bug@nongnu.org .br Lzlib home page: http://www.nongnu.org/lzip/lzlib.html .SH COPYRIGHT -Copyright \(co 2013 Antonio Diaz Diaz. -Using Lzlib 1.6\-pre1 +Copyright \(co 2014 Antonio Diaz Diaz. +Using Lzlib 1.6\-pre2 License GPLv3+: GNU GPL version 3 or later .br This is free software: you are free to change and redistribute it. diff --git a/encoder.c b/encoder.c index 84cf760..3a22a25 100644 --- a/encoder.c +++ b/encoder.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,16 +39,16 @@ static bool Mf_normalize_pos( struct Matchfinder * const mf ) mf->pos -= offset; mf->stream_pos -= offset; for( i = 0; i < mf->num_prev_positions; ++i ) - if( mf->prev_positions[i] >= 0 ) mf->prev_positions[i] -= offset; + mf->prev_positions[i] -= min( mf->prev_positions[i], offset ); for( i = 0; i < 2 * ( mf->dictionary_size + 1 ); ++i ) - if( mf->prev_pos_tree[i] >= 0 ) mf->prev_pos_tree[i] -= offset; + mf->prev_pos_tree[i] -= min( mf->prev_pos_tree[i], offset ); } return true; } -static bool Mf_init( struct Matchfinder * const mf, - const int dict_size, const int match_len_limit ) +static bool Mf_init( struct Matchfinder * const mf, const int dict_size, + const int match_len_limit ) { const int buffer_size_limit = ( 2 * dict_size ) + before_size + after_size; unsigned size; @@ -83,7 +83,7 @@ static bool Mf_init( struct Matchfinder * const mf, else mf->prev_positions = (int32_t *)malloc( size * sizeof (int32_t) ); if( !mf->prev_positions ) { free( mf->buffer ); return false; } mf->prev_pos_tree = mf->prev_positions + mf->num_prev_positions; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; return true; } @@ -111,8 +111,8 @@ static void Mf_adjust_distionary_size( struct Matchfinder * const mf ) static void Mf_reset( struct Matchfinder * const mf ) { int i; - const int size = mf->stream_pos - mf->pos; - if( size > 0 ) memmove( mf->buffer, mf->buffer + mf->pos, size ); + if( mf->stream_pos > mf->pos ) + memmove( mf->buffer, mf->buffer + mf->pos, mf->stream_pos - mf->pos ); mf->partial_data_pos = 0; mf->stream_pos -= mf->pos; mf->pos = 0; @@ -120,7 +120,7 @@ static void Mf_reset( struct Matchfinder * const mf ) mf->at_stream_end = false; mf->been_flushed = false; mf->flushing = false; - for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = -1; + for( i = 0; i < mf->num_prev_positions; ++i ) mf->prev_positions[i] = 0; } @@ -130,10 +130,11 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair int32_t * ptr1 = ptr0 + 1; int32_t * newptr; int len = 0, len0 = 0, len1 = 0; - int maxlen = min_match_len - 1; + int maxlen = 0; int num_pairs = 0; - const int min_pos = (mf->pos > mf->dictionary_size) ? - mf->pos - mf->dictionary_size : 0; + const int pos1 = mf->pos + 1; + const int min_pos = + ( mf->pos > mf->dictionary_size ) ? mf->pos - mf->dictionary_size : 0; const uint8_t * const data = mf->buffer + mf->pos; int count, delta, key2, key3, key4, newpos; unsigned tmp; @@ -143,12 +144,12 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair { mf->been_flushed = true; len_limit = Mf_available_bytes( mf ); - if( len_limit < 4 ) { *ptr0 = *ptr1 = -1; return 0; } + if( len_limit < 4 ) { *ptr0 = *ptr1 = 0; return 0; } } tmp = crc32[data[0]] ^ data[1]; key2 = tmp & ( num_prev_positions2 - 1 ); - tmp ^= (uint32_t)data[2] << 8; + tmp ^= (unsigned)data[2] << 8; key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) ); key4 = num_prev_positions2 + num_prev_positions3 + ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & mf->key4_mask ); @@ -157,41 +158,41 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair { int np2 = mf->prev_positions[key2]; int np3 = mf->prev_positions[key3]; - if( np2 >= min_pos && mf->buffer[np2] == data[0] ) + if( np2 > min_pos && mf->buffer[np2-1] == data[0] ) { - pairs[0].dis = mf->pos - np2 - 1; + pairs[0].dis = mf->pos - np2; pairs[0].len = maxlen = 2; num_pairs = 1; } - if( np2 != np3 && np3 >= min_pos && mf->buffer[np3] == data[0] ) + if( np2 != np3 && np3 > min_pos && mf->buffer[np3-1] == data[0] ) { maxlen = 3; - pairs[num_pairs].dis = mf->pos - np3 - 1; - ++num_pairs; np2 = np3; + pairs[num_pairs].dis = mf->pos - np2; + ++num_pairs; } if( num_pairs > 0 ) { - delta = mf->pos - np2; + delta = pos1 - np2; while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] ) ++maxlen; pairs[num_pairs-1].len = maxlen; - if( maxlen >= len_limit ) pairs = 0; + if( maxlen >= len_limit ) pairs = 0; /* done. now just skip */ } if( maxlen < 3 ) maxlen = 3; } - mf->prev_positions[key2] = mf->pos; - mf->prev_positions[key3] = mf->pos; + mf->prev_positions[key2] = pos1; + mf->prev_positions[key3] = pos1; newpos = mf->prev_positions[key4]; - mf->prev_positions[key4] = mf->pos; + mf->prev_positions[key4] = pos1; for( count = mf->cycles; ; ) { - if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } + if( newpos <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; } if( mf->been_flushed ) len = 0; - delta = mf->pos - newpos; + delta = pos1 - newpos; newptr = mf->prev_pos_tree + ( ( mf->cyclic_pos - delta + ( (mf->cyclic_pos >= delta) ? 0 : mf->dictionary_size + 1 ) ) << 1 ); @@ -230,86 +231,83 @@ static int Mf_get_match_pairs( struct Matchfinder * const mf, struct Pair * pair } -static void Lee_encode( struct Len_encoder * const len_encoder, +static void Lee_encode( struct Len_encoder * const le, struct Range_encoder * const renc, int symbol, const int pos_state ) { symbol -= min_match_len; if( symbol < len_low_symbols ) { - Re_encode_bit( renc, &len_encoder->lm.choice1, 0 ); - Re_encode_tree( renc, len_encoder->lm.bm_low[pos_state], symbol, len_low_bits ); + Re_encode_bit( renc, &le->lm.choice1, 0 ); + Re_encode_tree( renc, le->lm.bm_low[pos_state], symbol, len_low_bits ); } else { - Re_encode_bit( renc, &len_encoder->lm.choice1, 1 ); + Re_encode_bit( renc, &le->lm.choice1, 1 ); if( symbol < len_low_symbols + len_mid_symbols ) { - Re_encode_bit( renc, &len_encoder->lm.choice2, 0 ); - Re_encode_tree( renc, len_encoder->lm.bm_mid[pos_state], + Re_encode_bit( renc, &le->lm.choice2, 0 ); + Re_encode_tree( renc, le->lm.bm_mid[pos_state], symbol - len_low_symbols, len_mid_bits ); } else { - Re_encode_bit( renc, &len_encoder->lm.choice2, 1 ); - Re_encode_tree( renc, len_encoder->lm.bm_high, + Re_encode_bit( renc, &le->lm.choice2, 1 ); + Re_encode_tree( renc, le->lm.bm_high, symbol - len_low_symbols - len_mid_symbols, len_high_bits ); } } - if( --len_encoder->counters[pos_state] <= 0 ) - Lee_update_prices( len_encoder, pos_state ); + if( --le->counters[pos_state] <= 0 ) Lee_update_prices( le, pos_state ); } /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */ -static bool LZe_full_flush( struct LZ_encoder * const encoder, const State state ) +static bool LZe_full_flush( struct LZ_encoder * const e, const State state ) { int i; - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; + const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; File_trailer trailer; - if( encoder->member_finished || - Cb_free_bytes( &encoder->renc.cb ) < max_marker_size + Ft_size ) + if( e->member_finished || + Cb_free_bytes( &e->renc.cb ) < max_marker_size + Ft_size ) return false; - Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len, pos_state ); - Re_flush( &encoder->renc ); - Ft_set_data_crc( trailer, LZe_crc( encoder ) ); - Ft_set_data_size( trailer, Mf_data_position( encoder->matchfinder ) ); - Ft_set_member_size( trailer, Re_member_position( &encoder->renc ) + Ft_size ); + Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 ); + Re_encode_bit( &e->renc, &e->bm_rep[state], 0 ); + LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len, pos_state ); + Re_flush( &e->renc ); + Ft_set_data_crc( trailer, LZe_crc( e ) ); + Ft_set_data_size( trailer, Mf_data_position( e->matchfinder ) ); + Ft_set_member_size( trailer, Re_member_position( &e->renc ) + Ft_size ); for( i = 0; i < Ft_size; ++i ) - Cb_put_byte( &encoder->renc.cb, trailer[i] ); + Cb_put_byte( &e->renc.cb, trailer[i] ); return true; } /* Sync Flush mark => (dis == 0xFFFFFFFFU, len == min_match_len + 1) */ -static bool LZe_sync_flush( struct LZ_encoder * const encoder ) +static bool LZe_sync_flush( struct LZ_encoder * const e ) { - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - const State state = encoder->state; - if( encoder->member_finished || - Cb_free_bytes( &encoder->renc.cb ) < max_marker_size ) + const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; + const State state = e->state; + if( e->member_finished || Cb_free_bytes( &e->renc.cb ) < max_marker_size ) return false; - Re_encode_bit( &encoder->renc, &encoder->bm_match[state][pos_state], 1 ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep[state], 0 ); - LZe_encode_pair( encoder, 0xFFFFFFFFU, min_match_len + 1, pos_state ); - Re_flush( &encoder->renc ); + Re_encode_bit( &e->renc, &e->bm_match[state][pos_state], 1 ); + Re_encode_bit( &e->renc, &e->bm_rep[state], 0 ); + LZe_encode_pair( e, 0xFFFFFFFFU, min_match_len + 1, pos_state ); + Re_flush( &e->renc ); return true; } -static void LZe_fill_align_prices( struct LZ_encoder * const encoder ) +static void LZe_fill_align_prices( struct LZ_encoder * const e ) { int i; for( i = 0; i < dis_align_size; ++i ) - encoder->align_prices[i] = - price_symbol_reversed( encoder->bm_align, i, dis_align_bits ); - encoder->align_price_count = dis_align_size; + e->align_prices[i] = price_symbol_reversed( e->bm_align, i, dis_align_bits ); + e->align_price_count = dis_align_size; } -static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) +static void LZe_fill_distance_prices( struct LZ_encoder * const e ) { int dis, len_state; for( dis = start_dis_model; dis < modeled_distances; ++dis ) @@ -317,22 +315,21 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) const int dis_slot = dis_slots[dis]; const int direct_bits = ( dis_slot >> 1 ) - 1; const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const int price = - price_symbol_reversed( encoder->bm_dis + base - dis_slot - 1, - dis - base, direct_bits ); + const int price = price_symbol_reversed( e->bm_dis + base - dis_slot - 1, + dis - base, direct_bits ); for( len_state = 0; len_state < len_states; ++len_state ) - encoder->dis_prices[len_state][dis] = price; + e->dis_prices[len_state][dis] = price; } for( len_state = 0; len_state < len_states; ++len_state ) { - int * const dsp = encoder->dis_slot_prices[len_state]; - int * const dp = encoder->dis_prices[len_state]; - const Bit_model * const bmds = encoder->bm_dis_slot[len_state]; + int * const dsp = e->dis_slot_prices[len_state]; + int * const dp = e->dis_prices[len_state]; + const Bit_model * const bmds = e->bm_dis_slot[len_state]; int slot = 0; - for( ; slot < end_dis_model && slot < encoder->num_dis_slots; ++slot ) + for( ; slot < end_dis_model; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ); - for( ; slot < encoder->num_dis_slots; ++slot ) + for( ; slot < e->num_dis_slots; ++slot ) dsp[slot] = price_symbol( bmds, slot, dis_slot_bits ) + (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits ); @@ -344,51 +341,49 @@ static void LZe_fill_distance_prices( struct LZ_encoder * const encoder ) } -static bool LZe_init( struct LZ_encoder * const encoder, +static bool LZe_init( struct LZ_encoder * const e, struct Matchfinder * const mf, const File_header header, const unsigned long long member_size ) { int i; - encoder->member_size_limit = member_size - Ft_size - max_marker_size; - encoder->pending_num_pairs = 0; - encoder->crc = 0xFFFFFFFFU; - - Bm_array_init( encoder->bm_literal[0], (1 << literal_context_bits) * 0x300 ); - Bm_array_init( encoder->bm_match[0], states * pos_states ); - Bm_array_init( encoder->bm_rep, states ); - Bm_array_init( encoder->bm_rep0, states ); - Bm_array_init( encoder->bm_rep1, states ); - Bm_array_init( encoder->bm_rep2, states ); - Bm_array_init( encoder->bm_len[0], states * pos_states ); - Bm_array_init( encoder->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); - Bm_array_init( encoder->bm_dis, modeled_distances - end_dis_model ); - Bm_array_init( encoder->bm_align, dis_align_size ); - - encoder->matchfinder = mf; - if( !Re_init( &encoder->renc ) ) return false; - Lee_init( &encoder->match_len_encoder, encoder->matchfinder->match_len_limit ); - Lee_init( &encoder->rep_len_encoder, encoder->matchfinder->match_len_limit ); - encoder->num_dis_slots = - 2 * real_bits( encoder->matchfinder->dictionary_size - 1 ); - - for( i = 0; i < num_rep_distances; ++i ) encoder->rep_distances[i] = 0; - encoder->align_price_count = 0; - encoder->fill_counter = 0; - encoder->state = 0; - encoder->member_finished = false; + e->member_size_limit = member_size - Ft_size - max_marker_size; + e->pending_num_pairs = 0; + e->crc = 0xFFFFFFFFU; + + Bm_array_init( e->bm_literal[0], (1 << literal_context_bits) * 0x300 ); + Bm_array_init( e->bm_match[0], states * pos_states ); + Bm_array_init( e->bm_rep, states ); + Bm_array_init( e->bm_rep0, states ); + Bm_array_init( e->bm_rep1, states ); + Bm_array_init( e->bm_rep2, states ); + Bm_array_init( e->bm_len[0], states * pos_states ); + Bm_array_init( e->bm_dis_slot[0], len_states * (1 << dis_slot_bits) ); + Bm_array_init( e->bm_dis, modeled_distances - end_dis_model ); + Bm_array_init( e->bm_align, dis_align_size ); + + e->matchfinder = mf; + if( !Re_init( &e->renc ) ) return false; + Lee_init( &e->match_len_encoder, mf->match_len_limit ); + Lee_init( &e->rep_len_encoder, mf->match_len_limit ); + for( i = 0; i < num_rep_distances; ++i ) e->reps[i] = 0; + e->align_price_count = 0; + e->num_dis_slots = 2 * real_bits( mf->dictionary_size - 1 ); + e->fill_counter = 0; + e->state = 0; + e->member_finished = false; for( i = 0; i < Fh_size; ++i ) - Cb_put_byte( &encoder->renc.cb, header[i] ); + Cb_put_byte( &e->renc.cb, header[i] ); return true; } /* Return value == number of bytes advanced (ahead). trials[0]..trials[ahead-1] contain the steps to encode. - ( trials[0].dis == -1 && trials[0].price == 1 ) means literal. + ( trials[0].dis == -1 ) means literal. */ -static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, +static int LZe_sequence_optimizer( struct LZ_encoder * const e, const int reps[num_rep_distances], const State state ) { @@ -396,111 +391,108 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, int replens[num_rep_distances]; int rep_index = 0; - if( encoder->pending_num_pairs > 0 ) /* from previous call */ + if( e->pending_num_pairs > 0 ) /* from previous call */ { - num_pairs = encoder->pending_num_pairs; - encoder->pending_num_pairs = 0; + num_pairs = e->pending_num_pairs; + e->pending_num_pairs = 0; } else - num_pairs = LZe_read_match_distances( encoder ); - main_len = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; + num_pairs = LZe_read_match_distances( e ); + main_len = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; for( i = 0; i < num_rep_distances; ++i ) { replens[i] = - Mf_true_match_len( encoder->matchfinder, 0, reps[i] + 1, max_match_len ); + Mf_true_match_len( e->matchfinder, 0, reps[i] + 1, max_match_len ); if( replens[i] > replens[rep_index] ) rep_index = i; } - if( replens[rep_index] >= encoder->matchfinder->match_len_limit ) + if( replens[rep_index] >= e->matchfinder->match_len_limit ) { - encoder->trials[0].dis = rep_index; - encoder->trials[0].price = replens[rep_index]; - if( !LZe_move_pos( encoder, replens[rep_index] ) ) return 0; + e->trials[0].dis = rep_index; + e->trials[0].price = replens[rep_index]; + if( !LZe_move_pos( e, replens[rep_index] ) ) return 0; return replens[rep_index]; } - if( main_len >= encoder->matchfinder->match_len_limit ) + if( main_len >= e->matchfinder->match_len_limit ) { - encoder->trials[0].dis = encoder->pairs[num_pairs-1].dis + num_rep_distances; - encoder->trials[0].price = main_len; - if( !LZe_move_pos( encoder, main_len ) ) return 0; + e->trials[0].dis = e->pairs[num_pairs-1].dis + num_rep_distances; + e->trials[0].price = main_len; + if( !LZe_move_pos( e, main_len ) ) return 0; return main_len; } { - const int pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - const int match_price = price1( encoder->bm_match[state][pos_state] ); - const int rep_match_price = match_price + price1( encoder->bm_rep[state] ); - const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -1 ); - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, 0 ); - const uint8_t match_byte = Mf_peek( encoder->matchfinder, -reps[0]-1 ); - - encoder->trials[0].state = state; - encoder->trials[1].dis = -1; - encoder->trials[1].price = price0( encoder->bm_match[state][pos_state] ); + const int pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; + const int match_price = price1( e->bm_match[state][pos_state] ); + const int rep_match_price = match_price + price1( e->bm_rep[state] ); + const uint8_t prev_byte = Mf_peek( e->matchfinder, 1 ); + const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); + const uint8_t match_byte = Mf_peek( e->matchfinder, reps[0] + 1 ); + + e->trials[0].state = state; + e->trials[1].dis = -1; /* literal */ + e->trials[1].price = price0( e->bm_match[state][pos_state] ); if( St_is_char( state ) ) - encoder->trials[1].price += - LZe_price_literal( encoder, prev_byte, cur_byte ); + e->trials[1].price += LZe_price_literal( e, prev_byte, cur_byte ); else - encoder->trials[1].price += - LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); + e->trials[1].price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); if( match_byte == cur_byte ) - Tr_update( &encoder->trials[1], rep_match_price + - LZe_price_rep_len1( encoder, state, pos_state ), 0, 0 ); + Tr_update( &e->trials[1], rep_match_price + + LZe_price_shortrep( e, state, pos_state ), 0, 0 ); num_trials = max( main_len, replens[rep_index] ); if( num_trials < min_match_len ) { - encoder->trials[0].dis = encoder->trials[1].dis; - encoder->trials[0].price = 1; - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + e->trials[0].dis = e->trials[1].dis; + e->trials[0].price = 1; + if( !Mf_move_pos( e->matchfinder ) ) return 0; return 1; } for( i = 0; i < num_rep_distances; ++i ) - encoder->trials[0].reps[i] = reps[i]; - encoder->trials[1].prev_index = 0; - encoder->trials[1].prev_index2 = single_step_trial; + e->trials[0].reps[i] = reps[i]; + e->trials[1].prev_index = 0; + e->trials[1].prev_index2 = single_step_trial; for( len = min_match_len; len <= num_trials; ++len ) - encoder->trials[len].price = infinite_price; + e->trials[len].price = infinite_price; for( rep = 0; rep < num_rep_distances; ++rep ) { int price; if( replens[rep] < min_match_len ) continue; - price = rep_match_price + LZe_price_rep( encoder, rep, state, pos_state ); + price = rep_match_price + LZe_price_rep( e, rep, state, pos_state ); for( len = min_match_len; len <= replens[rep]; ++len ) - Tr_update( &encoder->trials[len], price + - Lee_price( &encoder->rep_len_encoder, len, pos_state ), - rep, 0 ); + Tr_update( &e->trials[len], price + + Lee_price( &e->rep_len_encoder, len, pos_state ), rep, 0 ); } if( main_len > replens[0] ) { - const int normal_match_price = match_price + price0( encoder->bm_rep[state] ); + const int normal_match_price = match_price + price0( e->bm_rep[state] ); i = 0, len = max( replens[0] + 1, min_match_len ); - while( len > encoder->pairs[i].len ) ++i; + while( len > e->pairs[i].len ) ++i; while( true ) { - const int dis = encoder->pairs[i].dis; - Tr_update( &encoder->trials[len], normal_match_price + - LZe_price_pair( encoder, dis, len, pos_state ), + const int dis = e->pairs[i].dis; + Tr_update( &e->trials[len], normal_match_price + + LZe_price_pair( e, dis, len, pos_state ), dis + num_rep_distances, 0 ); - if( ++len > encoder->pairs[i].len && ++i >= num_pairs ) break; + if( ++len > e->pairs[i].len && ++i >= num_pairs ) break; } } } - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + if( !Mf_move_pos( e->matchfinder ) ) return 0; while( true ) /* price optimization loop */ { struct Trial *cur_trial, *next_trial; - int newlen, pos_state, prev_index, prev_index2, available_bytes, len_limit; + int newlen, pos_state, available_bytes, len_limit; int start_len = min_match_len; int next_price, match_price, rep_match_price; State cur_state; @@ -508,120 +500,105 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, if( ++cur >= num_trials ) /* no more initialized trials */ { - LZe_backward( encoder, cur ); + LZe_backward( e, cur ); return cur; } - num_pairs = LZe_read_match_distances( encoder ); - newlen = ( num_pairs > 0 ) ? encoder->pairs[num_pairs-1].len : 0; - if( newlen >= encoder->matchfinder->match_len_limit ) + num_pairs = LZe_read_match_distances( e ); + newlen = ( num_pairs > 0 ) ? e->pairs[num_pairs-1].len : 0; + if( newlen >= e->matchfinder->match_len_limit ) { - encoder->pending_num_pairs = num_pairs; - LZe_backward( encoder, cur ); + e->pending_num_pairs = num_pairs; + LZe_backward( e, cur ); return cur; } /* give final values to current trial */ - cur_trial = &encoder->trials[cur]; - prev_index = cur_trial->prev_index; - prev_index2 = cur_trial->prev_index2; + cur_trial = &e->trials[cur]; + { + int dis = cur_trial->dis; + int prev_index = cur_trial->prev_index; + const int prev_index2 = cur_trial->prev_index2; - if( prev_index2 != single_step_trial ) + if( prev_index2 == single_step_trial ) { - --prev_index; - if( prev_index2 >= 0 ) + cur_state = e->trials[prev_index].state; + if( prev_index + 1 == cur ) /* len == 1 */ { - cur_state = encoder->trials[prev_index2].state; - if( cur_trial->dis2 < num_rep_distances ) - cur_state = St_set_rep( cur_state ); - else - cur_state = St_set_match( cur_state ); + if( dis == 0 ) cur_state = St_set_short_rep( cur_state ); + else cur_state = St_set_char( cur_state ); /* literal */ } - else - cur_state = encoder->trials[prev_index].state; - cur_state = St_set_char( cur_state ); + else if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state ); + else cur_state = St_set_match( cur_state ); } - else - cur_state = encoder->trials[prev_index].state; - - if( prev_index == cur - 1 ) + else if( prev_index2 == dual_step_trial ) /* dis == 0 */ { - if( cur_trial->dis == 0 ) - cur_state = St_set_short_rep( cur_state ); - else - cur_state = St_set_char( cur_state ); - for( i = 0; i < num_rep_distances; ++i ) - cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; + --prev_index; + cur_state = e->trials[prev_index].state; + cur_state = St_set_char( cur_state ); + cur_state = St_set_rep( cur_state ); } - else + else /* if( prev_index2 >= 0 ) */ { - int dis; - if( prev_index2 >= 0 ) - { - dis = cur_trial->dis2; - prev_index = prev_index2; - cur_state = St_set_rep( cur_state ); - } - else - { - dis = cur_trial->dis; - if( dis < num_rep_distances ) - cur_state = St_set_rep( cur_state ); - else - cur_state = St_set_match( cur_state ); - } - for( i = 0; i < num_rep_distances; ++i ) - cur_trial->reps[i] = encoder->trials[prev_index].reps[i]; - LZe_mtf_reps( dis, cur_trial->reps ); + prev_index = prev_index2; + cur_state = e->trials[prev_index].state; + if( dis < num_rep_distances ) cur_state = St_set_rep( cur_state ); + else cur_state = St_set_match( cur_state ); + cur_state = St_set_char( cur_state ); + cur_state = St_set_rep( cur_state ); } cur_trial->state = cur_state; + for( i = 0; i < num_rep_distances; ++i ) + cur_trial->reps[i] = e->trials[prev_index].reps[i]; + mtf_reps( dis, cur_trial->reps ); + } - pos_state = Mf_data_position( encoder->matchfinder ) & pos_state_mask; - prev_byte = Mf_peek( encoder->matchfinder, -1 ); - cur_byte = Mf_peek( encoder->matchfinder, 0 ); - match_byte = Mf_peek( encoder->matchfinder, -cur_trial->reps[0]-1 ); + pos_state = Mf_data_position( e->matchfinder ) & pos_state_mask; + prev_byte = Mf_peek( e->matchfinder, 1 ); + cur_byte = Mf_peek( e->matchfinder, 0 ); + match_byte = Mf_peek( e->matchfinder, cur_trial->reps[0] + 1 ); + if( !Mf_move_pos( e->matchfinder ) ) return 0; next_price = cur_trial->price + - price0( encoder->bm_match[cur_state][pos_state] ); + price0( e->bm_match[cur_state][pos_state] ); if( St_is_char( cur_state ) ) - next_price += LZe_price_literal( encoder, prev_byte, cur_byte ); + next_price += LZe_price_literal( e, prev_byte, cur_byte ); else - next_price += LZe_price_matched( encoder, prev_byte, cur_byte, match_byte ); - if( !Mf_move_pos( encoder->matchfinder ) ) return 0; + next_price += LZe_price_matched( e, prev_byte, cur_byte, match_byte ); /* try last updates to next trial */ - next_trial = &encoder->trials[cur+1]; + next_trial = &e->trials[cur+1]; - Tr_update( next_trial, next_price, -1, cur ); + Tr_update( next_trial, next_price, -1, cur ); /* literal */ - match_price = cur_trial->price + price1( encoder->bm_match[cur_state][pos_state] ); - rep_match_price = match_price + price1( encoder->bm_rep[cur_state] ); + match_price = cur_trial->price + price1( e->bm_match[cur_state][pos_state] ); + rep_match_price = match_price + price1( e->bm_rep[cur_state] ); - if( match_byte == cur_byte && next_trial->dis != 0 ) + if( match_byte == cur_byte && next_trial->dis != 0 && + next_trial->prev_index2 == single_step_trial ) { const int price = rep_match_price + - LZe_price_rep_len1( encoder, cur_state, pos_state ); + LZe_price_shortrep( e, cur_state, pos_state ); if( price <= next_trial->price ) { next_trial->price = price; next_trial->dis = 0; next_trial->prev_index = cur; - next_trial->prev_index2 = single_step_trial; } } - available_bytes = min( Mf_available_bytes( encoder->matchfinder ) + 1, + available_bytes = min( Mf_available_bytes( e->matchfinder ) + 1, max_num_trials - 1 - cur ); if( available_bytes < min_match_len ) continue; - len_limit = min( encoder->matchfinder->match_len_limit, available_bytes ); + len_limit = min( e->matchfinder->match_len_limit, available_bytes ); /* try literal + rep0 */ if( match_byte != cur_byte && next_trial->prev_index != cur ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; const int dis = cur_trial->reps[0] + 1; - const int limit = min( encoder->matchfinder->match_len_limit + 1, + const int limit = min( e->matchfinder->match_len_limit + 1, available_bytes ); len = 1; while( len < limit && data[len-dis] == data[len] ) ++len; @@ -630,40 +607,38 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, const int pos_state2 = ( pos_state + 1 ) & pos_state_mask; const State state2 = St_set_char( cur_state ); const int price = next_price + - price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len, state2, pos_state2 ); + price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len, state2, pos_state2 ); while( num_trials < cur + 1 + len ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update2( &encoder->trials[cur+1+len], price, 0, cur + 1 ); + e->trials[++num_trials].price = infinite_price; + Tr_update2( &e->trials[cur+1+len], price, cur + 1 ); } } /* try rep distances */ for( rep = 0; rep < num_rep_distances; ++rep ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; int price; const int dis = cur_trial->reps[rep] + 1; - if( data[-dis] != data[0] || data[1-dis] != data[1] ) continue; + if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue; for( len = min_match_len; len < len_limit; ++len ) if( data[len-dis] != data[len] ) break; while( num_trials < cur + len ) - encoder->trials[++num_trials].price = infinite_price; - price = rep_match_price + - LZe_price_rep( encoder, rep, cur_state, pos_state ); + e->trials[++num_trials].price = infinite_price; + price = rep_match_price + LZe_price_rep( e, rep, cur_state, pos_state ); for( i = min_match_len; i <= len; ++i ) - Tr_update( &encoder->trials[cur+i], price + - Lee_price( &encoder->rep_len_encoder, i, pos_state ), - rep, cur ); + Tr_update( &e->trials[cur+i], price + + Lee_price( &e->rep_len_encoder, i, pos_state ), rep, cur ); if( rep == 0 ) start_len = len + 1; /* discard shorter matches */ /* try rep + literal + rep0 */ { int len2 = len + 1, pos_state2; - const int limit = min( encoder->matchfinder->match_len_limit + len2, + const int limit = min( e->matchfinder->match_len_limit + len2, available_bytes ); State state2; while( len2 < limit && data[len2-dis] == data[len2] ) ++len2; @@ -672,18 +647,17 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, pos_state2 = ( pos_state + len ) & pos_state_mask; state2 = St_set_rep( cur_state ); - price += Lee_price( &encoder->rep_len_encoder, len, pos_state ) + - price0( encoder->bm_match[state2][pos_state2] ) + - LZe_price_matched( encoder, data[len-1], data[len], data[len-dis] ); + price += Lee_price( &e->rep_len_encoder, len, pos_state ) + + price0( e->bm_match[state2][pos_state2] ) + + LZe_price_matched( e, data[len-1], data[len], data[len-dis] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + price += price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, cur + len + 1, - rep, cur ); + e->trials[++num_trials].price = infinite_price; + Tr_update3( &e->trials[cur+len+1+len2], price, rep, cur + len + 1, cur ); } } @@ -692,28 +666,27 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { int dis; const int normal_match_price = match_price + - price0( encoder->bm_rep[cur_state] ); + price0( e->bm_rep[cur_state] ); while( num_trials < cur + newlen ) - encoder->trials[++num_trials].price = infinite_price; + e->trials[++num_trials].price = infinite_price; i = 0; - while( start_len > encoder->pairs[i].len ) ++i; - dis = encoder->pairs[i].dis; + while( start_len > e->pairs[i].len ) ++i; + dis = e->pairs[i].dis; for( len = start_len; ; ++len ) { - int price = normal_match_price + - LZe_price_pair( encoder, dis, len, pos_state ); + int price = normal_match_price + LZe_price_pair( e, dis, len, pos_state ); - Tr_update( &encoder->trials[cur+len], price, dis + num_rep_distances, cur ); + Tr_update( &e->trials[cur+len], price, dis + num_rep_distances, cur ); /* try match + literal + rep0 */ - if( len == encoder->pairs[i].len ) + if( len == e->pairs[i].len ) { - const uint8_t * const data = Mf_ptr_to_current_pos( encoder->matchfinder ) - 1; + const uint8_t * const data = Mf_ptr_to_current_pos( e->matchfinder ) - 1; const int dis2 = dis + 1; int len2 = len + 1; - const int limit = min( encoder->matchfinder->match_len_limit + len2, + const int limit = min( e->matchfinder->match_len_limit + len2, available_bytes ); while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2; len2 -= len + 1; @@ -721,21 +694,21 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, { int pos_state2 = ( pos_state + len ) & pos_state_mask; State state2 = St_set_match( cur_state ); - price += price0( encoder->bm_match[state2][pos_state2] ) + - LZe_price_matched( encoder, data[len-1], data[len], data[len-dis2] ); + price += price0( e->bm_match[state2][pos_state2] ) + + LZe_price_matched( e, data[len-1], data[len], data[len-dis2] ); pos_state2 = ( pos_state2 + 1 ) & pos_state_mask; state2 = St_set_char( state2 ); - price += price1( encoder->bm_match[state2][pos_state2] ) + - price1( encoder->bm_rep[state2] ) + - LZe_price_rep0_len( encoder, len2, state2, pos_state2 ); + price += price1( e->bm_match[state2][pos_state2] ) + + price1( e->bm_rep[state2] ) + + LZe_price_rep0_len( e, len2, state2, pos_state2 ); while( num_trials < cur + len + 1 + len2 ) - encoder->trials[++num_trials].price = infinite_price; - Tr_update3( &encoder->trials[cur+len+1+len2], price, 0, - cur + len + 1, dis + num_rep_distances, cur ); + e->trials[++num_trials].price = infinite_price; + Tr_update3( &e->trials[cur+len+1+len2], price, + dis + num_rep_distances, cur + len + 1, cur ); } if( ++i >= num_pairs ) break; - dis = encoder->pairs[i].dis; + dis = e->pairs[i].dis; } } } @@ -743,118 +716,112 @@ static int LZe_sequence_optimizer( struct LZ_encoder * const encoder, } -static bool LZe_encode_member( struct LZ_encoder * const encoder ) +static bool LZe_encode_member( struct LZ_encoder * const e ) { - const int fill_count = - ( encoder->matchfinder->match_len_limit > 12 ) ? 128 : 512; + const int fill_count = ( e->matchfinder->match_len_limit > 12 ) ? 128 : 512; int ahead, i; - State * const state = &encoder->state; + State * const state = &e->state; - if( encoder->member_finished ) return true; - if( Re_member_position( &encoder->renc ) >= encoder->member_size_limit ) + if( e->member_finished ) return true; + if( Re_member_position( &e->renc ) >= e->member_size_limit ) { - if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; + if( LZe_full_flush( e, *state ) ) e->member_finished = true; return true; } - if( Mf_data_position( encoder->matchfinder ) == 0 && - !Mf_finished( encoder->matchfinder ) ) /* encode first byte */ + if( Mf_data_position( e->matchfinder ) == 0 && + !Mf_finished( e->matchfinder ) ) /* encode first byte */ { const uint8_t prev_byte = 0; - uint8_t cur_byte; - if( Mf_available_bytes( encoder->matchfinder ) < max_match_len && - !Mf_flushing_or_end( encoder->matchfinder ) ) - return true; - cur_byte = Mf_peek( encoder->matchfinder, 0 ); - Re_encode_bit( &encoder->renc, &encoder->bm_match[*state][0], 0 ); - LZe_encode_literal( encoder, prev_byte, cur_byte ); - CRC32_update_byte( &encoder->crc, cur_byte ); - Mf_get_match_pairs( encoder->matchfinder, 0 ); - if( !Mf_move_pos( encoder->matchfinder ) ) return false; + const uint8_t cur_byte = Mf_peek( e->matchfinder, 0 ); + if( !Mf_enough_available_bytes( e->matchfinder ) || + !Re_enough_free_bytes( &e->renc ) ) return true; + CRC32_update_byte( &e->crc, cur_byte ); + Re_encode_bit( &e->renc, &e->bm_match[*state][0], 0 ); + LZe_encode_literal( e, prev_byte, cur_byte ); + Mf_get_match_pairs( e->matchfinder, 0 ); + if( !Mf_move_pos( e->matchfinder ) ) return false; } - while( !Mf_finished( encoder->matchfinder ) ) + while( !Mf_finished( e->matchfinder ) ) { - if( !Mf_enough_available_bytes( encoder->matchfinder ) || - !Re_enough_free_bytes( &encoder->renc ) ) return true; - if( encoder->pending_num_pairs == 0 ) + if( !Mf_enough_available_bytes( e->matchfinder ) || + !Re_enough_free_bytes( &e->renc ) ) return true; + if( e->pending_num_pairs == 0 ) { - if( encoder->fill_counter <= 0 ) - { LZe_fill_distance_prices( encoder ); encoder->fill_counter = fill_count; } - if( encoder->align_price_count <= 0 ) - LZe_fill_align_prices( encoder ); + if( e->fill_counter <= 0 ) + { LZe_fill_distance_prices( e ); e->fill_counter = fill_count; } + if( e->align_price_count <= 0 ) LZe_fill_align_prices( e ); } - ahead = LZe_sequence_optimizer( encoder, encoder->rep_distances, *state ); + ahead = LZe_sequence_optimizer( e, e->reps, *state ); if( ahead <= 0 ) return false; /* can't happen */ - for( i = 0; ; ) + for( i = 0; ahead > 0; ) { const int pos_state = - ( Mf_data_position( encoder->matchfinder ) - ahead ) & pos_state_mask; - const int dis = encoder->trials[i].dis; - const int len = encoder->trials[i].price; + ( Mf_data_position( e->matchfinder ) - ahead ) & pos_state_mask; + const int dis = e->trials[i].dis; + const int len = e->trials[i].price; - bool bit = ( dis < 0 && len == 1 ); - Re_encode_bit( &encoder->renc, - &encoder->bm_match[*state][pos_state], !bit ); + bool bit = ( dis < 0 ); + Re_encode_bit( &e->renc, &e->bm_match[*state][pos_state], !bit ); if( bit ) /* literal byte */ { - const uint8_t prev_byte = Mf_peek( encoder->matchfinder, -ahead-1 ); - const uint8_t cur_byte = Mf_peek( encoder->matchfinder, -ahead ); - CRC32_update_byte( &encoder->crc, cur_byte ); + const uint8_t prev_byte = Mf_peek( e->matchfinder, ahead + 1 ); + const uint8_t cur_byte = Mf_peek( e->matchfinder, ahead ); + CRC32_update_byte( &e->crc, cur_byte ); if( St_is_char( *state ) ) - LZe_encode_literal( encoder, prev_byte, cur_byte ); + LZe_encode_literal( e, prev_byte, cur_byte ); else { const uint8_t match_byte = - Mf_peek( encoder->matchfinder, -ahead-encoder->rep_distances[0]-1 ); - LZe_encode_matched( encoder, prev_byte, cur_byte, match_byte ); + Mf_peek( e->matchfinder, ahead + e->reps[0] + 1 ); + LZe_encode_matched( e, prev_byte, cur_byte, match_byte ); } *state = St_set_char( *state ); } else /* match or repeated match */ { - CRC32_update_buf( &encoder->crc, Mf_ptr_to_current_pos( encoder->matchfinder ) - ahead, len ); - LZe_mtf_reps( dis, encoder->rep_distances ); + CRC32_update_buf( &e->crc, Mf_ptr_to_current_pos( e->matchfinder ) - ahead, len ); + mtf_reps( dis, e->reps ); bit = ( dis < num_rep_distances ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep[*state], bit ); - if( bit ) + Re_encode_bit( &e->renc, &e->bm_rep[*state], bit ); + if( bit ) /* repeated match */ { bit = ( dis == 0 ); - Re_encode_bit( &encoder->renc, &encoder->bm_rep0[*state], !bit ); + Re_encode_bit( &e->renc, &e->bm_rep0[*state], !bit ); if( bit ) - Re_encode_bit( &encoder->renc, &encoder->bm_len[*state][pos_state], len > 1 ); + Re_encode_bit( &e->renc, &e->bm_len[*state][pos_state], len > 1 ); else { - Re_encode_bit( &encoder->renc, &encoder->bm_rep1[*state], dis > 1 ); + Re_encode_bit( &e->renc, &e->bm_rep1[*state], dis > 1 ); if( dis > 1 ) - Re_encode_bit( &encoder->renc, &encoder->bm_rep2[*state], dis > 2 ); + Re_encode_bit( &e->renc, &e->bm_rep2[*state], dis > 2 ); } if( len == 1 ) *state = St_set_short_rep( *state ); else { - Lee_encode( &encoder->rep_len_encoder, &encoder->renc, len, pos_state ); + Lee_encode( &e->rep_len_encoder, &e->renc, len, pos_state ); *state = St_set_rep( *state ); } } - else + else /* match */ { - LZe_encode_pair( encoder, dis - num_rep_distances, len, pos_state ); - --encoder->fill_counter; + LZe_encode_pair( e, dis - num_rep_distances, len, pos_state ); + --e->fill_counter; *state = St_set_match( *state ); } } ahead -= len; i += len; - if( Re_member_position( &encoder->renc ) >= encoder->member_size_limit ) + if( Re_member_position( &e->renc ) >= e->member_size_limit ) { - if( !Mf_dec_pos( encoder->matchfinder, ahead ) ) return false; - if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; + if( !Mf_dec_pos( e->matchfinder, ahead ) ) return false; + if( LZe_full_flush( e, *state ) ) e->member_finished = true; return true; } - if( ahead <= 0 ) break; } } - if( LZe_full_flush( encoder, *state ) ) encoder->member_finished = true; + if( LZe_full_flush( e, *state ) ) e->member_finished = true; return true; } diff --git a/encoder.h b/encoder.h index ac98e6a..f458bd9 100644 --- a/encoder.h +++ b/encoder.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -96,7 +96,7 @@ static const uint8_t dis_slots[1<<10] = 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 }; -static inline uint8_t get_slot( const uint32_t dis ) +static inline uint8_t get_slot( const unsigned dis ) { if( dis < (1 << 10) ) return dis_slots[dis]; if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18; @@ -186,15 +186,15 @@ static inline int price_symbol_reversed( const Bit_model bm[], int symbol, } -static inline int price_matched( const Bit_model bm[], unsigned symbol, - unsigned match_byte ) +static inline int price_matched( const Bit_model bm[], int symbol, + int match_byte ) { int price = 0; - unsigned mask = 0x100; - symbol |= 0x100; + int mask = 0x100; + symbol |= mask; do { - unsigned bit, match_bit; + int match_bit, bit; match_byte <<= 1; match_bit = match_byte & mask; symbol <<= 1; @@ -225,17 +225,17 @@ struct Matchfinder { unsigned long long partial_data_pos; uint8_t * buffer; /* input buffer */ - int32_t * prev_positions; /* last seen position of key */ + int32_t * prev_positions; /* 1 + last seen position of key. else 0 */ int32_t * prev_pos_tree; /* previous positions of key */ int match_len_limit; int buffer_size; int dictionary_size; /* bytes to keep in buffer before pos */ int pos; /* current pos in buffer */ int cyclic_pos; /* cycles through [0, dictionary_size] */ - int pos_limit; /* when reached, a new block must be read */ int stream_pos; /* first byte not yet read from file */ + int pos_limit; /* when reached, a new block must be read */ int cycles; - unsigned key4_mask; + int key4_mask; int num_prev_positions; /* size of prev_positions */ bool at_stream_end; /* stream_pos shows real end of file */ bool been_flushed; @@ -262,8 +262,9 @@ static inline void Mf_free( struct Matchfinder * const mf ) free( mf->buffer ); } -static inline uint8_t Mf_peek( const struct Matchfinder * const mf, const int i ) - { return mf->buffer[mf->pos+i]; } +static inline uint8_t Mf_peek( const struct Matchfinder * const mf, + const int distance ) + { return mf->buffer[mf->pos-distance]; } static inline int Mf_available_bytes( const struct Matchfinder * const mf ) { return mf->stream_pos - mf->pos; } @@ -339,7 +340,7 @@ struct Range_encoder uint64_t low; unsigned long long partial_member_pos; uint32_t range; - int ff_count; + unsigned ff_count; uint8_t cache; }; @@ -457,14 +458,14 @@ static inline void Re_encode_tree_reversed( struct Range_encoder * const renc, } static inline void Re_encode_matched( struct Range_encoder * const renc, - Bit_model bm[], unsigned symbol, - unsigned match_byte ) + Bit_model bm[], int symbol, + int match_byte ) { - unsigned mask = 0x100; - symbol |= 0x100; + int mask = 0x100; + symbol |= mask; do { - unsigned bit, match_bit; + int match_bit, bit; match_byte <<= 1; match_bit = match_byte & mask; symbol <<= 1; @@ -484,44 +485,43 @@ struct Len_encoder int counters[pos_states]; }; -static void Lee_update_prices( struct Len_encoder * const len_encoder, +static void Lee_update_prices( struct Len_encoder * const le, const int pos_state ) { - int * const pps = len_encoder->prices[pos_state]; - int tmp = price0( len_encoder->lm.choice1 ); + int * const pps = le->prices[pos_state]; + int tmp = price0( le->lm.choice1 ); int len = 0; - for( ; len < len_low_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + - price_symbol( len_encoder->lm.bm_low[pos_state], len, len_low_bits ); - tmp = price1( len_encoder->lm.choice1 ); - for( ; len < len_low_symbols + len_mid_symbols && len < len_encoder->len_symbols; ++len ) - pps[len] = tmp + price0( len_encoder->lm.choice2 ) + - price_symbol( len_encoder->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); - for( ; len < len_encoder->len_symbols; ++len ) + for( ; len < len_low_symbols && len < le->len_symbols; ++len ) + pps[len] = tmp + price_symbol( le->lm.bm_low[pos_state], len, len_low_bits ); + tmp = price1( le->lm.choice1 ); + for( ; len < len_low_symbols + len_mid_symbols && len < le->len_symbols; ++len ) + pps[len] = tmp + price0( le->lm.choice2 ) + + price_symbol( le->lm.bm_mid[pos_state], len - len_low_symbols, len_mid_bits ); + for( ; len < le->len_symbols; ++len ) /* using 4 slots per value makes "Lee_price" faster */ - len_encoder->prices[3][len] = len_encoder->prices[2][len] = - len_encoder->prices[1][len] = len_encoder->prices[0][len] = - tmp + price1( len_encoder->lm.choice2 ) + - price_symbol( len_encoder->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); - len_encoder->counters[pos_state] = len_encoder->len_symbols; + le->prices[3][len] = le->prices[2][len] = + le->prices[1][len] = le->prices[0][len] = + tmp + price1( le->lm.choice2 ) + + price_symbol( le->lm.bm_high, len - len_low_symbols - len_mid_symbols, len_high_bits ); + le->counters[pos_state] = le->len_symbols; } -static void Lee_init( struct Len_encoder * const len_encoder, +static void Lee_init( struct Len_encoder * const le, const int match_len_limit ) { int i; - Lm_init( &len_encoder->lm ); - len_encoder->len_symbols = match_len_limit + 1 - min_match_len; - for( i = 0; i < pos_states; ++i ) Lee_update_prices( len_encoder, i ); + Lm_init( &le->lm ); + le->len_symbols = match_len_limit + 1 - min_match_len; + for( i = 0; i < pos_states; ++i ) Lee_update_prices( le, i ); } -static void Lee_encode( struct Len_encoder * const len_encoder, +static void Lee_encode( struct Len_encoder * const le, struct Range_encoder * const renc, int symbol, const int pos_state ); -static inline int Lee_price( const struct Len_encoder * const len_encoder, +static inline int Lee_price( const struct Len_encoder * const le, const int symbol, const int pos_state ) - { return len_encoder->prices[pos_state][symbol - min_match_len]; } + { return le->prices[pos_state][symbol - min_match_len]; } enum { infinite_price = 0x0FFFFFFF, @@ -534,46 +534,42 @@ struct Trial { State state; int price; /* dual use var; cumulative price, match length */ - int dis; /* rep index or match distance */ + int dis; /* rep index or match distance. (-1 for literal) */ int prev_index; /* index of prev trial in trials[] */ - int dis2; int prev_index2; /* -2 trial is single step */ /* -1 literal + rep0 */ - /* >= 0 rep or match + literal + rep0 */ + /* >= 0 ( rep or match ) + literal + rep0 */ int reps[num_rep_distances]; }; static inline void Tr_update( struct Trial * const trial, const int pr, - const int d, const int p_i ) + const int distance, const int p_i ) { if( pr < trial->price ) { - trial->price = pr; - trial->dis = d; trial->prev_index = p_i; + trial->price = pr; trial->dis = distance; trial->prev_index = p_i; trial->prev_index2 = single_step_trial; } } static inline void Tr_update2( struct Trial * const trial, const int pr, - const int d, const int p_i ) + const int p_i ) { if( pr < trial->price ) { - trial->price = pr; - trial->dis = d; trial->prev_index = p_i; + trial->price = pr; trial->dis = 0; trial->prev_index = p_i; trial->prev_index2 = dual_step_trial; } } static inline void Tr_update3( struct Trial * const trial, const int pr, - const int d, const int p_i, - const int d2, const int p_i2 ) + const int distance, const int p_i, + const int p_i2 ) { if( pr < trial->price ) { - trial->price = pr; - trial->dis = d; trial->prev_index = p_i; - trial->dis2 = d2; trial->prev_index2 = p_i2; + trial->price = pr; trial->dis = distance; trial->prev_index = p_i; + trial->prev_index2 = p_i2; } } @@ -600,35 +596,35 @@ struct LZ_encoder struct Len_encoder match_len_encoder; struct Len_encoder rep_len_encoder; - int num_dis_slots; - int rep_distances[num_rep_distances]; struct Pair pairs[max_match_len+1]; struct Trial trials[max_num_trials]; + int reps[num_rep_distances]; int dis_slot_prices[len_states][2*max_dictionary_bits]; int dis_prices[len_states][modeled_distances]; int align_prices[dis_align_size]; int align_price_count; + int num_dis_slots; int fill_counter; State state; bool member_finished; }; -static inline bool LZe_member_finished( const struct LZ_encoder * const encoder ) +static inline bool LZe_member_finished( const struct LZ_encoder * const e ) { - return ( encoder->member_finished && !Cb_used_bytes( &encoder->renc.cb ) ); + return ( e->member_finished && !Cb_used_bytes( &e->renc.cb ) ); } -static inline void LZe_free( struct LZ_encoder * const encoder ) - { Re_free( &encoder->renc ); } +static inline void LZe_free( struct LZ_encoder * const e ) + { Re_free( &e->renc ); } -static inline unsigned LZe_crc( const struct LZ_encoder * const encoder ) - { return encoder->crc ^ 0xFFFFFFFFU; } +static inline unsigned LZe_crc( const struct LZ_encoder * const e ) + { return e->crc ^ 0xFFFFFFFFU; } - /* move-to-front dis in/into reps */ -static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) + /* move-to-front dis in/into reps if( dis > 0 ) */ +static inline void mtf_reps( const int dis, int reps[num_rep_distances] ) { int i; if( dis >= num_rep_distances ) @@ -644,156 +640,146 @@ static inline void LZe_mtf_reps( const int dis, int reps[num_rep_distances] ) } } -static inline int LZe_price_rep_len1( const struct LZ_encoder * const encoder, +static inline int LZe_price_shortrep( const struct LZ_encoder * const e, const State state, const int pos_state ) { - return price0( encoder->bm_rep0[state] ) + - price0( encoder->bm_len[state][pos_state] ); + return price0( e->bm_rep0[state] ) + price0( e->bm_len[state][pos_state] ); } -static inline int LZe_price_rep( const struct LZ_encoder * const encoder, +static inline int LZe_price_rep( const struct LZ_encoder * const e, const int rep, const State state, const int pos_state ) { int price; - if( rep == 0 ) return price0( encoder->bm_rep0[state] ) + - price1( encoder->bm_len[state][pos_state] ); - price = price1( encoder->bm_rep0[state] ); + if( rep == 0 ) return price0( e->bm_rep0[state] ) + + price1( e->bm_len[state][pos_state] ); + price = price1( e->bm_rep0[state] ); if( rep == 1 ) - price += price0( encoder->bm_rep1[state] ); + price += price0( e->bm_rep1[state] ); else { - price += price1( encoder->bm_rep1[state] ); - price += price_bit( encoder->bm_rep2[state], rep - 2 ); + price += price1( e->bm_rep1[state] ); + price += price_bit( e->bm_rep2[state], rep - 2 ); } return price; } -static inline int LZe_price_rep0_len( const struct LZ_encoder * const encoder, +static inline int LZe_price_rep0_len( const struct LZ_encoder * const e, const int len, const State state, const int pos_state ) { - return LZe_price_rep( encoder, 0, state, pos_state ) + - Lee_price( &encoder->rep_len_encoder, len, pos_state ); - } - -static inline int LZe_price_dis( const struct LZ_encoder * const encoder, - const int dis, const int len_state ) - { - if( dis < modeled_distances ) - return encoder->dis_prices[len_state][dis]; - else - return encoder->dis_slot_prices[len_state][get_slot( dis )] + - encoder->align_prices[dis & (dis_align_size - 1)]; + return LZe_price_rep( e, 0, state, pos_state ) + + Lee_price( &e->rep_len_encoder, len, pos_state ); } -static inline int LZe_price_pair( const struct LZ_encoder * const encoder, +static inline int LZe_price_pair( const struct LZ_encoder * const e, const int dis, const int len, const int pos_state ) { - return Lee_price( &encoder->match_len_encoder, len, pos_state ) + - LZe_price_dis( encoder, dis, get_len_state( len ) ); + const int price = Lee_price( &e->match_len_encoder, len, pos_state ); + const int len_state = get_len_state( len ); + if( dis < modeled_distances ) + return price + e->dis_prices[len_state][dis]; + else + return price + e->dis_slot_prices[len_state][get_slot( dis )] + + e->align_prices[dis & (dis_align_size - 1)]; } -static inline int LZe_price_literal( const struct LZ_encoder * const encoder, - uint8_t prev_byte, uint8_t symbol ) - { return price_symbol( encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } +static inline int LZe_price_literal( const struct LZ_encoder * const e, + uint8_t prev_byte, uint8_t symbol ) + { return price_symbol( e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } -static inline int LZe_price_matched( const struct LZ_encoder * const encoder, +static inline int LZe_price_matched( const struct LZ_encoder * const e, uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { return price_matched( encoder->bm_literal[get_lit_state(prev_byte)], - symbol, match_byte ); } + { return price_matched( e->bm_literal[get_lit_state(prev_byte)], symbol, + match_byte ); } -static inline void LZe_encode_literal( struct LZ_encoder * const encoder, +static inline void LZe_encode_literal( struct LZ_encoder * const e, uint8_t prev_byte, uint8_t symbol ) - { Re_encode_tree( &encoder->renc, - encoder->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } + { Re_encode_tree( &e->renc, + e->bm_literal[get_lit_state(prev_byte)], symbol, 8 ); } -static inline void LZe_encode_matched( struct LZ_encoder * const encoder, +static inline void LZe_encode_matched( struct LZ_encoder * const e, uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) - { Re_encode_matched( &encoder->renc, - encoder->bm_literal[get_lit_state(prev_byte)], + { Re_encode_matched( &e->renc, e->bm_literal[get_lit_state(prev_byte)], symbol, match_byte ); } -static inline void LZe_encode_pair( struct LZ_encoder * const encoder, - const uint32_t dis, const int len, +static inline void LZe_encode_pair( struct LZ_encoder * const e, + const unsigned dis, const int len, const int pos_state ) { const int dis_slot = get_slot( dis ); - Lee_encode( &encoder->match_len_encoder, &encoder->renc, len, pos_state ); - Re_encode_tree( &encoder->renc, encoder->bm_dis_slot[get_len_state(len)], - dis_slot, dis_slot_bits ); + Lee_encode( &e->match_len_encoder, &e->renc, len, pos_state ); + Re_encode_tree( &e->renc, e->bm_dis_slot[get_len_state(len)], dis_slot, + dis_slot_bits ); if( dis_slot >= start_dis_model ) { const int direct_bits = ( dis_slot >> 1 ) - 1; - const uint32_t base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - const uint32_t direct_dis = dis - base; + const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + const unsigned direct_dis = dis - base; if( dis_slot < end_dis_model ) - Re_encode_tree_reversed( &encoder->renc, - encoder->bm_dis + base - dis_slot - 1, + Re_encode_tree_reversed( &e->renc, e->bm_dis + base - dis_slot - 1, direct_dis, direct_bits ); else { - Re_encode( &encoder->renc, direct_dis >> dis_align_bits, + Re_encode( &e->renc, direct_dis >> dis_align_bits, direct_bits - dis_align_bits ); - Re_encode_tree_reversed( &encoder->renc, encoder->bm_align, - direct_dis, dis_align_bits ); - --encoder->align_price_count; + Re_encode_tree_reversed( &e->renc, e->bm_align, direct_dis, dis_align_bits ); + --e->align_price_count; } } } -static inline int LZe_read_match_distances( struct LZ_encoder * const encoder ) +static inline int LZe_read_match_distances( struct LZ_encoder * const e ) { - const int num_pairs = - Mf_get_match_pairs( encoder->matchfinder, encoder->pairs ); + const int num_pairs = Mf_get_match_pairs( e->matchfinder, e->pairs ); if( num_pairs > 0 ) { - int len = encoder->pairs[num_pairs-1].len; - if( len == encoder->matchfinder->match_len_limit && len < max_match_len ) + int len = e->pairs[num_pairs-1].len; + if( len == e->matchfinder->match_len_limit && len < max_match_len ) { - len += Mf_true_match_len( encoder->matchfinder, len, - encoder->pairs[num_pairs-1].dis + 1, + len += Mf_true_match_len( e->matchfinder, len, + e->pairs[num_pairs-1].dis + 1, max_match_len - len ); - encoder->pairs[num_pairs-1].len = len; + e->pairs[num_pairs-1].len = len; } } return num_pairs; } -static inline bool LZe_move_pos( struct LZ_encoder * const encoder, int n ) +static inline bool LZe_move_pos( struct LZ_encoder * const e, int n ) { - if( --n >= 0 && !Mf_move_pos( encoder->matchfinder ) ) return false; - while( --n >= 0 ) + while( true ) { - Mf_get_match_pairs( encoder->matchfinder, 0 ); - if( !Mf_move_pos( encoder->matchfinder ) ) return false; + if( !Mf_move_pos( e->matchfinder ) ) return false; + if( --n <= 0 ) break; + Mf_get_match_pairs( e->matchfinder, 0 ); } return true; } -static inline void LZe_backward( struct LZ_encoder * const encoder, int cur ) +static inline void LZe_backward( struct LZ_encoder * const e, int cur ) { - int * const dis = &encoder->trials[cur].dis; + int * const dis = &e->trials[cur].dis; while( cur > 0 ) { - const int prev_index = encoder->trials[cur].prev_index; - struct Trial * const prev_trial = &encoder->trials[prev_index]; + const int prev_index = e->trials[cur].prev_index; + struct Trial * const prev_trial = &e->trials[prev_index]; - if( encoder->trials[cur].prev_index2 != single_step_trial ) + if( e->trials[cur].prev_index2 != single_step_trial ) { prev_trial->dis = -1; prev_trial->prev_index = prev_index - 1; prev_trial->prev_index2 = single_step_trial; - if( encoder->trials[cur].prev_index2 >= 0 ) + if( e->trials[cur].prev_index2 >= 0 ) { - struct Trial * const prev_trial2 = &encoder->trials[prev_index-1]; - prev_trial2->dis = encoder->trials[cur].dis2; - prev_trial2->prev_index = encoder->trials[cur].prev_index2; + struct Trial * const prev_trial2 = &e->trials[prev_index-1]; + prev_trial2->dis = *dis; *dis = 0; + prev_trial2->prev_index = e->trials[cur].prev_index2; prev_trial2->prev_index2 = single_step_trial; } } diff --git a/lzcheck.c b/lzcheck.c index 8de3055..aa9195e 100644 --- a/lzcheck.c +++ b/lzcheck.c @@ -1,5 +1,5 @@ /* Lzcheck - Test program for the lzlib library - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. diff --git a/lzip.h b/lzip.h index aac0bd8..92e015e 100644 --- a/lzip.h +++ b/lzip.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -56,7 +56,7 @@ static inline State St_set_short_rep( const State st ) enum { min_dictionary_bits = 12, - min_dictionary_size = 1 << min_dictionary_bits, + min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */ max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, literal_context_bits = 3, @@ -174,7 +174,8 @@ static inline void CRC32_update_byte( uint32_t * const crc, const uint8_t byte ) { *crc = crc32[(*crc^byte)&0xFF] ^ ( *crc >> 8 ); } static inline void CRC32_update_buf( uint32_t * const crc, - const uint8_t * const buffer, const int size ) + const uint8_t * const buffer, + const int size ) { int i; for( i = 0; i < size; ++i ) @@ -254,41 +255,29 @@ enum { Ft_size = 20 }; static inline unsigned Ft_get_data_crc( const File_trailer data ) { unsigned tmp = 0; - int i; - for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } static inline void Ft_set_data_crc( File_trailer data, unsigned crc ) - { - int i; - for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } - } + { int i; for( i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } static inline unsigned long long Ft_get_data_size( const File_trailer data ) { unsigned long long tmp = 0; - int i; - for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } static inline void Ft_set_data_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } + { int i; for( i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } static inline unsigned long long Ft_get_member_size( const File_trailer data ) { unsigned long long tmp = 0; - int i; - for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + int i; for( i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } static inline void Ft_set_member_size( File_trailer data, unsigned long long sz ) - { - int i; - for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } - } + { int i; for( i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } diff --git a/lzlib.c b/lzlib.c index a69a9d2..48ef29e 100644 --- a/lzlib.c +++ b/lzlib.c @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -436,7 +436,8 @@ int LZ_decompress_read( struct LZ_Decoder * const d, return -1; } d->lz_decoder = (struct LZ_decoder *)malloc( sizeof (struct LZ_decoder) ); - if( !d->lz_decoder || !LZd_init( d->lz_decoder, d->member_header, d->rdec ) ) + if( !d->lz_decoder || !LZd_init( d->lz_decoder, d->rdec, + Fh_get_dictionary_size( d->member_header ) ) ) { /* not enough free memory */ if( d->lz_decoder ) { LZd_free( d->lz_decoder ); free( d->lz_decoder ); d->lz_decoder = 0; } diff --git a/lzlib.h b/lzlib.h index fd4bfd5..e1e91a0 100644 --- a/lzlib.h +++ b/lzlib.h @@ -1,5 +1,5 @@ /* Lzlib - Compression library for lzip files - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ extern "C" { #endif -static const char * const LZ_version_string = "1.6-pre1"; +static const char * const LZ_version_string = "1.6-pre2"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, diff --git a/main.c b/main.c index 4435c01..ef12c33 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* Minilzip - Test program for the lzlib library - Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,7 +76,7 @@ void internal_error( const char * const msg ); const char * const Program_name = "Minilzip"; const char * const program_name = "minilzip"; -const char * const program_year = "2013"; +const char * const program_year = "2014"; const char * invocation_name = 0; struct { const char * from; const char * to; } const known_extensions[] = { @@ -105,10 +105,30 @@ struct Pretty_print { const char * name; const char * stdin_name; - int longest_name; + unsigned longest_name; bool first_post; }; +static void Pp_init( struct Pretty_print * const pp, + const char * const filenames[], const int num_filenames ) + { + unsigned stdin_name_len; + int i; + pp->name = 0; + pp->stdin_name = "(stdin)"; + pp->longest_name = 0; + pp->first_post = false; + stdin_name_len = strlen( pp->stdin_name ); + + for( i = 0; i < num_filenames; ++i ) + { + const char * const s = filenames[i]; + const unsigned len = (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ); + if( len > pp->longest_name ) pp->longest_name = len; + } + if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; + } + static inline void Pp_set_name( struct Pretty_print * const pp, const char * const filename ) { @@ -118,11 +138,9 @@ static inline void Pp_set_name( struct Pretty_print * const pp, pp->first_post = true; } - static inline void Pp_reset( struct Pretty_print * const pp ) { if( pp->name && pp->name[0] ) pp->first_post = true; } - static void Pp_show_msg( struct Pretty_print * const pp, const char * const msg ) { if( verbosity >= 0 ) @@ -193,14 +211,14 @@ static void show_version( void ) } -static void show_header( struct LZ_Decoder * const decoder ) +static void show_header( const unsigned dictionary_size ) { const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; enum { factor = 1024 }; const char * p = ""; const char * np = " "; - unsigned num = LZ_decompress_dictionary_size( decoder ), i; + unsigned num = dictionary_size, i; bool exact = ( num % factor == 0 ); for( i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i ) @@ -304,7 +322,8 @@ static int open_instream( const char * const name, struct stat * const in_statsp } else { - infd = open( name, O_RDONLY | O_BINARY ); + do infd = open( name, O_RDONLY | O_BINARY ); + while( infd < 0 && errno == EINTR ); if( infd < 0 ) { if( verbosity >= 0 ) @@ -388,7 +407,8 @@ static bool open_outstream( const bool force ) int flags = O_CREAT | O_WRONLY | O_BINARY; if( force ) flags |= O_TRUNC; else flags |= O_EXCL; - outfd = open( output_filename, flags, outfd_mode ); + do outfd = open( output_filename, flags, outfd_mode ); + while( outfd < 0 && errno == EINTR ); if( outfd < 0 && verbosity >= 0 ) { if( errno == EEXIST ) @@ -441,10 +461,14 @@ static void close_and_set_permissions( const struct stat * const in_statsp ) bool warning = false; if( in_statsp ) { + const mode_t mode = in_statsp->st_mode; /* fchown will in many cases return with EPERM, which can be safely ignored. */ - if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && - errno != EPERM ) || - fchmod( outfd, in_statsp->st_mode ) != 0 ) warning = true; + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; } if( close( outfd ) != 0 ) cleanup_and_fail( 1 ); outfd = -1; @@ -515,8 +539,8 @@ static bool next_filename( void ) static int do_compress( struct LZ_Encoder * const encoder, const unsigned long long member_size, - const unsigned long long volume_size, const int infd, - struct Pretty_print * const pp, + const unsigned long long volume_size, + const int infd, struct Pretty_print * const pp, const struct stat * const in_statsp ) { unsigned long long partial_volume_size = 0; @@ -688,7 +712,8 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, const unsigned long long data_position = LZ_decompress_data_position( decoder ); const unsigned long long member_size = LZ_decompress_member_position( decoder ); Pp_show_msg( pp, 0 ); - if( verbosity >= 3 ) show_header( decoder ); + if( verbosity >= 3 ) + show_header( LZ_decompress_dictionary_size( decoder ) ); if( verbosity >= 2 && data_position > 0 && member_size > 0 ) fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", (double)data_position / member_size, @@ -714,10 +739,7 @@ int do_decompress( struct LZ_Decoder * const decoder, const int infd, return 2; } if( lz_errno == LZ_mem_error ) - { - Pp_show_msg( pp, "Not enough memory. Find a machine with more memory" ); - return 1; - } + { Pp_show_msg( pp, "Not enough memory" ); return 1; } if( verbosity >= 0 ) { Pp_show_msg( pp, 0 ); @@ -746,10 +768,7 @@ int decompress( const int infd, struct Pretty_print * const pp, int retval; if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) - { - Pp_show_msg( pp, "Not enough memory. Find a machine with more memory" ); - retval = 1; - } + { Pp_show_msg( pp, "Not enough memory" ); retval = 1; } else retval = do_decompress( decoder, infd, pp, testing ); LZ_decompress_close( decoder ); @@ -773,27 +792,6 @@ static void set_signals( void ) } -static void Pp_init( struct Pretty_print * const pp, - const char * const filenames[], const int num_filenames ) - { - unsigned stdin_name_len; - int i; - pp->name = 0; - pp->stdin_name = "(stdin)"; - pp->longest_name = 0; - pp->first_post = false; - stdin_name_len = strlen( pp->stdin_name ); - - for( i = 0; i < num_filenames; ++i ) - { - const char * const s = filenames[i]; - const int len = ( (strcmp( s, "-" ) == 0) ? stdin_name_len : strlen( s ) ); - if( len > pp->longest_name ) pp->longest_name = len; - } - if( pp->longest_name == 0 ) pp->longest_name = stdin_name_len; - } - - void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity >= 0 ) @@ -896,7 +894,7 @@ int main( const int argc, const char * const argv[] ) internal_error( "bad library version_string" ); if( !ap_init( &parser, argc, argv, options, 0 ) ) - { show_error( "Memory exhausted.", 0, false ); return 1; } + { show_error( "Not enough memory.", 0, false ); return 1; } if( ap_error( &parser ) ) /* bad option */ { show_error( ap_error( &parser ), 0, true ); return 1; } @@ -907,8 +905,7 @@ int main( const int argc, const char * const argv[] ) if( !code ) break; /* no more options */ switch( code ) { - case '0': - case '1': case '2': case '3': case '4': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': encoder_options = option_mapping[code-'0']; break; case 'b': member_size = getnum( arg, 100000, max_member_size ); break; diff --git a/testsuite/check.sh b/testsuite/check.sh index d7d7d0a..6665469 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2014 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -29,13 +29,17 @@ fail=0 printf "testing lzlib-%s..." "$2" +"${LZIP}" -cqm4 in > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi +"${LZIP}" -cqm274 in > /dev/null +if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs-1 in > /dev/null if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs0 in > /dev/null if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -cqs4095 in > /dev/null if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi -"${LZIP}" -cqm274 in > /dev/null +"${LZIP}" -cqs513MiB in > /dev/null if [ $? = 1 ] ; then printf . ; else fail=1 ; printf - ; fi "${LZIP}" -tq in if [ $? = 2 ] ; then printf . ; else fail=1 ; printf - ; fi @@ -104,8 +108,16 @@ cmp in anyothername.out || fail=1 printf . cat in in > in2 || framework_failure -"${LZIP}" < in2 > out2 || fail=1 -"${LZIP}" -d < out2 > copy2 || fail=1 +"${LZIP}" -o copy2 < in2 || fail=1 +"${LZIP}" -t copy2.lz || fail=1 +printf . +"${LZIP}" -cd copy2.lz > copy2 || fail=1 +cmp in2 copy2 || fail=1 +printf . + +printf "garbage" >> copy2.lz || framework_failure +printf "to be overwritten" > copy2 || framework_failure +"${LZIP}" -df copy2.lz || fail=1 cmp in2 copy2 || fail=1 printf . -- cgit v1.2.3