diff options
-rw-r--r-- | ChangeLog | 35 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | Makefile.in | 41 | ||||
-rw-r--r-- | NEWS | 22 | ||||
-rw-r--r-- | README | 17 | ||||
-rw-r--r-- | bbexample.cc | 210 | ||||
-rwxr-xr-x | configure | 8 | ||||
-rw-r--r-- | decoder.cc | 36 | ||||
-rw-r--r-- | decoder.h | 33 | ||||
-rw-r--r-- | doc/lzlib.info | 69 | ||||
-rw-r--r-- | doc/lzlib.texinfo | 49 | ||||
-rw-r--r-- | encoder.cc | 87 | ||||
-rw-r--r-- | encoder.h | 77 | ||||
-rw-r--r-- | lzcheck.cc | 36 | ||||
-rw-r--r-- | lzip.h | 105 | ||||
-rw-r--r-- | lzlib.cc | 32 | ||||
-rw-r--r-- | lzlib.h | 4 | ||||
-rw-r--r-- | main.cc | 186 | ||||
-rwxr-xr-x | testsuite/check.sh | 47 | ||||
-rw-r--r-- | testsuite/test.txt (renamed from testsuite/test1) | 0 | ||||
-rw-r--r-- | testsuite/test_sync.lz | bin | 0 -> 11658 bytes | |||
-rw-r--r-- | testsuite/test_v0.lz (renamed from testsuite/test1.lz) | bin | 11540 -> 11540 bytes | |||
-rw-r--r-- | testsuite/test_v1.lz | bin | 0 -> 11548 bytes |
23 files changed, 714 insertions, 384 deletions
@@ -1,3 +1,16 @@ +2011-01-03 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 1.1 released. + * Compression time has been reduced by 2%. + * All declarations not belonging to the API have been + encapsulated in the namespace `Lzlib'. + * testsuite: `test1' renamed to `test.txt'. Added new tests. + * Match length limits set by options -1 to -9 of minilzip have + been changed to match those of lzip 1.11. + * main.cc: Set stdin/stdout in binary mode on OS2. + * bbexample.cc: New file containing example functions for + buffer-to-buffer compression/decompression. + 2010-05-08 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 1.0 released. @@ -5,7 +18,7 @@ * Added new function LZ_decompress_member_version. * Added new function LZ_decompress_dictionary_size. * Added new function LZ_decompress_data_crc. - * Variables declared "extern" have been encapsulated in a + * Variables declared `extern' have been encapsulated in a namespace. * main.cc: Fixed warning about fchown's return value being ignored. * decoder.h: Input_buffer integrated in Range_decoder. @@ -14,7 +27,7 @@ * Version 0.9 released. * Compression time has been reduced by 8%. - * main.cc: New constant "o_binary". + * main.cc: New constant `o_binary'. 2010-01-17 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -23,18 +36,18 @@ * Added new function LZ_decompress_sync_to_member. * Added new function LZ_decompress_write_size. * Added new function LZ_strerror. - * lzlib.h: API change. Replaced "enum" with functions for values + * lzlib.h: API change. Replaced `enum' with functions for values of dictionary size limits to make interface names consistent. - * lzlib.h: API change. "LZ_errno" replaced with "LZ_Errno". - * lzlib.h: API change. Replaced "void *" with "struct LZ_Encoder *" - and "struct LZ_Decoder *" to make interface type safe. + * lzlib.h: API change. `LZ_errno' replaced with `LZ_Errno'. + * lzlib.h: API change. Replaced `void *' with `struct LZ_Encoder *' + and `struct LZ_Decoder *' to make interface type safe. * decoder.cc: Truncated member trailer is now correctly detected. * encoder.cc: Matchfinder::reset now also clears at_stream_end_, allowing LZ_compress_restart_member to restart a finished stream. * lzlib.cc: Accept only query or close operations after a fatal error has occurred. * Shared version of lzlib is no more built by default. - * testsuite/check.sh: Use "test1" instead of "COPYING" for testing. + * testsuite/check.sh: Use `test1' instead of `COPYING' for testing. 2009-10-20 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -60,7 +73,7 @@ * Added new function LZ_compress_sync_flush. * Added new function LZ_compress_write_size. * Decompression speed has been improved. - * Added chapter "Buffering" to the manual. + * Added chapter `Buffering' to the manual. 2009-05-03 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -78,8 +91,8 @@ * Version 0.1 released. -Copyright (C) 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, -but just in case, I give you unlimited permission to copy, distribute -and modify it. +but just in case, you have unlimited permission to copy, distribute and +modify it. @@ -1,7 +1,7 @@ Requirements ------------ You will need a C++ compiler. -I use gcc 4.3.4 and 3.3.6, but the code should compile with any +I use gcc 4.3.5 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -50,7 +50,7 @@ After running `configure', you can run `make' and `make install' as explained above. -Copyright (C) 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. diff --git a/Makefile.in b/Makefile.in index a68698a..94e0114 100644 --- a/Makefile.in +++ b/Makefile.in @@ -20,22 +20,25 @@ objs = arg_parser.o main.o all : $(progname) $(progname_shared) lib$(libname).a: $(lib_objs) - $(AR) -rcs lib$(libname).a $(lib_objs) + $(AR) -rcs $@ $^ lib$(libname).so.$(pkgversion) : $(sh_lib_objs) - $(CXX) -shared -Wl,--soname=lib$(libname).so.$(soversion) -o lib$(libname).so.$(pkgversion) $(sh_lib_objs) + $(CXX) -shared -Wl,--soname=lib$(libname).so.$(soversion) -o $@ $^ $(progname) : $(objs) lib$(libname).a - $(CXX) $(LDFLAGS) -o $(progname) $(objs) lib$(libname).a + $(CXX) $(LDFLAGS) -o $@ $^ $(progname)_shared : $(objs) lib$(libname).so.$(pkgversion) - $(CXX) $(LDFLAGS) -o $(progname)_shared $(objs) lib$(libname).so.$(pkgversion) + $(CXX) $(LDFLAGS) -o $@ $^ $(progname)_profiled : $(objs) lib$(libname).a - $(CXX) $(LDFLAGS) -pg -o $(progname)_profiled $(objs) lib$(libname).a + $(CXX) $(LDFLAGS) -pg -o $@ $^ + +bbexample : bbexample.o lib$(libname).a + $(CXX) $(LDFLAGS) -o $@ $^ lzcheck : lzcheck.o lib$(libname).a - $(CXX) $(LDFLAGS) -o lzcheck lzcheck.o lib$(libname).a + $(CXX) $(LDFLAGS) -o $@ $^ main.o : main.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< @@ -44,13 +47,13 @@ main.o : main.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< sh_decoder.o : decoder.cc - $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o sh_decoder.o $< + $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< sh_encoder.o : encoder.cc - $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o sh_encoder.o $< + $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< sh_lzlib.o : lzlib.cc - $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o sh_lzlib.o $< + $(CXX) -fpic -fPIC $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< $(lib_objs) : Makefile lzlib.h lzip.h $(sh_lib_objs) : Makefile lzlib.h lzip.h @@ -63,6 +66,7 @@ sh_decoder.o : decoder.h sh_encoder.o : encoder.h sh_lzlib.o : decoder.h encoder.h main.o : arg_parser.h lzlib.h +bbexample.o : Makefile lzlib.h lzcheck.o : Makefile lzlib.h @@ -76,14 +80,14 @@ $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texinfo man : $(VPATH)/doc/$(progname).1 $(VPATH)/doc/$(progname).1 : $(progname) - help2man -n 'test program for the lzlib library' \ - -o $(VPATH)/doc/$(progname).1 --no-info ./$(progname) + help2man -n 'reduces the size of files' \ + -o $@ --no-info ./$(progname) Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status -check : all lzcheck - @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite +check : all bbexample lzcheck + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) install : all install-info if [ ! -d "$(DESTDIR)$(includedir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(includedir)" ; fi @@ -112,7 +116,7 @@ install-man : install-strip : all $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install -uninstall : uninstall-info +uninstall : uninstall-info uninstall-man -rm -f "$(DESTDIR)$(includedir)/$(libname)lib.h" -rm -f "$(DESTDIR)$(libdir)/lib$(libname).a" -rm -f "$(DESTDIR)$(libdir)/lib$(libname).so.$(soversion)" @@ -139,17 +143,18 @@ dist : doc $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texinfo \ $(DISTNAME)/testsuite/check.sh \ - $(DISTNAME)/testsuite/test1 \ - $(DISTNAME)/testsuite/test1.lz \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test_sync.lz \ + $(DISTNAME)/testsuite/test_v[01].lz \ $(DISTNAME)/*.h \ $(DISTNAME)/*.cc rm -f $(DISTNAME) lzip -v -9 $(DISTNAME).tar clean : - -rm -f $(progname) $(progname)_profiled $(objs) $(lib_objs) *.a + -rm -f $(progname) $(progname)_profiled $(objs) -rm -f $(progname)_shared $(sh_lib_objs) *.so.$(pkgversion) - -rm -f lzcheck.o + -rm -f bbexample bbexample.o lzcheck lzcheck.o $(lib_objs) *.a distclean : clean -rm -f Makefile config.status *.tar *.tar.lz @@ -1,14 +1,16 @@ -Changes in version 1.0: +Changes in version 1.1: -New functions: - LZ_decompress_member_finished. - LZ_decompress_member_version. - LZ_decompress_dictionary_size. - LZ_decompress_data_crc. +Compression time has been reduced by 2%. -Variables declared "extern" have been encapsulated in a namespace. +All declarations not belonging to the API have been encapsulated in the +namespace "Lzlib". -A warning about fchown's return value being ignored has been fixed. +New tests have been added to the testsuite. -Input_buffer has been integrated in Range_decoder, simplifying the code -and making decompression slightly faster. +Match length limits set by options -1 to -9 of minilzip have been +changed to match those of lzip 1.11. + +Minilzip now sets stdin and stdout in binary mode on OS2. + +The file bbexample.cc, containing example functions for buffer-to-buffer +compression/decompression, has been added. @@ -2,21 +2,22 @@ Description Lzlib is a data compression library providing in-memory LZMA compression and decompression functions, including integrity checking of the -uncompressed data. The compressed data format used by the library is the +decompressed data. The compressed data format used by the library is the lzip format. The functions and variables forming the interface of the compression -library are declared in the file lzlib.h. An usage example of the -library is given in the file main.cc. +library are declared in the file lzlib.h. Usage examples of the library +are given in the files main.cc and bbexample.cc from the source +distribution. -Compression/decompression is done by repeteadly calling a couple of +Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data has been processed by the library. This interface is safer and less error prone than the traditional zlib interface. Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation -of the corresponding uncompressed data streams. Integrity testing of +of the corresponding decompressed data streams. Integrity testing of concatenated compressed data streams is also supported. All the library functions are thread safe. The library does not install @@ -29,11 +30,11 @@ Igor Pavlov. For a description of the LZMA algorithm, see the Lzip manual. -Copyright (C) 2009, 2010 Antonio Diaz Diaz. +Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute and modify it. The file Makefile.in is a data file used by configure to produce the -Makefile. It has the same copyright owner and permissions that this -file. +Makefile. It has the same copyright owner and permissions that configure +itself. diff --git a/bbexample.cc b/bbexample.cc new file mode 100644 index 0000000..df91300 --- /dev/null +++ b/bbexample.cc @@ -0,0 +1,210 @@ +/* Buff to buff example - A test program for the lzlib library + Copyright (C) 2010, 2011 Antonio Diaz Diaz. + + This program is free software: you have unlimited permission + to copy, distribute and modify it. + + Usage is: + bbexample filename + + This program is an example of how buffer-to-buffer + compression/decompression can be implemented using lzlib. +*/ + +#ifndef __cplusplus +#include <stdbool.h> +#endif +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> + +#include "lzlib.h" + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1LL) +#endif +#ifndef ULLONG_MAX +#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL +#endif + + +// Compresses `size' bytes from `data'. Returns the address of a +// malloc'd buffer containing the compressed data and its size in +// `*out_sizep'. +// In case of error, returns 0 and does not modify `*out_sizep'. + +uint8_t * bbcompress( const uint8_t * const data, const int size, + int * const out_sizep ) + { + int dict_size = 8 << 20; // 8 MiB + const int match_len_limit = 36; + const long long member_size = LLONG_MAX; + if( dict_size > size ) dict_size = size; + if( dict_size < LZ_min_dictionary_size() ) + dict_size = LZ_min_dictionary_size(); + struct LZ_Encoder * encoder = + LZ_compress_open( dict_size, match_len_limit, member_size ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { LZ_compress_close( encoder ); return 0; } + + const int delta_size = (size < 256) ? 64 : size / 4; // size may be zero + int new_data_size = delta_size; // initial size + uint8_t * new_data = (uint8_t *)malloc( new_data_size ); + if( !new_data ) + { LZ_compress_close( encoder ); return 0; } + + int new_pos = 0; + int written = 0; + bool error = false; + while( true ) + { + if( LZ_compress_write_size( encoder ) > 0 ) + { + if( written < size ) + { + const int wr = LZ_compress_write( encoder, data + written, + size - written ); + if( wr < 0 ) { error = true; break; } + written += wr; + } + if( written >= size ) LZ_compress_finish( encoder ); + } + const int rd = LZ_compress_read( encoder, new_data + new_pos, + new_data_size - new_pos ); + if( rd < 0 ) { error = true; break; } + new_pos += rd; + if( LZ_compress_finished( encoder ) == 1 ) break; + if( new_pos >= new_data_size ) + { + void * const tmp = realloc( new_data, new_data_size + delta_size ); + if( !tmp ) { error = true; break; } + new_data = (uint8_t *)tmp; + new_data_size += delta_size; + } + } + + if( LZ_compress_close( encoder ) < 0 ) error = true; + if( error ) { free( new_data ); return 0; } + *out_sizep = new_pos; + return new_data; + } + + +// Decompresses `size' bytes from `data'. Returns the address of a +// malloc'd buffer containing the decompressed data and its size in +// `*out_sizep'. +// In case of error, returns 0 and does not modify `*out_sizep'. + +uint8_t * bbdecompress( const uint8_t * const data, const int size, + int * const out_sizep ) + { + struct LZ_Decoder * decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { LZ_decompress_close( decoder ); return 0; } + + const int delta_size = size; + int new_data_size = delta_size; // initial size + uint8_t * new_data = (uint8_t *)malloc( new_data_size ); + if( !new_data ) + { LZ_decompress_close( decoder ); return 0; } + + int new_pos = 0; + int written = 0; + bool error = false; + while( true ) + { + if( LZ_decompress_write_size( decoder ) > 0 ) + { + if( written < size ) + { + const int wr = LZ_decompress_write( decoder, data + written, + size - written ); + if( wr < 0 ) { error = true; break; } + written += wr; + } + if( written >= size ) LZ_decompress_finish( decoder ); + } + const int rd = LZ_decompress_read( decoder, new_data + new_pos, + new_data_size - new_pos ); + if( rd < 0 ) { error = true; break; } + new_pos += rd; + if( LZ_decompress_finished( decoder ) == 1 ) break; + if( new_pos >= new_data_size ) + { + void * const tmp = realloc( new_data, new_data_size + delta_size ); + if( !tmp ) { error = true; break; } + new_data = (uint8_t *)tmp; + new_data_size += delta_size; + } + } + + if( LZ_decompress_close( decoder ) < 0 ) error = true; + if( error ) { free( new_data ); return 0; } + *out_sizep = new_pos; + return new_data; + } + + +int main( const int argc, const char * const argv[] ) + { + if( argc < 2 ) + { + fprintf( stderr, "Usage: bbexample filename\n" ); + return 1; + } + + FILE *file = fopen( argv[1], "rb" ); + if( !file ) + { + fprintf( stderr, "bbexample: Can't open file `%s' for reading\n", argv[1] ); + return 1; + } + + const int in_buffer_size = 1 << 20; + uint8_t * const in_buffer = (uint8_t *)malloc( in_buffer_size ); + if( !in_buffer ) + { + fprintf( stderr, "bbexample: Not enough memory.\n" ); + return 1; + } + const int in_size = fread( in_buffer, 1, in_buffer_size, file ); + if( in_size >= in_buffer_size ) + { + fprintf( stderr, "bbexample: Input file `%s' is too big.\n", argv[1] ); + return 1; + } + fclose( file ); + + int mid_size = 0; + uint8_t * const mid_buffer = bbcompress( in_buffer, in_size, &mid_size ); + if( !mid_buffer ) + { + fprintf( stderr, "bbexample: Not enough memory or compress error.\n" ); + return 1; + } + + int out_size = 0; + uint8_t * const out_buffer = bbdecompress( mid_buffer, mid_size, &out_size ); + if( !out_buffer ) + { + fprintf( stderr, "bbexample: Not enough memory or decompress error.\n" ); + return 1; + } + + if( in_size != out_size || + ( out_size > 0 && memcmp( in_buffer, out_buffer, out_size ) ) ) + { + fprintf( stderr, "bbexample: Decompressed data differs from original.\n" ); + return 1; + } + + free( out_buffer ); + free( mid_buffer ); + free( in_buffer ); + return 0; + } @@ -1,16 +1,16 @@ #! /bin/sh # configure script for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute and modify it. # -# Date of this version: 2010-05-08 +# Date of this version: 2011-01-03 args= no_create= pkgname=lzlib -pkgversion=1.0 +pkgversion=1.1 soversion=1 progname=minilzip progname_shared= @@ -183,7 +183,7 @@ echo "LDFLAGS = ${LDFLAGS}" rm -f Makefile cat > Makefile << EOF # Makefile for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. # This file was generated automatically by configure. Do not edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,7 +38,9 @@ #include "decoder.h" -const CRC32 Lzlib_namespace::crc32; +namespace Lzlib { + +const CRC32 crc32; // Seeks a member header and updates `get'. // Returns true if it finds a valid header. @@ -98,7 +100,11 @@ bool LZ_decoder::verify_trailer() { if( !range_decoder.finished() ) trailer.data[i] = range_decoder.get_byte(); - else { error = true; for( ; i < trailer_size; ++i ) trailer.data[i] = 0; } + else + { + error = true; + for( ; i < trailer_size; ++i ) trailer.data[i] = 0; + } } if( member_version == 0 ) trailer.member_size( member_size ); if( !range_decoder.code_is_zero() ) error = true; @@ -132,7 +138,7 @@ int LZ_decoder::decode_member() const int pos_state = data_position() & pos_state_mask; if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 ) { - const uint8_t prev_byte = get_byte( 0 ); + const uint8_t prev_byte = get_prev_byte(); if( state.is_char() ) put_byte( literal_decoder.decode( range_decoder, prev_byte ) ); else @@ -146,12 +152,7 @@ int LZ_decoder::decode_member() if( range_decoder.decode_bit( bm_rep[state()] ) == 1 ) { len = 0; - if( range_decoder.decode_bit( bm_rep0[state()] ) == 0 ) - { - if( range_decoder.decode_bit( bm_len[state()][pos_state] ) == 0 ) - { len = 1; state.set_short_rep(); } - } - else + if( range_decoder.decode_bit( bm_rep0[state()] ) == 1 ) { unsigned int distance; if( range_decoder.decode_bit( bm_rep1[state()] ) == 0 ) @@ -166,15 +167,20 @@ int LZ_decoder::decode_member() rep1 = rep0; rep0 = distance; } + else + { + if( range_decoder.decode_bit( bm_len[state()][pos_state] ) == 0 ) + { state.set_short_rep(); len = 1; } + } if( len == 0 ) { - len = min_match_len + rep_match_len_decoder.decode( range_decoder, pos_state ); state.set_rep(); + len = min_match_len + rep_match_len_decoder.decode( range_decoder, pos_state ); } } else { - unsigned int rep0_saved = rep0; + const unsigned int rep0_saved = rep0; len = min_match_len + len_decoder.decode( range_decoder, pos_state ); const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits ); if( dis_slot < start_dis_model ) rep0 = dis_slot; @@ -207,11 +213,13 @@ int LZ_decoder::decode_member() } return 4; } - if( rep0 >= (unsigned int)dictionary_size ) return 1; } } rep3 = rep2; rep2 = rep1; rep1 = rep0_saved; state.set_match(); + if( rep0 >= (unsigned int)dictionary_size || + ( rep0 >= (unsigned int)put && !partial_data_pos ) ) + return 1; } copy_block( rep0, len ); } @@ -277,3 +285,5 @@ int Circular_buffer::write_data( const uint8_t * const in_buffer, const int in_s } return size; } + +} // end namespace Lzlib @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,8 @@ Public License. */ +namespace Lzlib { + class Range_decoder : public Circular_buffer { enum { min_available_bytes = 8 }; @@ -51,7 +53,8 @@ public: bool finished() const throw() { return at_stream_end_ && !used_bytes(); } int free_bytes() const throw() { if( at_stream_end_ ) return 0; return Circular_buffer::free_bytes(); } - long long member_position() const throw() { return member_pos; } + long long member_position() const throw() + { return member_pos; } void purge() throw() { at_stream_end_ = true; Circular_buffer::reset(); } void reset() throw() { at_stream_end_ = false; Circular_buffer::reset(); } @@ -211,7 +214,8 @@ public: uint8_t decode_matched( Range_decoder & range_decoder, const uint8_t prev_byte, const uint8_t match_byte ) - { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], match_byte ); } + { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)], + match_byte ); } }; @@ -219,9 +223,9 @@ class LZ_decoder : public Circular_buffer { enum { min_free_bytes = max_match_len }; long long partial_data_pos; - const int member_version; const int dictionary_size; uint32_t crc_; + const int member_version; bool member_finished_; bool verify_trailer_pending; unsigned int rep0; // rep[0-3] latest four distances @@ -237,7 +241,7 @@ class LZ_decoder : public Circular_buffer Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Range_decoder & range_decoder; @@ -245,6 +249,14 @@ class LZ_decoder : public Circular_buffer Len_decoder rep_match_len_decoder; Literal_decoder literal_decoder; + bool verify_trailer(); + + uint8_t get_prev_byte() const throw() + { + const int i = ( ( put > 0 ) ? put : buffer_size ) - 1; + return buffer[i]; + } + uint8_t get_byte( const int distance ) const throw() { int i = put - distance - 1; @@ -278,16 +290,14 @@ class LZ_decoder : public Circular_buffer } } - bool verify_trailer(); - public: LZ_decoder( const File_header & header, Range_decoder & rdec ) : Circular_buffer( std::max( 65536, header.dictionary_size() ) + min_free_bytes ), partial_data_pos( 0 ), - member_version( header.version() ), dictionary_size( header.dictionary_size() ), crc_( 0xFFFFFFFFU ), + member_version( header.version() ), member_finished_( false ), verify_trailer_pending( false ), rep0( 0 ), @@ -301,12 +311,13 @@ public: { return free_bytes() >= min_free_bytes; } uint32_t crc() const throw() { return crc_ ^ 0xFFFFFFFFU; } - int decode_member(); bool member_finished() const throw() { return ( member_finished_ && !used_bytes() ); } - long long member_position() const throw() - { return range_decoder.member_position(); } long long data_position() const throw() { return partial_data_pos + put; } + + int decode_member(); }; + +} // end namespace Lzlib diff --git a/doc/lzlib.info b/doc/lzlib.info index af9f67b..4fddce8 100644 --- a/doc/lzlib.info +++ b/doc/lzlib.info @@ -12,25 +12,25 @@ File: lzlib.info, Node: Top, Next: Introduction, Up: (dir) Lzlib Manual ************ -This manual is for Lzlib (version 1.0, 8 May 2010). +This manual is for Lzlib (version 1.1, 3 January 2011). * Menu: -* Introduction:: Purpose and features of Lzlib -* Library Version:: Checking library version -* Buffering:: Sizes of Lzlib's buffers -* Parameter Limits:: Min / max values for some parameters -* Compression Functions:: Descriptions of the compression functions -* Decompression Functions:: Descriptions of the decompression functions -* Error Codes:: Meaning of codes returned by functions -* Error Messages:: Error messages corresponding to error codes -* Data Format:: Detailed format of the compressed data -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of Lzlib +* Library Version:: Checking library version +* Buffering:: Sizes of Lzlib's buffers +* Parameter Limits:: Min / max values for some parameters +* Compression Functions:: Descriptions of the compression functions +* Decompression Functions:: Descriptions of the decompression functions +* Error Codes:: Meaning of codes returned by functions +* Error Messages:: Error messages corresponding to error codes +* Data Format:: Detailed format of the compressed data +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept Index:: Index of concepts - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -43,14 +43,15 @@ File: lzlib.info, Node: Introduction, Next: Library Version, Prev: Top, Up: Lzlib is a data compression library providing in-memory LZMA compression and decompression functions, including integrity checking of the -uncompressed data. The compressed data format used by the library is the +decompressed data. The compressed data format used by the library is the lzip format. The functions and variables forming the interface of the compression -library are declared in the file `lzlib.h'. An usage example of the -library is given in the file main.cc. +library are declared in the file `lzlib.h'. Usage examples of the +library are given in the files `main.cc' and `bbexample.cc' from the +source distribution. - Compression/decompression is done by repeteadly calling a couple of + Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data has been processed by the library. This interface is safer and less error prone than the traditional zlib interface. @@ -63,7 +64,7 @@ function with a SIZE equal to 0. Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the -concatenation of the corresponding uncompressed data streams. Integrity +concatenation of the corresponding decompressed data streams. Integrity testing of concatenated compressed data streams is also supported. All the library functions are thread safe. The library does not @@ -550,9 +551,11 @@ File: lzlib.info, Node: Examples, Next: Problems, Prev: Data Format, Up: Top 10 A small tutorial with examples ********************************* -This chaper shows the order in which the library functions should be +This chapter shows the order in which the library functions should be called depending on what kind of data stream you want to compress or -decompress. +decompress. See the file `bbexample.cc' in the source distribution for +an example of how buffer-to-buffer compression/decompression can be +implemented using lzlib. Example 1: Normal compression (MEMBER_SIZE > total output). @@ -700,17 +703,17 @@ Concept Index Tag Table: Node: Top219 -Node: Introduction1152 -Node: Library Version2927 -Node: Buffering3572 -Node: Parameter Limits4692 -Node: Compression Functions5649 -Node: Decompression Functions11695 -Node: Error Codes17766 -Node: Error Messages19705 -Node: Data Format20284 -Node: Examples22254 -Node: Problems25967 -Node: Concept Index26539 +Node: Introduction1310 +Node: Library Version3135 +Node: Buffering3780 +Node: Parameter Limits4900 +Node: Compression Functions5857 +Node: Decompression Functions11903 +Node: Error Codes17974 +Node: Error Messages19913 +Node: Data Format20492 +Node: Examples22462 +Node: Problems26328 +Node: Concept Index26900 End Tag Table diff --git a/doc/lzlib.texinfo b/doc/lzlib.texinfo index ef46af9..d03d817 100644 --- a/doc/lzlib.texinfo +++ b/doc/lzlib.texinfo @@ -5,8 +5,8 @@ @finalout @c %**end of header -@set UPDATED 8 May 2010 -@set VERSION 1.0 +@set UPDATED 3 January 2011 +@set VERSION 1.1 @dircategory Data Compression @direntry @@ -14,6 +14,7 @@ @end direntry +@ifnothtml @titlepage @title Lzlib @subtitle A compression library for lzip files @@ -25,6 +26,7 @@ @end titlepage @contents +@end ifnothtml @node Top @top @@ -32,22 +34,22 @@ This manual is for Lzlib (version @value{VERSION}, @value{UPDATED}). @menu -* Introduction:: Purpose and features of Lzlib -* Library Version:: Checking library version -* Buffering:: Sizes of Lzlib's buffers -* Parameter Limits:: Min / max values for some parameters -* Compression Functions:: Descriptions of the compression functions -* Decompression Functions:: Descriptions of the decompression functions -* Error Codes:: Meaning of codes returned by functions -* Error Messages:: Error messages corresponding to error codes -* Data Format:: Detailed format of the compressed data -* Examples:: A small tutorial with examples -* Problems:: Reporting bugs -* Concept Index:: Index of concepts +* Introduction:: Purpose and features of Lzlib +* Library Version:: Checking library version +* Buffering:: Sizes of Lzlib's buffers +* Parameter Limits:: Min / max values for some parameters +* Compression Functions:: Descriptions of the compression functions +* Decompression Functions:: Descriptions of the decompression functions +* Error Codes:: Meaning of codes returned by functions +* Error Messages:: Error messages corresponding to error codes +* Data Format:: Detailed format of the compressed data +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept Index:: Index of concepts @end menu @sp 1 -Copyright @copyright{} 2009, 2010 Antonio Diaz Diaz. +Copyright @copyright{} 2009, 2010, 2011 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute and modify it. @@ -59,14 +61,15 @@ to copy, distribute and modify it. Lzlib is a data compression library providing in-memory LZMA compression and decompression functions, including integrity checking of the -uncompressed data. The compressed data format used by the library is the +decompressed data. The compressed data format used by the library is the lzip format. The functions and variables forming the interface of the compression -library are declared in the file @samp{lzlib.h}. An usage example of the -library is given in the file main.cc. +library are declared in the file @samp{lzlib.h}. Usage examples of the +library are given in the files @samp{main.cc} and @samp{bbexample.cc} +from the source distribution. -Compression/decompression is done by repeteadly calling a couple of +Compression/decompression is done by repeatedly calling a couple of read/write functions until all the data has been processed by the library. This interface is safer and less error prone than the traditional zlib interface. @@ -79,7 +82,7 @@ the data to be compressed in advance, just call the read function with a Lzlib will correctly decompress a data stream which is the concatenation of two or more compressed data streams. The result is the concatenation -of the corresponding uncompressed data streams. Integrity testing of +of the corresponding decompressed data streams. Integrity testing of concatenated compressed data streams is also supported. All the library functions are thread safe. The library does not install @@ -617,9 +620,11 @@ safe recovery of undamaged members from multimember files. @chapter A small tutorial with examples @cindex examples -This chaper shows the order in which the library functions should be +This chapter shows the order in which the library functions should be called depending on what kind of data stream you want to compress or -decompress. +decompress. See the file @samp{bbexample.cc} in the source distribution +for an example of how buffer-to-buffer compression/decompression can be +implemented using lzlib. @sp 1 @noindent @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,8 +38,10 @@ #include "encoder.h" -const Dis_slots Lzlib_namespace::dis_slots; -const Prob_prices Lzlib_namespace::prob_prices; +namespace Lzlib { + +const Dis_slots dis_slots; +const Prob_prices prob_prices; int Matchfinder::write_data( const uint8_t * const in_buffer, const int in_size ) throw() @@ -62,16 +64,17 @@ Matchfinder::Matchfinder( const int dict_size, const int len_limit ) buffer_size( ( 2 * std::max( 65536, dictionary_size_ ) ) + before_size + after_size ), buffer( new( std::nothrow ) uint8_t[buffer_size] ), + prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ), + prev_pos_tree( new( std::nothrow ) int32_t[2*dictionary_size_] ), pos( 0 ), cyclic_pos( 0 ), stream_pos( 0 ), pos_limit( buffer_size - after_size ), match_len_limit_( len_limit ), - prev_positions( new( std::nothrow ) int32_t[num_prev_positions] ), + cycles( ( len_limit < max_match_len ) ? 16 + ( len_limit / 2 ) : 256 ), at_stream_end_( false ), been_flushed( false ) { - prev_pos_tree = new( std::nothrow ) int32_t[2*dictionary_size_]; if( !buffer || !prev_positions || !prev_pos_tree ) { if( prev_pos_tree ) delete[] prev_pos_tree; @@ -102,7 +105,8 @@ bool Matchfinder::move_pos() throw() if( ++cyclic_pos >= dictionary_size_ ) cyclic_pos = 0; if( ++pos >= pos_limit ) { - if( pos > stream_pos ) { pos = stream_pos; return false; } + if( pos > stream_pos ) + { pos = stream_pos; return false; } else { const int offset = pos - dictionary_size_ - before_size; @@ -123,15 +127,14 @@ bool Matchfinder::move_pos() throw() int Matchfinder::longest_match_len( int * const distances ) throw() { - int idx0 = cyclic_pos << 1; - int idx1 = idx0 + 1; + int32_t * ptr0 = prev_pos_tree + ( cyclic_pos << 1 ); + int32_t * ptr1 = ptr0 + 1; int len_limit = match_len_limit_; if( len_limit > available_bytes() ) { been_flushed = true; len_limit = available_bytes(); - if( len_limit < 4 ) - { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; return 0; } + if( len_limit < 4 ) { *ptr0 = *ptr1 = -1; return 0; } } int maxlen = min_match_len - 1; @@ -166,10 +169,9 @@ int Matchfinder::longest_match_len( int * const distances ) throw() int len = 0, len0 = 0, len1 = 0; - for( int count = 16 + ( match_len_limit_ / 2 ); ; ) + for( int count = cycles; ; ) { - if( newpos < min_pos || --count < 0 ) - { prev_pos_tree[idx0] = prev_pos_tree[idx1] = -1; break; } + if( newpos < min_pos || --count < 0 ) { *ptr0 = *ptr1 = -1; break; } const uint8_t * const newdata = buffer + newpos; if( been_flushed ) len = 0; while( len < len_limit && newdata[len] == data[len] ) ++len; @@ -177,30 +179,31 @@ int Matchfinder::longest_match_len( int * const distances ) throw() const int delta = pos - newpos; if( distances ) while( maxlen < len ) distances[++maxlen] = delta - 1; - const int newidx = ( cyclic_pos - delta + - ( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ ) ) << 1; + int32_t * const newptr = prev_pos_tree + + ( ( cyclic_pos - delta + + ( ( cyclic_pos >= delta ) ? 0 : dictionary_size_ ) ) << 1 ); if( len < len_limit ) { if( newdata[len] < data[len] ) { - prev_pos_tree[idx0] = newpos; - idx0 = newidx + 1; - newpos = prev_pos_tree[idx0]; + *ptr0 = newpos; + ptr0 = newptr + 1; + newpos = *ptr0; len0 = len; if( len1 < len ) len = len1; } else { - prev_pos_tree[idx1] = newpos; - idx1 = newidx; - newpos = prev_pos_tree[idx1]; + *ptr1 = newpos; + ptr1 = newptr; + newpos = *ptr1; len1 = len; if( len0 < len ) len = len0; } } else { - prev_pos_tree[idx0] = prev_pos_tree[newidx]; - prev_pos_tree[idx1] = prev_pos_tree[newidx+1]; + *ptr0 = newptr[0]; + *ptr1 = newptr[1]; break; } } @@ -250,6 +253,17 @@ void LZ_encoder::fill_align_prices() throw() void LZ_encoder::fill_distance_prices() throw() { + for( int dis = start_dis_model; dis < modeled_distances; ++dis ) + { + const int dis_slot = dis_slots.table( dis ); + const int direct_bits = ( dis_slot >> 1 ) - 1; + const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; + const int price = + price_symbol_reversed( bm_dis + base - dis_slot, dis - base, direct_bits ); + for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) + dis_prices[dis_state][dis] = price; + } + for( int dis_state = 0; dis_state < max_dis_states; ++dis_state ) { int * const dsp = dis_slot_prices[dis_state]; @@ -266,13 +280,7 @@ void LZ_encoder::fill_distance_prices() throw() for( ; dis < start_dis_model; ++dis ) dp[dis] = dsp[dis]; for( ; dis < modeled_distances; ++dis ) - { - const int dis_slot = dis_slots[dis]; - const int direct_bits = ( dis_slot >> 1 ) - 1; - const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits; - dp[dis] = dsp[dis_slot] + - price_symbol_reversed( bm_dis + base - dis_slot, dis - base, direct_bits ); - } + dp[dis] += dsp[dis_slots.table( dis )]; } } @@ -415,7 +423,8 @@ int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances], const uint8_t cur_byte = matchfinder[0]; const uint8_t match_byte = matchfinder[-cur_trial.reps[0]-1]; - int next_price = cur_trial.price + price0( bm_match[cur_trial.state()][pos_state] ); + int next_price = cur_trial.price + + price0( bm_match[cur_trial.state()][pos_state] ); if( cur_trial.state.is_char() ) next_price += literal_encoder.price_symbol( prev_byte, cur_byte ); else @@ -519,7 +528,7 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, matchfinder( mf ), len_encoder( matchfinder.match_len_limit() ), rep_match_len_encoder( matchfinder.match_len_limit() ), - num_dis_slots( 2 * File_header::real_bits( matchfinder.dictionary_size() - 1 ) ), + num_dis_slots( 2 * real_bits( matchfinder.dictionary_size() - 1 ) ), fill_counter( 0 ), member_finished_( false ) { @@ -533,6 +542,7 @@ LZ_encoder::LZ_encoder( Matchfinder & mf, const File_header & header, bool LZ_encoder::encode_member( const bool finish ) { + const int fill_count = ( matchfinder.match_len_limit() > 12 ) ? 512 : 2048; State & state = main_state; if( member_finished_ ) return true; if( range_encoder.member_position() >= member_size_limit ) @@ -543,9 +553,9 @@ bool LZ_encoder::encode_member( const bool finish ) { if( matchfinder.available_bytes() < 4 && !matchfinder.at_stream_end() ) return true; - range_encoder.encode_bit( bm_match[state()][0], 0 ); const uint8_t prev_byte = 0; const uint8_t cur_byte = matchfinder[0]; + range_encoder.encode_bit( bm_match[state()][0], 0 ); literal_encoder.encode( range_encoder, prev_byte, cur_byte ); crc32.update( crc_, cur_byte ); if( !move_pos( 1 ) ) return false; @@ -560,7 +570,8 @@ bool LZ_encoder::encode_member( const bool finish ) } if( !matchfinder.enough_available_bytes() || !range_encoder.enough_free_bytes() ) return true; - if( fill_counter <= 0 ) { fill_distance_prices(); fill_counter = 512; } + if( fill_counter <= 0 ) + { fill_distance_prices(); fill_counter = fill_count; } int ahead = sequence_optimizer( rep_distances, state ); if( ahead <= 0 ) return false; @@ -568,7 +579,8 @@ bool LZ_encoder::encode_member( const bool finish ) for( int i = 0; ; ) { - const int pos_state = ( matchfinder.data_position() - ahead ) & pos_state_mask; + const int pos_state = + ( matchfinder.data_position() - ahead ) & pos_state_mask; const int dis = trials[i].dis; const int len = trials[i].price; @@ -584,7 +596,8 @@ bool LZ_encoder::encode_member( const bool finish ) else { const uint8_t match_byte = matchfinder[-ahead-rep_distances[0]-1]; - literal_encoder.encode_matched( range_encoder, prev_byte, match_byte, cur_byte ); + literal_encoder.encode_matched( range_encoder, + prev_byte, cur_byte, match_byte ); } state.set_char(); } @@ -630,3 +643,5 @@ bool LZ_encoder::encode_member( const bool finish ) } } } + +} // end namespace Lzlib @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,8 +25,10 @@ Public License. */ -const int max_num_trials = 1 << 12; -const int price_shift = 6; +namespace Lzlib { + +enum { max_num_trials = 1 << 12, + price_shift = 6 }; class Dis_slots { @@ -45,6 +47,8 @@ public: } } + unsigned char table( const int dis ) const throw() { return data[dis]; } + int operator[]( const uint32_t dis ) const throw() { if( dis < (1 << 12) ) return data[dis]; @@ -53,8 +57,7 @@ public: } }; -namespace Lzlib_namespace { extern const Dis_slots dis_slots; } -using Lzlib_namespace::dis_slots; +extern const Dis_slots dis_slots; class Prob_prices @@ -65,13 +68,13 @@ public: Prob_prices() { const int num_bits = ( bit_model_total_bits - 2 ); - for( int i = num_bits - 1; i >= 0; --i ) + int j = 1, end = 2; + data[0] = bit_model_total_bits << price_shift; + for( int i = num_bits - 1; i >= 0; --i, end <<= 1 ) { - int start = 1 << ( num_bits - i - 1 ); - int end = 1 << ( num_bits - i); - for( int j = start; j < end; ++j ) - data[j] = (i << price_shift) + - ( ((end - j) << price_shift) >> (num_bits - i - 1) ); + for( ; j < end; ++j ) + data[j] = ( i << price_shift ) + + ( ( (end - j) << price_shift ) >> ( num_bits - i - 1 ) ); } } @@ -79,8 +82,7 @@ public: { return data[probability >> 2]; } }; -namespace Lzlib_namespace { extern const Prob_prices prob_prices; } -using Lzlib_namespace::prob_prices; +extern const Prob_prices prob_prices; inline int price0( const Bit_model & bm ) throw() @@ -95,8 +97,8 @@ inline int price_bit( const Bit_model & bm, const int bit ) throw() inline int price_symbol( const Bit_model bm[], int symbol, const int num_bits ) throw() { - symbol |= ( 1 << num_bits ); int price = 0; + symbol |= ( 1 << num_bits ); while( symbol > 1 ) { const int bit = symbol & 1; @@ -165,14 +167,15 @@ class Matchfinder long long partial_data_pos; const int dictionary_size_; // bytes to keep in buffer before pos const int buffer_size; - uint8_t * const buffer; - int pos; - int cyclic_pos; + uint8_t * const buffer; // input buffer + int32_t * const prev_positions; // last seen position of key + int32_t * const prev_pos_tree; + int pos; // current pos in buffer + int cyclic_pos; // current pos in dictionary int stream_pos; // first byte not yet read from file const int pos_limit; // when reached, a new block must be read const int match_len_limit_; - int32_t * const prev_positions; // last seen position of key - int32_t * prev_pos_tree; + const int cycles; bool at_stream_end_; // stream_pos shows real end of file bool been_flushed; @@ -187,8 +190,8 @@ public: int available_bytes() const throw() { return stream_pos - pos; } long long data_position() const throw() { return partial_data_pos + pos; } int dictionary_size() const throw() { return dictionary_size_; } - void flushing( const bool b ) throw() { at_stream_end_ = b; } bool finished() const throw() { return at_stream_end_ && pos >= stream_pos; } + void flushing( const bool b ) throw() { at_stream_end_ = b; } int free_bytes() const throw() { if( at_stream_end_ ) return 0; return buffer_size - stream_pos; } int match_len_limit() const throw() { return match_len_limit_; } @@ -258,6 +261,9 @@ public: ff_count( 0 ), cache( 0 ) {} + long long member_position() const throw() + { return partial_member_pos + used_bytes() + ff_count; } + bool enough_free_bytes() const throw() { return free_bytes() >= min_free_bytes; } @@ -277,9 +283,6 @@ public: cache = 0; } - long long member_position() const throw() - { return partial_member_pos + used_bytes() + ff_count; } - void encode( const int symbol, const int num_bits ) { for( int i = num_bits - 1; i >= 0; --i ) @@ -408,24 +411,28 @@ class Literal_encoder { return ( prev_byte >> ( 8 - literal_context_bits ) ); } public: - void encode( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t symbol ) + void encode( Range_encoder & range_encoder, + uint8_t prev_byte, uint8_t symbol ) { range_encoder.encode_tree( bm_literal[lstate(prev_byte)], symbol, 8 ); } - void encode_matched( Range_encoder & range_encoder, uint8_t prev_byte, uint8_t match_byte, uint8_t symbol ) - { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } - - int price_matched( uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) const throw() - { return ::price_matched( bm_literal[lstate(prev_byte)], symbol, match_byte ); } + void encode_matched( Range_encoder & range_encoder, + uint8_t prev_byte, uint8_t symbol, uint8_t match_byte ) + { range_encoder.encode_matched( bm_literal[lstate(prev_byte)], + symbol, match_byte ); } int price_symbol( uint8_t prev_byte, uint8_t symbol ) const throw() - { return ::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); } + { return Lzlib::price_symbol( bm_literal[lstate(prev_byte)], symbol, 8 ); } + + int price_matched( uint8_t prev_byte, uint8_t symbol, + uint8_t match_byte ) const throw() + { return Lzlib::price_matched( bm_literal[lstate(prev_byte)], + symbol, match_byte ); } }; class LZ_encoder { - enum { dis_align_mask = dis_align_size - 1, - infinite_price = 0x0FFFFFFF, + enum { infinite_price = 0x0FFFFFFF, max_marker_size = 16, num_rep_distances = 4 }; // must be 4 @@ -451,7 +458,7 @@ class LZ_encoder Bit_model bm_rep2[State::states]; Bit_model bm_len[State::states][pos_states]; Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits]; - Bit_model bm_dis[modeled_distances-end_dis_model]; + Bit_model bm_dis[modeled_distances-end_dis_model+1]; Bit_model bm_align[dis_align_size]; Matchfinder & matchfinder; @@ -525,7 +532,7 @@ class LZ_encoder price += dis_prices[dis_state][dis]; else price += dis_slot_prices[dis_state][dis_slots[dis]] + - align_prices[dis & dis_align_mask]; + align_prices[dis & (dis_align_size - 1)]; return price; } @@ -604,3 +611,5 @@ public: long long member_position() const throw() { return range_encoder.member_position(); } }; + +} // end namespace Lzlib @@ -1,5 +1,5 @@ /* Lzcheck - A test program for the lzlib library - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you have unlimited permission to copy, distribute and modify it. @@ -39,7 +39,7 @@ uint8_t mid_buffer[buffer_size]; uint8_t out_buffer[buffer_size]; -int main( const int argc, const char * argv[] ) +int main( const int argc, const char * const argv[] ) { if( argc < 2 ) { @@ -50,13 +50,14 @@ int main( const int argc, const char * argv[] ) FILE *file = std::fopen( argv[1], "rb" ); if( !file ) { - std::fprintf( stderr, "Can't open file `%s' for reading\n", argv[1] ); + std::fprintf( stderr, + "lzcheck: Can't open file `%s' for reading\n", argv[1] ); return 1; } -// std::fprintf( stderr, "lzcheck: testing file `%s'\n", argv[1] ); +// std::fprintf( stderr, "lzcheck: Testing file `%s'\n", argv[1] ); const int dictionary_size = 1 << 20; - const int match_len_limit = 80; + const int match_len_limit = 36; const long long member_size = LLONG_MAX; LZ_Encoder * encoder = LZ_compress_open( dictionary_size, match_len_limit, member_size ); @@ -66,10 +67,11 @@ int main( const int argc, const char * argv[] ) LZ_compress_close( encoder ); if( mem_error ) { - std::fprintf( stderr, "not enough memory.\n" ); + std::fprintf( stderr, "lzcheck: Not enough memory.\n" ); return 1; } - std::fprintf( stderr, "internal error: invalid argument to encoder.\n" ); + std::fprintf( stderr, + "lzcheck: internal error: Invalid argument to encoder.\n" ); return 3; } @@ -77,7 +79,7 @@ int main( const int argc, const char * argv[] ) if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { LZ_decompress_close( decoder ); - std::fprintf( stderr, "not enough memory.\n" ); + std::fprintf( stderr, "lzcheck: Not enough memory.\n" ); return 1; } @@ -96,7 +98,7 @@ int main( const int argc, const char * argv[] ) const int mid_size = LZ_compress_read( encoder, mid_buffer, buffer_size ); if( mid_size < 0 ) { - std::fprintf( stderr, "LZ_compress_read error: %s.\n", + std::fprintf( stderr, "lzcheck: LZ_compress_read error: %s.\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } @@ -104,14 +106,14 @@ int main( const int argc, const char * argv[] ) const int out_size = LZ_decompress_read( decoder, out_buffer, buffer_size ); if( out_size < 0 ) { - std::fprintf( stderr, "LZ_decompress_read error: %s.\n", + std::fprintf( stderr, "lzcheck: LZ_decompress_read error: %s.\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } if( out_size != in_size || std::memcmp( in_buffer + l, out_buffer, out_size ) ) { - std::fprintf( stderr, "sync error at pos %d. in_size = %d, out_size = %d\n", + std::fprintf( stderr, "lzcheck: Sync error at pos %d. in_size = %d, out_size = %d\n", l, in_size, out_size ); for( int i = 0; i < in_size; ++i ) std::fputc( in_buffer[l+i], stderr ); @@ -133,7 +135,7 @@ int main( const int argc, const char * argv[] ) LZ_decompress_read( decoder, out_buffer, buffer_size ) != 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) { - std::fprintf( stderr, "can't finish member: %s.\n", + std::fprintf( stderr, "lzcheck: Can't finish member: %s.\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; } @@ -158,7 +160,7 @@ int main( const int argc, const char * argv[] ) buffer_size - leading_garbage ); if( mid_size < 0 ) { - std::fprintf( stderr, "LZ_compress_read error: %s.\n", + std::fprintf( stderr, "lzcheck: LZ_compress_read error: %s.\n", LZ_strerror( LZ_compress_errno( encoder ) ) ); retval = 3; break; } @@ -174,7 +176,7 @@ int main( const int argc, const char * argv[] ) } if( out_size < 0 ) { - std::fprintf( stderr, "LZ_decompress_read error: %s.\n", + std::fprintf( stderr, "lzcheck: LZ_decompress_read error: %s.\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } @@ -182,7 +184,7 @@ int main( const int argc, const char * argv[] ) if( out_size != in_size || std::memcmp( in_buffer + l, out_buffer, out_size ) ) { - std::fprintf( stderr, "sync error at pos %d. in_size = %d, out_size = %d\n", + std::fprintf( stderr, "lzcheck: Sync error at pos %d. in_size = %d, out_size = %d\n", l, in_size, out_size ); for( int i = 0; i < in_size; ++i ) std::fputc( in_buffer[l+i], stderr ); @@ -202,7 +204,7 @@ int main( const int argc, const char * argv[] ) LZ_decompress_reset( decoder ) < 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) { - std::fprintf( stderr, "can't restart member: %s.\n", + std::fprintf( stderr, "lzcheck: Can't restart member: %s.\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } @@ -215,7 +217,7 @@ int main( const int argc, const char * argv[] ) LZ_decompress_sync_to_member( decoder ) < 0 || LZ_compress_restart_member( encoder, member_size ) < 0 ) { - std::fprintf( stderr, "can't seek to next member: %s.\n", + std::fprintf( stderr, "lzcheck: Can't seek to next member: %s.\n", LZ_strerror( LZ_decompress_errno( decoder ) ) ); retval = 3; break; } @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,8 @@ Public License. */ +namespace Lzlib { + class State { unsigned char st; @@ -37,56 +39,64 @@ public: void set_char() throw() { - static const unsigned char next[states] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; + static const unsigned char next[states] = + {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; st = next[st]; } + void set_match() throw() { - static const unsigned char next[states] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; + static const unsigned char next[states] = + {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; st = next[st]; } + void set_rep() throw() { - static const unsigned char next[states] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; + static const unsigned char next[states] = + {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; st = next[st]; } + void set_short_rep() throw() { - static const unsigned char next[states] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + static const unsigned char next[states] = + {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; st = next[st]; } }; -const int min_dictionary_bits = 12; -const int min_dictionary_size = 1 << min_dictionary_bits; -const int max_dictionary_bits = 29; -const int max_dictionary_size = 1 << max_dictionary_bits; -const int literal_context_bits = 3; -const int pos_state_bits = 2; -const int pos_states = 1 << pos_state_bits; -const int pos_state_mask = pos_states - 1; - -const int dis_slot_bits = 6; -const int start_dis_model = 4; -const int end_dis_model = 14; -const int modeled_distances = 1 << (end_dis_model / 2); -const int dis_align_bits = 4; -const int dis_align_size = 1 << dis_align_bits; - -const int len_low_bits = 3; -const int len_mid_bits = 3; -const int len_high_bits = 8; -const int len_low_symbols = 1 << len_low_bits; -const int len_mid_symbols = 1 << len_mid_bits; -const int len_high_symbols = 1 << len_high_bits; -const int max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols; - -const int min_match_len = 2; // must be 2 -const int max_match_len = min_match_len + max_len_symbols - 1; // 273 -const int min_match_len_limit = 5; - -const int max_dis_states = 4; +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + literal_context_bits = 3, + pos_state_bits = 2, + pos_states = 1 << pos_state_bits, + pos_state_mask = pos_states - 1, + + dis_slot_bits = 6, + start_dis_model = 4, + end_dis_model = 14, + modeled_distances = 1 << (end_dis_model / 2), + dis_align_bits = 4, + dis_align_size = 1 << dis_align_bits, + + len_low_bits = 3, + len_mid_bits = 3, + len_high_bits = 8, + len_low_symbols = 1 << len_low_bits, + len_mid_symbols = 1 << len_mid_bits, + len_high_symbols = 1 << len_high_bits, + max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols, + + min_match_len = 2, // must be 2 + max_match_len = min_match_len + max_len_symbols - 1, // 273 + min_match_len_limit = 5, + + max_dis_states = 4 }; inline int get_dis_state( int len ) throw() { @@ -96,9 +106,9 @@ inline int get_dis_state( int len ) throw() } -const int bit_model_move_bits = 5; -const int bit_model_total_bits = 11; -const int bit_model_total = 1 << bit_model_total_bits; +enum { bit_model_move_bits = 5, + bit_model_total_bits = 11, + bit_model_total = 1 << bit_model_total_bits }; struct Bit_model { @@ -133,10 +143,17 @@ public: } }; -namespace Lzlib_namespace { extern const CRC32 crc32; } -using Lzlib_namespace::crc32; +extern const CRC32 crc32; +inline int real_bits( const int value ) throw() + { + int bits = 0; + for( int i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) + if( value & mask ) bits = i; + return bits; + } + const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' }; struct File_header @@ -162,14 +179,6 @@ struct File_header dictionary_size() <= max_dictionary_size ); } - static int real_bits( const int value ) throw() - { - int bits = 0; - for( int i = 1, mask = 1; mask > 0; ++i, mask <<= 1 ) - if( value & mask ) bits = i; - return bits; - } - int dictionary_size() const throw() { int sz = ( 1 << ( data[5] & 0x1F ) ); @@ -284,3 +293,5 @@ public: int read_data( uint8_t * const out_buffer, const int out_size ) throw(); int write_data( const uint8_t * const in_buffer, const int in_size ) throw(); }; + +} // end namespace Lzlib @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,8 @@ #include "encoder.h" +using namespace Lzlib; + struct LZ_Encoder { long long partial_in_size; @@ -60,15 +62,6 @@ struct LZ_Encoder }; -bool verify_encoder( LZ_Encoder * const encoder ) - { - if( !encoder ) return false; - if( !encoder->matchfinder || !encoder->lz_encoder ) - { encoder->lz_errno = LZ_bad_argument; return false; } - return true; - } - - struct LZ_Decoder { long long partial_in_size; @@ -95,6 +88,17 @@ struct LZ_Decoder }; +namespace Lzlib { + +bool verify_encoder( LZ_Encoder * const encoder ) + { + if( !encoder ) return false; + if( !encoder->matchfinder || !encoder->lz_encoder ) + { encoder->lz_errno = LZ_bad_argument; return false; } + return true; + } + + bool verify_decoder( struct LZ_Decoder * const decoder ) { if( !decoder ) return false; @@ -103,6 +107,8 @@ bool verify_decoder( struct LZ_Decoder * const decoder ) return true; } +} // end namespace Lzlib + const char * LZ_version() { return LZ_version_string; } @@ -387,7 +393,7 @@ int LZ_decompress_read( struct LZ_Decoder * const decoder, if( d.seeking ) return 0; if( d.lz_decoder && d.lz_decoder->member_finished() ) { - d.partial_in_size += d.lz_decoder->member_position(); + d.partial_in_size += d.rdec->member_position(); d.partial_out_size += d.lz_decoder->data_position(); delete d.lz_decoder; d.lz_decoder = 0; @@ -511,7 +517,7 @@ long long LZ_decompress_member_position( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) ) return -1; if( decoder->lz_decoder ) - return decoder->lz_decoder->member_position(); + return decoder->rdec->member_position(); else return 0; } @@ -520,7 +526,7 @@ long long LZ_decompress_total_in_size( struct LZ_Decoder * const decoder ) { if( !verify_decoder( decoder ) ) return -1; if( decoder->lz_decoder ) - return decoder->partial_in_size + decoder->lz_decoder->member_position(); + return decoder->partial_in_size + decoder->rdec->member_position(); return decoder->partial_in_size; } @@ -1,5 +1,5 @@ /* Lzlib - A compression library for lzip files - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This library is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ extern "C" { #endif -const char * const LZ_version_string = "1.0"; +const char * const LZ_version_string = "1.1"; enum LZ_Errno { LZ_ok = 0, LZ_bad_argument, LZ_mem_error, LZ_sequence_error, LZ_header_error, LZ_unexpected_eof, @@ -1,5 +1,5 @@ /* Minilzip - A test program for the lzlib library - Copyright (C) 2009, 2010 Antonio Diaz Diaz. + Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -63,10 +63,10 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) throw( namespace { +const char * const Program_name = "Minilzip"; +const char * const program_name = "minilzip"; +const char * const program_year = "2011"; const char * invocation_name = 0; -const char * const Program_name = "Minilzip"; -const char * const program_name = "minilzip"; -const char * const program_year = "2010"; #ifdef O_BINARY const int o_binary = O_BINARY; @@ -85,27 +85,26 @@ struct Lzma_options int match_len_limit; // 5..273 }; -enum Mode { m_compress = 0, m_decompress, m_test }; +enum Mode { m_compress, m_decompress, m_test }; std::string output_filename; int outfd = -1; -mode_t outfd_mode = S_IRUSR | S_IWUSR; int verbosity = 0; +mode_t outfd_mode = S_IRUSR | S_IWUSR; bool delete_output_on_interrupt = false; class Pretty_print { const char * const stdin_name; - const unsigned int stdin_name_len; unsigned int longest_name; std::string name_; mutable bool first_post; public: Pretty_print( const std::vector< std::string > & filenames ) - : stdin_name( "(stdin)" ), stdin_name_len( std::strlen( stdin_name ) ), - longest_name( 0 ), first_post( false ) + : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { + const unsigned int stdin_name_len = std::strlen( stdin_name ); for( unsigned int i = 0; i < filenames.size(); ++i ) { const std::string & s = filenames[i]; @@ -140,7 +139,7 @@ void show_help() throw() std::printf( " -d, --decompress decompress\n" ); std::printf( " -f, --force overwrite existing output files\n" ); std::printf( " -k, --keep keep (don't delete) input files\n" ); - std::printf( " -m, --match-length=<n> set match length limit in bytes [80]\n" ); + std::printf( " -m, --match-length=<n> set match length limit in bytes [36]\n" ); std::printf( " -o, --output=<file> if reading stdin, place the output into <file>\n" ); std::printf( " -q, --quiet suppress all messages\n" ); std::printf( " -s, --dictionary-size=<n> set dictionary size limit in bytes [8MiB]\n" ); @@ -169,26 +168,18 @@ void show_version() throw() } -const char * format_num( long long num, long long limit = 9999, - const int set_prefix = 0 ) throw() +const char * format_num( long long num ) throw() { - const char * const si_prefix[8] = - { "k", "M", "G", "T", "P", "E", "Z", "Y" }; - const char * const binary_prefix[8] = + const char * const prefix[8] = { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" }; - static bool si = false; - static char buf[16]; - - if( set_prefix ) si = ( set_prefix > 0 ); - const int factor = ( si ) ? 1000 : 1024; - const char * const *prefix = ( si ) ? si_prefix : binary_prefix; + enum { buf_size = 16, factor = 1024 }; + static char buf[buf_size]; const char *p = ""; - limit = std::max( 999LL, std::min( 999999LL, limit ) ); - for( int i = 0; i < 8 && ( llabs( num ) > limit || + for( int i = 0; i < 8 && ( llabs( num ) > 9999 || ( llabs( num ) >= factor && num % factor == 0 ) ); ++i ) { num /= factor; p = prefix[i]; } - snprintf( buf, sizeof buf, "%lld %s", num, p ); + snprintf( buf, buf_size, "%lld %s", num, p ); return buf; } @@ -202,7 +193,7 @@ long long getnum( const char * const ptr, const int bs = 0, long long result = strtoll( ptr, &tail, 0 ); if( tail == ptr ) { - show_error( "bad or missing numerical argument", 0, true ); + show_error( "Bad or missing numerical argument.", 0, true ); std::exit( 1 ); } @@ -232,7 +223,7 @@ long long getnum( const char * const ptr, const int bs = 0, } if( bad_multiplier ) { - show_error( "bad multiplier in numerical argument", 0, true ); + show_error( "Bad multiplier in numerical argument.", 0, true ); std::exit( 1 ); } for( int i = 0; i < exponent; ++i ) @@ -244,7 +235,7 @@ long long getnum( const char * const ptr, const int bs = 0, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "numerical argument out of limits" ); + show_error( "Numerical argument out of limits." ); std::exit( 1 ); } return result; @@ -283,7 +274,7 @@ int open_instream( const std::string & name, struct stat * const in_statsp, if( program_mode == m_compress && !force && eindex >= 0 ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: input file `%s' already has `%s' suffix.\n", + std::fprintf( stderr, "%s: Input file `%s' already has `%s' suffix.\n", program_name, name.c_str(), known_extensions[eindex].from ); } @@ -300,14 +291,16 @@ int open_instream( const std::string & name, struct stat * const in_statsp, { const int i = fstat( infd, in_statsp ); const mode_t & mode = in_statsp->st_mode; - if( i < 0 || !( S_ISREG( mode ) || ( to_stdout && - ( S_ISFIFO( mode ) || S_ISSOCK( mode ) || - S_ISBLK( mode ) || S_ISCHR( mode ) ) ) ) ) + const bool can_read = ( i == 0 && + ( S_ISBLK( mode ) || S_ISCHR( mode ) || + S_ISFIFO( mode ) || S_ISSOCK( mode ) ) ); + if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: input file `%s' is not a regular file%s.\n", + std::fprintf( stderr, "%s: Input file `%s' is not a regular file%s.\n", program_name, name.c_str(), - to_stdout ? "" : " and `--stdout' was not specified" ); + ( can_read && !to_stdout ) ? + " and `--stdout' was not specified" : "" ); close( infd ); infd = -1; } @@ -339,7 +332,7 @@ void set_d_outname( const std::string & name, const int i ) throw() } output_filename = name; output_filename += ".out"; if( verbosity >= 0 ) - std::fprintf( stderr, "%s: can't guess original name for `%s' -- using `%s'.\n", + std::fprintf( stderr, "%s: Can't guess original name for `%s' -- using `%s'.\n", program_name, name.c_str(), output_filename.c_str() ); } @@ -350,18 +343,14 @@ bool open_outstream( const bool force ) throw() if( force ) flags |= O_TRUNC; else flags |= O_EXCL; outfd = open( output_filename.c_str(), flags, outfd_mode ); - if( outfd < 0 ) + if( outfd < 0 && verbosity >= 0 ) { - if( errno == EEXIST ) outfd = -2; else outfd = -1; - if( verbosity >= 0 ) - { - if( outfd == -2 ) - std::fprintf( stderr, "%s: Output file %s already exists, skipping.\n", - program_name, output_filename.c_str() ); - else - std::fprintf( stderr, "%s: Can't create output file `%s': %s.\n", - program_name, output_filename.c_str(), std::strerror( errno ) ); - } + if( errno == EEXIST ) + std::fprintf( stderr, "%s: Output file `%s' already exists, skipping.\n", + program_name, output_filename.c_str() ); + else + std::fprintf( stderr, "%s: Can't create output file `%s': %s.\n", + program_name, output_filename.c_str(), std::strerror( errno ) ); } return ( outfd >= 0 ); } @@ -369,7 +358,7 @@ bool open_outstream( const bool force ) throw() bool check_tty( const int infd, const Mode program_mode ) throw() { - if( program_mode == m_compress && isatty( outfd ) ) + if( program_mode == m_compress && outfd >= 0 && isatty( outfd ) ) { show_error( "I won't write compressed data to a terminal.", 0, true ); return false; @@ -391,7 +380,7 @@ void cleanup_and_fail( const int retval ) throw() delete_output_on_interrupt = false; if( verbosity >= 0 ) std::fprintf( stderr, "%s: Deleting output file `%s', if it exists.\n", - program_name, output_filename.c_str() ); + program_name, output_filename.c_str() ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( std::remove( output_filename.c_str() ) != 0 ) show_error( "WARNING: deletion of output file (apparently) failed." ); @@ -406,9 +395,10 @@ void close_and_set_permissions( const struct stat * const in_statsp ) bool error = false; if( in_statsp ) { - if( fchmod( outfd, in_statsp->st_mode ) != 0 || - ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && - errno != EPERM ) ) error = true; + if( ( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) != 0 && + errno != EPERM ) || + fchmod( outfd, in_statsp->st_mode ) != 0 ) + error = true; // fchown will in many cases return with EPERM, which can be safely ignored. } if( close( outfd ) == 0 ) outfd = -1; @@ -424,7 +414,7 @@ void close_and_set_permissions( const struct stat * const in_statsp ) } if( error ) { - show_error( "I can't change output file attributes." ); + show_error( "Can't change output file attributes." ); cleanup_and_fail( 1 ); } } @@ -461,10 +451,11 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, buffer_size ); const int rd = readblock( infd, buffer, size ); if( rd != size && errno ) - { pp(); show_error( "read error", errno ); return 1; } + { pp(); show_error( "Read error", errno ); return 1; } if( rd > 0 && rd != LZ_compress_write( encoder, buffer, rd ) ) internal_error( "library error (LZ_compress_write)" ); if( rd < size ) LZ_compress_finish( encoder ); +// else LZ_compress_sync_flush( encoder ); in_size += rd; } const int out_size = LZ_compress_read( encoder, buffer, buffer_size ); @@ -472,15 +463,15 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, { pp(); if( verbosity >= 0 ) - std::fprintf( stderr, "LZ_compress_read error: %s.\n", - LZ_strerror( LZ_compress_errno( encoder ) ) ); + std::fprintf( stderr, "%s: LZ_compress_read error: %s.\n", + program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); return 1; } else if( out_size > 0 ) { const int wr = writeblock( outfd, buffer, out_size ); if( wr != out_size ) - { pp(); show_error( "write error", errno ); return 1; } + { pp(); show_error( "Write error", errno ); return 1; } } else if( in_size == 0 ) internal_error( "library error (LZ_compress_read)" ); if( LZ_compress_member_finished( encoder ) ) @@ -494,7 +485,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, { close_and_set_permissions( in_statsp ); if( !next_filename() ) - { pp(); show_error( "too many volume files" ); return 1; } + { pp( "Too many volume files." ); return 1; } if( !open_outstream( true ) ) return 1; delete_output_on_interrupt = true; } @@ -505,8 +496,8 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, { pp(); if( verbosity >= 0 ) - std::fprintf( stderr, "LZ_compress_restart_member error: %s.\n", - LZ_strerror( LZ_compress_errno( encoder ) ) ); + std::fprintf( stderr, "%s: LZ_compress_restart_member error: %s.\n", + program_name, LZ_strerror( LZ_compress_errno( encoder ) ) ); return 1; } } @@ -517,7 +508,7 @@ int do_compress( LZ_Encoder * const encoder, const long long member_size, const long long in_size = LZ_compress_total_in_size( encoder ); const long long out_size = LZ_compress_total_out_size( encoder ); if( in_size <= 0 || out_size <= 0 ) - std::fprintf( stderr, "no data compressed.\n" ); + std::fprintf( stderr, "No data compressed.\n" ); else std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, " "%5.2f%% saved, %lld in, %lld out.\n", @@ -543,7 +534,7 @@ int compress( const long long member_size, const long long volume_size, if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) { if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) - pp( "not enough memory. Try a smaller dictionary size" ); + pp( "Not enough memory. Try a smaller dictionary size" ); else internal_error( "invalid argument to encoder" ); retval = 1; @@ -569,7 +560,7 @@ int do_decompress( LZ_Decoder * const decoder, const int infd, const int max_in_size = in_size; in_size = readblock( infd, buffer, max_in_size ); if( in_size != max_in_size && errno ) - { pp(); show_error( "read error", errno ); return 1; } + { pp(); show_error( "Read error", errno ); return 1; } if( in_size > 0 && in_size != LZ_decompress_write( decoder, buffer, in_size ) ) internal_error( "library error (LZ_decompress_write)" ); if( in_size < max_in_size ) LZ_decompress_finish( decoder ); @@ -585,22 +576,28 @@ int do_decompress( LZ_Decoder * const decoder, const int infd, { const int wr = writeblock( outfd, buffer, rd ); if( wr != rd ) - { pp(); show_error( "write error", errno ); return 1; } + { pp(); show_error( "Write error", errno ); return 1; } } } else if( rd < 0 ) { out_size = rd; break; } if( verbosity >= 1 && LZ_decompress_member_finished( decoder ) == 1 ) { + const long long data_position = LZ_decompress_data_position( decoder ); + const long long member_size = LZ_decompress_member_position( decoder ); pp(); if( verbosity >= 2 ) std::fprintf( stderr, "version %d, dictionary size %7sB. ", LZ_decompress_member_version( decoder ), format_num( LZ_decompress_dictionary_size( decoder ) ) ); + if( verbosity >= 4 && data_position > 0 && member_size > 0 ) + std::fprintf( stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ", + (double)data_position / member_size, + ( 8.0 * member_size ) / data_position, + 100.0 * ( 1.0 - ( (double)member_size / data_position ) ) ); if( verbosity >= 3 ) - std::fprintf( stderr, "data crc %08X, data size %9lld, member size %8lld. ", + std::fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ", LZ_decompress_data_crc( decoder ), - LZ_decompress_data_position( decoder ), - LZ_decompress_member_position( decoder ) ); + data_position, member_size ); if( testing ) std::fprintf( stderr, "ok\n" ); else std::fprintf( stderr, "done\n" ); pp.reset(); @@ -614,19 +611,19 @@ int do_decompress( LZ_Decoder * const decoder, const int infd, { if( LZ_decompress_total_out_size( decoder ) > 0 ) break; // trailing garbage - pp( "error reading member header" ); + pp( "Error reading member header" ); return 1; } if( lz_errno == LZ_mem_error ) { - pp( "not enough memory. Find a machine with more memory" ); + pp( "Not enough memory. Find a machine with more memory" ); return 1; } pp(); if( lz_errno == LZ_unexpected_eof ) { if( verbosity >= 0 ) - std::fprintf( stderr, "file ends unexpectedly at pos %lld\n", + std::fprintf( stderr, "File ends unexpectedly at pos %lld\n", LZ_decompress_total_in_size( decoder ) ); return 2; } @@ -651,7 +648,7 @@ int decompress( const int infd, const Pretty_print & pp, if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) { - pp( "not enough memory. Find a machine with more memory" ); + pp( "Not enough memory. Find a machine with more memory" ); retval = 1; } else retval = do_decompress( decoder, infd, pp, testing ); @@ -699,22 +696,24 @@ void show_error( const char * const msg, const int errcode, const bool help ) th { if( verbosity >= 0 ) { - if( msg && msg[0] != 0 ) + if( msg && msg[0] ) { std::fprintf( stderr, "%s: %s", program_name, msg ); - if( errcode > 0 ) std::fprintf( stderr, ": %s", std::strerror( errcode ) ); + if( errcode > 0 ) + std::fprintf( stderr, ": %s", std::strerror( errcode ) ); std::fprintf( stderr, "\n" ); } - if( help && invocation_name && invocation_name[0] != 0 ) - std::fprintf( stderr, "Try `%s --help' for more information.\n", invocation_name ); + if( help && invocation_name && invocation_name[0] ) + std::fprintf( stderr, "Try `%s --help' for more information.\n", + invocation_name ); } } void internal_error( const char * const msg ) { - std::string s( "internal error: " ); s += msg; - show_error( s.c_str() ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s.\n", program_name, msg ); std::exit( 3 ); } @@ -762,15 +761,15 @@ int main( const int argc, const char * const argv[] ) // to the corresponding LZMA compression modes. const Lzma_options option_mapping[] = { - { 1 << 16, 5 }, // -0 - { 1 << 20, 10 }, // -1 - { 3 << 19, 12 }, // -2 - { 1 << 21, 17 }, // -3 - { 3 << 20, 26 }, // -4 - { 1 << 22, 44 }, // -5 - { 1 << 23, 80 }, // -6 - { 1 << 24, 108 }, // -7 - { 3 << 23, 163 }, // -8 + { 1 << 20, 5 }, // -0 + { 1 << 20, 5 }, // -1 + { 3 << 19, 6 }, // -2 + { 1 << 21, 8 }, // -3 + { 3 << 20, 12 }, // -4 + { 1 << 22, 20 }, // -5 + { 1 << 23, 36 }, // -6 + { 1 << 24, 68 }, // -7 + { 3 << 23, 132 }, // -8 { 1 << 25, 273 } }; // -9 Lzma_options encoder_options = option_mapping[6]; // default = "-6" long long member_size = LLONG_MAX; @@ -793,8 +792,8 @@ int main( const int argc, const char * const argv[] ) const Arg_parser::Option options[] = { - { '0', 0, Arg_parser::no }, - { '1', "fast", Arg_parser::no }, + { '0', "fast", Arg_parser::no }, + { '1', 0, Arg_parser::no }, { '2', 0, Arg_parser::no }, { '3', 0, Arg_parser::no }, { '4', 0, Arg_parser::no }, @@ -820,7 +819,7 @@ int main( const int argc, const char * const argv[] ) { 'V', "version", Arg_parser::no }, { 0 , 0, Arg_parser::no } }; - Arg_parser parser( argc, argv, options ); + const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option { show_error( parser.error().c_str(), 0, true ); return 1; } @@ -838,7 +837,7 @@ int main( const int argc, const char * const argv[] ) case 'b': member_size = getnum( arg, 0, 100000, LLONG_MAX / 2 ); break; case 'c': to_stdout = true; break; case 'd': program_mode = m_decompress; break; - case 'e': break; + case 'e': break; // ignored by now case 'f': force = true; break; case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; @@ -855,7 +854,12 @@ int main( const int argc, const char * const argv[] ) case 'V': show_version(); return 0; default : internal_error( "uncaught option" ); } - } + } // end process options + +#if defined(__OS2__) + _fsetmode( stdin, "b" ); + _fsetmode( stdout, "b" ); +#endif bool filenames_given = false; for( ; argind < parser.arguments(); ++argind ) @@ -954,9 +958,7 @@ int main( const int argc, const char * const argv[] ) } if( outfd >= 0 && close( outfd ) != 0 ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't close stdout: %s.\n", - program_name, std::strerror( errno ) ); + show_error( "Can't close stdout", errno ); if( retval < 1 ) retval = 1; } return retval; diff --git a/testsuite/check.sh b/testsuite/check.sh index 69060bd..be6204b 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Lzlib - A compression library for lzip files -# Copyright (C) 2009, 2010 Antonio Diaz Diaz. +# Copyright (C) 2009, 2010, 2011 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute and modify it. @@ -10,6 +10,7 @@ export LC_ALL objdir=`pwd` testdir=`cd "$1" ; pwd` LZIP="${objdir}"/minilzip +BBEXAMPLE="${objdir}"/bbexample LZCHECK="${objdir}"/lzcheck framework_failure() { echo "failure in testing framework" ; exit 1 ; } @@ -20,17 +21,31 @@ fi if [ -d tmp ] ; then rm -rf tmp ; fi mkdir tmp -printf "testing lzlib..." +printf "testing lzlib-%s..." "$2" cd "${objdir}"/tmp -cat "${testdir}"/test1 > in || framework_failure +cat "${testdir}"/test.txt > in || framework_failure fail=0 -"${LZIP}" -t "${testdir}"/test1.lz || fail=1 -"${LZIP}" -cd "${testdir}"/test1.lz > copy || fail=1 +"${LZIP}" -t "${testdir}"/test_v0.lz || fail=1 +printf . +"${LZIP}" -cd "${testdir}"/test_v0.lz > copy || fail=1 +cmp in copy || fail=1 +printf . + +"${LZIP}" -t "${testdir}"/test_v1.lz || fail=1 +printf . +"${LZIP}" -cd "${testdir}"/test_v1.lz > copy || fail=1 cmp in copy || fail=1 +printf . -for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do +"${LZIP}" -t "${testdir}"/test_sync.lz || fail=1 +printf . +"${LZIP}" -cd "${testdir}"/test_sync.lz > copy || fail=1 +cmp in copy || fail=1 +printf . + +for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -k -$i in || fail=1 mv -f in.lz copy.lz || fail=1 printf "garbage" >> copy.lz || fail=1 @@ -39,7 +54,7 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do printf . done -for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do +for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -c -$i in > out || fail=1 printf "g" >> out || fail=1 "${LZIP}" -cd out > copy || fail=1 @@ -47,21 +62,31 @@ for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do printf . done -for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do +for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do "${LZIP}" -$i < in > out || fail=1 "${LZIP}" -d < out > copy || fail=1 cmp in copy || fail=1 printf . done -for i in s4Ki 0 1 2 3 4 5 6 7 8 9s16 ; do - "${LZIP}" -fe -$i -o out < in || fail=1 +for i in s4Ki 0 1 2 3 4 5 6 7 8s16 9s16 ; do + "${LZIP}" -f -$i -o out < in || fail=1 "${LZIP}" -df -o copy < out.lz || fail=1 cmp in copy || fail=1 printf . done -"${LZCHECK}" in 2>/dev/null || fail=1 +"${LZIP}" -$i < in > anyothername || fail=1 +"${LZIP}" -dq anyothername || fail=1 +cmp in anyothername.out || fail=1 +printf . + +"${BBEXAMPLE}" in || fail=1 +printf . +"${BBEXAMPLE}" out || fail=1 +printf . + +"${LZCHECK}" in || fail=1 printf . echo diff --git a/testsuite/test1 b/testsuite/test.txt index 5b244d5..5b244d5 100644 --- a/testsuite/test1 +++ b/testsuite/test.txt diff --git a/testsuite/test_sync.lz b/testsuite/test_sync.lz Binary files differnew file mode 100644 index 0000000..419fa97 --- /dev/null +++ b/testsuite/test_sync.lz diff --git a/testsuite/test1.lz b/testsuite/test_v0.lz Binary files differindex a09b1e8..a09b1e8 100644 --- a/testsuite/test1.lz +++ b/testsuite/test_v0.lz diff --git a/testsuite/test_v1.lz b/testsuite/test_v1.lz Binary files differnew file mode 100644 index 0000000..f1c79eb --- /dev/null +++ b/testsuite/test_v1.lz |