summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog11
-rw-r--r--INSTALL13
-rw-r--r--Makefile.in40
-rw-r--r--NEWS24
-rw-r--r--README7
-rw-r--r--arg_parser.cc8
-rw-r--r--arg_parser.h2
-rwxr-xr-xconfigure39
-rw-r--r--decoder.cc90
-rw-r--r--decoder.h124
-rw-r--r--doc/lziprecover.14
-rw-r--r--doc/lziprecover.info51
-rw-r--r--doc/lziprecover.texinfo35
-rw-r--r--file_index.cc134
-rw-r--r--file_index.h82
-rw-r--r--lzip.h165
-rw-r--r--main.cc218
-rw-r--r--merge.cc77
-rw-r--r--range_dec.cc245
-rw-r--r--repair.cc44
-rw-r--r--split.cc22
-rwxr-xr-xtestsuite/check.sh11
-rw-r--r--testsuite/unzcrash.cc31
23 files changed, 817 insertions, 660 deletions
diff --git a/ChangeLog b/ChangeLog
index 6c199ca..91fa7c9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2013-02-27 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 1.14-rc1 released.
+ * Option '-l, --list' now accepts more than one file.
+ * Decompression time has been reduced by 12%.
+ * Makefile.in: Added new target 'install-as-lzip'.
+ * Makefile.in: Added new target 'install-bin'.
+ * main.cc: Use 'setmode' instead of '_setmode' on Windows and OS/2.
+
2012-02-24 Antonio Diaz Diaz <ant_diaz@teleline.es>
* Version 1.13 released.
@@ -60,7 +69,7 @@
* testsuite/unzcrash.cc: Test all 1-byte errors.
-Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is a collection of facts, and thus it is not copyrightable,
but just in case, you have unlimited permission to copy, distribute and
diff --git a/INSTALL b/INSTALL
index c2839d6..11d7b69 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,7 +1,7 @@
Requirements
------------
You will need a C++ compiler.
-I use gcc 4.3.5 and 3.3.6, but the code should compile with any
+I use gcc 4.7.2 and 3.3.6, but the code should compile with any
standards compliant compiler.
Gcc is available at http://gcc.gnu.org.
@@ -30,9 +30,16 @@ from the main archive.
4. Optionally, type 'make check' to run the tests that come with
lziprecover.
-5. Type 'make install' to install the programs and any data files and
+5. Type 'make install' to install the program and any data files and
documentation.
+ You can install only the program, the info manual or the man page
+ typing 'make install-bin', 'make install-info' or 'make install-man'
+ respectively.
+
+5a. Type 'make install-as-lzip' to install the program and any data
+ files and documentation, and link the program to the name 'lzip'.
+
Another way
-----------
@@ -51,7 +58,7 @@ After running 'configure', you can run 'make' and 'make install' as
explained above.
-Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/Makefile.in b/Makefile.in
index 08df8ab..4e619e4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -6,12 +6,13 @@ INSTALL_DATA = $(INSTALL) -p -m 644
INSTALL_DIR = $(INSTALL) -d -m 755
SHELL = /bin/sh
-objs = arg_parser.o decoder.o merge.o range_dec.o repair.o split.o main.o
+objs = arg_parser.o file_index.o merge.o range_dec.o repair.o split.o \
+ decoder.o main.o
unzobjs = arg_parser.o unzcrash.o
-.PHONY : all install install-info install-man install-strip \
- uninstall uninstall-info uninstall-man \
+.PHONY : all install install-bin install-info install-man install-strip \
+ install-as-lzip uninstall uninstall-bin uninstall-info uninstall-man \
doc info man check dist clean distclean
all : $(progname)
@@ -34,15 +35,16 @@ unzcrash.o : testsuite/unzcrash.cc
%.o : %.cc
$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
-$(objs) : Makefile
-arg_parser.o : arg_parser.h
-decoder.o : lzip.h decoder.h
-main.o : arg_parser.h lzip.h decoder.h
-merge.o : lzip.h decoder.h
-range_dec.o : lzip.h decoder.h
-repair.o : lzip.h
-split.o : lzip.h
-unzcrash.o : arg_parser.h Makefile
+$(objs) : Makefile
+arg_parser.o : arg_parser.h
+decoder.o : lzip.h decoder.h
+file_index.o : lzip.h file_index.h
+main.o : arg_parser.h lzip.h decoder.h
+merge.o : lzip.h decoder.h file_index.h
+range_dec.o : lzip.h decoder.h file_index.h
+repair.o : lzip.h
+split.o : lzip.h
+unzcrash.o : arg_parser.h Makefile
doc : info man
@@ -64,14 +66,16 @@ Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
check : all
@$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
-install : all install-info install-man
+install : install-bin install-info install-man
+
+install-bin : all
if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
$(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
install-info :
if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
$(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
- -install-info --info-dir="$(DESTDIR)$(infodir)" $(DESTDIR)$(infodir)/$(pkgname).info
+ -install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info"
install-man :
if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
@@ -80,7 +84,13 @@ install-man :
install-strip : all
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
-uninstall : uninstall-info uninstall-man
+install-as-lzip : install
+ -rm -f "$(DESTDIR)$(bindir)/lzip"
+ cd "$(DESTDIR)$(bindir)" && ln -s $(progname) lzip
+
+uninstall : uninstall-bin uninstall-info uninstall-man
+
+uninstall-bin :
-rm -f "$(DESTDIR)$(bindir)/$(progname)"
uninstall-info :
diff --git a/NEWS b/NEWS
index ae35955..73ab113 100644
--- a/NEWS
+++ b/NEWS
@@ -1,23 +1,9 @@
-Changes in version 1.13:
+Changes in version 1.14:
-Lziprecover is now distributed in its own package. Until version 1.12 it
-was included in the lzip package.
+Option "-l, --list" now accepts more than one file.
-Decompressor options (-c, -d, -k, -t) have been implemented in
-lziprecover so that a external decompressor is not needed for recovery
-nor for "make check".
+Decompression time has been reduced by 12%.
-The new option "-D, --range-decompress" which extracts a range of bytes
-decompressing only the members containing the desired data, has been
-added.
+The target "install-as-lzip" has been added to the Makefile.
-The new option "-l, --list" which prints correct total file sizes and
-ratios even for multi-member files, has been added.
-
-"--merge" and "--repair" now remove the output file if recovery fails.
-
-Quote characters in messages have been changed as advised by GNU Coding
-Standards.
-
-Configure option "--datadir" has been renamed to "--datarootdir" to
-follow GNU Standards.
+The target "install-bin" has been added to the Makefile.
diff --git a/README b/README
index 628144d..e5b3641 100644
--- a/README
+++ b/README
@@ -40,8 +40,13 @@ If the cause of file corruption is damaged media, the combination
GNU ddrescue + lziprecover is the best option for recovering data from
multiple damaged copies.
+This package also includes unzcrash, a program written to test
+robustness to decompression of corrupted data, inspired by unzcrash.c
+from Julian Seward's bzip2. Type 'make unzcrash' in the lziprecover
+directory to build it. Then try 'unzcrash --help'.
-Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+
+Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This file is free documentation: you have unlimited permission to copy,
distribute and modify it.
diff --git a/arg_parser.cc b/arg_parser.cc
index b3fd48d..a28d2ba 100644
--- a/arg_parser.cc
+++ b/arg_parser.cc
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
@@ -36,7 +36,7 @@
bool Arg_parser::parse_long_option( const char * const opt, const char * const arg,
const Option options[], int & argind )
{
- unsigned int len;
+ unsigned len;
int index = -1;
bool exact = false, ambig = false;
@@ -44,7 +44,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a
// Test all long options for either exact match or abbreviated matches.
for( int i = 0; options[i].code != 0; ++i )
- if( options[i].name && !std::strncmp( options[i].name, &opt[2], len ) )
+ if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 )
{
if( std::strlen( options[i].name ) == len ) // Exact match found
{ index = i; exact = true; break; }
@@ -178,7 +178,7 @@ Arg_parser::Arg_parser( const int argc, const char * const argv[],
if( error_.size() ) data.clear();
else
{
- for( unsigned int i = 0; i < non_options.size(); ++i )
+ for( unsigned i = 0; i < non_options.size(); ++i )
{ data.push_back( Record() ); data.back().argument.swap( non_options[i] ); }
while( argind < argc )
{ data.push_back( Record() ); data.back().argument = argv[argind++]; }
diff --git a/arg_parser.h b/arg_parser.h
index 4fbd1af..5248cb1 100644
--- a/arg_parser.h
+++ b/arg_parser.h
@@ -1,5 +1,5 @@
/* Arg_parser - POSIX/GNU command line argument parser. (C++ version)
- Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
+ Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Antonio Diaz Diaz.
This library is free software: you can redistribute it and/or modify
diff --git a/configure b/configure
index ac52bf7..d869772 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# configure script for Lziprecover - Data recovery tool for lzipped files
-# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
#
# This configure script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -8,9 +8,9 @@
args=
no_create=
pkgname=lziprecover
-pkgversion=1.13
+pkgversion=1.14-rc1
progname=lziprecover
-srctrigger=lzip.h
+srctrigger=doc/lziprecover.texinfo
# clear some things potentially inherited from environment.
LC_ALL=C
@@ -22,11 +22,19 @@ bindir='$(exec_prefix)/bin'
datarootdir='$(prefix)/share'
infodir='$(datarootdir)/info'
mandir='$(datarootdir)/man'
-CXX=
+CXX=g++
CPPFLAGS=
CXXFLAGS='-Wall -W -O2'
LDFLAGS=
+# checking whether we are using GNU C++.
+if [ ! -x /bin/g++ ] &&
+ [ ! -x /usr/bin/g++ ] &&
+ [ ! -x /usr/local/bin/g++ ] ; then
+ CXX=c++
+ CXXFLAGS='-W -O2'
+fi
+
# Loop over all args
while [ -n "$1" ] ; do
@@ -91,14 +99,14 @@ done
srcdirtext=
if [ -z "${srcdir}" ] ; then
srcdirtext="or . or .." ; srcdir=.
- if [ ! -r ${srcdir}/${srctrigger} ] ; then srcdir=.. ; fi
- if [ ! -r ${srcdir}/${srctrigger} ] ; then
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then
## the sed command below emulates the dirname command
srcdir=`echo $0 | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
fi
fi
-if [ ! -r ${srcdir}/${srctrigger} ] ; then
+if [ ! -r "${srcdir}/${srctrigger}" ] ; then
exec 1>&2
echo
echo "configure: Can't find sources in ${srcdir} ${srcdirtext}"
@@ -107,18 +115,7 @@ if [ ! -r ${srcdir}/${srctrigger} ] ; then
fi
# Set srcdir to . if that's what it is.
-if [ "`pwd`" = "`cd ${srcdir} ; pwd`" ] ; then srcdir=. ; fi
-
-# checking whether we are using GNU C++.
-if [ -z "${CXX}" ] ; then # Let the user override the test.
- if [ -x /bin/g++ ] ||
- [ -x /usr/bin/g++ ] ||
- [ -x /usr/local/bin/g++ ] ; then
- CXX="g++"
- else
- CXX="c++"
- fi
-fi
+if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
echo
if [ -z "${no_create}" ] ; then
@@ -152,7 +149,7 @@ echo "LDFLAGS = ${LDFLAGS}"
rm -f Makefile
cat > Makefile << EOF
# Makefile for Lziprecover - Data recovery tool for lzipped files
-# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
# This file was generated automatically by configure. Do not edit.
#
# This Makefile is free software: you have unlimited permission
@@ -173,6 +170,6 @@ CPPFLAGS = ${CPPFLAGS}
CXXFLAGS = ${CXXFLAGS}
LDFLAGS = ${LDFLAGS}
EOF
-cat ${srcdir}/Makefile.in >> Makefile
+cat "${srcdir}/Makefile.in" >> Makefile
echo "OK. Now you can run make."
diff --git a/decoder.cc b/decoder.cc
index fdb4ff9..5e26504 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -34,19 +34,19 @@
const CRC32 crc32;
-void Pretty_print::operator()( const char * const msg ) const
+void Pretty_print::operator()( const char * const msg, FILE * const f ) const
{
if( verbosity_ >= 0 )
{
if( first_post )
{
first_post = false;
- std::fprintf( stderr, " %s: ", name_.c_str() );
- for( unsigned int i = 0; i < longest_name - name_.size(); ++i )
- std::fprintf( stderr, " " );
- if( !msg ) std::fflush( stderr );
+ std::fprintf( f, " %s: ", name_.c_str() );
+ for( unsigned i = 0; i < longest_name - name_.size(); ++i )
+ std::fprintf( f, " " );
+ if( !msg ) std::fflush( f );
}
- if( msg ) std::fprintf( stderr, "%s.\n", msg );
+ if( msg ) std::fprintf( f, "%s.\n", msg );
}
}
@@ -60,13 +60,13 @@ int readblock( const int fd, uint8_t * const buf, const int size )
errno = 0;
while( rest > 0 )
{
- errno = 0;
const int n = read( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
- else if( n == 0 ) break;
+ else if( n == 0 ) break; // EOF
else if( errno != EINTR && errno != EAGAIN ) break;
+ errno = 0;
}
- return ( rest > 0 ) ? size - rest : size;
+ return size - rest;
}
@@ -79,12 +79,12 @@ int writeblock( const int fd, const uint8_t * const buf, const int size )
errno = 0;
while( rest > 0 )
{
- errno = 0;
const int n = write( fd, buf + size - rest, rest );
if( n > 0 ) rest -= n;
else if( n < 0 && errno != EINTR && errno != EAGAIN ) break;
+ errno = 0;
}
- return ( rest > 0 ) ? size - rest : size;
+ return size - rest;
}
@@ -110,9 +110,10 @@ void LZ_decoder::flush_data()
crc32.update( crc_, buffer + stream_pos, size );
if( outfd >= 0 )
{
- const long long i = std::max( 0LL, outskip - stream_position() );
+ const unsigned long long sp = stream_position();
+ const long long i = positive_diff( outskip, sp );
const long long s =
- std::min( outend - stream_position(), (long long)size ) - i;
+ std::min( positive_diff( outend, sp ), (unsigned long long)size ) - i;
if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s )
throw Error( "Write error" );
}
@@ -126,10 +127,11 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
File_trailer trailer;
const int trailer_size = File_trailer::size( member_version );
- const long long member_size = range_decoder.member_position() + trailer_size;
+ const unsigned long long member_size =
+ range_decoder.member_position() + trailer_size;
bool error = false;
- const int size = range_decoder.read( trailer.data, trailer_size );
+ int size = range_decoder.read_data( trailer.data, trailer_size );
if( size < trailer_size )
{
error = true;
@@ -139,9 +141,11 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
std::fprintf( stderr, "Trailer truncated at trailer position %d;"
" some checks may fail.\n", size );
}
- for( int i = size; i < trailer_size; ++i ) trailer.data[i] = 0;
+ while( size < trailer_size ) trailer.data[size++] = 0;
}
+
if( member_version == 0 ) trailer.member_size( member_size );
+
if( !range_decoder.code_is_zero() )
{
error = true;
@@ -154,7 +158,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
{
pp();
std::fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X.\n",
- (unsigned int)trailer.data_crc(), (unsigned int)crc() );
+ trailer.data_crc(), crc() );
}
}
if( trailer.data_size() != data_position() )
@@ -163,7 +167,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
if( pp.verbosity() >= 0 )
{
pp();
- std::fprintf( stderr, "Data size mismatch; trailer says %lld, data size is %lld (0x%llX).\n",
+ std::fprintf( stderr, "Data size mismatch; trailer says %llu, data size is %llu (0x%llX).\n",
trailer.data_size(), data_position(), data_position() );
}
}
@@ -173,7 +177,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
if( pp.verbosity() >= 0 )
{
pp();
- std::fprintf( stderr, "Member size mismatch; trailer says %lld, member size is %lld (0x%llX).\n",
+ std::fprintf( stderr, "Member size mismatch; trailer says %llu, member size is %llu (0x%llX).\n",
trailer.member_size(), member_size, member_size );
}
}
@@ -183,9 +187,8 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
( 8.0 * member_size ) / data_position(),
100.0 * ( 1.0 - ( (double)member_size / data_position() ) ) );
if( !error && pp.verbosity() >= 4 )
- std::fprintf( stderr, "data CRC %08X, data size %9lld, member size %8lld. ",
- (unsigned int)trailer.data_crc(), trailer.data_size(),
- trailer.member_size() );
+ std::fprintf( stderr, "data CRC %08X, data size %9llu, member size %8llu. ",
+ trailer.data_crc(), trailer.data_size(), trailer.member_size() );
return !error;
}
@@ -194,6 +197,7 @@ bool LZ_decoder::verify_trailer( const Pretty_print & pp ) const
// 3 = trailer error, 4 = unknown marker found.
int LZ_decoder::decode_member( const Pretty_print & pp )
{
+ Bit_model bm_literal[1<<literal_context_bits][0x300];
Bit_model bm_match[State::states][pos_states];
Bit_model bm_rep[State::states];
Bit_model bm_rep0[State::states];
@@ -201,32 +205,30 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
Bit_model bm_rep2[State::states];
Bit_model bm_len[State::states][pos_states];
Bit_model bm_dis_slot[max_dis_states][1<<dis_slot_bits];
- Bit_model bm_dis[modeled_distances-end_dis_model+1];
+ Bit_model bm_dis[modeled_distances-end_dis_model];
Bit_model bm_align[dis_align_size];
-
- unsigned int rep0 = 0; // rep[0-3] latest four distances
- unsigned int rep1 = 0; // used for efficient coding of
- unsigned int rep2 = 0; // repeated distances
- unsigned int rep3 = 0;
-
Len_decoder len_decoder;
Len_decoder rep_match_len_decoder;
- Literal_decoder literal_decoder;
+
+ unsigned rep0 = 0; // rep[0-3] latest four distances
+ unsigned rep1 = 0; // used for efficient coding of
+ unsigned rep2 = 0; // repeated distances
+ unsigned rep3 = 0;
+
State state;
range_decoder.load();
- while( true )
+ while( !range_decoder.finished() )
{
- if( range_decoder.finished() ) { flush_data(); return 2; }
const int pos_state = data_position() & pos_state_mask;
if( range_decoder.decode_bit( bm_match[state()][pos_state] ) == 0 )
{
const uint8_t prev_byte = get_prev_byte();
if( state.is_char() )
- put_byte( literal_decoder.decode( range_decoder, prev_byte ) );
+ put_byte( range_decoder.decode_tree( bm_literal[get_lit_state(prev_byte)], 8 ) );
else
- put_byte( literal_decoder.decode_matched( range_decoder, prev_byte,
- get_byte( rep0 ) ) );
+ put_byte( range_decoder.decode_matched( bm_literal[get_lit_state(prev_byte)],
+ get_byte( rep0 ) ) );
state.set_char();
}
else
@@ -237,7 +239,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
len = 0;
if( range_decoder.decode_bit( bm_rep0[state()] ) == 1 )
{
- unsigned int distance;
+ unsigned distance;
if( range_decoder.decode_bit( bm_rep1[state()] ) == 0 )
distance = rep1;
else
@@ -263,20 +265,20 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
}
else
{
- const unsigned int rep0_saved = rep0;
+ const unsigned rep0_saved = rep0;
len = min_match_len + len_decoder.decode( range_decoder, pos_state );
- const int dis_slot = range_decoder.decode_tree( bm_dis_slot[get_dis_state(len)], dis_slot_bits );
+ const int dis_slot = range_decoder.decode_tree6( bm_dis_slot[get_dis_state(len)] );
if( dis_slot < start_dis_model ) rep0 = dis_slot;
else
{
const int direct_bits = ( dis_slot >> 1 ) - 1;
rep0 = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
if( dis_slot < end_dis_model )
- rep0 += range_decoder.decode_tree_reversed( bm_dis + rep0 - dis_slot, direct_bits );
+ rep0 += range_decoder.decode_tree_reversed( bm_dis + rep0 - dis_slot - 1, direct_bits );
else
{
rep0 += range_decoder.decode( direct_bits - dis_align_bits ) << dis_align_bits;
- rep0 += range_decoder.decode_tree_reversed( bm_align, dis_align_bits );
+ rep0 += range_decoder.decode_tree_reversed4( bm_align );
if( rep0 == 0xFFFFFFFFU ) // Marker found
{
rep0 = rep0_saved;
@@ -301,11 +303,13 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
}
rep3 = rep2; rep2 = rep1; rep1 = rep0_saved;
state.set_match();
- if( rep0 >= (unsigned int)dictionary_size ||
- ( rep0 >= (unsigned int)pos && !partial_data_pos ) )
+ if( rep0 >= (unsigned)dictionary_size ||
+ ( rep0 >= (unsigned)pos && !partial_data_pos ) )
{ flush_data(); return 1; }
}
copy_block( rep0, len );
}
}
+ flush_data();
+ return 2;
}
diff --git a/decoder.h b/decoder.h
index 92da2b3..c446802 100644
--- a/decoder.h
+++ b/decoder.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@
class Range_decoder
{
enum { buffer_size = 16384 };
- long long partial_member_pos;
+ unsigned long long partial_member_pos;
uint8_t * const buffer; // input buffer
int pos; // current pos in buffer
int stream_pos; // when reached, a new block must be read
@@ -42,22 +42,23 @@ public:
code( 0 ),
range( 0xFFFFFFFFU ),
infd( ifd ),
- at_stream_end( false ) {}
+ at_stream_end( false )
+ {}
~Range_decoder() { delete[] buffer; }
bool code_is_zero() const { return ( code == 0 ); }
bool finished() { return pos >= stream_pos && !read_block(); }
- long long member_position() const { return partial_member_pos + pos; }
+ unsigned long long member_position() const { return partial_member_pos + pos; }
void reset_member_position() { partial_member_pos = -pos; }
uint8_t get_byte()
{
- if( finished() ) return 0x55; // make code != 0
+ if( finished() ) return 0xAA; // make code != 0
return buffer[pos++];
}
- int read( uint8_t * const outbuf, const int size )
+ int read_data( uint8_t * const outbuf, const int size )
{
int rest = size;
while( rest > 0 && !finished() )
@@ -67,14 +68,14 @@ public:
pos += rd;
rest -= rd;
}
- return ( rest > 0 ) ? size - rest : size;
+ return size - rest;
}
void load()
{
code = 0;
- range = 0xFFFFFFFFU;
for( int i = 0; i < 5; ++i ) code = (code << 8) | get_byte();
+ range = 0xFFFFFFFFU;
}
void normalize()
@@ -88,17 +89,14 @@ public:
int symbol = 0;
for( int i = num_bits; i > 0; --i )
{
- symbol <<= 1;
- if( range <= 0x00FFFFFFU )
- {
- range <<= 7; code = (code << 8) | get_byte();
- if( code >= range ) { code -= range; symbol |= 1; }
- }
- else
- {
- range >>= 1;
- if( code >= range ) { code -= range; symbol |= 1; }
- }
+ normalize();
+ range >>= 1;
+// symbol <<= 1;
+// if( code >= range ) { code -= range; symbol |= 1; }
+ const uint32_t mask = 0U - (code < range);
+ code -= range;
+ code += range & mask;
+ symbol = (symbol << 1) + (mask + 1);
}
return symbol;
}
@@ -130,36 +128,63 @@ public:
return model - (1 << num_bits);
}
+ int decode_tree6( Bit_model bm[] )
+ {
+ int model = 1;
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ model = ( model << 1 ) | decode_bit( bm[model] );
+ return model - (1 << 6);
+ }
+
int decode_tree_reversed( Bit_model bm[], const int num_bits )
{
int model = 1;
int symbol = 0;
for( int i = 0; i < num_bits; ++i )
{
- const int bit = decode_bit( bm[model] );
+ const bool bit = decode_bit( bm[model] );
model <<= 1;
- if( bit ) { model |= 1; symbol |= (1 << i); }
+ if( bit ) { ++model; symbol |= (1 << i); }
}
return symbol;
}
- int decode_matched( Bit_model bm[], const int match_byte )
+ int decode_tree_reversed4( Bit_model bm[] )
+ {
+ int model = 1;
+ int symbol = 0;
+ int bit = decode_bit( bm[model] );
+ model = (model << 1) + bit; symbol |= bit;
+ bit = decode_bit( bm[model] );
+ model = (model << 1) + bit; symbol |= (bit << 1);
+ bit = decode_bit( bm[model] );
+ model = (model << 1) + bit; symbol |= (bit << 2);
+ if( decode_bit( bm[model] ) ) symbol |= 8;
+ return symbol;
+ }
+
+ int decode_matched( Bit_model bm[], int match_byte )
{
Bit_model * const bm1 = bm + 0x100;
int symbol = 1;
for( int i = 7; i >= 0; --i )
{
- const int match_bit = ( match_byte >> i ) & 1;
- const int bit = decode_bit( bm1[(match_bit<<8)+symbol] );
- symbol = ( symbol << 1 ) | bit;
- if( match_bit != bit )
+ match_byte <<= 1;
+ const int match_bit = match_byte & 0x100;
+ const int bit = decode_bit( bm1[match_bit+symbol] );
+ symbol = ( symbol << 1 ) + bit;
+ if( match_bit != bit << 8 )
{
- while( --i >= 0 )
- symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
+ while( symbol < 0x100 )
+ symbol = ( symbol << 1 ) + decode_bit( bm[symbol] );
break;
}
}
- return symbol & 0xFF;
+ return symbol - 0x100;
}
};
@@ -186,29 +211,12 @@ public:
};
-class Literal_decoder
- {
- Bit_model bm_literal[1<<literal_context_bits][0x300];
-
- int lstate( const uint8_t prev_byte ) const
- { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
-
-public:
- uint8_t decode( Range_decoder & range_decoder, const uint8_t prev_byte )
- { return range_decoder.decode_tree( bm_literal[lstate(prev_byte)], 8 ); }
-
- uint8_t decode_matched( Range_decoder & range_decoder,
- const uint8_t prev_byte, const uint8_t match_byte )
- { return range_decoder.decode_matched( bm_literal[lstate(prev_byte)],
- match_byte ); }
- };
-
-
class LZ_decoder
{
- const long long outskip;
- const long long outend;
- long long partial_data_pos;
+ const unsigned long long outskip;
+ const unsigned long long outend;
+ unsigned long long partial_data_pos;
+ Range_decoder & range_decoder;
const int dictionary_size;
const int buffer_size;
uint8_t * const buffer; // output buffer
@@ -217,9 +225,8 @@ class LZ_decoder
uint32_t crc_;
const int outfd; // output file descriptor
const int member_version;
- Range_decoder & range_decoder;
- long long stream_position() const { return partial_data_pos + stream_pos; }
+ unsigned long long stream_position() const { return partial_data_pos + stream_pos; }
void flush_data();
bool verify_trailer( const Pretty_print & pp ) const;
@@ -248,7 +255,7 @@ class LZ_decoder
if( i < 0 ) i += buffer_size;
if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) )
{
- std::memcpy( buffer + pos, buffer + i, len );
+ std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap
pos += len;
}
else for( ; len > 0; --len )
@@ -264,11 +271,13 @@ class LZ_decoder
public:
LZ_decoder( const File_header & header, Range_decoder & rdec, const int ofd,
- const long long oskip = 0, const long long oend = LLONG_MAX )
+ const unsigned long long oskip = 0,
+ const unsigned long long oend = -1ULL )
:
outskip( oskip ),
outend( oend ),
partial_data_pos( 0 ),
+ range_decoder( rdec ),
dictionary_size( header.dictionary_size() ),
buffer_size( std::max( 65536, dictionary_size ) ),
buffer( new uint8_t[buffer_size] ),
@@ -276,15 +285,14 @@ public:
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd ),
- member_version( header.version() ),
- range_decoder( rdec )
+ member_version( header.version() )
{ buffer[buffer_size-1] = 0; } // prev_byte of first_byte
~LZ_decoder() { delete[] buffer; }
- uint32_t crc() const { return crc_ ^ 0xFFFFFFFFU; }
+ unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
- long long data_position() const { return partial_data_pos + pos; }
+ unsigned long long data_position() const { return partial_data_pos + pos; }
int decode_member( const Pretty_print & pp );
};
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index 862cbe0..d63f6ab 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.37.1.
-.TH LZIPRECOVER "1" "February 2012" "Lziprecover 1.13" "User Commands"
+.TH LZIPRECOVER "1" "February 2013" "Lziprecover 1.14-rc1" "User Commands"
.SH NAME
Lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@@ -61,7 +61,7 @@ Report bugs to lzip\-bug@nongnu.org
.br
Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html
.SH COPYRIGHT
-Copyright \(co 2012 Antonio Diaz Diaz.
+Copyright \(co 2013 Antonio Diaz Diaz.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
.br
This is free software: you are free to change and redistribute it.
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 73830cf..7b24dcb 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.13, 24 February 2012).
+This manual is for Lziprecover (version 1.14-rc1, 27 February 2013).
* Menu:
@@ -24,7 +24,7 @@ This manual is for Lziprecover (version 1.13, 24 February 2012).
* Concept Index:: Index of concepts
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission to
copy, distribute and modify it.
@@ -132,7 +132,8 @@ The format for running lziprecover is:
`-l'
`--list'
Print total file sizes and ratios. The values produced are correct
- even for multi-member files.
+ even for multi-member files. Use it together with `-v' to see
+ information about the members in the file.
`-m'
`--merge'
@@ -221,7 +222,12 @@ File: lziprecover.info, Node: File Format, Next: Examples, Prev: Invoking Lzi
3 File Format
*************
-In the diagram below, a box like this:
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.
+-- Antoine de Saint-Exupery
+
+
+ In the diagram below, a box like this:
+---+
| | <-- the vertical bars might be missing
+---+
@@ -250,15 +256,18 @@ additional information before, between, or after them.
"LZIP".
`VN (version number, 1 byte)'
- Just in case something needs to be modified in the future. Valid
- values are 0 and 1. Version 0 files are deprecated. They can
- contain only one member and lack the `Member size' field.
+ Just in case something needs to be modified in the future. 1 for
+ now.
`DS (coded dictionary size, 1 byte)'
- Bits 4-0 contain the base 2 logarithm of the base dictionary size.
- Bits 7-5 contain the number of "wedges" to substract from the base
- dictionary size to obtain the dictionary size. The size of a wedge
- is (base dictionary size / 16).
+ Lzip divides the distance between any two powers of 2 into 8
+ equally spaced intervals, named "wedges". The dictionary size is
+ calculated by taking a power of 2 (the base size) and substracting
+ from it a number of wedges between 0 and 7. The size of a wedge is
+ (base_size / 16).
+ Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).
+ Bits 7-5 contain the number of wedges (0 to 7) to substract from
+ the base size to obtain the dictionary size.
Valid values for dictionary size range from 4KiB to 512MiB.
`Lzma stream'
@@ -272,9 +281,9 @@ additional information before, between, or after them.
Size of the uncompressed original data.
`Member size (8 bytes)'
- Total size of the member, including header and trailer. This
- facilitates safe recovery of undamaged members from multi-member
- files.
+ Total size of the member, including header and trailer. This field
+ acts as a distributed index, and facilitates safe recovery of
+ undamaged members from multi-member files.

@@ -399,13 +408,13 @@ Concept Index

Tag Table:
Node: Top231
-Node: Introduction900
-Node: Invoking Lziprecover2937
-Node: File Format7982
-Node: Examples9989
-Ref: ddrescue-example11207
-Node: Problems13038
-Node: Concept Index13588
+Node: Introduction910
+Node: Invoking Lziprecover2947
+Node: File Format8073
+Node: Examples10394
+Ref: ddrescue-example11612
+Node: Problems13443
+Node: Concept Index13993

End Tag Table
diff --git a/doc/lziprecover.texinfo b/doc/lziprecover.texinfo
index 22eea8a..872abb4 100644
--- a/doc/lziprecover.texinfo
+++ b/doc/lziprecover.texinfo
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 24 February 2012
-@set VERSION 1.13
+@set UPDATED 27 February 2013
+@set VERSION 1.14-rc1
@dircategory Data Compression
@direntry
@@ -44,7 +44,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
@end menu
@sp 1
-Copyright @copyright{} 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+Copyright @copyright{} 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This manual is free documentation: you have unlimited permission
to copy, distribute and modify it.
@@ -155,7 +155,9 @@ Keep (don't delete) input files during decompression.
@item -l
@itemx --list
Print total file sizes and ratios. The values produced are correct even
-for multi-member files.
+for multi-member files. Use it together with @samp{-v} to see
+information about the members in the file.
+
@item -m
@itemx --merge
@@ -245,6 +247,11 @@ Table of SI and binary prefixes (unit multipliers):
@chapter File Format
@cindex file format
+Perfection is reached, not when there is no longer anything to add, but
+when there is no longer anything to take away.@*
+--- Antoine de Saint-Exupery
+
+@sp 1
In the diagram below, a box like this:
@verbatim
+---+
@@ -280,15 +287,16 @@ All multibyte values are stored in little endian order.
A four byte string, identifying the lzip format, with the value "LZIP".
@item VN (version number, 1 byte)
-Just in case something needs to be modified in the future. Valid values
-are 0 and 1. Version 0 files are deprecated. They can contain only one
-member and lack the @samp{Member size} field.
+Just in case something needs to be modified in the future. 1 for now.
@item DS (coded dictionary size, 1 byte)
-Bits 4-0 contain the base 2 logarithm of the base dictionary size.@*
-Bits 7-5 contain the number of "wedges" to substract from the base
-dictionary size to obtain the dictionary size. The size of a wedge is
-(base dictionary size / 16).@*
+Lzip divides the distance between any two powers of 2 into 8 equally
+spaced intervals, named "wedges". The dictionary size is calculated by
+taking a power of 2 (the base size) and substracting from it a number of
+wedges between 0 and 7. The size of a wedge is (base_size / 16).@*
+Bits 4-0 contain the base 2 logarithm of the base size (12 to 29).@*
+Bits 7-5 contain the number of wedges (0 to 7) to substract from the
+base size to obtain the dictionary size.@*
Valid values for dictionary size range from 4KiB to 512MiB.
@item Lzma stream
@@ -302,8 +310,9 @@ CRC of the uncompressed original data.
Size of the uncompressed original data.
@item Member size (8 bytes)
-Total size of the member, including header and trailer. This facilitates
-safe recovery of undamaged members from multi-member files.
+Total size of the member, including header and trailer. This field acts
+as a distributed index, and facilitates safe recovery of undamaged
+members from multi-member files.
@end table
diff --git a/file_index.cc b/file_index.cc
new file mode 100644
index 0000000..41bee41
--- /dev/null
+++ b/file_index.cc
@@ -0,0 +1,134 @@
+/* Lziprecover - Data recovery tool for lzipped files
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "lzip.h"
+#include "file_index.h"
+
+
+const char * format_num( unsigned long long num,
+ unsigned long long limit,
+ const int set_prefix )
+ {
+ const char * const si_prefix[8] =
+ { "k", "M", "G", "T", "P", "E", "Z", "Y" };
+ const char * const binary_prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ static bool si = true;
+ static char buf[32];
+
+ if( set_prefix ) si = ( set_prefix > 0 );
+ const unsigned factor = ( si ? 1000 : 1024 );
+ const char * const * prefix = ( si ? si_prefix : binary_prefix );
+ const char * p = "";
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < 8 && ( num > limit || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
+ snprintf( buf, sizeof buf, "%llu %s", num, p );
+ return buf;
+ }
+
+
+File_index::File_index( const int infd ) : retval_( 0 )
+ {
+ const long long isize = lseek( infd, 0, SEEK_END );
+ if( isize < 0 )
+ { error_ = "Input file is not seekable :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( isize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+ long long pos = isize; // always points to a header or EOF
+ File_header header;
+ File_trailer trailer;
+
+ if( isize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( seek_read( infd, header.data, File_header::size, 0 ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; return; }
+ if( !header.verify_magic() )
+ { error_ = "Bad magic number (file not in lzip format).";
+ retval_ = 2; return; }
+ if( !header.verify_version() )
+ { error_ = "Version "; error_ += format_num( header.version() );
+ error_ += "member format not supported."; retval_ = 2; return; }
+
+ while( pos >= min_member_size )
+ {
+ if( seek_read( infd, trailer.data, File_trailer::size(),
+ pos - File_trailer::size() ) != File_trailer::size() )
+ { error_ = "Error reading member trailer :";
+ error_ += std::strerror( errno ); retval_ = 1; break; }
+ const long long member_size = trailer.member_size();
+ if( member_size < min_member_size || member_size > pos )
+ {
+ if( member_vector.size() == 0 ) // maybe trailing garbage
+ { --pos; continue; }
+ error_ = "Member size in trailer is corrupt at pos ";
+ error_ += format_num( pos - 8 ); retval_ = 2; break;
+ }
+ if( seek_read( infd, header.data, File_header::size,
+ pos - member_size ) != File_header::size )
+ { error_ = "Error reading member header :";
+ error_ += std::strerror( errno ); retval_ = 1; break; }
+ if( !header.verify_magic() || !header.verify_version() )
+ {
+ if( member_vector.size() == 0 ) // maybe trailing garbage
+ { --pos; continue; }
+ error_ = "Bad header at pos ";
+ error_ += format_num( pos - member_size ); retval_ = 2; break;
+ }
+ if( member_vector.size() == 0 && isize - pos > File_header::size &&
+ seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
+ header.verify_magic() && header.verify_version() )
+ { // last trailer is corrupt
+ error_ = "Member size in trailer is corrupt at pos ";
+ error_ += format_num( isize - 8 ); retval_ = 2; break;
+ }
+ pos -= member_size;
+ member_vector.push_back( Member( 0, trailer.data_size(),
+ pos, member_size ) );
+ }
+ if( pos != 0 || member_vector.size() == 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned i = 0; i < member_vector.size() - 1; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ member_vector[i+1].dblock.pos( end );
+ }
+ }
diff --git a/file_index.h b/file_index.h
new file mode 100644
index 0000000..2f055b1
--- /dev/null
+++ b/file_index.h
@@ -0,0 +1,82 @@
+/* Lziprecover - Data recovery tool for lzipped files
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef INT64_MAX
+#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
+#endif
+
+
+class Block
+ {
+ long long pos_, size_; // pos + size <= INT64_MAX
+
+public:
+ Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+
+ long long pos() const { return pos_; }
+ long long size() const { return size_; }
+ long long end() const { return pos_ + size_; }
+
+ void pos( const long long p ) { pos_ = p; }
+ void size( const long long s ) { size_ = s; }
+
+ bool overlaps( const Block & b ) const
+ { return ( pos_ < b.end() && b.pos_ < end() ); }
+ void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
+ };
+
+
+class File_index
+ {
+ struct Member
+ {
+ Block dblock, mblock; // data block, member block
+
+ Member( const long long dp, const long long ds,
+ const long long mp, const long long ms )
+ : dblock( dp, ds ), mblock( mp, ms ) {}
+ };
+
+ std::vector< Member > member_vector;
+ std::string error_;
+ int retval_;
+
+public:
+ File_index( const int infd );
+
+ const std::string & error() const { return error_; }
+ int retval() const { return retval_; }
+
+ long long data_end() const
+ { if( member_vector.size() ) return member_vector.back().dblock.end();
+ else return 0; }
+
+ long long file_end() const
+ { if( member_vector.size() ) return member_vector.back().mblock.end();
+ else return 0; }
+
+ const Block & dblock( const int i ) const
+ { return member_vector[i].dblock; }
+ const Block & mblock( const int i ) const
+ { return member_vector[i].mblock; }
+ int members() const { return (int)member_vector.size(); }
+ };
+
+
+const char * format_num( unsigned long long num,
+ unsigned long long limit = -1ULL,
+ const int set_prefix = 0 );
diff --git a/lzip.h b/lzip.h
index d1c1753..ac3c002 100644
--- a/lzip.h
+++ b/lzip.h
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,41 +17,23 @@
class State
{
- unsigned char st;
+ int st;
public:
enum { states = 12 };
State() : st( 0 ) {}
- unsigned char operator()() const { return st; }
+ int operator()() const { return st; }
bool is_char() const { return st < 7; }
void set_char()
{
- static const unsigned char next[states] =
- { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
+ static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
st = next[st];
}
- void set_match()
- {
- static const unsigned char next[states] =
- { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
- st = next[st];
- }
-
- void set_rep()
- {
- static const unsigned char next[states] =
- { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
- st = next[st];
- }
-
- void set_short_rep()
- {
- static const unsigned char next[states] =
- { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
- st = next[st];
- }
+ void set_match() { st = ( ( st < 7 ) ? 7 : 10 ); }
+ void set_rep() { st = ( ( st < 7 ) ? 8 : 11 ); }
+ void set_short_rep() { st = ( ( st < 7 ) ? 9 : 11 ); }
};
@@ -69,7 +51,7 @@ enum {
dis_slot_bits = 6,
start_dis_model = 4,
end_dis_model = 14,
- modeled_distances = 1 << (end_dis_model / 2),
+ modeled_distances = 1 << (end_dis_model / 2), // 128
dis_align_bits = 4,
dis_align_size = 1 << dis_align_bits,
@@ -87,12 +69,11 @@ enum {
max_dis_states = 4 };
-inline int get_dis_state( int len )
- {
- len -= min_match_len;
- if( len >= max_dis_states ) len = max_dis_states - 1;
- return len;
- }
+inline int get_dis_state( const int len )
+ { return std::min( len - min_match_len, max_dis_states - 1 ); }
+
+inline int get_lit_state( const uint8_t prev_byte )
+ { return ( prev_byte >> ( 8 - literal_context_bits ) ); }
enum { bit_model_move_bits = 5,
@@ -101,17 +82,17 @@ enum { bit_model_move_bits = 5,
struct Bit_model
{
- unsigned int probability;
+ int probability;
Bit_model() : probability( bit_model_total / 2 ) {}
};
class Pretty_print
{
+ std::string name_;
const char * const stdin_name;
- unsigned int longest_name;
+ unsigned longest_name;
const int verbosity_;
- std::string name_;
mutable bool first_post;
public:
@@ -119,11 +100,11 @@ public:
: stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ),
first_post( false )
{
- const unsigned int stdin_name_len = std::strlen( stdin_name );
- for( unsigned int i = 0; i < filenames.size(); ++i )
+ const unsigned stdin_name_len = std::strlen( stdin_name );
+ for( unsigned i = 0; i < filenames.size(); ++i )
{
const std::string & s = filenames[i];
- const unsigned int len = ( ( s == "-" ) ? stdin_name_len : s.size() );
+ const unsigned len = ( ( s == "-" ) ? stdin_name_len : s.size() );
if( len > longest_name ) longest_name = len;
}
if( longest_name == 0 ) longest_name = stdin_name_len;
@@ -132,7 +113,7 @@ public:
Pretty_print( const std::string & filename, const int v )
: stdin_name( "(stdin)" ), verbosity_( v ), first_post( false )
{
- const unsigned int stdin_name_len = std::strlen( stdin_name );
+ const unsigned stdin_name_len = std::strlen( stdin_name );
longest_name = ( ( filename == "-" ) ? stdin_name_len : filename.size() );
if( longest_name == 0 ) longest_name = stdin_name_len;
set_name( filename );
@@ -148,7 +129,7 @@ public:
void reset() const { if( name_.size() ) first_post = true; }
const char * name() const { return name_.c_str(); }
int verbosity() const { return verbosity_; }
- void operator()( const char * const msg = 0 ) const;
+ void operator()( const char * const msg = 0, FILE * const f = stderr ) const;
};
@@ -159,9 +140,9 @@ class CRC32
public:
CRC32()
{
- for( unsigned int n = 0; n < 256; ++n )
+ for( unsigned n = 0; n < 256; ++n )
{
- unsigned int c = n;
+ unsigned c = n;
for( int k = 0; k < 8; ++k )
{ if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
data[n] = c;
@@ -169,8 +150,10 @@ public:
}
uint32_t operator[]( const uint8_t byte ) const { return data[byte]; }
+
void update( uint32_t & crc, const uint8_t byte ) const
{ crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+
void update( uint32_t & crc, const uint8_t * const buffer, const int size ) const
{
for( int i = 0; i < size; ++i )
@@ -181,16 +164,15 @@ public:
extern const CRC32 crc32;
-inline int real_bits( const unsigned int value )
+inline int real_bits( unsigned value )
{
- int bits = 0, i = 1;
- unsigned int mask = 1;
- for( ; mask > 0; ++i, mask <<= 1 ) if( value & mask ) bits = i;
+ int bits = 0;
+ while( value > 0 ) { value >>= 1; ++bits; }
return bits;
}
-const uint8_t magic_string[4] = { 'L', 'Z', 'I', 'P' };
+const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
struct File_header
{
@@ -206,11 +188,11 @@ struct File_header
uint8_t version() const { return data[4]; }
bool verify_version() const { return ( data[4] <= 1 ); }
- int dictionary_size() const
+ unsigned dictionary_size() const
{
- int sz = ( 1 << ( data[5] & 0x1F ) );
- if( sz > min_dictionary_size && sz <= max_dictionary_size )
- sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 0x07 );
+ unsigned sz = ( 1 << ( data[5] & 0x1F ) );
+ if( sz > min_dictionary_size )
+ sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
return sz;
}
@@ -243,36 +225,36 @@ struct File_trailer
static int size( const int version = 1 )
{ return ( ( version >= 1 ) ? 20 : 12 ); }
- uint32_t data_crc() const
+ unsigned data_crc() const
{
- uint32_t tmp = 0;
+ unsigned tmp = 0;
for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
- void data_crc( uint32_t crc )
+ void data_crc( unsigned crc )
{ for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
- long long data_size() const
+ unsigned long long data_size() const
{
- long long tmp = 0;
+ unsigned long long tmp = 0;
for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
- void data_size( long long sz )
+ void data_size( unsigned long long sz )
{
for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
- long long member_size() const
+ unsigned long long member_size() const
{
- long long tmp = 0;
+ unsigned long long tmp = 0;
for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
return tmp;
}
- void member_size( long long sz )
+ void member_size( unsigned long long sz )
{
for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; }
}
@@ -286,35 +268,9 @@ struct Error
};
-#ifndef LLONG_MAX
-#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
-#endif
-#ifndef LLONG_MIN
-#define LLONG_MIN (-LLONG_MAX - 1LL)
-#endif
-#ifndef ULLONG_MAX
-#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
-#endif
-
-
-class Block
- {
- long long pos_, size_; // pos + size <= LLONG_MAX
-
-public:
- Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
-
- long long pos() const { return pos_; }
- long long size() const { return size_; }
- long long end() const { return pos_ + size_; }
-
- void pos( const long long p ) { pos_ = p; }
- void size( const long long s ) { size_ = s; }
-
- bool overlaps( const Block & b ) const
- { return ( pos_ < b.end() && b.pos_ < end() ); }
- void shift( Block & b ) { ++size_; ++b.pos_; --b.size_; }
- };
+inline unsigned long long positive_diff( const unsigned long long x,
+ const unsigned long long y )
+ { return ( ( x > y ) ? x - y : 0 ); }
// defined in decoder.cc
@@ -322,13 +278,11 @@ int readblock( const int fd, uint8_t * const buf, const int size );
int writeblock( const int fd, const uint8_t * const buf, const int size );
// defined in main.cc
-extern int verbosity;
-const char * format_num( long long num, long long limit = LLONG_MAX,
- const int set_prefix = 0 );
int open_instream( const std::string & name, struct stat * const in_statsp,
const bool to_stdout, const bool reg_only = false );
int open_outstream_rw( const std::string & output_filename,
const bool force );
+void show_header( const File_header & header );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void internal_error( const char * const msg );
@@ -337,25 +291,32 @@ void internal_error( const char * const msg );
void cleanup_and_fail( const std::string & output_filename,
const int outfd, const int retval );
bool copy_file( const int infd, const int outfd,
- const long long size = LLONG_MAX );
-bool try_decompress( const int fd, const long long file_size,
+ const long long max_size = -1 );
+bool try_decompress( const int fd, const unsigned long long file_size,
long long * failure_posp = 0 );
-bool verify_header( const File_header & header );
-bool verify_single_member( const int fd, const long long file_size );
+bool verify_header( const File_header & header, const int verbosity );
+bool verify_single_member( const int fd, const long long file_size,
+ const int verbosity );
int merge_files( const std::vector< std::string > & filenames,
- const std::string & output_filename, const bool force );
+ const std::string & output_filename, const int verbosity,
+ const bool force );
// defined in range_dec.cc
-int list_file( const std::string & input_filename );
+int list_files( const std::vector< std::string > & filenames,
+ const int verbosity );
int range_decompress( const std::string & input_filename,
const std::string & default_output_filename,
- const std::string & range_string,
- const bool to_stdout, const bool force );
+ const std::string & range_string, const int verbosity,
+ const bool force, const bool to_stdout );
// defined in repair.cc
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos );
int repair_file( const std::string & input_filename,
- const std::string & output_filename, const bool force );
+ const std::string & output_filename, const int verbosity,
+ const bool force );
// defined in split.cc
int split_file( const std::string & input_filename,
- const std::string & default_output_filename, const bool force );
+ const std::string & default_output_filename,
+ const int verbosity, const bool force );
diff --git a/main.cc b/main.cc
index 7480038..0b7a88a 100644
--- a/main.cc
+++ b/main.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -38,6 +38,7 @@
#include <utime.h>
#include <sys/stat.h>
#if defined(__MSVCRT__)
+#include <io.h>
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define SIGHUP SIGTERM
@@ -47,6 +48,9 @@
#define S_IROTH 0
#define S_IWOTH 0
#endif
+#if defined(__OS2__)
+#include <io.h>
+#endif
#include "arg_parser.h"
#include "lzip.h"
@@ -61,7 +65,7 @@ namespace {
const char * const Program_name = "Lziprecover";
const char * const program_name = "lziprecover";
-const char * const program_year = "2012";
+const char * const program_year = "2013";
const char * invocation_name = 0;
#ifdef O_BINARY
@@ -80,6 +84,7 @@ enum Mode { m_none, m_decompress, m_generate, m_list, m_merge, m_range,
std::string output_filename;
int outfd = -1;
+int verbosity = 0;
const mode_t usr_rw = S_IRUSR | S_IWUSR;
const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
mode_t outfd_mode = usr_rw;
@@ -125,10 +130,30 @@ void show_version()
"There is NO WARRANTY, to the extent permitted by law.\n" );
}
+} // end namespace
+
+void show_header( const File_header & header )
+ {
+ const char * const prefix[8] =
+ { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
+ enum { factor = 1024 };
+ const char * p = "";
+ const char * np = " ";
+ unsigned num = header.dictionary_size();
+ bool exact = ( num % factor == 0 );
+
+ for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
+ { num /= factor; if( num % factor != 0 ) exact = false;
+ p = prefix[i]; np = ""; }
+ std::fprintf( stderr, "version %d, dictionary size %s%4u %sB. ",
+ header.version(), np, num, p );
+ }
+
+namespace {
-void one_file( const int argind, const int arguments )
+void one_file( const int files )
{
- if( argind + 1 != arguments )
+ if( files != 1 )
{
show_error( "You must specify exactly 1 file.", 0, true );
std::exit( 1 );
@@ -159,6 +184,40 @@ int extension_index( const std::string & name )
return -1;
}
+} // end namespace
+
+int open_instream( const std::string & name, struct stat * const in_statsp,
+ const bool to_stdout, const bool reg_only )
+ {
+ int infd = open( name.c_str(), O_RDONLY | o_binary );
+ if( infd < 0 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
+ program_name, name.c_str(), std::strerror( errno ) );
+ }
+ else
+ {
+ const int i = fstat( infd, in_statsp );
+ const mode_t mode = in_statsp->st_mode;
+ const bool can_read = ( i == 0 && !reg_only &&
+ ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
+ S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
+ if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
+ program_name, name.c_str(),
+ ( can_read && !to_stdout ) ?
+ " and '--stdout' was not specified" : "" );
+ close( infd );
+ infd = -1;
+ }
+ }
+ return infd;
+ }
+
+namespace {
void set_d_outname( const std::string & name, const int i )
{
@@ -300,16 +359,17 @@ void show_trailing_garbage( const uint8_t * const data, const int size,
int decompress( const int infd, const Pretty_print & pp, const bool testing )
{
+ const char * const ok_msg = ( testing ? "ok\n" : "done\n" );
int retval = 0;
try {
+ unsigned long long partial_file_pos = 0;
Range_decoder rdec( infd );
- long long partial_file_pos = 0;
- for( bool first_member = true; ; first_member = false, pp.reset() )
+ for( bool first_member = true; ; first_member = false )
{
File_header header;
rdec.reset_member_position();
- const int size = rdec.read( header.data, File_header::size );
+ const int size = rdec.read_data( header.data, File_header::size );
if( rdec.finished() ) // End Of File
{
if( first_member )
@@ -339,13 +399,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
{ pp( "Invalid dictionary size in member header" ); retval = 2; break; }
if( verbosity >= 2 || ( verbosity == 1 && first_member ) )
- {
- pp();
- if( verbosity >= 2 )
- std::fprintf( stderr, "version %d, dictionary size %7sB. ",
- header.version(),
- format_num( header.dictionary_size(), 9999, -1 ) );
- }
+ { pp(); if( verbosity >= 2 ) show_header( header ); }
LZ_decoder decoder( header, rdec, outfd );
const int result = decoder.decode_member( pp );
@@ -356,17 +410,15 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
{
pp();
if( result == 2 )
- std::fprintf( stderr, "File ends unexpectedly at pos %lld\n",
+ std::fprintf( stderr, "File ends unexpectedly at pos %llu\n",
partial_file_pos );
else
- std::fprintf( stderr, "Decoder error at pos %lld\n",
+ std::fprintf( stderr, "Decoder error at pos %llu\n",
partial_file_pos );
}
retval = 2; break;
}
- if( verbosity >= 2 )
- { if( testing ) std::fprintf( stderr, "ok\n" );
- else std::fprintf( stderr, "done\n" ); }
+ if( verbosity >= 2 ) { std::fprintf( stderr, ok_msg ); pp.reset(); }
}
}
catch( std::bad_alloc )
@@ -375,9 +427,7 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
retval = 1;
}
catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; }
- if( verbosity == 1 && retval == 0 )
- { if( testing ) std::fprintf( stderr, "ok\n" );
- else std::fprintf( stderr, "done\n" ); }
+ if( verbosity == 1 && retval == 0 ) std::fprintf( stderr, ok_msg );
return retval;
}
@@ -399,65 +449,6 @@ void set_signals()
} // end namespace
-int verbosity = 0;
-
-
-const char * format_num( long long num, long long limit,
- const int set_prefix )
- {
- const char * const si_prefix[8] =
- { "k", "M", "G", "T", "P", "E", "Z", "Y" };
- const char * const binary_prefix[8] =
- { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
- static bool si = true;
- static char buf[32];
-
- if( set_prefix ) si = ( set_prefix > 0 );
- const int factor = ( si ? 1000 : 1024 );
- const char * const * prefix = ( si ? si_prefix : binary_prefix );
- const char * p = "";
- bool exact = ( num % factor == 0 );
-
- for( int i = 0; i < 8 && ( llabs( num ) > limit ||
- ( exact && llabs( num ) >= factor ) ); ++i )
- { num /= factor; if( num % factor != 0 ) exact = false; p = prefix[i]; }
- snprintf( buf, sizeof buf, "%lld %s", num, p );
- return buf;
- }
-
-
-int open_instream( const std::string & name, struct stat * const in_statsp,
- const bool to_stdout, const bool reg_only )
- {
- int infd = open( name.c_str(), O_RDONLY | o_binary );
- if( infd < 0 )
- {
- if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Can't open input file '%s': %s.\n",
- program_name, name.c_str(), std::strerror( errno ) );
- }
- else
- {
- const int i = fstat( infd, in_statsp );
- const mode_t & mode = in_statsp->st_mode;
- const bool can_read = ( i == 0 && !reg_only &&
- ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
- S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
- if( i != 0 || ( !S_ISREG( mode ) && ( !to_stdout || !can_read ) ) )
- {
- if( verbosity >= 0 )
- std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
- program_name, name.c_str(),
- ( can_read && !to_stdout ) ?
- " and '--stdout' was not specified" : "" );
- close( infd );
- infd = -1;
- }
- }
- return infd;
- }
-
-
int open_outstream_rw( const std::string & output_filename,
const bool force )
{
@@ -490,7 +481,7 @@ void show_error( const char * const msg, const int errcode, const bool help )
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
- if( help && invocation_name && invocation_name[0] )
+ if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name );
}
@@ -507,15 +498,15 @@ void internal_error( const char * const msg )
int main( const int argc, const char * const argv[] )
{
+ std::string input_filename;
+ std::string default_output_filename;
+ std::string range_string;
+ std::vector< std::string > filenames;
int infd = -1;
Mode program_mode = m_none;
bool force = false;
bool keep_input_files = false;
bool to_stdout = false;
- std::string input_filename;
- std::string default_output_filename;
- std::string range_string;
- std::vector< std::string > filenames;
invocation_name = argv[0];
const Arg_parser::Option options[] =
@@ -546,7 +537,7 @@ int main( const int argc, const char * const argv[] )
{
const int code = parser.code( argind );
if( !code ) break; // no more options
- const std::string & arg = parser.argument( argind ).c_str();
+ const std::string & arg = parser.argument( argind );
switch( code )
{
case 'c': to_stdout = true; break;
@@ -570,8 +561,8 @@ int main( const int argc, const char * const argv[] )
} // end process options
#if defined(__MSVCRT__) || defined(__OS2__)
- _fsetmode( stdin, "b" );
- _fsetmode( stdout, "b" );
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
#endif
if( program_mode == m_none )
@@ -580,6 +571,13 @@ int main( const int argc, const char * const argv[] )
return 1;
}
+ bool filenames_given = false;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ filenames.push_back( parser.argument( argind ) );
+ if( filenames.back() != "-" ) filenames_given = true;
+ }
+
switch( program_mode )
{
case m_generate:
@@ -588,29 +586,27 @@ int main( const int argc, const char * const argv[] )
case m_none: internal_error( "invalid operation" ); break;
case m_decompress: break;
case m_list:
- one_file( argind, parser.arguments() );
- return list_file( parser.argument( argind ) );
+ if( filenames.size() < 1 )
+ { show_error( "You must specify at least 1 file.", 0, true ); return 1; }
+ return list_files( filenames, verbosity );
case m_merge:
- for( ; argind < parser.arguments(); ++argind )
- filenames.push_back( parser.argument( argind ) );
if( filenames.size() < 2 )
{ show_error( "You must specify at least 2 files.", 0, true ); return 1; }
if( !default_output_filename.size() )
default_output_filename = insert_fixed( filenames[0] );
- return merge_files( filenames, default_output_filename, force );
+ return merge_files( filenames, default_output_filename, verbosity, force );
case m_range:
- one_file( argind, parser.arguments() );
- return range_decompress( parser.argument( argind ),
- default_output_filename, range_string,
- to_stdout, force );
+ one_file( filenames.size() );
+ return range_decompress( filenames[0], default_output_filename,
+ range_string, verbosity, force, to_stdout );
case m_repair:
- one_file( argind, parser.arguments() );
+ one_file( filenames.size() );
if( !default_output_filename.size() )
- default_output_filename = insert_fixed( parser.argument( argind ) );
- return repair_file( parser.argument( argind ), default_output_filename, force );
+ default_output_filename = insert_fixed( filenames[0] );
+ return repair_file( filenames[0], default_output_filename, verbosity, force );
case m_split:
- one_file( argind, parser.arguments() );
- return split_file( parser.argument( argind ), default_output_filename, force );
+ one_file( filenames.size() );
+ return split_file( filenames[0], default_output_filename, verbosity, force );
case m_test: break;
}
@@ -619,13 +615,6 @@ int main( const int argc, const char * const argv[] )
else if( program_mode != m_decompress )
internal_error( "invalid decompressor operation" );
- bool filenames_given = false;
- for( ; argind < parser.arguments(); ++argind )
- {
- if( parser.argument( argind ) != "-" ) filenames_given = true;
- filenames.push_back( parser.argument( argind ) );
- }
-
if( filenames.empty() ) filenames.push_back("-");
if( !to_stdout && program_mode != m_test &&
( filenames_given || default_output_filename.size() ) )
@@ -634,7 +623,7 @@ int main( const int argc, const char * const argv[] )
Pretty_print pp( filenames, verbosity );
int retval = 0;
- for( unsigned int i = 0; i < filenames.size(); ++i )
+ for( unsigned i = 0; i < filenames.size(); ++i )
{
struct stat in_stats;
output_filename.clear();
@@ -653,7 +642,7 @@ int main( const int argc, const char * const argv[] )
outfd_mode = all_rw;
if( !open_outstream( force ) )
{
- if( outfd == -1 && retval < 1 ) retval = 1;
+ if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
continue;
}
@@ -663,7 +652,6 @@ int main( const int argc, const char * const argv[] )
else
{
input_filename = filenames[i];
- const int eindex = extension_index( input_filename );
infd = open_instream( input_filename, &in_stats, to_stdout );
if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
if( program_mode != m_test )
@@ -671,11 +659,11 @@ int main( const int argc, const char * const argv[] )
if( to_stdout ) outfd = STDOUT_FILENO;
else
{
- set_d_outname( input_filename, eindex );
+ set_d_outname( input_filename, extension_index( input_filename ) );
outfd_mode = usr_rw;
if( !open_outstream( force ) )
{
- if( outfd == -1 && retval < 1 ) retval = 1;
+ if( retval < 1 ) retval = 1;
close( infd ); infd = -1;
continue;
}
diff --git a/merge.cc b/merge.cc
index fe40c63..7ef0455 100644
--- a/merge.cc
+++ b/merge.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
#include "lzip.h"
#include "decoder.h"
+#include "file_index.h"
namespace {
@@ -39,7 +40,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector,
{
const int buffer_size = 65536;
std::vector< uint8_t * > buffer_vector( infd_vector.size() );
- for( unsigned int i = 0; i < infd_vector.size(); ++i )
+ for( unsigned i = 0; i < infd_vector.size(); ++i )
buffer_vector[i] = new uint8_t[buffer_size];
Block b( 0, 0 );
long long partial_pos = 0;
@@ -53,7 +54,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector,
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
{
- for( unsigned int i = 1; i < infd_vector.size(); ++i )
+ for( unsigned i = 1; i < infd_vector.size(); ++i )
if( readblock( infd_vector[i], buffer_vector[i], rd ) != rd )
{ show_error( "Error reading input file", errno );
error = true; break; }
@@ -66,7 +67,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector,
{
while( i < rd && b.pos() == 0 )
{
- for( unsigned int j = 1; j < infd_vector.size(); ++j )
+ for( unsigned j = 1; j < infd_vector.size(); ++j )
if( buffer_vector[0][i] != buffer_vector[j][i] )
{ b.pos( partial_pos + i ); break; } // begin block
++i;
@@ -74,7 +75,7 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector,
while( i < rd && b.pos() > 0 )
{
++equal_bytes;
- for( unsigned int j = 1; j < infd_vector.size(); ++j )
+ for( unsigned j = 1; j < infd_vector.size(); ++j )
if( buffer_vector[0][i] != buffer_vector[j][i] )
{ equal_bytes = 0; break; }
if( equal_bytes >= 2 ) // end block
@@ -96,18 +97,18 @@ bool copy_and_diff_file( const std::vector< int > & infd_vector,
b.size( partial_pos - b.pos() );
block_vector.push_back( b );
}
- for( unsigned int i = 0; i < infd_vector.size(); ++i )
+ for( unsigned i = 0; i < infd_vector.size(); ++i )
delete[] buffer_vector[i];
return !error;
}
-int ipow( const unsigned int base, const unsigned int exponent )
+int ipow( const unsigned base, const unsigned exponent )
{
- int result = 1;
- for( unsigned int i = 0; i < exponent; ++i )
+ unsigned result = 1;
+ for( unsigned i = 0; i < exponent; ++i )
{
- if( INT_MAX / base >= (unsigned int)result ) result *= base;
+ if( INT_MAX / base >= result ) result *= base;
else { result = INT_MAX; break; }
}
return result;
@@ -115,14 +116,15 @@ int ipow( const unsigned int base, const unsigned int exponent )
int open_input_files( const std::vector< std::string > & filenames,
- std::vector< int > & infd_vector, long long & isize )
+ std::vector< int > & infd_vector, long long & isize,
+ const int verbosity )
{
bool identical = false;
- for( unsigned int i = 1; i < filenames.size(); ++i )
+ for( unsigned i = 1; i < filenames.size(); ++i )
if( filenames[0] == filenames[i] )
{ identical = true; break; }
if( !identical )
- for( unsigned int i = 0; i < filenames.size(); ++i )
+ for( unsigned i = 0; i < filenames.size(); ++i )
{
struct stat in_stats;
ino_t st_ino0 = 0;
@@ -136,7 +138,7 @@ int open_input_files( const std::vector< std::string > & filenames,
if( identical ) { show_error( "Two input files are the same." ); return 1; }
isize = 0;
- for( unsigned int i = 0; i < filenames.size(); ++i )
+ for( unsigned i = 0; i < filenames.size(); ++i )
{
const long long tmp = lseek( infd_vector[i], 0, SEEK_END );
if( tmp < 0 )
@@ -155,11 +157,11 @@ int open_input_files( const std::vector< std::string > & filenames,
{ show_error( "Sizes of input files are different." ); return 1; }
}
- for( unsigned int i = 0; i < filenames.size(); ++i )
- if( !verify_single_member( infd_vector[i], isize ) )
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ if( !verify_single_member( infd_vector[i], isize, verbosity ) )
return 2;
- for( unsigned int i = 0; i < filenames.size(); ++i )
+ for( unsigned i = 0; i < filenames.size(); ++i )
{
if( lseek( infd_vector[i], 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
@@ -189,19 +191,21 @@ void cleanup_and_fail( const std::string & output_filename,
}
-bool copy_file( const int infd, const int outfd, const long long size )
+// max_size < 0 means no size limit.
+bool copy_file( const int infd, const int outfd, const long long max_size )
{
- long long rest = size;
const int buffer_size = 65536;
+ // remaining number of bytes to copy
+ long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
uint8_t * const buffer = new uint8_t[buffer_size];
bool error = false;
- while( true )
+ while( rest > 0 )
{
- const int block_size = std::min( (long long)buffer_size, rest );
- if( block_size <= 0 ) break;
- const int rd = readblock( infd, buffer, block_size );
- if( rd != block_size && errno )
+ const int size = std::min( (long long)buffer_size, rest );
+ if( max_size >= 0 ) rest -= size;
+ const int rd = readblock( infd, buffer, size );
+ if( rd != size && errno )
{ show_error( "Error reading input file", errno ); error = true; break; }
if( rd > 0 )
{
@@ -209,24 +213,21 @@ bool copy_file( const int infd, const int outfd, const long long size )
if( wr != rd )
{ show_error( "Error writing output file", errno );
error = true; break; }
- rest -= rd;
}
- if( rd < block_size ) break; // EOF
+ if( rd < size ) break; // EOF
}
delete[] buffer;
return !error;
}
-bool try_decompress( const int fd, const long long file_size,
+bool try_decompress( const int fd, const unsigned long long file_size,
long long * failure_posp )
{
try {
Range_decoder rdec( fd );
File_header header;
- rdec.reset_member_position();
- for( int i = 0; i < File_header::size; ++i )
- header.data[i] = rdec.get_byte();
+ rdec.read_data( header.data, File_header::size );
if( !rdec.finished() && // End Of File
header.verify_magic() &&
header.version() == 1 &&
@@ -251,7 +252,7 @@ bool try_decompress( const int fd, const long long file_size,
}
-bool verify_header( const File_header & header )
+bool verify_header( const File_header & header, const int verbosity )
{
if( !header.verify_magic() )
{
@@ -274,13 +275,14 @@ bool verify_header( const File_header & header )
}
-bool verify_single_member( const int fd, const long long file_size )
+bool verify_single_member( const int fd, const long long file_size,
+ const int verbosity )
{
File_header header;
if( lseek( fd, 0, SEEK_SET ) < 0 ||
readblock( fd, header.data, File_header::size ) != File_header::size )
{ show_error( "Error reading member header", errno ); return false; }
- if( !verify_header( header ) ) return false;
+ if( !verify_header( header, verbosity ) ) return false;
File_trailer trailer;
if( lseek( fd, -File_trailer::size(), SEEK_END ) < 0 ||
@@ -292,7 +294,7 @@ bool verify_single_member( const int fd, const long long file_size )
if( member_size < file_size &&
lseek( fd, -member_size, SEEK_END ) > 0 &&
readblock( fd, header.data, File_header::size ) == File_header::size &&
- verify_header( header ) )
+ verify_header( header, verbosity ) )
show_error( "Input file has more than 1 member. Split it first." );
else
show_error( "Member size in input file trailer is corrupt." );
@@ -303,11 +305,12 @@ bool verify_single_member( const int fd, const long long file_size )
int merge_files( const std::vector< std::string > & filenames,
- const std::string & output_filename, const bool force )
+ const std::string & output_filename, const int verbosity,
+ const bool force )
{
std::vector< int > infd_vector( filenames.size() );
long long isize = 0;
- const int retval = open_input_files( filenames, infd_vector, isize );
+ const int retval = open_input_files( filenames, infd_vector, isize, verbosity );
if( retval >= 0 ) return retval;
const int outfd = open_outstream_rw( output_filename, force );
@@ -353,7 +356,7 @@ int merge_files( const std::vector< std::string > & filenames,
std::fflush( stdout );
}
int tmp = var;
- for( unsigned int i = 0; i < block_vector.size(); ++i )
+ for( unsigned i = 0; i < block_vector.size(); ++i )
{
const int infd = infd_vector[tmp % filenames.size()];
tmp /= filenames.size();
diff --git a/range_dec.cc b/range_dec.cc
index d8e171a..d056271 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,7 +19,6 @@
#include <algorithm>
#include <cerrno>
-#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
@@ -31,83 +30,11 @@
#include "lzip.h"
#include "decoder.h"
+#include "file_index.h"
namespace {
-class Member
- {
- Block dblock_, mblock_; // data block, member block
-
-public:
- Member( const long long dp, const long long ds,
- const long long mp, const long long ms )
- : dblock_( dp, ds ), mblock_( mp, ms ) {}
-
- const Block & dblock() const { return dblock_; }
- Block & dblock() { return dblock_; }
- const Block & mblock() const { return mblock_; }
- Block & mblock() { return mblock_; }
- };
-
-
-int seek_read( const int fd, uint8_t * const buf, const int size,
- const long long pos )
- {
- if( lseek( fd, pos, SEEK_SET ) == pos )
- return readblock( fd, buf, size );
- return 0;
- }
-
-
-class Member_index
- {
- std::vector< Member > member_vector;
-
-public:
- Member_index( const int infd, const long long isize )
- {
- long long pos = isize; // always points to a header or EOF
- File_header header;
- File_trailer trailer;
- while( pos >= min_member_size )
- {
- if( seek_read( infd, trailer.data, File_trailer::size(),
- pos - File_trailer::size() ) != File_trailer::size() )
- { show_error( "Read error", errno ); std::exit( 1 ); }
- const long long member_size = trailer.member_size();
- if( member_size < min_member_size || pos < member_size ) break;
- if( seek_read( infd, header.data, File_header::size,
- pos - member_size ) != File_header::size )
- { show_error( "Read error", errno ); std::exit( 1 ); }
- if( !header.verify_magic() || !header.verify_version() ) break;
- pos -= member_size;
- member_vector.push_back( Member( 0, trailer.data_size(),
- pos, member_size ) );
- }
- if( pos != 0 || member_vector.size() == 0 )
- {
- show_error( "Member size in input file trailer is corrupt." );
- std::exit( 1 );
- }
- std::reverse( member_vector.begin(), member_vector.end() );
- for( unsigned int i = 0; i < member_vector.size() - 1; ++i )
- member_vector[i+1].dblock().pos( member_vector[i].dblock().end() );
- }
-
- long long data_end() const
- { if( member_vector.size() ) return member_vector.back().dblock().end();
- else return 0; }
-
- const Member & member( const int i ) const { return member_vector[i]; }
- const Block & dblock( const int i ) const
- { return member_vector[i].dblock(); }
- const Block & mblock( const int i ) const
- { return member_vector[i].mblock(); }
- int members() const { return (int)member_vector.size(); }
- };
-
-
// Returns the number of chars read, or 0 if error.
//
int parse_long_long( const char * const ptr, long long & value )
@@ -115,7 +42,7 @@ int parse_long_long( const char * const ptr, long long & value )
char * tail;
errno = 0;
value = strtoll( ptr, &tail, 0 );
- if( tail == ptr || errno ) return 0;
+ if( tail == ptr || errno || value < 0 ) return 0;
int c = tail - ptr;
if( ptr[c] )
@@ -141,7 +68,7 @@ int parse_long_long( const char * const ptr, long long & value )
if( ptr[c] == 'B' ) ++c;
for( int i = 0; i < exponent; ++i )
{
- if( LLONG_MAX / factor >= llabs( value ) ) value *= factor;
+ if( INT64_MAX / factor >= value ) value *= factor;
else return 0;
}
}
@@ -156,17 +83,17 @@ void parse_range( const char * const ptr, Block & range )
{
long long value = 0;
int c = parse_long_long( ptr, value ); // pos
- if( c && value >= 0 && value < LLONG_MAX &&
+ if( c && value >= 0 && value < INT64_MAX &&
( ptr[c] == 0 || ptr[c] == ',' || ptr[c] == '-' ) )
{
range.pos( value );
- if( ptr[c] == 0 ) { range.size( LLONG_MAX - value ); return; }
+ if( ptr[c] == 0 ) { range.size( INT64_MAX - value ); return; }
const bool issize = ( ptr[c] == ',' );
c = parse_long_long( ptr + c + 1, value ); // size
if( c && value > 0 && ( issize || value > range.pos() ) )
{
if( !issize ) value -= range.pos();
- if( LLONG_MAX - range.pos() >= value ) { range.size( value ); return; }
+ if( INT64_MAX - range.pos() >= value ) { range.size( value ); return; }
}
}
show_error( "Bad decompression range.", 0, true );
@@ -182,132 +109,138 @@ bool safe_seek( const int fd, const long long pos )
int decompress_member( const int infd, const int outfd,
- const Pretty_print & pp, const Member & member,
- const long long outskip, const long long outend )
+ const Pretty_print & pp,
+ const unsigned long long mpos,
+ const unsigned long long outskip,
+ const unsigned long long outend )
{
- int retval = 0;
-
try {
Range_decoder rdec( infd );
File_header header;
- int size;
- for( size = 0; size < File_header::size && !rdec.finished(); ++size )
- header.data[size] = rdec.get_byte();
+ rdec.read_data( header.data, File_header::size );
if( rdec.finished() ) // End Of File
- { pp( "Error reading member header" ); retval = 1; }
+ { pp( "Error reading member header" ); return 1; }
if( !header.verify_magic() )
- { pp( "Bad magic number (file not in lzip format)" ); retval = 2; }
+ { pp( "Bad magic number (file not in lzip format)" ); return 2; }
if( !header.verify_version() )
{
- if( verbosity >= 0 )
+ if( pp.verbosity() >= 0 )
{ pp();
std::fprintf( stderr, "Version %d member format not supported.\n",
header.version() ); }
- retval = 2;
+ return 2;
}
if( header.dictionary_size() < min_dictionary_size ||
header.dictionary_size() > max_dictionary_size )
- { pp( "Invalid dictionary size in member header" ); retval = 2; }
+ { pp( "Invalid dictionary size in member header" ); return 2; }
- if( pp.verbosity() >= 2 )
- {
- pp();
- std::fprintf( stderr, "version %d, dictionary size %7sB. ",
- header.version(),
- format_num( header.dictionary_size(), 9999, -1 ) );
- }
- LZ_decoder decoder( header, rdec, outfd, outskip, outend );
+ if( pp.verbosity() >= 2 ) { pp(); show_header( header ); }
+ LZ_decoder decoder( header, rdec, outfd, outskip, outend );
const int result = decoder.decode_member( pp );
if( result != 0 )
{
- if( verbosity >= 0 && result <= 2 )
+ if( pp.verbosity() >= 0 && result <= 2 )
{
pp();
if( result == 2 )
- std::fprintf( stderr, "File ends unexpectedly at pos %lld\n",
- member.mblock().pos() + rdec.member_position() );
+ std::fprintf( stderr, "File ends unexpectedly at pos %llu\n",
+ mpos + rdec.member_position() );
else
- std::fprintf( stderr, "Decoder error at pos %lld\n",
- member.mblock().pos() + rdec.member_position() );
+ std::fprintf( stderr, "Decoder error at pos %llu\n",
+ mpos + rdec.member_position() );
}
- retval = 2;
+ return 2;
}
if( pp.verbosity() >= 2 ) std::fprintf( stderr, "done\n" );
}
catch( std::bad_alloc )
{
pp( "Not enough memory. Find a machine with more memory" );
- retval = 1;
+ return 1;
}
- catch( Error e ) { pp(); show_error( e.msg, errno ); retval = 1; }
- return retval;
+ catch( Error e ) { pp(); show_error( e.msg, errno ); return 1; }
+ return 0;
}
-} // end namespace
-
-int list_file( const std::string & input_filename )
+int list_file( const std::string & input_filename, const Pretty_print & pp )
{
struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- const long long isize = lseek( infd, 0, SEEK_END );
- if( isize < 0 )
- { show_error( "Input file is not seekable", errno ); return 1; }
- if( isize < min_member_size )
- { show_error( "Input file is too short." ); return 2; }
- Member_index member_index( infd, isize );
+ File_index file_index( infd );
+ close( infd );
+ if( file_index.retval() != 0 )
+ { show_error( file_index.error().c_str() ); return file_index.retval(); }
- if( verbosity >= 0 )
+ if( pp.verbosity() >= 0 )
{
- if( verbosity >= 1 )
+ const unsigned long long data_size = file_index.data_end();
+ const unsigned long long file_size = file_index.file_end();
+ pp( 0, stdout );
+ if( data_size > 0 && file_size > 0 )
+ std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
+ (double)data_size / file_size,
+ ( 8.0 * file_size ) / data_size,
+ 100.0 * ( 1.0 - ( (double)file_size / data_size ) ) );
+ std::printf( "decompressed size %9llu, compressed size %8llu.\n",
+ data_size, file_size );
+
+ if( pp.verbosity() >= 1 && file_index.members() > 1 )
{
- std::printf( "Total members in file = %d.\n", member_index.members() );
- for( int i = 0; i < member_index.members(); ++i )
- {
- const Block & db = member_index.dblock( i );
- const Block & mb = member_index.mblock( i );
- std::printf( "Member %3d data pos %9lld data size %7lld "
- "member pos %9lld member size %7lld.\n", i,
- db.pos(), db.size(), mb.pos(), mb.size() );
- }
+ std::printf( "Total members in file = %d.\n", file_index.members() );
+ if( pp.verbosity() >= 2 )
+ for( int i = 0; i < file_index.members(); ++i )
+ {
+ const Block & db = file_index.dblock( i );
+ const Block & mb = file_index.mblock( i );
+ std::printf( "Member %3d data pos %9llu data size %7llu "
+ "member pos %9llu member size %7llu.\n", i + 1,
+ db.pos(), db.size(), mb.pos(), mb.size() );
+ }
std::printf( "\n" );
}
-
- const long long data_size = member_index.data_end();
- if( data_size > 0 && isize > 0 )
- std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.\n",
- (double)data_size / isize,
- ( 8.0 * isize ) / data_size,
- 100.0 * ( 1.0 - ( (double)isize / data_size ) ) );
- std::printf( "decompressed size %9lld, compressed size %8lld.\n",
- data_size, isize );
}
return 0;
}
+} // end namespace
+
+
+int list_files( const std::vector< std::string > & filenames,
+ const int verbosity )
+ {
+ Pretty_print pp( filenames, verbosity );
+ int retval = 0;
+ for( unsigned i = 0; i < filenames.size(); ++i )
+ {
+ pp.set_name( filenames[i] );
+ const int tmp = list_file( filenames[i], pp );
+ if( tmp > retval ) retval = tmp;
+ }
+ return retval;
+ }
+
int range_decompress( const std::string & input_filename,
const std::string & output_filename,
- const std::string & range_string,
- const bool to_stdout, const bool force )
+ const std::string & range_string, const int verbosity,
+ const bool force, const bool to_stdout )
{
Block range( 0, 0 );
parse_range( range_string.c_str(), range );
struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true );
if( infd < 0 ) return 1;
- const long long isize = lseek( infd, 0, SEEK_END );
- if( isize < 0 )
- { show_error( "Input file is not seekable", errno ); return 1; }
- if( isize < min_member_size )
- { show_error( "Input file is too short." ); return 2; }
-
- Member_index member_index( infd, isize );
- if( range.end() > member_index.data_end() )
- range.size( std::max( 0LL, member_index.data_end() - range.pos() ) );
+
+ File_index file_index( infd );
+ if( file_index.retval() != 0 )
+ { show_error( file_index.error().c_str() ); return file_index.retval(); }
+
+ if( range.end() > file_index.data_end() )
+ range.size( std::max( 0LL, file_index.data_end() - range.pos() ) );
if( range.size() <= 0 )
{ if( verbosity >= 1 ) show_error( "Nothing to do." ); return 0; }
@@ -315,7 +248,7 @@ int range_decompress( const std::string & input_filename,
{
if( verbosity >= 2 )
std::fprintf( stderr, "Decompressed file size = %sB\n",
- format_num( member_index.data_end() ) );
+ format_num( file_index.data_end() ) );
std::fprintf( stderr, "Decompressing range %sB", format_num( range.pos() ) );
std::fprintf( stderr, " to %sB ", format_num( range.pos() + range.size() ) );
std::fprintf( stderr, "(%sBytes)\n", format_num( range.size() ) );
@@ -329,23 +262,23 @@ int range_decompress( const std::string & input_filename,
if( outfd < 0 ) return 1; }
Pretty_print pp( input_filename, 0 );
int retval = 0;
- for( int i = 0; i < member_index.members(); ++i )
+ for( int i = 0; i < file_index.members(); ++i )
{
- const Block & db = member_index.dblock( i );
+ const Block & db = file_index.dblock( i );
if( range.overlaps( db ) )
{
if( verbosity >= 3 )
std::fprintf( stderr, "Decompressing member %3d\n", i );
const long long outskip = std::max( 0LL, range.pos() - db.pos() );
const long long outend = std::min( db.end(), range.end() - db.pos() );
- if( !safe_seek( infd, member_index.mblock( i ).pos() ) )
- { retval = 1; break; }
- retval = decompress_member( infd, outfd, pp, member_index.member( i ),
- outskip, outend );
+ const long long mpos = file_index.mblock( i ).pos();
+ if( !safe_seek( infd, mpos ) ) { retval = 1; break; }
+ retval = decompress_member( infd, outfd, pp, mpos, outskip, outend );
if( retval ) cleanup_and_fail( output_filename, outfd, retval );
pp.reset();
}
}
+ close( infd );
if( close( outfd ) != 0 )
{
show_error( "Error closing output file", errno );
diff --git a/repair.cc b/repair.cc
index b065814..e9cef61 100644
--- a/repair.cc
+++ b/repair.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,8 +30,27 @@
#include "lzip.h"
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+
+int seek_write( const int fd, const uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return writeblock( fd, buf, size );
+ return 0;
+ }
+
+
int repair_file( const std::string & input_filename,
- const std::string & output_filename, const bool force )
+ const std::string & output_filename, const int verbosity,
+ const bool force )
{
struct stat in_stats;
const int infd = open_instream( input_filename, &in_stats, true, true );
@@ -41,7 +60,7 @@ int repair_file( const std::string & input_filename,
{ show_error( "Input file is not seekable", errno ); return 1; }
if( isize < min_member_size )
{ show_error( "Input file is too short." ); return 2; }
- if( !verify_single_member( infd, isize ) ) return 2;
+ if( !verify_single_member( infd, isize, verbosity ) ) return 2;
if( lseek( infd, 0, SEEK_SET ) < 0 )
{ show_error( "Seek error in input file", errno ); return 1; }
@@ -67,35 +86,28 @@ int repair_file( const std::string & input_filename,
const long long min_pos =
std::max( (long long)File_header::size, failure_pos - 1000 );
bool done = false;
- for( long long pos = failure_pos; pos >= min_pos; --pos )
+ for( long long pos = failure_pos; pos >= min_pos && !done ; --pos )
{
if( verbosity >= 1 )
{
- std::printf( "Trying position %lld \r", pos );
+ std::printf( "Trying position %llu \r", pos );
std::fflush( stdout );
}
uint8_t byte;
- if( lseek( outfd, pos, SEEK_SET ) < 0 ||
- readblock( outfd, &byte, 1 ) != 1 )
+ if( seek_read( outfd, &byte, 1, pos ) != 1 )
{ show_error( "Error reading output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
- for( int i = 0; i < 255; ++i )
+ for( int i = 0; i < 256; ++i )
{
++byte;
- if( lseek( outfd, pos, SEEK_SET ) < 0 ||
- writeblock( outfd, &byte, 1 ) != 1 ||
+ if( seek_write( outfd, &byte, 1, pos ) != 1 ||
lseek( outfd, 0, SEEK_SET ) < 0 )
{ show_error( "Error writing output file", errno );
cleanup_and_fail( output_filename, outfd, 1 ); }
+ if( i == 255 ) break;
if( try_decompress( outfd, isize ) )
{ done = true; break; }
}
- if( done ) break;
- ++byte;
- if( lseek( outfd, pos, SEEK_SET ) < 0 ||
- writeblock( outfd, &byte, 1 ) != 1 )
- { show_error( "Error writing output file", errno );
- cleanup_and_fail( output_filename, outfd, 1 ); }
}
if( verbosity >= 1 ) std::printf( "\n" );
diff --git a/split.cc b/split.cc
index 786d6e9..88cdbfa 100644
--- a/split.cc
+++ b/split.cc
@@ -1,5 +1,5 @@
/* Lziprecover - Data recovery tool for lzipped files
- Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -83,7 +83,8 @@ int find_magic( const uint8_t * const buffer, const int pos, const int size )
int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
- const std::string & default_output_filename, const bool force )
+ const std::string & default_output_filename,
+ const int verbosity, const bool force )
{
const int hsize = File_header::size;
const int tsize = File_trailer::size();
@@ -99,16 +100,16 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
bool at_stream_end = ( size < buffer_size );
if( size != buffer_size && errno )
{ show_error( "Read error", errno ); return 1; }
- if( size <= tsize )
+ if( size < min_member_size )
{ show_error( "Input file is too short." ); return 2; }
- if( !verify_header( *(File_header *)buffer ) ) return 2;
+ if( !verify_header( *(File_header *)buffer, verbosity ) ) return 2;
std::string output_filename;
first_filename( input_filename, default_output_filename, output_filename );
int outfd = open_outstream_rw( output_filename, force );
if( outfd < 0 ) { close( infd ); return 1; }
- long long partial_member_size = 0;
+ unsigned long long partial_member_size = 0;
while( true )
{
int pos = 0;
@@ -117,10 +118,8 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
newpos = find_magic( buffer, newpos, size + 4 - newpos );
if( newpos <= size )
{
- long long member_size = 0;
- for( int i = 1; i <= 8; ++i )
- { member_size <<= 8; member_size += base_buffer[tsize+newpos-i]; }
- if( partial_member_size + newpos - pos == member_size )
+ const File_trailer & trailer = *(File_trailer *)(base_buffer + newpos);
+ if( partial_member_size + newpos - pos == trailer.member_size() )
{ // header found
const int wr = writeblock( outfd, buffer + pos, newpos - pos );
if( wr != newpos - pos )
@@ -167,11 +166,12 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
int split_file( const std::string & input_filename,
- const std::string & default_output_filename, const bool force )
+ const std::string & default_output_filename,
+ const int verbosity, const bool force )
{
uint8_t * base_buffer;
const int retval = do_split_file( input_filename, base_buffer,
- default_output_filename, force );
+ default_output_filename, verbosity, force );
delete[] base_buffer;
return retval;
}
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 92dcfdd..bd77f02 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -1,6 +1,6 @@
#! /bin/sh
# check script for Lziprecover - Data recovery tool for lzipped files
-# Copyright (C) 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+# Copyright (C) 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
#
# This script is free software: you have unlimited permission
# to copy, distribute and modify it.
@@ -53,6 +53,15 @@ printf .
cmp ${in} copy || fail=1
printf .
+"${LZIPRECOVER}" -lq
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIPRECOVER}" -mq ${bad1_lz}
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIPRECOVER}" -Rq
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+"${LZIPRECOVER}" -sq
+if [ $? != 1 ] ; then fail=1 ; printf - ; else printf . ; fi
+
"${LZIPRECOVER}" -D 921-1921 -fo copy ${in_lz} || fail=1
cmp ${inD} copy || fail=1
printf .
diff --git a/testsuite/unzcrash.cc b/testsuite/unzcrash.cc
index 4c49035..abf61bb 100644
--- a/testsuite/unzcrash.cc
+++ b/testsuite/unzcrash.cc
@@ -1,7 +1,7 @@
/* Unzcrash - A test program written to test robustness to
decompression of corrupted data.
Inspired by unzcrash.c from Julian Seward's bzip2.
- Copyright (C) 2008, 2009, 2010, 2011, 2012 Antonio Diaz Diaz.
+ Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -34,22 +34,12 @@
#error "Environments where CHAR_BIT != 8 are not supported."
#endif
-#ifndef LLONG_MAX
-#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL
-#endif
-#ifndef LLONG_MIN
-#define LLONG_MIN (-LLONG_MAX - 1LL)
-#endif
-#ifndef ULLONG_MAX
-#define ULLONG_MAX 0xFFFFFFFFFFFFFFFFULL
-#endif
-
namespace {
const char * const Program_name = "Unzcrash";
const char * const program_name = "unzcrash";
-const char * const program_year = "2012";
+const char * const program_year = "2013";
const char * invocation_name = 0;
int verbosity = 0;
@@ -67,11 +57,12 @@ void show_help()
"\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
- " -b, --bits=<n>[,<n>]... test <n>-bit errors instead of full byte\n"
+ " -b, --bits=<range> test N-bit errors instead of full byte\n"
" -p, --position=<bytes> first byte position to test\n"
" -q, --quiet suppress all messages\n"
" -s, --size=<bytes> number of byte positions to test\n"
" -v, --verbose be verbose (a 2nd -v gives more)\n"
+ "Examples of <range>: 1 1,2,3 1-4 1,3-5,8\n"
"\nReport bugs to lzip-bug@nongnu.org\n"
"Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
}
@@ -99,7 +90,7 @@ void show_error( const char * const msg, const int errcode = 0,
std::fprintf( stderr, ": %s", std::strerror( errcode ) );
std::fprintf( stderr, "\n" );
}
- if( help && invocation_name && invocation_name[0] )
+ if( help )
std::fprintf( stderr, "Try '%s --help' for more information.\n",
invocation_name );
}
@@ -114,13 +105,13 @@ void internal_error( const char * const msg )
}
-long long getnum( const char * const ptr,
- const long long llimit = LLONG_MIN + 1,
- const long long ulimit = LLONG_MAX )
+unsigned long long getnum( const char * const ptr,
+ const unsigned long long llimit,
+ const unsigned long long ulimit )
{
errno = 0;
- char *tail;
- long long result = strtoll( ptr, &tail, 0 );
+ char * tail;
+ unsigned long long result = strtoull( ptr, &tail, 0 );
if( tail == ptr )
{
show_error( "Bad or missing numerical argument.", 0, true );
@@ -155,7 +146,7 @@ long long getnum( const char * const ptr,
}
for( int i = 0; i < exponent; ++i )
{
- if( LLONG_MAX / factor >= llabs( result ) ) result *= factor;
+ if( ulimit / factor >= result ) result *= factor;
else { errno = ERANGE; break; }
}
}