summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 11:53:03 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2015-11-07 11:53:03 +0000
commit585fba50b00b5716bbde7a1b05cbab114af8cdb0 (patch)
treedf705039ddf6d248ae0755a2a80c439b7b2aeee6
parentAdding upstream version 1.18~pre1. (diff)
downloadlziprecover-585fba50b00b5716bbde7a1b05cbab114af8cdb0.tar.xz
lziprecover-585fba50b00b5716bbde7a1b05cbab114af8cdb0.zip
Adding upstream version 1.18~pre2.upstream/1.18_pre2
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to '')
-rw-r--r--ChangeLog12
-rw-r--r--NEWS16
-rwxr-xr-xconfigure2
-rw-r--r--decoder.cc22
-rw-r--r--decoder.h51
-rw-r--r--doc/lziprecover.15
-rw-r--r--doc/lziprecover.info129
-rw-r--r--doc/lziprecover.texi101
-rw-r--r--file_index.cc11
-rw-r--r--file_index.h13
-rw-r--r--lzip.h24
-rw-r--r--main.cc86
-rw-r--r--mtester.cc12
-rw-r--r--mtester.h47
-rw-r--r--range_dec.cc20
-rw-r--r--repair.cc3
-rw-r--r--split.cc38
-rwxr-xr-xtestsuite/check.sh55
18 files changed, 427 insertions, 220 deletions
diff --git a/ChangeLog b/ChangeLog
index 75abd30..835dec0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2015-09-16 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 1.18-pre2 released.
+ * main.cc: Added new option '-a, --trailing-error'.
+ * Decompression time has been reduced by 2%.
+ * main.cc (decompress): Print up to 6 bytes of trailing data
+ when '-tvvvv' is specified.
+ * range_dec.cc (list_file): Show dictionary size and size of
+ trailing data (if any) with '-lv'.
+ * lzip.texi: Added chapter 'Trailing data'.
+ * testsuite/check.sh: Don't check error messages.
+
2015-06-30 Antonio Diaz Diaz <antonio@gnu.org>
* Version 1.18-pre1 released.
diff --git a/NEWS b/NEWS
index c46157d..4e36544 100644
--- a/NEWS
+++ b/NEWS
@@ -7,5 +7,21 @@ attempting to repair it.
errors in these bytes sometimes can't be detected until the end of the
member.
+The option "-a, --trailing-error", which makes lzip exit with error
+status 2 if any remaining input is detected after decompressing the last
+member, has been added.
+
The new option "-x, --show-packets", which shows the LZMA packets
(coding sequences) coded in a given file, has been added.
+
+Decompression time has been reduced by 2%.
+
+Up to 6 bytes of trailing data are printed when "-tvvvv" is specified.
+
+Dictionary size and size of trailing data (if any) are printed when
+"-lv" is specified.
+
+The new chapter "Trailing data" has been added to the manual.
+
+Fixed a harmless check failure on Windows caused by the failed
+comparison of a message in text mode.
diff --git a/configure b/configure
index aad4fc1..a76d668 100755
--- a/configure
+++ b/configure
@@ -6,7 +6,7 @@
# to copy, distribute and modify it.
pkgname=lziprecover
-pkgversion=1.18-pre1
+pkgversion=1.18-pre2
progname=lziprecover
srctrigger=doc/${pkgname}.texi
diff --git a/decoder.cc b/decoder.cc
index 5de3a6f..895bd9c 100644
--- a/decoder.cc
+++ b/decoder.cc
@@ -42,7 +42,7 @@ void Pretty_print::operator()( const char * const msg, FILE * const f ) const
{
first_post = false;
std::fprintf( f, " %s: ", name_.c_str() );
- for( unsigned i = 0; i < longest_name - name_.size(); ++i )
+ for( unsigned i = name_.size(); i < longest_name; ++i )
std::fputc( ' ', f );
if( !msg ) std::fflush( f );
}
@@ -62,7 +62,7 @@ long readblock( const int fd, uint8_t * const buf, const long size )
{
const int n = read( fd, buf + sz, std::min( 1L << 20, size - sz ) );
if( n > 0 ) sz += n;
- else if( n == 0 ) break; /* EOF */
+ else if( n == 0 ) break; // EOF
else if( errno != EINTR ) break;
errno = 0;
}
@@ -117,7 +117,7 @@ void LZ_decoder::flush_data()
if( s > 0 && writeblock( outfd, buffer + stream_pos + i, s ) != s )
throw Error( "Write error" );
}
- if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; }
+ if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
stream_pos = pos;
}
}
@@ -206,9 +206,9 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
Bit_model bm_align[dis_align_size];
Len_model match_len_model;
Len_model rep_len_model;
- unsigned rep0 = 0; /* rep[0-3] latest four distances */
- unsigned rep1 = 0; /* used for efficient coding of */
- unsigned rep2 = 0; /* repeated distances */
+ unsigned rep0 = 0; // rep[0-3] latest four distances
+ unsigned rep1 = 0; // used for efficient coding of
+ unsigned rep2 = 0; // repeated distances
unsigned rep3 = 0;
State state;
@@ -231,7 +231,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
peek( rep0 ) ) );
}
}
- else /* match or repeated match */
+ else // match or repeated match
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
@@ -260,7 +260,7 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
state.set_rep();
len = min_match_len + rdec.decode_len( rep_len_model, pos_state );
}
- else /* match */
+ else // match
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
@@ -277,16 +277,16 @@ int LZ_decoder::decode_member( const Pretty_print & pp )
{
rep0 += rdec.decode( direct_bits - dis_align_bits ) << dis_align_bits;
rep0 += rdec.decode_tree_reversed4( bm_align );
- if( rep0 == 0xFFFFFFFFU ) /* marker found */
+ if( rep0 == 0xFFFFFFFFU ) // marker found
{
rep0 = rep0_saved;
rdec.normalize();
flush_data();
- if( len == min_match_len ) /* End Of Stream marker */
+ if( len == min_match_len ) // End Of Stream marker
{
if( verify_trailer( pp ) ) return 0; else return 3;
}
- if( len == min_match_len + 1 ) /* Sync Flush marker */
+ if( len == min_match_len + 1 ) // Sync Flush marker
{
rdec.load(); continue;
}
diff --git a/decoder.h b/decoder.h
index 5e6e16c..5ffc0be 100644
--- a/decoder.h
+++ b/decoder.h
@@ -19,12 +19,12 @@ class Range_decoder
{
enum { buffer_size = 16384 };
unsigned long long partial_member_pos;
- uint8_t * const buffer; /* input buffer */
- int pos; /* current pos in buffer */
- int stream_pos; /* when reached, a new block must be read */
+ uint8_t * const buffer; // input buffer
+ int pos; // current pos in buffer
+ int stream_pos; // when reached, a new block must be read
uint32_t code;
uint32_t range;
- const int infd; /* input file descriptor */
+ const int infd; // input file descriptor
bool at_stream_end;
bool read_block();
@@ -214,12 +214,11 @@ class LZ_decoder
unsigned long long partial_data_pos;
Range_decoder & rdec;
const unsigned dictionary_size;
- const int buffer_size;
- uint8_t * const buffer; /* output buffer */
- int pos; /* current pos in buffer */
- int stream_pos; /* first byte not yet written to file */
+ uint8_t * const buffer; // output buffer
+ unsigned pos; // current pos in buffer
+ unsigned stream_pos; // first byte not yet written to file
uint32_t crc_;
- const int outfd; /* output file descriptor */
+ const int outfd; // output file descriptor
unsigned long long stream_position() const
{ return partial_data_pos + stream_pos; }
@@ -228,37 +227,42 @@ class LZ_decoder
uint8_t peek_prev() const
{
- const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1;
+ const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1;
return buffer[i];
}
- uint8_t peek( const int distance ) const
+ uint8_t peek( const unsigned distance ) const
{
- int i = pos - distance - 1;
- if( i < 0 ) i += buffer_size;
+ unsigned i = pos - distance - 1;
+ if( pos <= distance ) i += dictionary_size;
return buffer[i];
}
void put_byte( const uint8_t b )
{
buffer[pos] = b;
- if( ++pos >= buffer_size ) flush_data();
+ if( ++pos >= dictionary_size ) flush_data();
}
- void copy_block( const int distance, int len )
+ void copy_block( const unsigned distance, unsigned len )
{
- int i = pos - distance - 1;
- if( i < 0 ) i += buffer_size;
- if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) )
+ unsigned i = pos - distance - 1;
+ bool fast;
+ if( pos <= distance )
+ { i += dictionary_size;
+ fast = ( len <= dictionary_size - i && len <= i - pos ); }
+ else
+ fast = ( len < dictionary_size - pos && len <= pos - i );
+ if( fast ) // no wrap, no overlap
{
- std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap
+ std::memcpy( buffer + pos, buffer + i, len );
pos += len;
}
else for( ; len > 0; --len )
{
buffer[pos] = buffer[i];
- if( ++pos >= buffer_size ) flush_data();
- if( ++i >= buffer_size ) i = 0;
+ if( ++pos >= dictionary_size ) flush_data();
+ if( ++i >= dictionary_size ) i = 0;
}
}
@@ -275,13 +279,12 @@ public:
partial_data_pos( 0 ),
rdec( rde ),
dictionary_size( header.dictionary_size() ),
- buffer_size( std::max( 65536U, dictionary_size ) ),
- buffer( new uint8_t[buffer_size] ),
+ buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
outfd( ofd )
- { buffer[buffer_size-1] = 0; } // prev_byte of first byte
+ { buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_decoder() { delete[] buffer; }
diff --git a/doc/lziprecover.1 b/doc/lziprecover.1
index 99b61dd..87c0598 100644
--- a/doc/lziprecover.1
+++ b/doc/lziprecover.1
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.46.1.
-.TH LZIPRECOVER "1" "June 2015" "lziprecover 1.18-pre1" "User Commands"
+.TH LZIPRECOVER "1" "September 2015" "lziprecover 1.18-pre2" "User Commands"
.SH NAME
lziprecover \- recovers data from damaged lzip files
.SH SYNOPSIS
@@ -23,6 +23,9 @@ display this help and exit
\fB\-V\fR, \fB\-\-version\fR
output version information and exit
.TP
+\fB\-a\fR, \fB\-\-trailing\-error\fR
+exit with error status if trailing data
+.TP
\fB\-c\fR, \fB\-\-stdout\fR
send decompressed output to standard output
.TP
diff --git a/doc/lziprecover.info b/doc/lziprecover.info
index 654e60c..8d7bc66 100644
--- a/doc/lziprecover.info
+++ b/doc/lziprecover.info
@@ -12,7 +12,7 @@ File: lziprecover.info, Node: Top, Next: Introduction, Up: (dir)
Lziprecover Manual
******************
-This manual is for Lziprecover (version 1.18-pre1, 30 June 2015).
+This manual is for Lziprecover (version 1.18-pre2, 16 September 2015).
* Menu:
@@ -23,6 +23,7 @@ This manual is for Lziprecover (version 1.18-pre1, 30 June 2015).
* Merging files:: Fixing several damaged copies
* File names:: Names of the files produced by lziprecover
* File format:: Detailed format of the compressed file
+* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Unzcrash:: Testing the robustness of decompressors
* Problems:: Reporting bugs
@@ -54,7 +55,7 @@ availability:
recovery means. The lziprecover program can repair bit-flip errors
(one of the most common forms of data corruption) in lzip files,
and provides data recovery capabilities, including error-checked
- merging of damaged copies of a file.
+ merging of damaged copies of a file. *Note Data safety::.
* The lzip format is as simple as possible (but not simpler). The
lzip manual provides the code of a simple decompressor along with
@@ -125,6 +126,13 @@ The format for running lziprecover is:
Print the version number of lziprecover on the standard output and
exit.
+'-a'
+'--trailing-error'
+ Exit with error status 2 if any remaining input is detected after
+ decompressing the last member. Such remaining input is usually
+ trailing garbage that can be safely ignored. *Note
+ concat-example::.
+
'-c'
'--stdout'
Decompress to standard output. Needed when reading from a named
@@ -133,7 +141,9 @@ The format for running lziprecover is:
'-d'
'--decompress'
- Decompress.
+ Decompress the specified file(s). If a file fails to decompress,
+ lziprecover exits immediately without decompressing the rest of the
+ files.
'-D RANGE'
'--range-decompress=RANGE'
@@ -220,7 +230,8 @@ The format for running lziprecover is:
Check integrity of the specified file(s), but don't decompress
them. This really performs a trial decompression and throws away
the result. Use it together with '-v' to see information about
- the file.
+ the file(s). If a file fails the test, lziprecover continues
+ checking the rest of the files.
'-v'
'--verbose'
@@ -228,7 +239,7 @@ The format for running lziprecover is:
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary
size, trailer contents (CRC, data size, member size), and up to 6
- bytes of trailing garbage (if any).
+ bytes of trailing data (if any).
Numbers given as arguments to options may be followed by a multiplier
@@ -387,7 +398,7 @@ original file name ends with one of the extensions '.tar.lz', '.lz' or
'.tlz', the string '_fixed' is inserted before the extension.

-File: lziprecover.info, Node: File format, Next: Examples, Prev: File names, Up: Top
+File: lziprecover.info, Node: File format, Next: Trailing data, Prev: File names, Up: Top
7 File format
*************
@@ -421,7 +432,7 @@ additional information before, between, or after them.
All multibyte values are stored in little endian order.
-'ID string'
+'ID string (the "magic" bytes)'
A four byte string, identifying the lzip format, with the value
"LZIP" (0x4C, 0x5A, 0x49, 0x50).
@@ -458,9 +469,42 @@ additional information before, between, or after them.

-File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: File format, Up: Top
+File: lziprecover.info, Node: Trailing data, Next: Examples, Prev: File format, Up: Top
+
+8 Extra data appended to the file
+*********************************
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+ * Padding added to make the file size a multiple of some block size,
+ for example when writing to a tape.
+
+ * Garbage added by some not totally successful copy operation.
+
+ * Useful data added by the user; a cryptographically secure hash, a
+ description of file contents, etc.
+
+ * Malicious data added to the file in order to make its total size
+ and hash value (for a chosen hash) coincide with those of another
+ file.
+
+ * In very rare cases, trailing data could be the corrupt header of
+ another member. In multi-member or concatenated files the
+ probability of corruption happening in the magic bytes is 5 times
+ smaller than the probability of getting a false positive caused by
+ the corruption of the integrity information itself. Therefore it
+ can be considered to be below the noise level.
+
+ Trailing data can be safely ignored in most cases. In some cases,
+like user-added data, it is expected to be ignored. In those cases
+where a file containing trailing data must be rejected, the option
+'--trailing-error' can be used. *Note --trailing-error::.
+
+
+File: lziprecover.info, Node: Examples, Next: Unzcrash, Prev: Trailing data, Up: Top
-8 A small tutorial with examples
+9 A small tutorial with examples
********************************
Example 1: Restore a regular file from its compressed version
@@ -475,29 +519,38 @@ show status.
lziprecover -tv file.lz
-Example 3: Decompress 'file.lz' partially until 10 KiB of decompressed
+Example 3: The right way of concatenating compressed files. *Note
+Trailing data::.
+
+ Don't do this
+ cat file1.lz file2.lz file3.lz | lziprecover -d
+ Do this instead
+ lziprecover -cd file1.lz file2.lz file3.lz
+
+
+Example 4: Decompress 'file.lz' partially until 10 KiB of decompressed
data are produced.
lziprecover -D 0,10KiB file.lz
-Example 4: Decompress 'file.lz' partially from decompressed byte 10000
+Example 5: Decompress 'file.lz' partially from decompressed byte 10000
to decompressed byte 15000 (5000 bytes are produced).
lziprecover -D 10000-15000 file.lz
-Example 5: Repair small errors in the file 'file.lz'. (Indented lines
+Example 6: Repair small errors in the file 'file.lz'. (Indented lines
are abridged diagnostic messages from lziprecover).
lziprecover -v -R file.lz
Copy of input file repaired successfully.
lziprecover -tv file_fixed.lz
- ok
+ file_fixed.lz: ok
mv file_fixed.lz file.lz
-Example 6: Split the multi-member file 'file.lz' and write each member
+Example 7: Split the multi-member file 'file.lz' and write each member
in its own 'recXXXfile.lz' file. Then use 'lziprecover -t' to test the
integrity of the resulting files.
@@ -505,26 +558,26 @@ integrity of the resulting files.
lziprecover -tv rec*file.lz
-Example 7: Recover a compressed backup from two copies on CD-ROM with
-error-checked merging of copies (*Note GNU ddrescue manual:
+Example 8: Recover a compressed backup from two copies on CD-ROM with
+error-checked merging of copies. (*Note GNU ddrescue manual:
(ddrescue)Top, for details about ddrescue).
- ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 logfile1
+ ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz
umount /mnt/cdimage
(insert second copy in the CD drive)
- ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 logfile2
+ ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2
mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage
cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz
umount /mnt/cdimage
lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz
Input files merged successfully.
lziprecover -tv backup.tar.lz
- ok
+ backup.tar.lz: ok
-Example 8: Recover the first volume of those created with the command
+Example 9: Recover the first volume of those created with the command
'lzip -b 32MiB -S 650MB big_db' from two copies, 'big_db1_00001.lz' and
'big_db2_00001.lz', with member 07 damaged in the first copy, member 18
damaged in the second copy, and member 12 damaged in both copies. The
@@ -533,13 +586,13 @@ correct file produced is saved in 'big_db_00001.lz'.
lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
Input files merged successfully.
lziprecover -tv big_db_00001.lz
- ok
+ big_db_00001.lz: ok

File: lziprecover.info, Node: Unzcrash, Next: Problems, Prev: Examples, Up: Top
-9 Testing the robustness of decompressors
-*****************************************
+10 Testing the robustness of decompressors
+******************************************
The lziprecover package also includes unzcrash, a program written to
test robustness to decompression of corrupted data, inspired by
@@ -615,7 +668,7 @@ caused unzcrash to panic.

File: lziprecover.info, Node: Problems, Next: Concept index, Prev: Unzcrash, Up: Top
-10 Reporting bugs
+11 Reporting bugs
*****************
There are probably bugs in lziprecover. There are certainly errors and
@@ -646,24 +699,28 @@ Concept index
* invoking: Invoking lziprecover. (line 6)
* merging files: Merging files. (line 6)
* repairing files: Repairing files. (line 6)
+* trailing data: Trailing data. (line 6)
* unzcrash: Unzcrash. (line 6)

Tag Table:
Node: Top231
-Node: Introduction1214
-Node: Invoking lziprecover4310
-Node: Data safety9743
-Node: Repairing files11667
-Node: Merging files13569
-Node: File names15410
-Node: File format15874
-Node: Examples18278
-Ref: ddrescue-example19524
-Node: Unzcrash20780
-Node: Problems23334
-Node: Concept index23886
+Node: Introduction1278
+Node: Invoking lziprecover4395
+Ref: --trailing-error4860
+Node: Data safety10294
+Node: Repairing files12218
+Node: Merging files14120
+Node: File names15961
+Node: File format16425
+Node: Trailing data18854
+Node: Examples20230
+Ref: concat-example20661
+Ref: ddrescue-example21725
+Node: Unzcrash23015
+Node: Problems25571
+Node: Concept index26123

End Tag Table
diff --git a/doc/lziprecover.texi b/doc/lziprecover.texi
index 29045e7..e29a59f 100644
--- a/doc/lziprecover.texi
+++ b/doc/lziprecover.texi
@@ -6,8 +6,8 @@
@finalout
@c %**end of header
-@set UPDATED 30 June 2015
-@set VERSION 1.18-pre1
+@set UPDATED 16 September 2015
+@set VERSION 1.18-pre2
@dircategory Data Compression
@direntry
@@ -42,6 +42,7 @@ This manual is for Lziprecover (version @value{VERSION}, @value{UPDATED}).
* Merging files:: Fixing several damaged copies
* File names:: Names of the files produced by lziprecover
* File format:: Detailed format of the compressed file
+* Trailing data:: Extra data appended to the file
* Examples:: A small tutorial with examples
* Unzcrash:: Testing the robustness of decompressors
* Problems:: Reporting bugs
@@ -75,7 +76,7 @@ The lzip format provides very safe integrity checking and some data
recovery means. The lziprecover program can repair bit-flip errors (one
of the most common forms of data corruption) in lzip files, and provides
data recovery capabilities, including error-checked merging of damaged
-copies of a file.
+copies of a file. @xref{Data safety}.
@item
The lzip format is as simple as possible (but not simpler). The lzip
@@ -152,6 +153,13 @@ Print an informative help message describing the options and exit.
@itemx --version
Print the version number of lziprecover on the standard output and exit.
+@anchor{--trailing-error}
+@item -a
+@itemx --trailing-error
+Exit with error status 2 if any remaining input is detected after
+decompressing the last member. Such remaining input is usually trailing
+garbage that can be safely ignored. @xref{concat-example}.
+
@item -c
@itemx --stdout
Decompress to standard output. Needed when reading from a named pipe
@@ -160,7 +168,9 @@ data as possible when decompressing a corrupt file.
@item -d
@itemx --decompress
-Decompress.
+Decompress the specified file(s). If a file fails to decompress,
+lziprecover exits immediately without decompressing the rest of the
+files.
@item -D @var{range}
@itemx --range-decompress=@var{range}
@@ -246,7 +256,9 @@ on the number of members in @samp{@var{file}}.
@itemx --test
Check integrity of the specified file(s), but don't decompress them.
This really performs a trial decompression and throws away the result.
-Use it together with @samp{-v} to see information about the file.
+Use it together with @samp{-v} to see information about the file(s). If
+a file fails the test, lziprecover continues checking the rest of the
+files.
@item -v
@itemx --verbose
@@ -254,7 +266,7 @@ Verbose mode.@*
When decompressing or testing, further -v's (up to 4) increase the
verbosity level, showing status, compression ratio, dictionary size,
trailer contents (CRC, data size, member size), and up to 6 bytes of
-trailing garbage (if any).
+trailing data (if any).
@end table
@@ -456,7 +468,7 @@ Each member has the following structure:
All multibyte values are stored in little endian order.
@table @samp
-@item ID string
+@item ID string (the "magic" bytes)
A four byte string, identifying the lzip format, with the value "LZIP"
(0x4C, 0x5A, 0x49, 0x50).
@@ -499,6 +511,44 @@ facilitates safe recovery of undamaged members from multi-member files.
@end table
+@node Trailing data
+@chapter Extra data appended to the file
+@cindex trailing data
+
+Sometimes extra data is found appended to a lzip file after the last
+member. Such trailing data may be:
+
+@itemize @bullet
+@item
+Padding added to make the file size a multiple of some block size, for
+example when writing to a tape.
+
+@item
+Garbage added by some not totally successful copy operation.
+
+@item
+Useful data added by the user; a cryptographically secure hash, a
+description of file contents, etc.
+
+@item
+Malicious data added to the file in order to make its total size and
+hash value (for a chosen hash) coincide with those of another file.
+
+@item
+In very rare cases, trailing data could be the corrupt header of another
+member. In multi-member or concatenated files the probability of
+corruption happening in the magic bytes is 5 times smaller than the
+probability of getting a false positive caused by the corruption of the
+integrity information itself. Therefore it can be considered to be below
+the noise level.
+@end itemize
+
+Trailing data can be safely ignored in most cases. In some cases, like
+user-added data, it is expected to be ignored. In those cases where a
+file containing trailing data must be rejected, the option
+@samp{--trailing-error} can be used. @xref{--trailing-error}.
+
+
@node Examples
@chapter A small tutorial with examples
@cindex examples
@@ -521,8 +571,21 @@ lziprecover -tv file.lz
@end example
@sp 1
+@anchor{concat-example}
+@noindent
+Example 3: The right way of concatenating compressed files.
+@xref{Trailing data}.
+
+@example
+Don't do this
+ cat file1.lz file2.lz file3.lz | lziprecover -d
+Do this instead
+ lziprecover -cd file1.lz file2.lz file3.lz
+@end example
+
+@sp 1
@noindent
-Example 3: Decompress @samp{file.lz} partially until 10 KiB of
+Example 4: Decompress @samp{file.lz} partially until 10 KiB of
decompressed data are produced.
@example
@@ -531,7 +594,7 @@ lziprecover -D 0,10KiB file.lz
@sp 1
@noindent
-Example 4: Decompress @samp{file.lz} partially from decompressed byte
+Example 5: Decompress @samp{file.lz} partially from decompressed byte
10000 to decompressed byte 15000 (5000 bytes are produced).
@example
@@ -540,20 +603,20 @@ lziprecover -D 10000-15000 file.lz
@sp 1
@noindent
-Example 5: Repair small errors in the file @samp{file.lz}. (Indented
+Example 6: Repair small errors in the file @samp{file.lz}. (Indented
lines are abridged diagnostic messages from lziprecover).
@example
lziprecover -v -R file.lz
Copy of input file repaired successfully.
lziprecover -tv file_fixed.lz
- ok
+ file_fixed.lz: ok
mv file_fixed.lz file.lz
@end example
@sp 1
@noindent
-Example 6: Split the multi-member file @samp{file.lz} and write each
+Example 7: Split the multi-member file @samp{file.lz} and write each
member in its own @samp{recXXXfile.lz} file. Then use
@w{@samp{lziprecover -t}} to test the integrity of the resulting files.
@@ -565,8 +628,8 @@ lziprecover -tv rec*file.lz
@sp 1
@anchor{ddrescue-example}
@noindent
-Example 7: Recover a compressed backup from two copies on CD-ROM with
-error-checked merging of copies
+Example 8: Recover a compressed backup from two copies on CD-ROM with
+error-checked merging of copies.
@ifnothtml
(@xref{Top,GNU ddrescue manual,,ddrescue},
@end ifnothtml
@@ -577,24 +640,24 @@ error-checked merging of copies
for details about ddrescue).
@example
-ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 logfile1
+ddrescue -d -r1 -b2048 /dev/cdrom cdimage1 mapfile1
mount -t iso9660 -o loop,ro cdimage1 /mnt/cdimage
cp /mnt/cdimage/backup.tar.lz rescued1.tar.lz
umount /mnt/cdimage
(insert second copy in the CD drive)
-ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 logfile2
+ddrescue -d -r1 -b2048 /dev/cdrom cdimage2 mapfile2
mount -t iso9660 -o loop,ro cdimage2 /mnt/cdimage
cp /mnt/cdimage/backup.tar.lz rescued2.tar.lz
umount /mnt/cdimage
lziprecover -m -v -o backup.tar.lz rescued1.tar.lz rescued2.tar.lz
Input files merged successfully.
lziprecover -tv backup.tar.lz
- ok
+ backup.tar.lz: ok
@end example
@sp 1
@noindent
-Example 8: Recover the first volume of those created with the command
+Example 9: Recover the first volume of those created with the command
@w{@samp{lzip -b 32MiB -S 650MB big_db}} from two copies,
@samp{big_db1_00001.lz} and @samp{big_db2_00001.lz}, with member 07
damaged in the first copy, member 18 damaged in the second copy, and
@@ -605,7 +668,7 @@ member 12 damaged in both copies. The correct file produced is saved in
lziprecover -m -v -o big_db_00001.lz big_db1_00001.lz big_db2_00001.lz
Input files merged successfully.
lziprecover -tv big_db_00001.lz
- ok
+ big_db_00001.lz: ok
@end example
diff --git a/file_index.cc b/file_index.cc
index a1a0f30..af55417 100644
--- a/file_index.cc
+++ b/file_index.cc
@@ -86,7 +86,7 @@ File_index::File_index( const int infd )
if( member_size < min_member_size || member_size > pos )
{
if( member_vector.empty() )
- { --pos; continue; } // maybe trailing garbage
+ { --pos; continue; } // maybe trailing data
set_num_error( "Member size in trailer is corrupt at pos ", pos - 8 );
break;
}
@@ -96,10 +96,11 @@ File_index::File_index( const int infd )
if( !header.verify_magic() || !header.verify_version() )
{
if( member_vector.empty() )
- { --pos; continue; } // maybe trailing garbage
+ { --pos; continue; } // maybe trailing data
set_num_error( "Bad header at pos ", pos - member_size );
break;
}
+ const unsigned dictionary_size = header.dictionary_size();
if( member_vector.empty() && isize - pos > File_header::size &&
seek_read( infd, header.data, File_header::size, pos ) == File_header::size &&
header.verify_magic() && header.verify_version() )
@@ -109,7 +110,7 @@ File_index::File_index( const int infd )
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
- pos, member_size ) );
+ pos, member_size, dictionary_size ) );
}
if( pos != 0 || member_vector.empty() )
{
@@ -184,7 +185,7 @@ File_index::File_index( const std::vector< int > & infd_vector,
}
if( !done )
{
- if( member_vector.empty() ) // maybe trailing garbage
+ if( member_vector.empty() ) // maybe trailing data
{ --pos; continue; }
set_num_error( "Member size in trailer may be corrupt at pos ", pos - 8 );
break;
@@ -202,7 +203,7 @@ File_index::File_index( const std::vector< int > & infd_vector,
}
pos -= member_size;
member_vector.push_back( Member( 0, trailer.data_size(),
- pos, member_size ) );
+ pos, member_size, 0 ) );
}
error:
if( pos != 0 || member_vector.empty() )
diff --git a/file_index.h b/file_index.h
index eff1157..5084fcb 100644
--- a/file_index.h
+++ b/file_index.h
@@ -20,10 +20,11 @@ class File_index
struct Member
{
Block dblock, mblock; // data block, member block
+ unsigned dictionary_size;
Member( const long long dp, const long long ds,
- const long long mp, const long long ms )
- : dblock( dp, ds ), mblock( mp, ms ) {}
+ const long long mp, const long long ms, const unsigned dict_size )
+ : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {}
bool operator==( const Member & m ) const { return ( mblock == m.mblock ); }
bool operator!=( const Member & m ) const { return ( mblock != m.mblock ); }
@@ -36,10 +37,10 @@ class File_index
void set_errno_error( const char * const msg );
void set_num_error( const char * const msg1, unsigned long long num,
- const char * const msg2 = "." );
+ const char * const msg2 = "" );
public:
- File_index() : error_( "No index." ), isize( 0 ), retval_( 2 ) {}
+ File_index() : error_( "No index" ), isize( 0 ), retval_( 2 ) {}
explicit File_index( const int infd );
File_index( const std::vector< int > & infd_vector, const long long fsize );
@@ -65,7 +66,7 @@ public:
{ if( member_vector.size() ) return member_vector.back().mblock.end();
else return 0; }
- // total size including trailing garbage (if any)
+ // total size including trailing data (if any)
long long file_size() const
{ if( isize >= 0 ) return isize; else return 0; }
@@ -73,4 +74,6 @@ public:
{ return member_vector[i].dblock; }
const Block & mblock( const long i ) const
{ return member_vector[i].mblock; }
+ unsigned dictionary_size( const long i ) const
+ { return member_vector[i].dictionary_size; }
};
diff --git a/lzip.h b/lzip.h
index e46b9b8..3bdc27c 100644
--- a/lzip.h
+++ b/lzip.h
@@ -40,7 +40,7 @@ public:
enum {
min_dictionary_bits = 12,
- min_dictionary_size = 1 << min_dictionary_bits, /* >= modeled_distances */
+ min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances
max_dictionary_bits = 29,
max_dictionary_size = 1 << max_dictionary_bits,
min_member_size = 36,
@@ -53,7 +53,7 @@ enum {
dis_slot_bits = 6,
start_dis_model = 4,
end_dis_model = 14,
- modeled_distances = 1 << (end_dis_model / 2), /* 128 */
+ modeled_distances = 1 << (end_dis_model / 2), // 128
dis_align_bits = 4,
dis_align_size = 1 << dis_align_bits,
@@ -65,8 +65,8 @@ enum {
len_high_symbols = 1 << len_high_bits,
max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
- min_match_len = 2, /* must be 2 */
- max_match_len = min_match_len + max_len_symbols - 1, /* 273 */
+ min_match_len = 2, // must be 2
+ max_match_len = min_match_len + max_len_symbols - 1, // 273
min_match_len_limit = 5 };
inline int get_len_state( const int len )
@@ -109,6 +109,7 @@ public:
: stdin_name( "(stdin)" ), longest_name( 0 ), verbosity_( v ),
first_post( false )
{
+ if( verbosity_ <= 0 ) return;
const unsigned stdin_name_len = std::strlen( stdin_name );
for( unsigned i = 0; i < filenames.size(); ++i )
{
@@ -186,9 +187,9 @@ const uint8_t magic_string[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
struct File_header
{
- uint8_t data[6]; /* 0-3 magic bytes */
- /* 4 version */
- /* 5 coded_dict_size */
+ uint8_t data[6]; // 0-3 magic bytes
+ // 4 version
+ // 5 coded_dict_size
enum { size = 6 };
void set_magic() { std::memcpy( data, magic_string, 4 ); data[4] = 1; }
@@ -228,9 +229,9 @@ struct File_header
struct File_trailer
{
- uint8_t data[20]; /* 0-3 CRC32 of the uncompressed data */
- /* 4-11 size of the uncompressed data */
- /* 12-19 member size including header and trailer */
+ uint8_t data[20]; // 0-3 CRC32 of the uncompressed data
+ // 4-11 size of the uncompressed data
+ // 12-19 member size including header and trailer
enum { size = 20 };
@@ -291,7 +292,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
const bool no_ofile, const bool reg_only = false );
bool file_exists( const std::string & filename );
int open_outstream_rw( const std::string & output_filename, const bool force );
-void show_header( const unsigned dictionary_size );
+void show_header( const unsigned dictionary_size, const int vlevel = 3 );
void show_error( const char * const msg, const int errcode = 0,
const bool help = false );
void internal_error( const char * const msg );
@@ -328,6 +329,7 @@ int debug_show_packets( const std::string & input_filename,
const uint8_t bad_value );
// defined in split.cc
+bool verify_header( const File_header & header, const Pretty_print & pp );
int split_file( const std::string & input_filename,
const std::string & default_output_filename,
const int verbosity, const bool force );
diff --git a/main.cc b/main.cc
index 9425858..7845d90 100644
--- a/main.cc
+++ b/main.cc
@@ -24,6 +24,7 @@
#define _FILE_OFFSET_BITS 64
#include <algorithm>
+#include <cctype>
#include <cerrno>
#include <climits>
#include <csignal>
@@ -104,6 +105,7 @@ void show_help()
std::printf( "\nOptions:\n"
" -h, --help display this help and exit\n"
" -V, --version output version information and exit\n"
+ " -a, --trailing-error exit with error status if trailing data\n"
" -c, --stdout send decompressed output to standard output\n"
" -d, --decompress decompress\n"
" -D, --range-decompress=<range> decompress a range of bytes (N-M) to stdout\n"
@@ -146,9 +148,9 @@ void show_version()
} // end namespace
-void show_header( const unsigned dictionary_size )
+void show_header( const unsigned dictionary_size, const int vlevel )
{
- if( verbosity >= 3 )
+ if( verbosity >= vlevel )
{
const char * const prefix[8] =
{ "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
@@ -309,7 +311,7 @@ int open_instream( const char * const name, struct stat * const in_statsp,
std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
program_name, name,
( can_read && !no_ofile ) ?
- " and '--stdout' was not specified" : "" );
+ ",\n and '--stdout' was not specified" : "" );
close( infd );
infd = -1;
}
@@ -373,14 +375,14 @@ void cleanup_and_fail( const int retval )
}
- /* Set permissions, owner and times. */
+ // Set permissions, owner and times.
void close_and_set_permissions( const struct stat * const in_statsp )
{
bool warning = false;
if( in_statsp )
{
const mode_t mode = in_statsp->st_mode;
- /* fchown will in many cases return with EPERM, which can be safely ignored. */
+ // fchown will in many cases return with EPERM, which can be safely ignored.
if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
{ if( fchmod( outfd, mode ) != 0 ) warning = true; }
else
@@ -424,36 +426,42 @@ unsigned char xdigit( const int value )
}
-void show_trailing_garbage( const uint8_t * const data, const int size,
- const Pretty_print & pp, const bool all )
+bool show_trailing_data( const uint8_t * const data, const int size,
+ const Pretty_print & pp, const bool all,
+ const bool ignore_trailing )
{
- std::string garbage_msg;
- if( !all ) garbage_msg = "first bytes of ";
- garbage_msg += "trailing garbage found = ";
- bool text = true;
- for( int i = 0; i < size; ++i )
- if( !std::isprint( data[i] ) ) { text = false; break; }
- if( text )
- {
- garbage_msg += '\'';
- garbage_msg.append( (const char *)data, size );
- garbage_msg += '\'';
- }
- else
+ if( verbosity >= 4 || !ignore_trailing )
{
+ std::string msg;
+ if( !all ) msg = "first bytes of ";
+ msg += "trailing data = ";
+ bool text = true;
for( int i = 0; i < size; ++i )
+ if( !std::isprint( data[i] ) ) { text = false; break; }
+ if( text )
+ {
+ msg += '\'';
+ msg.append( (const char *)data, size );
+ msg += '\'';
+ }
+ else
{
- if( i > 0 ) garbage_msg += ' ';
- garbage_msg += xdigit( data[i] >> 4 );
- garbage_msg += xdigit( data[i] & 0x0F );
+ for( int i = 0; i < size; ++i )
+ {
+ if( i > 0 ) msg += ' ';
+ msg += xdigit( data[i] >> 4 );
+ msg += xdigit( data[i] & 0x0F );
+ }
}
+ pp( msg.c_str() );
+ if( !ignore_trailing ) show_error( "Trailing data not allowed." );
}
- garbage_msg += '.';
- pp( garbage_msg.c_str() );
+ return ignore_trailing;
}
-int decompress( const int infd, const Pretty_print & pp, const bool testing )
+int decompress( const int infd, const Pretty_print & pp,
+ const bool ignore_trailing, const bool testing )
{
int retval = 0;
@@ -469,16 +477,17 @@ int decompress( const int infd, const Pretty_print & pp, const bool testing )
{
if( first_member )
{ pp( "File ends unexpectedly at member header." ); retval = 2; }
- else if( verbosity >= 4 && size > 0 )
- show_trailing_garbage( header.data, size, pp, true );
+ else if( size > 0 && !show_trailing_data( header.data, size, pp,
+ true, ignore_trailing ) )
+ retval = 2;
break;
}
if( !header.verify_magic() )
{
if( first_member )
{ pp( "Bad magic number (file not in lzip format)." ); retval = 2; }
- else if( verbosity >= 4 )
- show_trailing_garbage( header.data, size, pp, false );
+ else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
+ retval = 2;
break;
}
if( !header.verify_version() )
@@ -620,13 +629,15 @@ int main( const int argc, const char * const argv[] )
Mode program_mode = m_none;
uint8_t bad_value = 0;
bool force = false;
- bool ignore = false;
+ bool ignore_errors = false;
+ bool ignore_trailing = true;
bool keep_input_files = false;
bool to_stdout = false;
invocation_name = argv[0];
const Arg_parser::Option options[] =
{
+ { 'a', "trailing-error", Arg_parser::no },
{ 'c', "stdout", Arg_parser::no },
{ 'd', "decompress", Arg_parser::no },
{ 'D', "range-decompress", Arg_parser::yes },
@@ -657,17 +668,18 @@ int main( const int argc, const char * const argv[] )
for( ; argind < parser.arguments(); ++argind )
{
const int code = parser.code( argind );
- if( !code ) break; /* no more options */
+ if( !code ) break; // no more options
const std::string & arg = parser.argument( argind );
switch( code )
{
+ case 'a': ignore_trailing = false; break;
case 'c': to_stdout = true; break;
case 'd': set_mode( program_mode, m_decompress ); break;
case 'D': set_mode( program_mode, m_range_dec );
parse_range( arg.c_str(), range ); break;
case 'f': force = true; break;
case 'h': show_help(); return 0;
- case 'i': ignore = true; break;
+ case 'i': ignore_errors = true; break;
case 'k': keep_input_files = true; break;
case 'l': set_mode( program_mode, m_list ); break;
case 'm': set_mode( program_mode, m_merge ); break;
@@ -688,7 +700,7 @@ int main( const int argc, const char * const argv[] )
parse_pos_value( arg.c_str(), bad_pos, bad_value ); break;
default : internal_error( "uncaught option." );
}
- } /* end process options */
+ } // end process options
#if defined(__MSVCRT__) || defined(__OS2__)
setmode( STDIN_FILENO, O_BINARY );
@@ -731,8 +743,8 @@ int main( const int argc, const char * const argv[] )
return merge_files( filenames, default_output_filename, verbosity, force );
case m_range_dec:
one_file( filenames.size() );
- return range_decompress( filenames[0], default_output_filename,
- range, verbosity, force, ignore, to_stdout );
+ return range_decompress( filenames[0], default_output_filename, range,
+ verbosity, force, ignore_errors, to_stdout );
case m_repair:
one_file( filenames.size() );
if( default_output_filename.empty() )
@@ -823,7 +835,7 @@ int main( const int argc, const char * const argv[] )
delete_output_on_interrupt = true;
const struct stat * const in_statsp = input_filename.size() ? &in_stats : 0;
pp.set_name( input_filename );
- const int tmp = decompress( infd, pp, program_mode == m_test );
+ const int tmp = decompress( infd, pp, ignore_trailing, program_mode == m_test );
if( tmp > retval ) retval = tmp;
if( tmp && program_mode != m_test ) cleanup_and_fail( retval );
diff --git a/mtester.cc b/mtester.cc
index a9ac06e..92de2e0 100644
--- a/mtester.cc
+++ b/mtester.cc
@@ -56,7 +56,7 @@ void LZ_mtester::flush_data()
{
const int size = pos - stream_pos;
crc32.update_buf( crc_, buffer + stream_pos, size );
- if( pos >= buffer_size ) { partial_data_pos += pos; pos = 0; }
+ if( pos >= dictionary_size ) { partial_data_pos += pos; pos = 0; }
stream_pos = pos;
}
}
@@ -89,11 +89,11 @@ void LZ_mtester::print_block( const int len )
void LZ_mtester::duplicate_buffer()
{
- uint8_t * const tmp = new uint8_t[buffer_size];
+ uint8_t * const tmp = new uint8_t[dictionary_size];
if( data_position() > 0 )
std::memcpy( tmp, buffer, std::min( data_position(),
- (unsigned long long)buffer_size ) );
- else tmp[buffer_size-1] = 0; // prev_byte of first byte
+ (unsigned long long)dictionary_size ) );
+ else tmp[dictionary_size-1] = 0; // prev_byte of first byte
buffer = tmp;
}
@@ -232,7 +232,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
format_byte( match_byte ) );
}
}
- else /* match or repeated match */
+ else // match or repeated match
{
int len;
if( rdec.decode_bit( bm_rep[state()] ) != 0 ) // 2nd bit
@@ -271,7 +271,7 @@ int LZ_mtester::debug_decode_member( const long long dpos, const long long mpos,
std::printf( "%6llu %6llu rep%c %6u,%3d (%6llu)",
mp, dp, rep + '0', rep0 + 1, len, dp - rep0 - 1 );
}
- else /* match */
+ else // match
{
const unsigned rep0_saved = rep0;
len = min_match_len + rdec.decode_len( match_len_model, pos_state );
diff --git a/mtester.h b/mtester.h
index 795d8e4..71df64d 100644
--- a/mtester.h
+++ b/mtester.h
@@ -194,14 +194,13 @@ class LZ_mtester
unsigned long long partial_data_pos;
Range_mtester rdec;
const unsigned dictionary_size;
- const int buffer_size;
- uint8_t * buffer; /* output buffer */
- int pos; /* current pos in buffer */
- int stream_pos; /* first byte not yet written to file */
+ uint8_t * buffer; // output buffer
+ unsigned pos; // current pos in buffer
+ unsigned stream_pos; // first byte not yet written to file
uint32_t crc_;
- unsigned rep0; /* rep[0-3] latest four distances */
- unsigned rep1; /* used for efficient coding of */
- unsigned rep2; /* repeated distances */
+ unsigned rep0; // rep[0-3] latest four distances
+ unsigned rep1; // used for efficient coding of
+ unsigned rep2; // repeated distances
unsigned rep3;
State state;
@@ -225,37 +224,42 @@ class LZ_mtester
uint8_t peek_prev() const
{
- const int i = ( ( pos > 0 ) ? pos : buffer_size ) - 1;
+ const unsigned i = ( ( pos > 0 ) ? pos : dictionary_size ) - 1;
return buffer[i];
}
- uint8_t peek( const int distance ) const
+ uint8_t peek( const unsigned distance ) const
{
- int i = pos - distance - 1;
- if( i < 0 ) i += buffer_size;
+ unsigned i = pos - distance - 1;
+ if( pos <= distance ) i += dictionary_size;
return buffer[i];
}
void put_byte( const uint8_t b )
{
buffer[pos] = b;
- if( ++pos >= buffer_size ) flush_data();
+ if( ++pos >= dictionary_size ) flush_data();
}
- void copy_block( const int distance, int len )
+ void copy_block( const unsigned distance, unsigned len )
{
- int i = pos - distance - 1;
- if( i < 0 ) i += buffer_size;
- if( len < buffer_size - std::max( pos, i ) && len <= std::abs( pos - i ) )
+ unsigned i = pos - distance - 1;
+ bool fast;
+ if( pos <= distance )
+ { i += dictionary_size;
+ fast = ( len <= dictionary_size - i && len <= i - pos ); }
+ else
+ fast = ( len < dictionary_size - pos && len <= pos - i );
+ if( fast ) // no wrap, no overlap
{
- std::memcpy( buffer + pos, buffer + i, len ); // no wrap, no overlap
+ std::memcpy( buffer + pos, buffer + i, len );
pos += len;
}
else for( ; len > 0; --len )
{
buffer[pos] = buffer[i];
- if( ++pos >= buffer_size ) flush_data();
- if( ++i >= buffer_size ) i = 0;
+ if( ++pos >= dictionary_size ) flush_data();
+ if( ++i >= dictionary_size ) i = 0;
}
}
@@ -268,8 +272,7 @@ public:
partial_data_pos( 0 ),
rdec( ibuf, ibuf_size ),
dictionary_size( dict_size ),
- buffer_size( std::max( 65536U, dictionary_size ) ),
- buffer( new uint8_t[buffer_size] ),
+ buffer( new uint8_t[dictionary_size] ),
pos( 0 ),
stream_pos( 0 ),
crc_( 0xFFFFFFFFU ),
@@ -277,7 +280,7 @@ public:
rep1( 0 ),
rep2( 0 ),
rep3( 0 )
- { buffer[buffer_size-1] = 0; } // prev_byte of first byte
+ { buffer[dictionary_size-1] = 0; } // prev_byte of first byte
~LZ_mtester() { delete[] buffer; }
diff --git a/range_dec.cc b/range_dec.cc
index d4a2b2c..c6ccb7a 100644
--- a/range_dec.cc
+++ b/range_dec.cc
@@ -48,16 +48,7 @@ int decompress_member( const int infd, const int outfd,
rdec.read_data( header.data, File_header::size );
if( rdec.finished() ) // End Of File
{ pp( "File ends unexpectedly at member header." ); return 2; }
- if( !header.verify_magic() )
- { pp( "Bad magic number (file not in lzip format)." ); return 2; }
- if( !header.verify_version() )
- {
- if( pp.verbosity() >= 0 )
- { pp();
- std::fprintf( stderr, "Version %d member format not supported.\n",
- header.version() ); }
- return 2;
- }
+ if( !verify_header( header, pp ) ) return 2;
const unsigned dictionary_size = header.dictionary_size();
if( dictionary_size < min_dictionary_size ||
dictionary_size > max_dictionary_size )
@@ -101,7 +92,12 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
{
const unsigned long long data_size = file_index.data_end();
const unsigned long long file_size = file_index.file_end();
+ unsigned dictionary_size = 0;
+ for( long i = 0; i < file_index.members(); ++i )
+ if( dictionary_size < file_index.dictionary_size( i ) )
+ dictionary_size = file_index.dictionary_size( i );
pp( 0, stdout );
+ show_header( dictionary_size, 1 );
if( data_size > 0 && file_size > 0 )
std::printf( "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
(double)data_size / file_size,
@@ -123,6 +119,10 @@ int list_file( const char * const input_filename, const Pretty_print & pp )
db.pos(), db.size(), mb.pos(), mb.size() );
}
}
+ const long long trailing_size = file_index.file_size() - file_index.file_end();
+ if( pp.verbosity() >= 1 && trailing_size > 0 )
+ std::printf( " %lld bytes of trailing data at end of file.\n",
+ trailing_size );
}
return 0;
}
diff --git a/repair.cc b/repair.cc
index e83b843..e8588e3 100644
--- a/repair.cc
+++ b/repair.cc
@@ -41,10 +41,9 @@ bool gross_damage( const long long msize, const uint8_t * const mbuffer )
enum { maxlen = 6 }; // max number of consecutive identical bytes
long i = File_header::size;
const long end = msize - File_trailer::size - maxlen;
- uint8_t byte;
while( i < end )
{
- byte = mbuffer[i];
+ const uint8_t byte = mbuffer[i];
int len = 0; // does not count the first byte
while( mbuffer[++i] == byte && ++len < maxlen ) {}
if( len >= maxlen ) return true;
diff --git a/split.cc b/split.cc
index 2ffb359..fc92ea4 100644
--- a/split.cc
+++ b/split.cc
@@ -62,25 +62,6 @@ bool next_filename( std::string & output_filename, const int max_digits )
}
-bool verify_header( const File_header & header, const Pretty_print & pp )
- {
- if( !header.verify_magic() )
- {
- pp( "Bad magic number (file not in lzip format)." );
- return false;
- }
- if( !header.verify_version() )
- {
- if( pp.verbosity() >= 0 )
- { pp();
- std::fprintf( stderr, "Version %d member format not supported.\n",
- header.version() ); }
- return false;
- }
- return true;
- }
-
-
// Search forward from 'pos' for "LZIP" (Boyer-Moore algorithm)
// Returns pos of found string or 'pos+size' if not found.
//
@@ -205,6 +186,25 @@ int do_split_file( const std::string & input_filename, uint8_t * & base_buffer,
} // end namespace
+bool verify_header( const File_header & header, const Pretty_print & pp )
+ {
+ if( !header.verify_magic() )
+ {
+ pp( "Bad magic number (file not in lzip format)." );
+ return false;
+ }
+ if( !header.verify_version() )
+ {
+ if( pp.verbosity() >= 0 )
+ { pp();
+ std::fprintf( stderr, "Version %d member format not supported.\n",
+ header.version() ); }
+ return false;
+ }
+ return true;
+ }
+
+
int split_file( const std::string & input_filename,
const std::string & default_output_filename,
const int verbosity, const bool force )
diff --git a/testsuite/check.sh b/testsuite/check.sh
index 8a9c81d..537f5f0 100755
--- a/testsuite/check.sh
+++ b/testsuite/check.sh
@@ -57,13 +57,10 @@ fail=0
printf "testing lziprecover-%s..." "$2"
-printf " in: Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-printf " (stdin): Bad magic number (file not in lzip format).\n" > msg
-"${LZIP}" -t < in 2> out
-if [ $? = 2 ] && cmp out msg ; then printf . ; else printf - ; fail=1 ; fi
-rm -f out msg
+"${LZIP}" -tq in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -tq < in
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq in
if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIP}" -cdq < in
@@ -110,6 +107,15 @@ cmp in2 copy2 || fail=1
printf .
printf "garbage" >> copy2.lz || framework_failure
+rm -f copy2
+"${LZIP}" -atq copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -atq < copy2.lz
+if [ $? = 2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
+"${LZIP}" -adkq -o copy2 < copy2.lz
+if [ $? = 2 ] && [ ! -e copy2 ] ; then printf . ; else printf - ; fail=1 ; fi
printf "to be overwritten" > copy2 || framework_failure
"${LZIP}" -df copy2.lz || fail=1
cmp in2 copy2 || fail=1
@@ -125,6 +131,8 @@ if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; f
"${LZIPRECOVER}" -D0 -iq "${f5b1_lz}" > copy
if [ $? = 2 ] && cmp "${f5b1}" copy ; then printf . ; else printf - ; fail=1 ; fi
+printf "\ntesting --merge ..."
+
rm -f copy.lz
"${LZIPRECOVER}" -m -o copy.lz "${fox5_lz}" "${f5b1_lz}"
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
@@ -144,8 +152,8 @@ for i in "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+ printf .
done
-printf .
for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${i}" || fail=1
@@ -160,21 +168,25 @@ for i in "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" ; do
cmp "${fox5_lz}" copy.lz || fail=1
"${LZIPRECOVER}" -mf -o copy.lz "${i}" "${f5b2_lz}" "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+ printf .
done
-printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${f5b1_lz}" "${f5b2_lz}" "${f5b3_lz}" "${f5b4_lz}" "${f5b5_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad1_lz}" "${bad2_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad2_lz}" "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
@@ -185,14 +197,19 @@ cat "${bad2_lz}" "${in_lz}" "${bad2_lz}" "${bad2_lz}" > bad22.lz || framework_fa
cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad12.lz bad22.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad11.lz bad22.lz bad12.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad11.lz bad22.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad12.lz bad22.lz bad11.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad11.lz bad12.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad22.lz bad12.lz bad11.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
@@ -204,19 +221,24 @@ for i in "${bad1_lz}" "${bad2_lz}" ; do
"${LZIPRECOVER}" -mf -o copy.lz "${j}" "${i}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
done
+ printf .
done
-printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad4_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad3_lz}" "${bad5_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad3_lz}" "${bad5_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad4_lz}" "${bad5_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o copy.lz "${bad5_lz}" "${bad4_lz}" "${bad3_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
@@ -227,18 +249,25 @@ cat "${bad5_lz}" "${bad3_lz}" "${bad4_lz}" "${in_lz}" > bad534.lz || framework_f
cat "${in_lz}" "${in_lz}" "${in_lz}" "${in_lz}" > copy4.lz || framework_failure
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad453.lz bad534.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad345.lz bad534.lz bad453.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad345.lz bad534.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad453.lz bad534.lz bad345.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad345.lz bad453.lz || fail=1
cmp out4.lz copy4.lz || fail=1
+printf .
"${LZIPRECOVER}" -mf -o out4.lz bad534.lz bad453.lz bad345.lz || fail=1
cmp out4.lz copy4.lz || fail=1
printf .
+printf "\ntesting --repair ..."
+
rm -f copy.lz
"${LZIPRECOVER}" -R -o copy.lz "${fox5_lz}" || fail=1
if [ $? = 0 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
@@ -250,6 +279,7 @@ if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
if [ $? = 2 ] && [ ! -e copy.lz ] ; then printf . ; else printf - ; fail=1 ; fi
"${LZIPRECOVER}" -Rf -o copy.lz "${f5b1_lz}" || fail=1
cmp "${fox5_lz}" copy.lz || fail=1
+printf .
"${LZIPRECOVER}" -Rf -o copy.lz "${bad1_lz}" || fail=1
cmp "${in_lz}" copy.lz || fail=1
printf .
@@ -264,14 +294,17 @@ mv copy.lz copy.tlz || framework_failure
"${LZIPRECOVER}" -R copy.tlz || fail=1
if [ $? = 0 ] && [ -e copy_fixed.tlz ] ; then printf . ; else printf - ; fail=1 ; fi
+printf "\ntesting --split ..."
+
cat "${in_lz}" "${in_lz}" "${in_lz}" > copy || framework_failure
printf "garbage" >> copy || fail=1
"${LZIPRECOVER}" -s -o copy.lz copy || fail=1
+printf .
for i in 1 2 3 ; do
"${LZIPRECOVER}" -cd rec${i}copy.lz > copy || fail=1
cmp in copy || fail=1
+ printf .
done
-printf .
echo
if [ ${fail} = 0 ] ; then