1 files changed, 95 insertions, 22 deletions
diff --git a/unzcrash.cc b/unzcrash.cc
index 3970638..9a32b82 100644
--- a/unzcrash.cc
+++ b/unzcrash.cc
@@ -1,6 +1,6 @@
 /*  Unzcrash - Tests robustness of decompressors to corrupted data.
     Inspired by unzcrash.c from Julian Seward's bzip2.
-    Copyright (C) 2008-2016 Antonio Diaz Diaz.
+    Copyright (C) 2008-2017 Antonio Diaz Diaz.
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -40,12 +40,16 @@
 #error "Environments where CHAR_BIT != 8 are not supported."
 #endif
 
+#ifndef INT64_MAX
+#define INT64_MAX  0x7FFFFFFFFFFFFFFFLL
+#endif
+
 
 namespace {
 
 const char * const Program_name = "Unzcrash";
 const char * const program_name = "unzcrash";
-const char * const program_year = "2016";
+const char * const program_year = "2017";
 const char * invocation_name = 0;
 
 int verbosity = 0;
@@ -55,14 +59,27 @@ void show_help()
   {
   std::printf( "%s - Tests robustness of decompressors to corrupted data.\n", Program_name );
   std::printf( "\nUsage: %s [options] \"lzip -tv\" filename.lz\n", invocation_name );
-  std::printf( "\nThis program reads the specified file and then repeatedly decompresses\n"
-               "it, increasing 256 times each byte of the compressed data, so as to test\n"
-               "all possible one-byte errors. This should not cause any invalid memory\n"
-               "accesses. If it does, please, report it as a bug.\n"
+  std::printf( "\nBy default, unzcrash reads the specified file and then repeatedly\n"
+               "decompresses it, increasing 256 times each byte of the compressed data,\n"
+               "so as to test all possible one-byte errors.\n"
+               "\nIf the '--block' option is given, unzcrash reads the specified file\n"
+               "and then repeatedly decompresses it, setting all bytes in each\n"
+               "successive block to the value given, so as to test all possible full\n"
+               "sector errors.\n"
+               "\nIf the '--truncate' option is given, unzcrash reads the specified\n"
+               "file and then repeatedly decompresses it, truncating the file to\n"
+               "increasing lengths, so as to test all possible truncation points.\n"
+               "\nNone of the three test modes described above should cause any invalid\n"
+               "memory accesses. If any of them does, please, report it as a bug to the\n"
+               "maintainers of the decompressor being tested.\n"
                "\nIf the decompressor returns with zero status, unzcrash compares the\n"
                "output of the decompressor for the original and corrupt files. If the\n"
-               "outputs differ, it means that the decompressor failed to recognize the\n"
-               "corruption and produced garbage output. Please, report it as a bug.\n"
+               "outputs differ, it means that the decompressor returned a false\n"
+               "negative; it failed to recognize the corruption and produced garbage\n"
+               "output. The only exception is when a multimember file is truncated just\n"
+               "after the last byte of a member, producing a shorter but valid\n"
+               "compressed file. Except in this latter case, please, report any false\n"
+               "negative as a bug.\n"
                "\nIn order to compare the outputs, unzcrash needs a zcmp program able to\n"
                "understand the format being tested. For example the one provided by zutils.\n"
                "Use '--zcmp=false' to disable comparisons.\n"
@@ -72,6 +89,7 @@ void show_help()
                "  -b, --bits=<range>            test N-bit errors instead of full byte\n"
                "  -B, --block[=<size>][,<val>]  test blocks of given size [512,0]\n"
                "  -d, --delta=<n>               test one of every n bytes/blocks/truncations\n"
+               "  -e, --set-byte=<pos>,<val>    set byte at position <pos> to value <val>\n"
                "  -p, --position=<bytes>        first byte position to test [default 0]\n"
                "  -q, --quiet                   suppress all messages\n"
                "  -s, --size=<bytes>            number of byte positions to test [all]\n"
@@ -124,12 +142,13 @@ void internal_error( const char * const msg )
   }
 
 
-long getnum( const char * const ptr, const long llimit, const long ulimit,
-             const bool comma = false )
+long long getnum( const char * const ptr, const long long llimit = -LLONG_MAX,
+                  const long long ulimit = LLONG_MAX,
+                  const char ** const tailp = 0 )
   {
   char * tail;
   errno = 0;
-  long result = strtol( ptr, &tail, 0 );
+  long long result = strtoll( ptr, &tail, 0 );
   if( tail == ptr )
     {
     show_error( "Bad or missing numerical argument.", 0, true );
@@ -138,11 +157,14 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
 
   if( !errno && tail[0] )
     {
-    const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
+    char * const p = tail++;
+    int factor;
+    bool bsuf;					// 'B' suffix is present
+    if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000;
+    if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false;
     int exponent = -1;				// -1 = bad multiplier
-    switch( tail[0] )
+    switch( *p )
       {
-      case ',': if( comma ) exponent = 0; break;
       case 'Y': exponent = 8; break;
       case 'Z': exponent = 7; break;
       case 'E': exponent = 6; break;
@@ -152,6 +174,8 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
       case 'M': exponent = 2; break;
       case 'K': if( factor == 1024 ) exponent = 1; break;
       case 'k': if( factor == 1000 ) exponent = 1; break;
+      case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break;
+      default : if( tailp ) { tail = p; exponent = 0; } break;
       }
     if( exponent < 0 )
       {
@@ -160,7 +184,7 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
       }
     for( int i = 0; i < exponent; ++i )
       {
-      if( LONG_MAX / factor >= std::labs( result ) ) result *= factor;
+      if( LLONG_MAX / factor >= std::labs( result ) ) result *= factor;
       else { errno = ERANGE; break; }
       }
     }
@@ -170,23 +194,64 @@ long getnum( const char * const ptr, const long llimit, const long ulimit,
     show_error( "Numerical argument out of limits." );
     std::exit( 1 );
     }
+  if( tailp ) *tailp = tail;
   return result;
   }
 
 
 void parse_block( const char * const ptr, long & size, uint8_t & value )
   {
-  const char * const ptr2 = std::strchr( ptr, ',' );
+  const char * tail = ptr;
+
+  if( tail[0] != ',' )
+    size = getnum( ptr, 1, INT_MAX, &tail );
+  if( tail[0] == ',' )
+    value = getnum( tail + 1, 0, 255 );
+  else if( tail[0] )
+    {
+    show_error( "Bad separator in argument of '--block'", 0, true );
+    std::exit( 1 );
+    }
+  }
+
+
+struct Bad_byte
+  {
+  enum Mode { literal, delta, flip };
+  long long pos;
+  Mode mode;
+  uint8_t value;
 
-  if( !ptr2 || ptr2 != ptr )
-    size = getnum( ptr, 1, INT_MAX, true );
-  if( ptr2 )
-    value = getnum( ptr2 + 1, 0, 255 );
+  Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
+  uint8_t operator()( const uint8_t old_value ) const
+    {
+    if( mode == delta ) return old_value + value;
+    if( mode == flip ) return old_value ^ value;
+    return value;
+    }
+  };
+
+
+// Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
+//
+void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
+  {
+  const char * tail;
+  bad_byte.pos = getnum( ptr, 0, INT64_MAX, &tail );
+  if( tail[0] != ',' )
+    {
+    show_error( "Bad separator between <pos> and <val>.", 0, true );
+    std::exit( 1 );
+    }
+  if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
+  else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
+  else bad_byte.mode = Bad_byte::literal;
+  bad_byte.value = getnum( tail + 1, 0, 255 );
   }
 
 
 /* Returns the address of a malloc'd buffer containing the file data and
-   its size in '*size'.
+   the file size in '*size'.
    In case of error, returns 0 and does not modify '*size'.
 */
 uint8_t * read_file( const char * const name, long * const size )
@@ -309,6 +374,7 @@ int main( const int argc, const char * const argv[] )
   enum Mode { m_block, m_byte, m_truncate };
   const char * mode_str[3] = { "block", "byte", "size" };
   Bitset8 bits;			// if Bitset8::parse not called test full byte
+  Bad_byte bad_byte;
   const char * zcmp_program = "zcmp";
   long pos = 0;
   long max_size = LONG_MAX;
@@ -324,6 +390,7 @@ int main( const int argc, const char * const argv[] )
     { 'b', "bits",     Arg_parser::yes },
     { 'B', "block",    Arg_parser::maybe },
     { 'd', "delta",    Arg_parser::yes },
+    { 'e', "set-byte", Arg_parser::yes },
     { 'p', "position", Arg_parser::yes },
     { 'q', "quiet",    Arg_parser::no  },
     { 's', "size",     Arg_parser::yes },
@@ -331,7 +398,7 @@ int main( const int argc, const char * const argv[] )
     { 'v', "verbose",  Arg_parser::no  },
     { 'V', "version",  Arg_parser::no  },
     { 'z', "zcmp",     Arg_parser::yes },
-    {  0 ,  0,         Arg_parser::no  } };
+    {  0 , 0,          Arg_parser::no  } };
 
   const Arg_parser parser( argc, argv, options );
   if( parser.error().size() )				// bad option
@@ -350,6 +417,7 @@ int main( const int argc, const char * const argv[] )
       case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
                 program_mode = m_block; break;
       case 'd': delta = getnum( arg, 1, INT_MAX ); break;
+      case 'e': parse_pos_value( arg, bad_byte ); break;
       case 'p': pos = getnum( arg, -LONG_MAX, LONG_MAX ); break;
       case 'q': verbosity = -1; break;
       case 's': max_size = getnum( arg, -LONG_MAX, LONG_MAX ); break;
@@ -414,6 +482,11 @@ int main( const int argc, const char * const argv[] )
     { show_error( "Nothing to do; domain is empty." ); return 0; }
   if( max_size < 0 ) max_size += file_size - pos;
   const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
+  if( bad_byte.pos >= file_size )
+    { show_error( "Position of '--set-byte' is beyond end of file." );
+      return 1; }
+  if( bad_byte.pos >= 0 )
+    buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
   long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
   if( program_mode == m_truncate )
     for( long i = pos; i < end; i += std::min( delta, end - i ) )