/* Lziprecover - Data recovery tool for the lzip format
Copyright (C) 2023-2024 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define _FILE_OFFSET_BITS 64
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "lzip.h"
#include "md5.h"
#include "fec.h"
namespace {
void xinit_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_init( mutex, 0 );
if( errcode )
{ show_error( "pthread_mutex_init", errcode ); cleanup_and_fail( 1 ); }
}
void xinit_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_init( cond, 0 );
if( errcode )
{ show_error( "pthread_cond_init", errcode ); cleanup_and_fail( 1 ); }
}
void xdestroy_mutex( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_destroy( mutex );
if( errcode )
{ show_error( "pthread_mutex_destroy", errcode ); cleanup_and_fail( 1 ); }
}
void xdestroy_cond( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_destroy( cond );
if( errcode )
{ show_error( "pthread_cond_destroy", errcode ); cleanup_and_fail( 1 ); }
}
void xlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_lock( mutex );
if( errcode )
{ show_error( "pthread_mutex_lock", errcode ); cleanup_and_fail( 1 ); }
}
void xunlock( pthread_mutex_t * const mutex )
{
const int errcode = pthread_mutex_unlock( mutex );
if( errcode )
{ show_error( "pthread_mutex_unlock", errcode ); cleanup_and_fail( 1 ); }
}
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
{
const int errcode = pthread_cond_wait( cond, mutex );
if( errcode )
{ show_error( "pthread_cond_wait", errcode ); cleanup_and_fail( 1 ); }
}
void xsignal( pthread_cond_t * const cond )
{
const int errcode = pthread_cond_signal( cond );
if( errcode )
{ show_error( "pthread_cond_signal", errcode ); cleanup_and_fail( 1 ); }
}
unsigned long out_size;
unsigned deliver_id; // id of worker writing fec packets to outfd
unsigned check_counter;
unsigned wait_counter;
pthread_mutex_t omutex;
std::vector< pthread_cond_t > may_deliver; // worker[i] may write
pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER; // cleanup mutex
struct Worker_arg
{
const uint8_t * prodata;
const uint8_t * lastbuf;
unsigned fec_blocks;
unsigned k;
unsigned num_workers;
unsigned worker_id;
Coded_fbs coded_fbs;
bool gf16;
};
// write a fec packet and pass the token to the next thread
extern "C" void * worker( void * arg )
{
const Worker_arg & tmp = *(const Worker_arg *)arg;
const uint8_t * const prodata = tmp.prodata;
const uint8_t * const lastbuf = tmp.lastbuf;
const unsigned fec_blocks = tmp.fec_blocks;
const unsigned k = tmp.k;
const unsigned num_workers = tmp.num_workers;
const unsigned worker_id = tmp.worker_id;
const Coded_fbs coded_fbs = tmp.coded_fbs;
const bool gf16 = tmp.gf16;
for( unsigned fbn = worker_id; fbn < fec_blocks; fbn += num_workers )
{
const Fec_packet fec_packet( prodata, lastbuf, fbn, k, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
xlock( &omutex );
++check_counter;
while( worker_id != deliver_id )
{ ++wait_counter; xwait( &may_deliver[worker_id], &omutex ); }
xlock( &cmutex ); // because of cleanup_and_fail
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ xunlock( &cmutex ); cleanup_and_fail( 1 ); }
xunlock( &cmutex );
out_size += packet_size;
if( ++deliver_id >= num_workers ) deliver_id = 0;
xsignal( &may_deliver[deliver_id] ); // allow next worker to write
xunlock( &omutex );
}
return 0;
}
// start the workers and wait for them to finish.
bool write_fec_mt( const uint8_t * const prodata,
const uint8_t * const lastbuf,
const unsigned fec_blocks, const unsigned k,
const unsigned num_workers, const Coded_fbs coded_fbs,
const char debug_level, const bool gf16 )
{
if( debug_level & 2 ) std::fputs( "write_fec_mt.\n", stderr );
out_size = 0;
deliver_id = 0;
check_counter = 0;
wait_counter = 0;
xinit_mutex( &omutex );
may_deliver.resize( num_workers );
for( unsigned i = 0; i < may_deliver.size(); ++i )
xinit_cond( &may_deliver[i] );
std::vector< Worker_arg > worker_args( num_workers );
std::vector< pthread_t > worker_threads( num_workers );
for( unsigned i = 0; i < num_workers; ++i )
{
worker_args[i].prodata = prodata;
worker_args[i].lastbuf = lastbuf;
worker_args[i].fec_blocks = fec_blocks;
worker_args[i].k = k;
worker_args[i].num_workers = num_workers;
worker_args[i].worker_id = i;
worker_args[i].coded_fbs = coded_fbs;
worker_args[i].gf16 = gf16;
const int errcode =
pthread_create( &worker_threads[i], 0, worker, &worker_args[i] );
if( errcode ) { show_error( "Can't create worker threads", errcode );
cleanup_and_fail( 1 ); }
}
for( unsigned i = 0; i < num_workers; ++i )
{
const int errcode = pthread_join( worker_threads[i], 0 );
if( errcode ) { show_error( "Can't join worker threads", errcode );
cleanup_and_fail( 1 ); }
}
for( unsigned i = 0; i < may_deliver.size(); ++i )
xdestroy_cond( &may_deliver[i] );
xdestroy_mutex( &omutex );
if( debug_level & 1 )
std::fprintf( stderr,
"workers started %8u\n"
"any worker tried to write a packet %8u times\n"
"any worker had to wait %8u times\n",
num_workers, check_counter, wait_counter );
return true;
}
inline void set_le( uint8_t * const buf, const int size, unsigned long n )
{ for( int i = 0; i < size; ++i ) { buf[i] = (uint8_t)n; n >>= 8; } }
unsigned compute_unit_fbs( const unsigned long prodata_size )
{
unsigned bs = min_fbs;
while( bs < 65536 && 4ULL * bs * bs < prodata_size ) bs <<= 1;
return bs;
}
unsigned long divide_fbs( const unsigned long size, const unsigned blocks,
const unsigned unit_fbs )
{
unsigned long long fbs = ceil_divide( size, blocks ); // ULL as max_fbs
if( fbs < min_fbs ) fbs = min_fbs;
else if( fbs > max_fbs ) fbs = max_fbs;
return ceil_divide( fbs, unit_fbs );
}
Coded_fbs compute_fbs( const unsigned long prodata_size,
const unsigned cl_block_size, const char fec_level )
{
const unsigned unit_fbs = isvalid_fbs( cl_block_size ) ? cl_block_size :
compute_unit_fbs( prodata_size );
const unsigned long max_k = (fec_level == 0) ? max_k8 : max_k16;
const unsigned k9 = std::min( ceil_divide( prodata_size, unit_fbs ), max_k );
const unsigned long fbsu9 = divide_fbs( prodata_size, k9, unit_fbs );
const unsigned long fbsu0 = divide_fbs( prodata_size, max_k8, unit_fbs );
const unsigned long a = std::min( (10 - fec_level) * fbsu9, fbsu0 ); // lin
const unsigned long b = fbsu0 >> fec_level; // exp
const unsigned long fbsu = std::max( a, b ); // join linear and exponential
return Coded_fbs( fbsu * unit_fbs, unit_fbs );
}
unsigned compute_fec_blocks( const unsigned long prodata_size,
const unsigned long fb_or_pct, const char fctype,
const char fec_level, const Coded_fbs coded_fbs )
{
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
const unsigned long max_k = (fec_level == 0) ? max_k8 : max_k16;
if( !isvalid_fbs( fbs ) || prodata_blocks > max_k ) return 0;
const unsigned long max_nk = (fec_level == 0) ? max_k8 : max_nk16;
unsigned fec_blocks;
if( fctype == fc_blocks ) fec_blocks = std::min( max_nk, fb_or_pct );
else
{
unsigned long fec_bytes;
if( fctype == fc_percent )
{ const double pct = std::max( 1UL, std::min( 100000UL, fb_or_pct ) );
fec_bytes = (unsigned long)std::ceil( prodata_size * pct / 100000 ); }
else if( fctype == fc_bytes )
fec_bytes = std::min( fb_or_pct, prodata_size );
else return 0; // unknown fctype, must not happen
fec_blocks = std::min( ceil_divide( fec_bytes, fbs ), max_nk );
}
if( fec_blocks > prodata_blocks ) fec_blocks = prodata_blocks;
return fec_blocks;
}
// return random number between 0 and 32767
unsigned my_rand( unsigned long & state )
{
state = state * 1103515245 + 12345;
return ( state / 65536 ) % 32768;
}
void random_fbn_vector( const unsigned fec_blocks, const bool gf16,
std::vector< unsigned > & fbn_vector )
{
struct timespec ts;
clock_gettime( CLOCK_REALTIME, &ts );
unsigned long state = ts.tv_nsec;
while( state != 0 && ( state & 1 ) == 0 ) state >>= 1;
if( state != 0 ) state *= ts.tv_sec; else state = ts.tv_sec;
for( unsigned i = 0; i < fec_blocks; ++i )
{
again: const unsigned fbn =
gf16 ? my_rand( state ) : my_rand( state ) % 128;
for( unsigned j = 0; j < fbn_vector.size(); ++j )
if( fbn == fbn_vector[j] ) goto again;
fbn_vector.push_back( fbn );
}
}
bool write_fec( const char * const input_filename,
const uint8_t * const prodata, const unsigned long prodata_size,
const unsigned long fb_or_pct, const unsigned cl_block_size,
unsigned num_workers, const char debug_level, const char fctype,
const char fec_level, const bool cl_gf16, const bool fec_random )
{
const Coded_fbs coded_fbs =
compute_fbs( prodata_size, cl_block_size, fec_level );
const unsigned fec_blocks =
compute_fec_blocks( prodata_size, fb_or_pct, fctype, fec_level, coded_fbs );
if( fec_blocks == 0 ) { show_file_error( input_filename,
"Input file is too large for fec protection." ); return false; }
if( num_workers > fec_blocks ) num_workers = fec_blocks;
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
md5_type prodata_md5;
compute_md5( prodata, prodata_size, prodata_md5 );
unsigned chksum_packet_size;
const bool gf16 = cl_gf16 || prodata_blocks > max_k8 || fec_blocks > max_k8;
{
const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
coded_fbs, gf16, false ); // CRC32 array
const long packet_size = chksum_packet.packet_size();
if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
goto fail;
chksum_packet_size = packet_size;
}
{
unsigned long fecdata_size = chksum_packet_size;
const uint8_t * const lastbuf = set_lastbuf( prodata, prodata_size, fbs );
gf16 ? gf16_init() : gf8_init(); // initialize Galois tables
if( fec_random )
{
std::vector< unsigned > fbn_vector;
random_fbn_vector( fec_blocks, gf16, fbn_vector );
for( unsigned i = 0; i < fbn_vector.size(); ++i )
{
const unsigned fbn = fbn_vector[i];
const Fec_packet
fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ delete[] lastbuf; goto fail; }
fecdata_size += packet_size;
}
}
else if( num_workers > 1 )
{
if( !write_fec_mt( prodata, lastbuf, fec_blocks, prodata_blocks,
num_workers, coded_fbs, debug_level, gf16 ) )
{ delete[] lastbuf; goto fail; }
fecdata_size += out_size;
}
else for( unsigned fbn = 0; fbn < fec_blocks; ++fbn )
{
const Fec_packet
fec_packet( prodata, lastbuf, fbn, prodata_blocks, coded_fbs, gf16 );
const long packet_size = fec_packet.packet_size();
if( writeblock( outfd, fec_packet.image(), packet_size ) != packet_size )
{ delete[] lastbuf; goto fail; }
fecdata_size += packet_size;
}
delete[] lastbuf;
if( ( fecdata_size + chksum_packet_size ) / 2 <= fec_blocks * fbs &&
fec_blocks > 1 ) // write the second chksum packet
{
const Chksum_packet chksum_packet( prodata, prodata_size, prodata_md5,
coded_fbs, gf16, true ); // CRC32-C array
const long packet_size = chksum_packet.packet_size();
if( writeblock( outfd, chksum_packet.image(), packet_size ) != packet_size )
goto fail;
fecdata_size += packet_size;
}
if( fecdata_size % 4 != 0 ) internal_error( "fecdata_size % 4 != 0" );
if( verbosity >= 1 )
std::fprintf( stderr, " %s: %s bytes, %s fec bytes, %u blocks\n",
printable_name( output_filename, false ),
format_num3( fecdata_size ),
format_num3( fec_blocks * fbs ), fec_blocks );
return true;
}
fail:
show_file_error( printable_name( output_filename, false ), write_error_msg,
errno ); return false;
}
int open_instream2( const std::string & name, struct stat * const in_statsp )
{
if( !has_fec_extension( name ) )
return open_instream( name.c_str(), in_statsp, false, true );
if( verbosity >= 0 )
std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix, ignored.\n",
program_name, name.c_str(), fec_extension );
return -1;
}
} // end namespace
Chksum_packet::Chksum_packet( const uint8_t * const prodata,
const unsigned long prodata_size,
const md5_type & prodata_md5, const Coded_fbs coded_fbs,
const bool gf16_, const bool is_crc_c_ )
{
const unsigned long fbs = coded_fbs.val();
const unsigned prodata_blocks = ceil_divide( prodata_size, fbs );
if( prodata_blocks * fbs < prodata_size )
internal_error( "prodata_blocks * fec_block_size < prodata_size" );
const unsigned paysize = prodata_blocks * sizeof crc_array()[0];
const unsigned packet_size = header_size + paysize + trailer_size;
if( paysize <= prodata_blocks || packet_size <= paysize )
throw std::bad_alloc();
uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
image_ = ip;
std::memcpy( ip, fec_magic, fec_magic_l );
ip[version_o] = current_version;
ip[flags_o] = ( gf16_ << 1 ) | is_crc_c_;
set_le( ip + prodata_size_o, prodata_size_l, prodata_size );
*(md5_type *)(ip + prodata_md5_o) = prodata_md5;
coded_fbs.copy( ip + fbs_o );
set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
le32 * const crc_arr = (le32 *)(ip + crc_array_o); // fill crc array
unsigned i = 0;
if( !is_crc_c_ ) // CRC32
for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
crc_arr[i] =
crc32.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
else
{ // CRC32-C
const CRC32 crc32c( true );
for( unsigned long pos = 0; pos < prodata_size; pos += fbs, ++i )
crc_arr[i] =
crc32c.compute_crc( prodata + pos, std::min( fbs, prodata_size - pos ) );
}
if( i != prodata_blocks )
internal_error( "wrong fec_block_size or number of prodata_blocks." );
// compute CRC32 of payload (crc array)
set_le( ip + crc_array_o + paysize, crc32_l,
crc32.compute_crc( image_ + crc_array_o, paysize ) );
}
Fec_packet::Fec_packet( const uint8_t * const prodata,
const uint8_t * const lastbuf,
const unsigned fbn, const unsigned k,
const Coded_fbs coded_fbs, const bool gf16 )
{
const unsigned long fbs = coded_fbs.val();
const unsigned long packet_size = header_size + fbs + trailer_size;
if( packet_size <= fbs || !fits_in_size_t( packet_size ) )
throw std::bad_alloc();
uint8_t * const ip = new uint8_t[packet_size]; // writable image ptr
image_ = ip;
std::memcpy( ip, fec_packet_magic, fec_magic_l );
set_le( ip + fbn_o, fbn_l, fbn );
coded_fbs.copy( ip + fbs_o );
set_le( ip + header_crc_o, crc32_l, compute_header_crc( image_ ) );
// fill fec array
gf16 ? rs16_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k ) :
rs8_encode( prodata, lastbuf, ip + fec_block_o, fbs, fbn, k );
// compute CRC32 of payload (fec array)
set_le( ip + fec_block_o + fbs, crc32_l,
crc32.compute_crc( image_ + fec_block_o, fbs ) );
}
void cleanup_mutex_lock() // make cleanup_and_fail thread-safe
{ pthread_mutex_lock( &cmutex ); } // ignore errors to avoid loop
int gf_check( const unsigned k, const bool cl_gf16, const bool fec_random )
{
std::vector< unsigned > fbn_vector;
const bool gf16 = cl_gf16 || k > max_k8;
if( fec_random ) random_fbn_vector( k, gf16, fbn_vector );
return gf16 ? !gf16_check( fbn_vector, k ) : !gf8_check( fbn_vector, k );
}
/* if name contains slash(es), copy name into srcdir up to the last slash,
removing a leading dot followed by slash(es) */
void extract_dirname( const std::string & name, std::string & srcdir )
{
unsigned i = 0;
unsigned j = name.size();
if( j >= 2 && name[0] == '.' && name[1] == '/' ) // remove leading "./"
for( i = 2; i < j && name[i] == '/'; ) ++i;
while( j > i && name[j-1] != '/' ) --j; // remove last component if any
if( j > i ) srcdir.assign( name, i, j - i );
}
// replace prefix srcdir with destdir in name and write result to outname
void replace_dirname( const std::string & name, const std::string & srcdir,
const std::string & destdir, std::string & outname )
{
if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 )
{ if( verbosity >= 0 ) std::fprintf( stderr,
"dirname '%s' != '%s'\n", name.c_str(), srcdir.c_str() );
internal_error( "srcdir mismatch." ); }
outname = destdir;
outname.append( name, srcdir.size(), name.size() - srcdir.size() );
}
bool has_fec_extension( const std::string & name )
{
const unsigned ext_len = std::strlen( fec_extension );
return name.size() > ext_len &&
name.compare( name.size() - ext_len, ext_len, fec_extension ) == 0;
}
int fec_create( const std::vector< std::string > & filenames,
const std::string & default_output_filename,
const unsigned long fb_or_pct, const unsigned cl_block_size,
const unsigned num_workers, const char debug_level,
const char fctype, const char fec_level, const char recursive,
const bool cl_gf16, const bool fec_random, const bool force,
const bool to_stdout )
{
const bool to_dir = !to_stdout && default_output_filename.size() &&
default_output_filename.end()[-1] == '/';
const bool to_file = !to_stdout && !to_dir && default_output_filename.size();
if( ( to_stdout || to_file ) && filenames.size() != 1 )
{ show_error( "You must specify exactly 1 file when redirecting fec data." );
return 1; }
if( ( to_stdout || to_file ) && recursive )
{ show_error( "Can't redirect fec data in recursive mode." ); return 1; }
if( to_stdout ) { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; }
else outfd = -1;
int retval = 0;
const bool one_to_one = !to_stdout && !to_file;
for( unsigned i = 0; i < filenames.size(); ++i )
{
if( filenames[i] == "-" )
{ prot_stdin(); set_retval( retval, 1 ); continue; }
std::string srcdir; // dirname to be replaced by '-o dir/'
if( to_dir ) extract_dirname( filenames[i], srcdir );
std::list< std::string > filelist( 1U, filenames[i] );
std::string input_filename;
while( next_filename( filelist, input_filename, retval, recursive ) )
{
struct stat in_stats;
const int infd = open_instream2( input_filename, &in_stats );
if( infd < 0 ) { set_retval( retval, 1 ); continue; }
const char * const input_filenamep = input_filename.c_str();
const long long file_size = lseek( infd, 0, SEEK_END );
if( file_size <= 0 )
{ show_file_error( input_filenamep, "Input file is empty." );
set_retval( retval, 2 ); close( infd ); continue; }
if( !fits_in_size_t( file_size ) )
{ show_file_error( input_filenamep, large_file_msg );
set_retval( retval, 1 ); close( infd ); continue; }
const unsigned long prodata_size = file_size;
const uint8_t * const prodata =
(const uint8_t *)mmap( 0, prodata_size, PROT_READ, MAP_PRIVATE, infd, 0 );
close( infd );
if( prodata == MAP_FAILED )
{ show_file_error( input_filenamep, mmap_msg, errno );
set_retval( retval, 1 ); continue; }
if( one_to_one )
{
if( to_dir ) replace_dirname( input_filename, srcdir,
default_output_filename, output_filename );
else output_filename = input_filename;
output_filename += fec_extension; set_signal_handler();
if( !open_outstream( force, true, false, true, to_dir ) )
{ munmap( (void *)prodata, prodata_size );
set_retval( retval, 1 ); continue; }
if( !check_tty_out() )
{ set_retval( retval, 1 ); return retval; } // don't delete a tty
}
else if( to_file && outfd < 0 ) // open outfd after checking infd
{
output_filename = default_output_filename; set_signal_handler();
if( !open_outstream( force, false ) || !check_tty_out() )
return 1; // check tty only once and don't try to delete a tty
}
// write fec data to output file
if( !write_fec( input_filenamep, prodata, prodata_size, fb_or_pct,
cl_block_size, num_workers, debug_level, fctype,
fec_level, cl_gf16, fec_random ) )
{ munmap( (void *)prodata, prodata_size ); cleanup_and_fail( 1 ); }
/* To avoid '-Fc | -Ft' running out of address space, munmap before
closing outfd and mmap after reading fec data from stdin */
munmap( (void *)prodata, prodata_size );
if( !close_outstream( &in_stats ) ) cleanup_and_fail( 1 );
}
}
return retval;
}