/* Plzip - Massively parallel implementation of lzip Copyright (C) 2009-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, // >= modeled_distances max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, min_member_size = 36 }; // defined in main.cc extern int verbosity; class Pretty_print // requires global var 'int verbosity' { std::string name_; std::string padded_name; const char * const stdin_name; unsigned longest_name; mutable bool first_post; public: Pretty_print( const std::vector< std::string > & filenames ) : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false ) { if( verbosity <= 0 ) return; const unsigned stdin_name_len = std::strlen( stdin_name ); for( unsigned i = 0; i < filenames.size(); ++i ) { const std::string & s = filenames[i]; const unsigned len = ( s == "-" ) ? stdin_name_len : s.size(); if( longest_name < len ) longest_name = len; } if( longest_name == 0 ) longest_name = stdin_name_len; } void set_name( const std::string & filename ) { if( filename.size() && filename != "-" ) name_ = filename; else name_ = stdin_name; padded_name = " "; padded_name += name_; padded_name += ": "; if( longest_name > name_.size() ) padded_name.append( longest_name - name_.size(), ' ' ); first_post = true; } void reset() const { if( name_.size() ) first_post = true; } const char * name() const { return name_.c_str(); } void operator()( const char * const msg = 0 ) const; }; inline bool isvalid_ds( const unsigned dictionary_size ) { return dictionary_size >= min_dictionary_size && dictionary_size <= max_dictionary_size; } inline int real_bits( unsigned value ) { int bits = 0; while( value > 0 ) { value >>= 1; ++bits; } return bits; } const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct Lzip_header { enum { size = 6 }; uint8_t data[size]; // 0-3 magic bytes // 4 version // 5 coded dictionary size void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; } bool check_magic() const { return std::memcmp( data, lzip_magic, 4 ) == 0; } bool check_prefix( const int sz ) const // detect (truncated) header { for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != lzip_magic[i] ) return false; return sz > 0; } bool check_corrupt() const // detect corrupt header { int matches = 0; for( int i = 0; i < 4; ++i ) if( data[i] == lzip_magic[i] ) ++matches; return matches > 1 && matches < 4; } uint8_t version() const { return data[4]; } bool check_version() const { return data[4] == 1; } unsigned dictionary_size() const { unsigned sz = 1 << ( data[5] & 0x1F ); if( sz > min_dictionary_size ) sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; } bool dictionary_size( const unsigned sz ) { if( !isvalid_ds( sz ) ) return false; data[5] = real_bits( sz - 1 ); if( sz > min_dictionary_size ) { const unsigned base_size = 1 << data[5]; const unsigned fraction = base_size / 16; for( unsigned i = 7; i >= 1; --i ) if( base_size - ( i * fraction ) >= sz ) { data[5] |= i << 5; break; } } return true; } bool check() const { return check_magic() && check_version() && isvalid_ds( dictionary_size() ); } }; struct Lzip_trailer { enum { size = 20 }; uint8_t data[size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer unsigned data_crc() const { unsigned tmp = 0; for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } void data_crc( unsigned crc ) { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } } unsigned long long data_size() const { unsigned long long tmp = 0; for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } void data_size( unsigned long long sz ) { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } unsigned long long member_size() const { unsigned long long tmp = 0; for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } void member_size( unsigned long long sz ) { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } } bool check_consistency() const // check internal consistency { const unsigned crc = data_crc(); const unsigned long long dsize = data_size(); if( ( crc == 0 ) != ( dsize == 0 ) ) return false; const unsigned long long msize = member_size(); if( msize < min_member_size ) return false; const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; if( mlimit > dsize && msize > mlimit ) return false; const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; if( dlimit > msize && dsize > dlimit ) return false; return true; } }; struct Cl_options // command-line options { bool ignore_trailing; bool loose_trailing; Cl_options() : ignore_trailing( true ), loose_trailing( false ) {} }; inline void set_retval( int & retval, const int new_val ) { if( retval < new_val ) retval = new_val; } const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const trailing_msg = "Trailing data not allowed."; const char * const mem_msg = "Not enough memory."; // defined in compress.cc int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); void xinit_mutex( pthread_mutex_t * const mutex ); void xinit_cond( pthread_cond_t * const cond ); void xdestroy_mutex( pthread_mutex_t * const mutex ); void xdestroy_cond( pthread_cond_t * const cond ); void xlock( pthread_mutex_t * const mutex ); void xunlock( pthread_mutex_t * const mutex ); void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); void xsignal( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond ); int compress( const unsigned long long cfile_size, const int data_size, const int dictionary_size, const int match_len_limit, const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level ); // defined in lzip_index.cc class Lzip_index; // forward declaration // defined in dec_stdout.cc int dec_stdout( const int num_workers, const int infd, const int outfd, const Pretty_print & pp, const int debug_level, const int out_slots, const Lzip_index & lzip_index ); // defined in dec_stream.cc int dec_stream( const unsigned long long cfile_size, const int num_workers, const int infd, const int outfd, const Cl_options & cl_opts, const Pretty_print & pp, const int debug_level, const int in_slots, const int out_slots ); // defined in decompress.cc int preadblock( const int fd, uint8_t * const buf, const int size, const long long pos ); class Shared_retval; void decompress_error( struct LZ_Decoder * const decoder, const Pretty_print & pp, Shared_retval & shared_retval, const int worker_id ); void show_results( const unsigned long long in_size, const unsigned long long out_size, const unsigned dictionary_size, const bool testing ); int decompress( const unsigned long long cfile_size, int num_workers, const int infd, const int outfd, const Cl_options & cl_opts, const Pretty_print & pp, const int debug_level, const int in_slots, const int out_slots, const bool infd_isreg, const bool one_to_one ); // defined in list.cc int list_files( const std::vector< std::string > & filenames, const Cl_options & cl_opts ); // defined in main.cc struct stat; const char * bad_version( const unsigned version ); const char * format_ds( const unsigned dictionary_size ); void show_header( const unsigned dictionary_size ); int open_instream( const char * const name, struct stat * const in_statsp, const bool one_to_one, const bool reg_only = false ); void cleanup_and_fail( const int retval = 1 ); // terminate the program void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg ); void show_progress( const unsigned long long packet_size, const unsigned long long cfile_size = 0, const Pretty_print * const p = 0 ); class Slot_tally { const int num_slots; // total slots int num_free; // remaining free slots pthread_mutex_t mutex; pthread_cond_t slot_av; // slot available Slot_tally( const Slot_tally & ); // declared as private void operator=( const Slot_tally & ); // declared as private public: explicit Slot_tally( const int slots ) : num_slots( slots ), num_free( slots ) { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } bool all_free() { return num_free == num_slots; } void get_slot() // wait for a free slot { xlock( &mutex ); while( num_free <= 0 ) xwait( &slot_av, &mutex ); --num_free; xunlock( &mutex ); } void leave_slot() // return a slot to the tally { xlock( &mutex ); if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 xunlock( &mutex ); } }; class Shared_retval // shared return value protected by a mutex { int retval; pthread_mutex_t mutex; Shared_retval( const Shared_retval & ); // declared as private void operator=( const Shared_retval & ); // declared as private public: Shared_retval() : retval( 0 ) { xinit_mutex( &mutex ); } bool set_value( const int val ) // only one thread can set retval > 0 { // (and print an error message) xlock( &mutex ); const bool done = ( retval == 0 && val > 0 ); if( done ) retval = val; xunlock( &mutex ); return done; } int operator()() const { return retval; } };