/* Tarlz - Archiver with multimember lzip compression Copyright (C) 2013-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #define max_file_size ( LLONG_MAX - header_size ) enum { header_size = 512 }; typedef uint8_t Tar_header[header_size]; enum Offsets { name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124, mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157, magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297, devmajor_o = 329, devminor_o = 337, prefix_o = 345 }; enum Lengths { name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12, mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100, magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32, devmajor_l = 8, devminor_l = 8, prefix_l = 155 }; enum Typeflag { tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3', tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7', tf_global = 'g', tf_extended = 'x' }; const uint8_t ustar_magic[magic_l] = { 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0" inline bool verify_ustar_magic( const Tar_header header ) { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; } inline void init_tar_header( Tar_header header ) // set magic and version { std::memset( header, 0, header_size ); std::memcpy( header + magic_o, ustar_magic, magic_l - 1 ); header[version_o] = header[version_o+1] = '0'; } inline void print_octal( uint8_t * const buf, int size, unsigned long long num ) { while( --size >= 0 ) { buf[size] = num % 8 + '0'; num /= 8; } } // Round "size" to the next multiple of header size (512). // inline unsigned long long round_up( const unsigned long long size ) { const int rem = size % header_size; const int padding = rem ? header_size - rem : 0; return size + padding; } inline int decimal_digits( unsigned long long value ) { int digits = 1; while( value >= 10 ) { value /= 10; ++digits; } return digits; } inline bool dotdot_at_i( const char * const filename, const int i ) { return ( filename[i] == '.' && filename[i+1] == '.' && ( i == 0 || filename[i-1] == '/' ) && ( filename[i+2] == 0 || filename[i+2] == '/' ) ); } inline bool contains_dotdot( const char * const filename ) { for( int i = 0; filename[i]; ++i ) if( dotdot_at_i( filename, i ) ) return true; return false; } class Resizable_buffer { char * p; unsigned long size_; // size_ < LONG_MAX public: // must be >= 87 for format_member_name enum { default_initial_size = 2 * header_size }; explicit Resizable_buffer( const unsigned long initial_size = default_initial_size ) : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {} ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; } bool resize( const unsigned long long new_size ) { if( new_size >= LONG_MAX ) return false; if( size_ < new_size ) { char * const tmp = (char *)std::realloc( p, new_size ); if( !tmp ) return false; p = tmp; size_ = new_size; } return true; } char * operator()() { return p; } const char * operator()() const { return p; } uint8_t * u8() { return (uint8_t *)p; } const uint8_t * u8() const { return (const uint8_t *)p; } unsigned long size() const { return size_; } }; inline bool uid_in_ustar_range( const long long uid ) // also for gid { return uid >= 0 && uid < 1 << 21; } inline bool time_in_ustar_range( const long long seconds ) { return seconds >= 0 && seconds < 1LL << 33; } /* The sign of the seconds field applies to the whole time value. A nanoseconds value out of range means an invalid time. */ class Etime // time since (or before) the epoch { long long sec_; int nsec_; // range [0, 999_999_999] public: Etime() : sec_( 0 ), nsec_( -1 ) {} void reset() { sec_ = 0; nsec_ = -1; } void set( const long long s ) { sec_ = s; nsec_ = 0; } long long sec() const { return sec_; } int nsec() const { return nsec_; } bool isvalid() const { return nsec_ >= 0 && nsec_ <= 999999999; } bool out_of_ustar_range() const { return isvalid() && !time_in_ustar_range( sec_ ); } unsigned decimal_size() const; unsigned print( char * const buf ) const; bool parse( const char * const ptr, const char ** const tailp, const long long size ); }; class Extended // stores metadata from/for extended records { static std::vector< std::string > unknown_keywords; // already diagnosed std::string linkpath_; // these are the real metadata std::string path_; long long file_size_; // >= 0 && <= max_file_size long long uid_, gid_; // may not fit in unsigned int Etime atime_, mtime_; // cached sizes; if full_size_ < 0 they must be recalculated mutable long long edsize_; // extended data size mutable long long padded_edsize_; // edsize rounded up mutable long long full_size_; // header + padded edsize mutable long long linkpath_recsize_; mutable long long path_recsize_; mutable int file_size_recsize_; mutable int uid_recsize_; mutable int gid_recsize_; mutable int atime_recsize_; mutable int mtime_recsize_; // true if CRC present in parsed or formatted records mutable bool crc_present_; void calculate_sizes() const; void unknown_keyword( const char * const buf, const unsigned long long size ) const; public: static const std::string crc_record; std::string removed_prefix; Extended() : file_size_( 0 ), uid_( -1 ), gid_( -1 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ), linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ), uid_recsize_( 0 ), gid_recsize_( 0 ), atime_recsize_( 0 ), mtime_recsize_( 0 ), crc_present_( false ) {} void reset() { linkpath_.clear(); path_.clear(); file_size_ = 0; uid_ = -1; gid_ = -1; atime_.reset(); mtime_.reset(); edsize_ = 0; padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0; path_recsize_ = 0; file_size_recsize_ = 0; uid_recsize_ = 0; gid_recsize_ = 0; atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false; removed_prefix.clear(); } bool empty() const { return linkpath_.empty() && path_.empty() && file_size_ == 0 && uid_ < 0 && gid_ < 0 && !atime_.out_of_ustar_range() && !mtime_.out_of_ustar_range(); } const std::string & linkpath() const { return linkpath_; } const std::string & path() const { return path_; } long long file_size() const { return file_size_; } long long get_file_size_and_reset( const Tar_header header ); long long get_uid() const { return uid_; } long long get_gid() const { return gid_; } const Etime & atime() const { return atime_; } const Etime & mtime() const { return mtime_; } void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } void path( const char * const p ) { path_ = p; full_size_ = -1; } void file_size( const long long fs ) { full_size_ = -1; file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } bool set_uid( const long long id ) { if( id >= 0 ) { uid_ = id; full_size_ = -1; } return id >= 0; } bool set_gid( const long long id ) { if( id >= 0 ) { gid_ = id; full_size_ = -1; } return id >= 0; } void set_atime( const long long s ) { atime_.set( s ); full_size_ = -1; } void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -1; } long long full_size() const { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } bool crc_present() const { return crc_present_; } long long format_block( Resizable_buffer & rbuf ) const; bool parse( const char * const buf, const unsigned long long edsize, const bool permissive ); void fill_from_ustar( const Tar_header header ); }; class CRC32 { uint32_t data[256]; // Table of CRCs of all 8-bit messages. public: CRC32( const bool castagnoli = false ) { const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial. const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial. const unsigned poly = castagnoli ? cpol : ipol; for( unsigned n = 0; n < 256; ++n ) { unsigned c = n; for( int k = 0; k < 8; ++k ) { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; } data[n] = c; } } void update_byte( uint32_t & crc, const uint8_t byte ) const { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } // about as fast as it is possible without messing with endianness void update_buf( uint32_t & crc, const uint8_t * const buffer, const int size ) const { uint32_t c = crc; for( int i = 0; i < size; ++i ) c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); crc = c; } uint32_t compute_crc( const uint8_t * const buffer, const int size ) const { uint32_t crc = 0xFFFFFFFFU; for( int i = 0; i < size; ++i ) crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); return crc ^ 0xFFFFFFFFU; } // Calculates the crc of size bytes except a window of 8 bytes at pos uint32_t windowed_crc( const uint8_t * const buffer, const int pos, const int size ) const { uint32_t crc = 0xFFFFFFFFU; update_buf( crc, buffer, pos ); update_buf( crc, buffer + pos + 8, size - pos - 8 ); return crc ^ 0xFFFFFFFFU; } }; struct Lzma_options { int dictionary_size; // 4 KiB .. 512 MiB int match_len_limit; // 5 .. 273 }; const Lzma_options option_mapping[] = { { 65535, 16 }, // -0 { 1 << 20, 5 }, // -1 { 3 << 19, 6 }, // -2 { 1 << 21, 8 }, // -3 { 3 << 20, 12 }, // -4 { 1 << 22, 20 }, // -5 { 1 << 23, 36 }, // -6 { 1 << 24, 68 }, // -7 { 3 << 23, 132 }, // -8 { 1 << 25, 273 } }; // -9 enum { min_dictionary_bits = 12, min_dictionary_size = 1 << min_dictionary_bits, max_dictionary_bits = 29, max_dictionary_size = 1 << max_dictionary_bits, min_member_size = 36, min_data_size = 2 * min_dictionary_size, max_data_size = 2 * max_dictionary_size }; inline bool isvalid_ds( const unsigned dictionary_size ) { return ( dictionary_size >= min_dictionary_size && dictionary_size <= max_dictionary_size ); } const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" struct Lzip_header { enum { size = 6 }; uint8_t data[size]; // 0-3 magic bytes // 4 version // 5 coded dictionary size bool verify_magic() const { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } bool verify_prefix( const int sz ) const // detect (truncated) header { for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != lzip_magic[i] ) return false; return ( sz > 0 ); } bool verify_corrupt() const // detect corrupt header { int matches = 0; for( int i = 0; i < 4; ++i ) if( data[i] == lzip_magic[i] ) ++matches; return ( matches > 1 && matches < 4 ); } uint8_t version() const { return data[4]; } bool verify_version() const { return ( data[4] == 1 ); } unsigned dictionary_size() const { unsigned sz = ( 1 << ( data[5] & 0x1F ) ); if( sz > min_dictionary_size ) sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); return sz; } bool verify() const { return verify_magic() && verify_version() && isvalid_ds( dictionary_size() ); } }; struct Lzip_trailer { enum { size = 20 }; uint8_t data[size]; // 0-3 CRC32 of the uncompressed data // 4-11 size of the uncompressed data // 12-19 member size including header and trailer unsigned data_crc() const { unsigned tmp = 0; for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } unsigned long long data_size() const { unsigned long long tmp = 0; for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } unsigned long long member_size() const { unsigned long long tmp = 0; for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } return tmp; } bool verify_consistency() const // check internal consistency { const unsigned crc = data_crc(); const unsigned long long dsize = data_size(); if( ( crc == 0 ) != ( dsize == 0 ) ) return false; const unsigned long long msize = member_size(); if( msize < min_member_size ) return false; const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; if( mlimit > dsize && msize > mlimit ) return false; const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; if( dlimit > msize && dsize > dlimit ) return false; return true; } }; enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create, m_delete, m_diff, m_extract, m_list }; enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; class Arg_parser; struct Cl_options // command line options { const Arg_parser & parser; std::string archive_name; std::string output_filename; long long mtime; long long uid; long long gid; Program_mode program_mode; Solidity solidity; int data_size; int debug_level; int level; // compression level, < 0 means uncompressed int num_files; int num_workers; // start this many worker threads int out_slots; bool dereference; bool filenames_given; bool ignore_ids; bool ignore_overflow; bool keep_damaged; bool missing_crc; bool mtime_set; bool permissive; bool preserve_permissions; bool warn_newer; Cl_options( const Arg_parser & ap ) : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), filenames_given( false ), ignore_ids( false ), ignore_overflow( false ), keep_damaged( false ), missing_crc( false ), mtime_set( false ), permissive( false ), preserve_permissions( false ), warn_newer( false ) {} bool to_stdout() const { return output_filename == "-"; } }; inline void set_retval( int & retval, const int new_val ) { if( retval < new_val ) retval = new_val; } const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; const char * const trailing_msg = "Trailing data not allowed."; const char * const bad_hdr_msg = "Corrupt or invalid tar header."; const char * const gblrec_msg = "Error in global extended records."; const char * const extrec_msg = "Error in extended records."; const char * const miscrc_msg = "Missing CRC in extended records."; const char * const misrec_msg = "Missing extended records."; const char * const longrec_msg = "Extended records are too long."; const char * const end_msg = "Archive ends unexpectedly."; const char * const mem_msg = "Not enough memory."; const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; const char * const fv_msg1 = "Format violation: extended header followed by EOA blocks."; const char * const fv_msg2 = "Format violation: extended header followed by global header."; const char * const fv_msg3 = "Format violation: consecutive extended headers found."; const char * const posix_msg = "This does not look like a POSIX tar archive."; const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive."; const char * const eclosa_msg = "Error closing archive"; const char * const eclosf_msg = "Error closing file"; const char * const nfound_msg = "Not found in archive."; const char * const seek_msg = "Seek error"; const char * const werr_msg = "Write error"; const char * const chdir_msg = "Error changing working directory"; // defined in common.cc void xinit_mutex( pthread_mutex_t * const mutex ); void xinit_cond( pthread_cond_t * const cond ); void xdestroy_mutex( pthread_mutex_t * const mutex ); void xdestroy_cond( pthread_cond_t * const cond ); void xlock( pthread_mutex_t * const mutex ); void xunlock( pthread_mutex_t * const mutex ); void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); void xsignal( pthread_cond_t * const cond ); void xbroadcast( pthread_cond_t * const cond ); unsigned long long parse_octal( const uint8_t * const ptr, const int size ); int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); bool nonempty_arg( const Arg_parser & parser, const int i ); // defined in common_decode.cc bool block_is_zero( const uint8_t * const buf, const int size ); bool format_member_name( const Extended & extended, const Tar_header header, Resizable_buffer & rbuf, const bool long_format ); bool show_member_name( const Extended & extended, const Tar_header header, const int vlevel, Resizable_buffer & rbuf ); bool check_skip_filename( const Cl_options & cl_opts, std::vector< char > & name_pending, const char * const filename ); mode_t get_umask(); bool make_path( const std::string & name ); // defined in compress.cc int compress( const Cl_options & cl_opts ); // defined in create.cc bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, const int size ); bool write_eoa_records( const int outfd, const bool compressed ); const char * remove_leading_dotslash( const char * const filename, std::string * const removed_prefixp, const bool dotdot = false ); bool print_removed_prefix( const std::string & prefix, std::string * const msgp = 0 ); bool fill_headers( const char * const filename, Extended & extended, Tar_header header, long long & file_size, const int flag ); bool block_is_full( const long long extended_size, const unsigned long long file_size, const unsigned long long target_size, unsigned long long & partial_data_size ); void set_error_status( const int retval ); int final_exit_status( int retval, const bool show_msg = true ); unsigned ustar_chksum( const Tar_header header ); bool verify_ustar_chksum( const Tar_header header ); bool has_lz_ext( const std::string & name ); int concatenate( const Cl_options & cl_opts ); int encode( const Cl_options & cl_opts ); // defined in create_lz.cc int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, const int outfd ); // defined in decode.cc bool compare_file_type( std::string & estr, std::string & ostr, const Cl_options & cl_opts, const Extended & extended, const Tar_header header ); class Archive_reader_base; bool compare_file_contents( std::string & estr, std::string & ostr, Archive_reader_base & ar, const long long file_size, const char * const filename, const int infd2 ); int decode( const Cl_options & cl_opts ); // defined in decode_lz.cc struct Archive_descriptor; int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, std::vector< char > & name_pending ); // defined in delete.cc bool safe_seek( const int fd, const long long pos ); int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, std::vector< char > & name_pending, const long long istream_pos, const int outfd, int retval ); int delete_members( const Cl_options & cl_opts ); // defined in delete_lz.cc int delete_members_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, std::vector< char > & name_pending, const int outfd ); // defined in exclude.cc namespace Exclude { void add_pattern( const std::string & arg ); void clear(); bool excluded( const char * const filename ); } // end namespace Exclude // defined in extended.cc extern const CRC32 crc32c; // defined in lzip_index.cc int seek_read( const int fd, uint8_t * const buf, const int size, const long long pos ); // defined in main.cc extern int verbosity; extern const char * const program_name; struct stat; int hstat( const char * const filename, struct stat * const st, const bool dereference ); int open_instream( const std::string & name ); int open_outstream( const std::string & name, const bool create = true, Resizable_buffer * const rbufp = 0, const bool force = true ); void exit_fail_mt( const int retval = 1 ); // terminate the program void show_error( const char * const msg, const int errcode = 0, const bool help = false ); bool format_error( Resizable_buffer & rbuf, const int errcode, const char * const format, ... ); void print_error( const int errcode, const char * const format, ... ); void format_file_error( std::string & estr, const char * const filename, const char * const msg, const int errcode = 0 ); bool format_file_error( Resizable_buffer & rbuf, const char * const filename, const char * const msg, const int errcode = 0 ); void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg );