diff options
Diffstat (limited to '')
-rw-r--r-- | tarlz.h | 216 |
1 files changed, 186 insertions, 30 deletions
@@ -42,22 +42,195 @@ inline bool verify_ustar_magic( const uint8_t * const header ) { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; } -class CRC32C // Uses CRC32-C (Castagnoli) polynomial. +// Round "size" to the next multiple of header size (512). +// +inline unsigned long long round_up( const unsigned long long size ) + { + const int rem = size % header_size; + const int padding = rem ? header_size - rem : 0; + return size + padding; + } + + +class Extended // stores metadata from/for extended records + { + std::string linkpath_; + std::string path_; + unsigned long long file_size_; + + mutable long long full_size_; // cached sizes + mutable int recsize_linkpath_; + mutable int recsize_path_; + mutable int recsize_file_size_; + + bool crc_present_; // true if CRC present in parsed records + +public: + static const std::string crc_record; + + Extended() + : file_size_( 0 ), full_size_( -1 ), recsize_linkpath_( -1 ), + recsize_path_( -1 ), recsize_file_size_( -1 ), crc_present_( false ) {} + + void reset() + { linkpath_.clear(); path_.clear(); file_size_ = 0; full_size_ = -1; + recsize_linkpath_ = -1; recsize_path_ = -1; recsize_file_size_ = -1; + crc_present_ = false; } + + bool empty() const + { return linkpath_.empty() && path_.empty() && file_size_ == 0; } + + const std::string & linkpath() const { return linkpath_; } + const std::string & path() const { return path_; } + unsigned long long file_size() const { return file_size_; } + + void linkpath( const char * const lp ) + { linkpath_ = lp; full_size_ = -1; recsize_linkpath_ = -1; } + void path( const char * const p ) + { path_ = p; full_size_ = -1; recsize_path_ = -1; } + void file_size( const unsigned long long fs ) + { file_size_ = fs; full_size_ = -1; recsize_file_size_ = -1; } + + int recsize_linkpath() const; + int recsize_path() const; + int recsize_file_size() const; + unsigned long long edsize() const // extended data size + { return empty() ? 0 : recsize_linkpath() + recsize_path() + + recsize_file_size() + crc_record.size(); } + unsigned long long edsize_pad() const // edsize rounded up + { return round_up( edsize() ); } + unsigned long long full_size() const + { if( full_size_ < 0 ) + full_size_ = ( empty() ? 0 : header_size + edsize_pad() ); + return full_size_; } + + bool crc_present() const { return crc_present_; } + bool parse( const char * const buf, const unsigned long long edsize, + const bool permissive ); + }; + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, + min_data_size = 2 * min_dictionary_size, + max_data_size = 2 * max_dictionary_size }; + + +inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + +const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" + +struct Lzip_header + { + uint8_t data[6]; // 0-3 magic bytes + // 4 version + // 5 coded_dict_size + enum { size = 6 }; + + bool verify_magic() const + { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } + + bool verify_prefix( const int sz ) const // detect (truncated) header + { + for( int i = 0; i < sz && i < 4; ++i ) + if( data[i] != lzip_magic[i] ) return false; + return ( sz > 0 ); + } + bool verify_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == lzip_magic[i] ) ++matches; + return ( matches > 1 && matches < 4 ); + } + + uint8_t version() const { return data[4]; } + bool verify_version() const { return ( data[4] == 1 ); } + + unsigned dictionary_size() const + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + }; + + +struct Lzip_trailer + { + uint8_t data[20]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + enum { size = 20 }; + + unsigned data_crc() const + { + unsigned tmp = 0; + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long data_size() const + { + unsigned long long tmp = 0; + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long member_size() const + { + unsigned long long tmp = 0; + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + bool verify_consistency() const // check internal consistency + { + const unsigned crc = data_crc(); + const unsigned long long dsize = data_size(); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = member_size(); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + }; + + +class CRC32 { uint32_t data[256]; // Table of CRCs of all 8-bit messages. public: - CRC32C() + CRC32( const bool castagnoli = false ) { + const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial. + const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial. + const unsigned poly = castagnoli ? cpol : ipol; + for( unsigned n = 0; n < 256; ++n ) { unsigned c = n; for( int k = 0; k < 8; ++k ) - { if( c & 1 ) c = 0x82F63B78U ^ ( c >> 1 ); else c >>= 1; } + { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; } data[n] = c; } } + void update_byte( uint32_t & crc, const uint8_t byte ) const + { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + void update_buf( uint32_t & crc, const uint8_t * const buffer, const int size ) const { @@ -78,32 +251,7 @@ public: } }; -extern const CRC32C crc32c; - - -// Round "size" to the next multiple of header size (512). -// -inline unsigned long long round_up( unsigned long long size ) - { - const int rem = size % header_size; - const int padding = rem ? header_size - rem : 0; - return size + padding; - } - - -struct Extended // stores metadata from/for extended records - { - std::string linkpath; - std::string path; - unsigned long long size; - bool crc_present; - Extended() : size( 0 ), crc_present( false ) {} - void reset() - { linkpath.clear(); path.clear(); size = 0; crc_present = false; } - bool empty() { return linkpath.empty() && path.empty() && size == 0; } - bool parse( const char * const buf, const unsigned long long edsize, - const bool permissive ); - }; +extern const CRC32 crc32c; enum { initial_line_length = 1000 }; // must be >= 77 @@ -132,10 +280,16 @@ public: unsigned size() const { return size_; } }; +const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +const char * const trailing_msg = "Trailing data not allowed."; + // defined in create.cc -enum Solidity { no_solid, dsolid, asolid, solid }; +enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; extern int cl_owner; extern int cl_group; +extern int cl_data_size; extern Solidity solidity; unsigned ustar_chksum( const uint8_t * const header ); bool verify_ustar_chksum( const uint8_t * const header ); @@ -152,6 +306,8 @@ void format_member_name( const Extended & extended, const Tar_header header, const char * remove_leading_slash( const char * const filename ); bool compare_prefix_dir( const char * const dir, const char * const name ); bool compare_tslash( const char * const name1, const char * const name2 ); +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); unsigned long long parse_octal( const uint8_t * const ptr, const int size ); int decode( const std::string & archive_name, const Arg_parser & parser, const int filenames, const int num_workers, const int debug_level, |