From 18525b97f1a4b60884962d8fb326e8e85d837686 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 17 Jul 2021 09:43:33 +0200 Subject: Merging upstream version 0.21. Signed-off-by: Daniel Baumann --- compress.cc | 378 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 compress.cc (limited to 'compress.cc') diff --git a/compress.cc b/compress.cc new file mode 100644 index 0000000..e26814b --- /dev/null +++ b/compress.cc @@ -0,0 +1,378 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" + + +namespace { + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +std::string output_filename; +int outfd = -1; +bool delete_output_on_interrupt = false; + + +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + + +void cleanup_and_fail( const int retval ) + { + set_signals( SIG_IGN ); // ignore signals + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename.c_str() ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed." ); + } + std::exit( retval ); + } + + +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + +const char * ne_output_filename() // non-empty output file name + { + return output_filename.size() ? output_filename.c_str() : "(stdout)"; + } + + +bool check_tty_out() + { + if( isatty( outfd ) ) + { show_file_error( ne_output_filename(), + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. +void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + // fchown will in many cases return with EPERM, which can be safely ignored. + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( 1 ); + } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes." ); + } + + +inline void set_retval( int & retval, const int new_val ) + { if( retval < new_val ) retval = new_val; } + + +bool archive_write( const uint8_t * const buf, const long long size, + LZ_Encoder * const encoder ) + { + static bool flushed = true; // avoid flushing empty lzip members + + if( size <= 0 && flushed ) return true; + flushed = ( size <= 0 ); + enum { obuf_size = 65536 }; + uint8_t obuf[obuf_size]; + long long sz = 0; + if( flushed ) LZ_compress_finish( encoder ); // flush encoder + while( sz < size || flushed ) + { + if( sz < size ) + { const int wr = LZ_compress_write( encoder, buf + sz, + std::min( size - sz, (long long)max_dictionary_size ) ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + sz += wr; } + if( sz >= size && !flushed ) break; // minimize dictionary size + const int rd = LZ_compress_read( encoder, obuf, obuf_size ); + if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); + if( rd == 0 && sz >= size ) break; + if( writeblock( outfd, obuf, rd ) != rd ) + { show_file_error( ne_output_filename(), "Write error", errno ); + return false; } + } + if( LZ_compress_finished( encoder ) == 1 && + LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + internal_error( "library error (LZ_compress_restart_member)." ); + return true; + } + + +bool tail_compress( const Cl_options & cl_opts, + const int infd, Tar_header header, + LZ_Encoder * const encoder ) + { + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; // flush encoder before EOF blocks + int size = header_size; + bool zero = true; // true until non-zero data found after EOF blocks + while( true ) + { + if( size > 0 && !archive_write( header, size, encoder ) ) + { close( infd ); return false; } + if( size < header_size ) break; // EOF + size = readblock( infd, header, header_size ); + if( errno ) return false; + if( zero && !block_is_zero( header, size ) ) + { zero = false; // flush encoder after EOF blocks + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; } + } + return true; + } + + +int compress_archive( const Cl_options & cl_opts, + const std::string & input_filename, + LZ_Encoder * const encoder, + const bool to_stdout, const bool to_file ) + { + const bool one_to_one = !to_stdout && !to_file; + const bool from_stdin = input_filename == "-"; + const char * const filename = from_stdin ? "(stdin)" : input_filename.c_str(); + const int infd = from_stdin ? STDIN_FILENO : open_instream( filename ); + if( infd < 0 ) return 1; + if( one_to_one ) + { + if( from_stdin ) { outfd = STDOUT_FILENO; output_filename.clear(); } + else + { + output_filename = input_filename + ".lz"; + outfd = open_outstream( output_filename, true, 0, false ); + if( outfd < 0 ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + if( !check_tty_out() ) { close( infd ); return 1; } // don't delete a tty + } + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + + unsigned long long partial_data_size = 0; // size of current block + Extended extended; // metadata from extended records + Resizable_buffer rbuf; // headers and extended records buffer + while( true ) // process one tar member per iteration + { + int total_header_size = header_size; // size of header(s) read + const int rd = readblock( infd, rbuf.u8(), header_size ); + if( rd == 0 && errno == 0 ) break; // missing EOF blocks + if( rd != header_size ) + { show_file_error( filename, "Read error", errno ); + close( infd ); return 1; } + + if( to_file && outfd < 0 ) // open outfd after verifying infd + { + outfd = open_outstream( output_filename, true, 0, false ); + // check tty only once and don't try to delete a tty + if( outfd < 0 || !check_tty_out() ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + + if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOF + { + if( block_is_zero( rbuf.u8(), header_size ) ) // first EOF block + { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; } + show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; + } + + const Typeflag typeflag = (Typeflag)rbuf()[typeflag_o]; + if( typeflag == tf_extended || typeflag == tf_global ) + { + const long long edsize = parse_octal( rbuf.u8() + size_o, size_l ); + const long long bufsize = round_up( edsize ); + // overflow or no extended data + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + if( !rbuf.resize( total_header_size + bufsize ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, bufsize ) != bufsize ) + { show_file_error( filename, "Read error", errno ); + close( infd ); return 1; } + total_header_size += bufsize; + if( typeflag == tf_extended ) // do not parse global headers + { + if( !extended.parse( rbuf() + header_size, edsize, false ) ) + { show_file_error( filename, extrec_msg ); close( infd ); return 2; } + // read ustar header + if( !rbuf.resize( total_header_size + header_size ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size ) + { show_file_error( filename, errno ? "Read error" : end_msg, errno ); + close( infd ); return errno ? 1 : 2; } + if( !verify_ustar_chksum( rbuf.u8() ) ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + const Typeflag typeflag2 = (Typeflag)(rbuf() + total_header_size)[typeflag_o]; + if( typeflag2 == tf_extended || typeflag2 == tf_global ) + { const char * msg = ( typeflag2 == tf_global ) ? fv_msg2 : fv_msg3; + show_file_error( filename, msg ); close( infd ); return 2; } + total_header_size += header_size; + } + } + + const long long file_size = round_up( extended.get_file_size_and_reset( + rbuf.u8() + total_header_size - header_size ) ); + if( cl_opts.solidity == bsolid && + block_is_full( total_header_size - header_size, file_size, + cl_opts.data_size, partial_data_size ) && + !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + if( !archive_write( rbuf.u8(), total_header_size, encoder ) ) + { close( infd ); return 1; } + + if( file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = file_size; // file_size already rounded up + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "'%s' ends unexpectedly at pos %llu\n", + filename, file_size - rest ); + close( infd ); return 1; + } + if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; } + } + } + if( cl_opts.solidity == no_solid && !archive_write( 0, 0, encoder ) ) + { close( infd ); return 1; } // one tar member per lzip member + } + // flush and restart encoder (for next archive) + if( !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + const bool need_close = delete_output_on_interrupt && + ( one_to_one || ( to_file && !from_stdin ) ); + struct stat in_stats; + const struct stat * const in_statsp = + ( need_close && fstat( infd, &in_stats ) == 0 ) ? &in_stats : 0; + if( close( infd ) != 0 ) + { show_file_error( filename, "Error closing file", errno ); return 1; } + if( need_close ) close_and_set_permissions( in_statsp ); + return 0; + } + +} // end namespace + + +int compress( Cl_options & cl_opts ) + { + if( !cl_opts.archive_name.empty() ) + { show_file_error( cl_opts.archive_name.c_str(), + "Option '-f' is incompatible with '--compress'." ); return 1; } + if( cl_opts.num_files > 1 && cl_opts.output_filename.size() ) + { show_file_error( cl_opts.output_filename.c_str(), + "Only can compress one archive when using '-o'." ); return 1; } + const bool to_stdout = cl_opts.output_filename == "-"; + if( to_stdout ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; } + else outfd = -1; + const bool to_file = !to_stdout && cl_opts.output_filename.size(); + if( to_file ) output_filename = cl_opts.output_filename; + if( !to_stdout && ( cl_opts.filenames_given || to_file ) ) + set_signals( signal_handler ); + + const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; + if( cl_opts.data_size <= 0 ) + { + if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; + else cl_opts.data_size = 2 * dictionary_size; + } + LZ_Encoder * encoder = LZ_compress_open( dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + return 1; + } + + if( !cl_opts.filenames_given ) + return compress_archive( cl_opts, "-", encoder, to_stdout, to_file ); + int retval = 0; + bool stdin_used = false; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) ) // skip opts, empty names + { + if( cl_opts.parser.argument( i ) == "-" ) + { if( stdin_used ) continue; else stdin_used = true; } + const int tmp = compress_archive( cl_opts, cl_opts.parser.argument( i ), + encoder, to_stdout, to_file ); + if( tmp ) + { set_retval( retval, tmp ); + if( delete_output_on_interrupt ) cleanup_and_fail( retval ); } + } + // flush and close encoder if needed + if( outfd >= 0 && archive_write( 0, 0, encoder ) && + LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); set_retval( retval, 1 ); } + if( outfd >= 0 && close( outfd ) != 0 ) // to_stdout + { + show_error( "Error closing stdout", errno ); + set_retval( retval, 1 ); + } + return retval; + } -- cgit v1.2.3