diff options
Diffstat (limited to 'zupdate.cc')
-rw-r--r-- | zupdate.cc | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/zupdate.cc b/zupdate.cc new file mode 100644 index 0000000..02dfe29 --- /dev/null +++ b/zupdate.cc @@ -0,0 +1,415 @@ +/* Zupdate - recompress bzip2, gzip, xz files to lzip files + Copyright (C) 2013 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "zutils.h" +#include "rc.h" + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + + +namespace { + +#ifdef O_BINARY +const int o_binary = O_BINARY; +#else +const int o_binary = 0; +#endif + + +void show_zupdate_help() + { + std::printf( "Zupdate recompresses files from bzip2, gzip, and xz formats to lzip format.\n" + "The originals are compared with the new files and then deleted.\n" + "Only regular files with standard file name extensions are recompressed,\n" + "other files are ignored.\n" + "Compressed files are decompressed and then recompressed on the fly; no\n" + "temporary files are created.\n" + "The lzip format is chosen as destination because it is by far the most\n" + "appropriate for long-term data archiving.\n" + "\nIf the lzip compressed version of a file already exists, the file is\n" + "skipped unless the '--force' option is given. In this case, if the\n" + "comparison fails, an error is returned and the original file is not\n" + "deleted. The operation of zupdate is meant to be safe and not produce\n" + "any data loss. Therefore, existing lzip compressed files are never\n" + "overwritten nor deleted.\n" + "\nUsage: zupdate [options] [files]\n" + "\nExit status is 0 if all the compressed files were successfully\n" + "recompressed (if needed), compared and deleted. Non-zero otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -f, --force do not skip a file even if the .lz exists\n" + " -l, --lzip-verbose pass a -v option to the lzip compressor\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 9]\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" ); + show_help_addr(); + } + + +int cant_execute( const std::string & command, const int status ) + { + if( verbosity >= 0 ) + { + if( WIFEXITED( status ) ) + std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d.\n", + util_name, command.c_str(), WEXITSTATUS( status ) ); + else + std::fprintf( stderr, "%s: Can't execute '%s'.\n", + util_name, command.c_str() ); + } + return 1; + } + + + // Set permissions, owner and times. +void set_permissions( const char * const rname, const struct stat & in_stats ) + { + bool warning = false; + // fchown will in many cases return with EPERM, which can be safely ignored. + if( ( chown( rname, in_stats.st_uid, in_stats.st_gid ) != 0 && + errno != EPERM ) || + chmod( rname, in_stats.st_mode ) != 0 ) warning = true; + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + if( utime( rname, &t ) != 0 ) warning = true; + if( warning && verbosity >= 2 ) + show_error( "Can't change output file attributes." ); + } + + +struct { const char * from; const char * to; int format_index; } const + known_extensions[] = { + { ".bz2", "", fmt_bz2 }, + { ".tbz", ".tar", fmt_bz2 }, + { ".tbz2", ".tar", fmt_bz2 }, + { ".gz", "", fmt_gz }, + { ".tgz", ".tar", fmt_gz }, + { ".lz", "", fmt_lz }, + { ".tlz", ".tar", fmt_lz }, + { ".xz", "", fmt_xz }, + { ".txz", ".tar", fmt_xz }, + { 0, 0, -1 } }; + + + // Returns 0 for success, -1 for file skipped, 1 for error. +int zupdate_file( const std::string & name, const char * const lzip_name, + const std::vector< std::string > & lzip_args2, + const bool force ) + { + int format_index = -1; + std::string dname; // decompressed_name + + for( int i = 0; known_extensions[i].from; ++i ) // search extension + { + const std::string from( known_extensions[i].from ); + if( name.size() > from.size() && + name.compare( name.size() - from.size(), from.size(), from ) == 0 ) + { + dname.assign( name, 0, name.size() - from.size() ); + dname += known_extensions[i].to; + format_index = known_extensions[i].format_index; + if( format_index == fmt_lz ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + util_name, name.c_str(), known_extensions[i].from ); + return 0; // ignore this file + } + break; + } + } + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name || !compressor_name[0] ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Unknown extension in file name '%s' -- ignored.\n", + util_name, name.c_str() ); + return 0; // ignore this file + } + + struct stat in_stats; + if( stat( name.c_str(), &in_stats ) != 0 ) // check input file + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't stat input file '%s': %s.\n", + util_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + if( !S_ISREG( in_stats.st_mode ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is not a regular file.\n", + util_name, name.c_str() ); + return 1; + } + + struct stat st; + std::string rname( dname ); rname += ".lz"; // recompressed_name + const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 ); + if( lz_exists && !force ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + util_name, rname.c_str() ); + return -1; + } + + if( !lz_exists ) // recompress + { + if( verbosity >= 1 ) + std::fprintf( stderr, "Recompressing file '%s'.\n", name.c_str() ); + int fda[2]; // pipe between decompressor and compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (decompressor) + { + if( dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = "-cd"; + argv[size+2] = "--"; + argv[size+3] = name.c_str(); + argv[size+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (lzip compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & lzip_args = + get_compressor_args( fmt_lz ); + const int size = lzip_args.size(); + const int size2 = lzip_args2.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = lzip_name; + argv[1] = "-9"; + for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str(); + for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str(); + argv[size+size2+2] = "-o"; + argv[size+size2+3] = dname.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( lzip_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( lzip_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + int retval = wait_for_child( pid, compressor_name ); + int retval2 = wait_for_child( pid2, lzip_name ); + if( retval || retval2 ) { std::remove( rname.c_str() ); return 1; } + set_permissions( rname.c_str(), in_stats ); + } + + { + if( lz_exists && verbosity >= 1 ) + std::fprintf( stderr, "Comparing file '%s'.\n", name.c_str() ); + std::string zcmp_command( invocation_name ); + unsigned i = zcmp_command.size(); + while( i > 0 && zcmp_command[i-1] != '/' ) --i; + zcmp_command.resize( i ); + zcmp_command += "zcmp "; // ${bindir}zcmp + zcmp_command += name; zcmp_command += ' '; zcmp_command += rname; + int status = std::system( zcmp_command.c_str() ); + if( status != 0 ) + { if( !lz_exists ) std::remove( rname.c_str() ); + return cant_execute( zcmp_command, status ); } + } + + if( std::remove( name.c_str() ) != 0 && errno != ENOENT ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't delete input file '%s': %s.\n", + util_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + return 0; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + std::string input_filename; + std::list< std::string > filenames; + std::vector< std::string > lzip_args2; // args to lzip, maybe empty + bool force = false; + bool recursive = false; + invocation_name = argv[0]; + util_name = "zupdate"; + + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'l', "lzip-verbose", Arg_parser::no }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const arg = parser.argument( argind ).c_str(); + switch( code ) // common options + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'f': force = true; break; + case 'h': show_zupdate_help(); return 0; + case 'l': lzip_args2.push_back( "-v" ); break; + case 'N': continue; + case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; + case 'r': recursive = true; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version( "Zupdate" ); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); continue; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); continue; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); continue; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); continue; + default : internal_error( "uncaught option" ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const char * const lzip_name = get_compressor_name( fmt_lz ); + if( !lzip_name || !lzip_name[0] ) + { show_error( "Missing name of compressor for lzip format." ); return 1; } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + int retval = 0; + while( !filenames.empty() ) + { + input_filename = filenames.front(); + filenames.pop_front(); + if( !input_filename.size() || input_filename == "-" ) continue; + if( recursive ) + { + struct stat st; + if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { + DIR * const dirp = opendir( input_filename.c_str() ); + if( !dirp ) + { + show_error2( "Can't open directory", input_filename.c_str() ); + if( retval < 1 ) retval = 1; continue; + } + std::list< std::string > tmp_list; + while( true ) + { + const struct dirent * const entryp = readdir( dirp ); + if( !entryp ) { closedir( dirp ); break; } + std::string tmp_name( entryp->d_name ); + if( tmp_name != "." && tmp_name != ".." ) + tmp_list.push_back( input_filename + "/" + tmp_name ); + } + filenames.splice( filenames.begin(), tmp_list ); + continue; + } + } + + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, force ); + if( tmp < 0 && retval < 1 ) retval = 1; + if( tmp > retval ) retval = tmp; + if( tmp > 0 ) break; + } + return retval; + } |