diff options
Diffstat (limited to 'zupdate.cc')
-rw-r--r-- | zupdate.cc | 506 |
1 files changed, 506 insertions, 0 deletions
diff --git a/zupdate.cc b/zupdate.cc new file mode 100644 index 0000000..64ca0d3 --- /dev/null +++ b/zupdate.cc @@ -0,0 +1,506 @@ +/* Zupdate - recompress bzip2, gzip, xz, zstd files to lzip format + Copyright (C) 2013-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <sys/wait.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip\n" + "format. Each original is compared with the new file and then deleted.\n" + "Only regular files with standard file name extensions are recompressed,\n" + "other files are ignored. Compressed files are decompressed and then\n" + "recompressed on the fly; no temporary files are created. The lzip format\n" + "is chosen as destination because it is the most appropriate for\n" + "long-term archiving.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches do nothing.\n" + "\nIf the lzip-compressed version of a file already exists, the file is skipped\n" + "unless the option '--force' is given. In this case, if the comparison with\n" + "the existing lzip version fails, an error is returned and the original file\n" + "is not deleted. The operation of zupdate is meant to be safe and not cause\n" + "any data loss. Therefore, existing lzip-compressed files are never\n" + "overwritten nor deleted.\n" + "\nThe names of the original files must have one of the following extensions:\n" + "\n'.bz2', '.gz', '.xz', '.zst', or '.Z', which are recompressed to '.lz'.\n" + "\n'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to '.tlz'.\n" + "\nUsage: zupdate [options] [files]\n" + "\nExit status is 0 if all the compressed files were successfully recompressed\n" + "(if needed), compared, and deleted (if requested). 1 if a non-fatal error\n" + "occurred (file not found or not regular, or has invalid format, or can't be\n" + "deleted). 2 if a fatal error occurred (invalid command-line options,\n" + "compressor can't be run, or comparison fails).\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -d, --destdir=<dir> write recompressed files into <dir>\n" + " -e, --expand-extensions expand combined extensions; tgz --> tar.lz\n" + " -f, --force don't skip a file even if the .lz exists\n" + " -i, --ignore-errors ignore non-fatal errors\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --lzip-verbose pass one option -v to the lzip compressor\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 9]\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for option '-M' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); + show_help_addr(); + } + + +void extract_srcdir_name( const std::string & name, std::string & srcdir ) + { + if( name.empty() || name == "." ) return; // leave srcdir empty + if( name[name.size()-1] == '/' ) // remove last slash + { srcdir.assign( name, 0, name.size() - 1 ); return; } + struct stat st; + if( stat( name.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { srcdir = name; return; } + + unsigned size = 0; // size of srcdir without last slash nor basename + for( unsigned i = name.size(); i > 0; --i ) + if( name[i-1] == '/' ) { size = i - 1; break; } + if( size > 0 ) srcdir.assign( name, 0, size ); + } + + +bool make_dirs( const std::string & name ) + { + static std::string cached_dirname; + unsigned i = name.size(); + while( i > 0 && name[i-1] != '/' ) --i; // remove last component + while( i > 0 && name[i-1] == '/' ) --i; // remove slash(es) + if( i == 0 ) return true; // dirname is '/' or empty + const unsigned dirsize = i; // size of dirname without trailing slash(es) + if( cached_dirname.size() == dirsize && + cached_dirname.compare( 0, dirsize, name ) == 0 ) return true; + + for( i = 0; i < dirsize; ) + { + while( i < dirsize && name[i] == '/' ) ++i; + const unsigned first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) + { + const std::string partial( name, 0, i ); + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another process created the dir + } + } + cached_dirname.assign( name, 0, dirsize ); + return true; + } + + +void cant_execute( const std::string & command, const int status ) + { + if( verbosity >= 0 ) + { + if( WIFEXITED( status ) ) + std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d\n", + program_name, command.c_str(), WEXITSTATUS( status ) ); + else + std::fprintf( stderr, "%s: Can't execute '%s'\n", + program_name, command.c_str() ); + } + } + + +// Set permissions, owner, and times. +void set_permissions( const char * const rname, const struct stat & in_stats ) + { + bool warning = false; + const mode_t mode = in_stats.st_mode; + // chown in many cases returns with EPERM, which can be safely ignored. + if( chown( rname, in_stats.st_uid, in_stats.st_gid ) == 0 ) + { if( chmod( rname, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + chmod( rname, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + if( utime( rname, &t ) != 0 ) warning = true; + if( warning && verbosity >= 2 ) + show_file_error( rname, + "warning: can't change output file attributes", errno ); + } + + +// Return value: 0 = success, -1 = file skipped, 1 = error, 2 = fatal error. +int zupdate_file( const std::string & name, const char * const lzip_name, + const std::vector< std::string > & lzip_args2, + const std::string & srcdir, const std::string & destdir, + const bool expand, const bool force, + const bool keep_input_files, const bool no_rcfile ) + { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. + static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool + int format_index = -1; // undefined + std::string rname; // recompressed name + + const int eindex = extension_index( name ); // search extension + if( eindex >= 0 ) + { + format_index = extension_format( eindex ); + if( format_index == fmt_lz ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n", + program_name, name.c_str(), extension_from( eindex ) ); + return 0; // ignore this file + } + if( destdir.size() ) + { + if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 ) + internal_error( "srcdir mismatch." ); + rname = destdir; + if( rname[rname.size()-1] != '/' && name[srcdir.size()] != '/' ) + rname += '/'; + rname.append( name, srcdir.size(), name.size() - srcdir.size() - + std::strlen( extension_from( eindex ) ) ); + } + else + rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ? + ( expand ? ".tar.lz" : ".tlz" ) : ".lz"; + } + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( verbosity >= 2 ) + show_file_error( name.c_str(), "Unknown extension in file name -- ignored." ); + return 0; // ignore this file + } + + struct stat in_stats; + if( stat( name.c_str(), &in_stats ) != 0 ) // check input file + { show_file_error( name.c_str(), "Can't stat input file", errno ); + return 1; } + if( !S_ISREG( in_stats.st_mode ) ) + { show_file_error( name.c_str(), "Input file is not a regular file." ); + return 1; } + + struct stat st; // not used + const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20 + const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 ); + // don't modify an existing 'rname.lz' + const bool lz_lz_exists = ( stat( rname2.c_str(), &st ) == 0 ); + if( lz_exists && !force ) + { + show_file_error( rname.c_str(), "Output file already exists, skipping." ); + return -1; + } + + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + show_file_error( compressor_name, + "Xz decompressor not found. Ignoring xz files." ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + show_file_error( compressor_name, + "Zstd decompressor not found. Ignoring zstd files." ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } + + if( !lz_exists ) // recompress + { + if( verbosity >= 1 ) + std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() ); + if( destdir.size() && !make_dirs( rname ) ) + { show_file_error( rname.c_str(), + "Error creating intermediate directory", errno ); return 2; } + int fda[2]; // pipe between decompressor and compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (decompressor) + { + if( dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = "-cd"; + argv[size+2] = "--"; + argv[size+3] = name.c_str(); + argv[size+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 2; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (lzip compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & lzip_args = + get_compressor_args( fmt_lz ); + const int size = lzip_args.size(); // from .conf or --lz + const int size2 = lzip_args2.size(); // from command line + const char ** const argv = new const char *[size+size2+5]; + argv[0] = lzip_name; + argv[1] = "-9"; + for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str(); + for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str(); + argv[size+size2+2] = "-o"; + argv[size+size2+3] = rname.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( lzip_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( lzip_name ); return 2; } + + close( fda[0] ); close( fda[1] ); + const int retval = wait_for_child( pid, compressor_name ); + const int retval2 = wait_for_child( pid2, lzip_name ); + if( retval || retval2 ) + { if( !lz_lz_exists ) std::remove( rname2.c_str() ); // lzip < 1.20 + std::remove( rname.c_str() ); return retval2 ? 2 : 1; } + if( stat( rname.c_str(), &st ) != 0 && + ( lz_lz_exists || stat( rname2.c_str(), &st ) != 0 || + std::rename( rname2.c_str(), rname.c_str() ) != 0 ) ) + { show_file_error( rname.c_str(), "Error renaming output file", errno ); + return 2; } // lzip < 1.11 + set_permissions( rname.c_str(), in_stats ); + } + + { + if( lz_exists && verbosity >= 1 ) + std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() ); + // Quote names in zcmp_command to allow file/dir names with spaces. + std::string zcmp_command( invocation_name ); + unsigned i = zcmp_command.size(); + while( i > 0 && zcmp_command[i-1] != '/' ) --i; // strip "zupdate" + zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' ); + zcmp_command += "zcmp' "; // '[dir/]zcmp' + if( no_rcfile ) zcmp_command += "-N "; + if( verbosity < 0 ) zcmp_command += "-q -s "; + zcmp_command += '\''; zcmp_command += name; + zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\''; + int status = std::system( zcmp_command.c_str() ); + if( status != 0 ) + { if( !lz_exists ) std::remove( rname.c_str() ); + cant_execute( zcmp_command, status ); return 2; } + } + + if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT ) + { show_file_error( name.c_str(), "Can't delete input file", errno ); + return 1; } + return 0; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::string destdir; // write recompressed files here + std::vector< std::string > lzip_args2; // args to lzip, maybe empty + bool expand = false; + bool force = false; + bool ignore_errors = false; + bool keep_input_files = false; + bool no_rcfile = false; + program_name = "zupdate"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'd', "destdir", Arg_parser::yes }, + { 'e', "expand-extensions", Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-errors", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "lzip-verbose", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'd': destdir = arg; break; + case 'e': expand = true; break; + case 'f': force = true; break; + case 'h': show_help(); return 0; + case 'i': ignore_errors = true; break; + case 'k': keep_input_files = true; break; + case 'l': lzip_args2.push_back( "-v" ); break; + case 'M': parse_format_list( arg, pn ); break; + case 'N': no_rcfile = true; break; + case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const char * const lzip_name = get_compressor_name( fmt_lz ); + if( !lzip_name ) + { show_error( "Missing name of compressor for lzip format." ); return 2; } + + std::list< std::string > filenames; + if( argind < parser.arguments() ) + filenames.push_back( parser.argument( argind++ ) ); // first argument + else if( recursive ) filenames.push_back( "." ); + else return 0; // nothing to do + + std::string input_filename; + int retval = 0; + bool error = false; + while( true ) + { + std::string srcdir; // dirname to be replaced by destdir + if( destdir.size() ) extract_srcdir_name( filenames.front(), srcdir ); + while( next_filename( filenames, input_filename, error, recursive, true ) ) + { + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, srcdir, + destdir, expand, force, keep_input_files, no_rcfile ); + if( tmp < 0 ) error = true; // file skipped + if( tmp > retval ) retval = tmp; + if( tmp >= 2 || ( tmp == 1 && !ignore_errors ) ) goto out; + } + if( argind >= parser.arguments() ) break; + filenames.push_back( parser.argument( argind++ ) ); + } +out: + if( error && retval == 0 ) retval = 1; + return retval; + } |