summaryrefslogtreecommitdiffstats
path: root/zupdate.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--zupdate.cc412
1 files changed, 412 insertions, 0 deletions
diff --git a/zupdate.cc b/zupdate.cc
new file mode 100644
index 0000000..a605f35
--- /dev/null
+++ b/zupdate.cc
@@ -0,0 +1,412 @@
+/* Zupdate - recompress bzip2, gzip, xz files to lzip format
+ Copyright (C) 2013-2021 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <climits>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <list>
+#include <string>
+#include <vector>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#if defined(__MSVCRT__) || defined(__OS2__)
+#include <io.h>
+#endif
+
+#include "arg_parser.h"
+#include "rc.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+
+namespace {
+
+#include "recursive.cc"
+
+void show_help()
+ {
+ std::printf( "zupdate recompresses files from bzip2, gzip, and xz formats to lzip\n"
+ "format. Each original is compared with the new file and then deleted.\n"
+ "Only regular files with standard file name extensions are recompressed,\n"
+ "other files are ignored. Compressed files are decompressed and then\n"
+ "recompressed on the fly; no temporary files are created. The lzip format\n"
+ "is chosen as destination because it is the most appropriate for\n"
+ "long-term data archiving.\n"
+ "\nIf no files are specified, recursive searches examine the current\n"
+ "working directory, and nonrecursive searches do nothing.\n"
+ "\nIf the lzip compressed version of a file already exists, the file is\n"
+ "skipped unless the option '--force' is given. In this case, if the\n"
+ "comparison with the existing lzip version fails, an error is returned\n"
+ "and the original file is not deleted. The operation of zupdate is meant\n"
+ "to be safe and not cause any data loss. Therefore, existing lzip\n"
+ "compressed files are never overwritten nor deleted.\n"
+ "\nThe names of the original files must have one of the following extensions:\n"
+ "'.bz2', '.gz', or '.xz', which are recompressed to '.lz';\n"
+ "'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'.\n"
+ "\nUsage: zupdate [options] [files]\n"
+ "\nExit status is 0 if all the compressed files were successfully recompressed\n"
+ "(if needed), compared, and deleted (if requested). Non-zero otherwise.\n"
+ "\nOptions:\n"
+ " -h, --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -f, --force don't skip a file even if the .lz exists\n"
+ " -k, --keep keep (don't delete) input files\n"
+ " -l, --lzip-verbose pass one option -v to the lzip compressor\n"
+ " -M, --format=<list> process only the formats in <list>\n"
+ " -N, --no-rcfile don't read runtime configuration file\n"
+ " -q, --quiet suppress all messages\n"
+ " -r, --recursive operate recursively on directories\n"
+ " -R, --dereference-recursive recursively follow symbolic links\n"
+ " -v, --verbose be verbose (a 2nd -v gives more)\n"
+ " -0 .. -9 set compression level [default 9]\n"
+ " --bz2=<command> set compressor and options for bzip2 format\n"
+ " --gz=<command> set compressor and options for gzip format\n"
+ " --lz=<command> set compressor and options for lzip format\n"
+ " --xz=<command> set compressor and options for xz format\n" );
+ show_help_addr();
+ }
+
+
+int cant_execute( const std::string & command, const int status )
+ {
+ if( verbosity >= 0 )
+ {
+ if( WIFEXITED( status ) )
+ std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d\n",
+ program_name, command.c_str(), WEXITSTATUS( status ) );
+ else
+ std::fprintf( stderr, "%s: Can't execute '%s'\n",
+ program_name, command.c_str() );
+ }
+ return 1;
+ }
+
+
+// Set permissions, owner, and times.
+void set_permissions( const char * const rname, const struct stat & in_stats )
+ {
+ bool warning = false;
+ const mode_t mode = in_stats.st_mode;
+ // chown will in many cases return with EPERM, which can be safely ignored.
+ if( chown( rname, in_stats.st_uid, in_stats.st_gid ) == 0 )
+ { if( chmod( rname, mode ) != 0 ) warning = true; }
+ else
+ if( errno != EPERM ||
+ chmod( rname, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
+ warning = true;
+ struct utimbuf t;
+ t.actime = in_stats.st_atime;
+ t.modtime = in_stats.st_mtime;
+ if( utime( rname, &t ) != 0 ) warning = true;
+ if( warning && verbosity >= 2 )
+ show_error( "Can't change output file attributes." );
+ }
+
+
+ // Returns 0 for success, -1 for file skipped, 1 for error.
+int zupdate_file( const std::string & name, const char * const lzip_name,
+ const std::vector< std::string > & lzip_args2,
+ const bool force, const bool keep_input_files,
+ const bool no_rcfile )
+ {
+ static int disable_xz = -1; // tri-state bool
+ int format_index = -1;
+ std::string rname; // recompressed name
+
+ const int eindex = extension_index( name ); // search extension
+ if( eindex >= 0 )
+ {
+ format_index = extension_format( eindex );
+ if( format_index == fmt_lz )
+ {
+ if( verbosity >= 2 )
+ std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
+ program_name, name.c_str(), extension_from( eindex ) );
+ return 0; // ignore this file
+ }
+ rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) );
+ rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ?
+ ".tlz" : ".lz"; // keep combined extension
+ }
+ const char * const compressor_name = get_compressor_name( format_index );
+ if( !compressor_name )
+ {
+ if( verbosity >= 2 )
+ std::fprintf( stderr, "%s: Unknown extension in file name '%s' -- ignored.\n",
+ program_name, name.c_str() );
+ return 0; // ignore this file
+ }
+
+ struct stat in_stats;
+ if( stat( name.c_str(), &in_stats ) != 0 ) // check input file
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't stat input file '%s': %s\n",
+ program_name, name.c_str(), std::strerror( errno ) );
+ return 1;
+ }
+ if( !S_ISREG( in_stats.st_mode ) )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Input file '%s' is not a regular file.\n",
+ program_name, name.c_str() );
+ return 1;
+ }
+
+ struct stat st; // not used
+ const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20
+ const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 );
+ // don't modify an existing 'rname.lz'
+ const bool lz_lz_exists = ( stat( rname2.c_str(), &st ) == 0 );
+ if( lz_exists && !force )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
+ program_name, rname.c_str() );
+ return -1;
+ }
+
+ if( format_index == fmt_xz )
+ {
+ if( disable_xz < 0 )
+ {
+ std::string command( compressor_name ); command += " -V > /dev/null 2>&1";
+ disable_xz = ( std::system( command.c_str() ) != 0 );
+ }
+ if( disable_xz ) return 0; // ignore this file if no xz installed
+ }
+
+ if( !lz_exists ) // recompress
+ {
+ if( verbosity >= 1 )
+ std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() );
+ int fda[2]; // pipe between decompressor and compressor
+ if( pipe( fda ) < 0 )
+ { show_error( "Can't create pipe", errno ); return 1; }
+
+ const pid_t pid = fork();
+ if( pid == 0 ) // child1 (decompressor)
+ {
+ if( dup2( fda[1], STDOUT_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 )
+ {
+ const std::vector< std::string > & compressor_args =
+ get_compressor_args( format_index );
+ const int size = compressor_args.size();
+ const char ** const argv = new const char *[size+5];
+ argv[0] = compressor_name;
+ for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str();
+ argv[size+1] = "-cd";
+ argv[size+2] = "--";
+ argv[size+3] = name.c_str();
+ argv[size+4] = 0;
+ execvp( argv[0], (char **)argv );
+ }
+ show_exec_error( compressor_name );
+ _exit( 1 );
+ }
+ if( pid < 0 ) // parent
+ { show_fork_error( compressor_name ); return 1; }
+
+ const pid_t pid2 = fork();
+ if( pid2 == 0 ) // child2 (lzip compressor)
+ {
+ if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
+ close( fda[0] ) == 0 && close( fda[1] ) == 0 )
+ {
+ const std::vector< std::string > & lzip_args =
+ get_compressor_args( fmt_lz );
+ const int size = lzip_args.size();
+ const int size2 = lzip_args2.size();
+ const char ** const argv = new const char *[size+size2+5];
+ argv[0] = lzip_name;
+ argv[1] = "-9";
+ for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str();
+ for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str();
+ argv[size+size2+2] = "-o";
+ argv[size+size2+3] = rname.c_str();
+ argv[size+size2+4] = 0;
+ execvp( argv[0], (char **)argv );
+ }
+ show_exec_error( lzip_name );
+ _exit( 1 );
+ }
+ if( pid2 < 0 ) // parent
+ { show_fork_error( lzip_name ); return 1; }
+
+ close( fda[0] ); close( fda[1] );
+ int retval = wait_for_child( pid, compressor_name );
+ int retval2 = wait_for_child( pid2, lzip_name );
+ if( retval || retval2 )
+ { if( !lz_lz_exists ) std::remove( rname2.c_str() ); // lzip < 1.20
+ std::remove( rname.c_str() ); return 1; }
+ if( stat( rname.c_str(), &st ) != 0 &&
+ ( lz_lz_exists || stat( rname2.c_str(), &st ) != 0 ||
+ std::rename( rname2.c_str(), rname.c_str() ) != 0 ) )
+ { show_file_error( rname.c_str(), "Error renaming output file", errno );
+ return 1; } // lzip < 1.11
+ set_permissions( rname.c_str(), in_stats );
+ }
+
+ {
+ if( lz_exists && verbosity >= 1 )
+ std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() );
+ std::string zcmp_command( invocation_name );
+ unsigned i = zcmp_command.size();
+ while( i > 0 && zcmp_command[i-1] != '/' ) --i;
+ zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' );
+ zcmp_command += "zcmp' "; // '[dir/]zcmp'
+ if( no_rcfile ) zcmp_command += "-N ";
+ if( verbosity < 0 ) zcmp_command += "-q ";
+ zcmp_command += '\''; zcmp_command += name;
+ zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\'';
+ int status = std::system( zcmp_command.c_str() );
+ if( status != 0 )
+ { if( !lz_exists ) std::remove( rname.c_str() );
+ return cant_execute( zcmp_command, status ); }
+ }
+
+ if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Can't delete input file '%s': %s\n",
+ program_name, name.c_str(), std::strerror( errno ) );
+ return 1;
+ }
+ return 0;
+ }
+
+} // end namespace
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt };
+ int recursive = 0; // 1 = '-r', 2 = '-R'
+ std::list< std::string > filenames;
+ std::vector< std::string > lzip_args2; // args to lzip, maybe empty
+ bool force = false;
+ bool keep_input_files = false;
+ bool no_rcfile = false;
+ program_name = "zupdate";
+ invocation_name = ( argc > 0 ) ? argv[0] : program_name;
+
+ const Arg_parser::Option options[] =
+ {
+ { '0', 0, Arg_parser::no },
+ { '1', 0, Arg_parser::no },
+ { '2', 0, Arg_parser::no },
+ { '3', 0, Arg_parser::no },
+ { '4', 0, Arg_parser::no },
+ { '5', 0, Arg_parser::no },
+ { '6', 0, Arg_parser::no },
+ { '7', 0, Arg_parser::no },
+ { '8', 0, Arg_parser::no },
+ { '9', 0, Arg_parser::no },
+ { 'f', "force", Arg_parser::no },
+ { 'h', "help", Arg_parser::no },
+ { 'k', "keep", Arg_parser::no },
+ { 'l', "lzip-verbose", Arg_parser::no },
+ { 'M', "format", Arg_parser::yes },
+ { 'N', "no-rcfile", Arg_parser::no },
+ { 'q', "quiet", Arg_parser::no },
+ { 'r', "recursive", Arg_parser::no },
+ { 'R', "dereference-recursive", Arg_parser::no },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { bz2_opt, "bz2", Arg_parser::yes },
+ { gz_opt, "gz", Arg_parser::yes },
+ { lz_opt, "lz", Arg_parser::yes },
+ { xz_opt, "xz", Arg_parser::yes },
+ { 0 , 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options );
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+
+ maybe_process_config_file( parser );
+
+ int argind = 0;
+ for( ; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) break; // no more options
+ const std::string & arg = parser.argument( argind );
+ switch( code )
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ lzip_args2.push_back( "-" ); lzip_args2.back() += code; break;
+ case 'f': force = true; break;
+ case 'h': show_help(); return 0;
+ case 'k': keep_input_files = true; break;
+ case 'l': lzip_args2.push_back( "-v" ); break;
+ case 'M': parse_format_list( arg ); break;
+ case 'N': no_rcfile = true; break;
+ case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break;
+ case 'r': recursive = 1; break;
+ case 'R': recursive = 2; break;
+ case 'v': if( verbosity < 4 ) ++verbosity; break;
+ case 'V': show_version(); return 0;
+ case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break;
+ case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break;
+ case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break;
+ case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break;
+ default : internal_error( "uncaught option." );
+ }
+ } // end process options
+
+#if defined(__MSVCRT__) || defined(__OS2__)
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ const char * const lzip_name = get_compressor_name( fmt_lz );
+ if( !lzip_name )
+ { show_error( "Missing name of compressor for lzip format." ); return 1; }
+
+ for( ; argind < parser.arguments(); ++argind )
+ filenames.push_back( parser.argument( argind ) );
+
+ if( filenames.empty() && recursive ) filenames.push_back( "." );
+
+ std::string input_filename;
+ int retval = 0;
+ bool error = false;
+ while( next_filename( filenames, input_filename, error, recursive, true ) )
+ {
+ int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, force,
+ keep_input_files, no_rcfile );
+ if( tmp < 0 ) error = true;
+ if( tmp > retval ) retval = tmp;
+ if( tmp > 0 ) break;
+ }
+ if( error && retval == 0 ) retval = 1;
+ return retval;
+ }