diff options
Diffstat (limited to '')
-rw-r--r-- | ChangeLog | 37 | ||||
-rw-r--r-- | INSTALL | 18 | ||||
-rw-r--r-- | Makefile.in | 21 | ||||
-rw-r--r-- | NEWS | 57 | ||||
-rw-r--r-- | README | 15 | ||||
-rw-r--r-- | archive_reader.cc | 14 | ||||
-rw-r--r-- | archive_reader.h | 2 | ||||
-rw-r--r-- | arg_parser.cc | 4 | ||||
-rw-r--r-- | arg_parser.h | 4 | ||||
-rw-r--r-- | common.cc | 81 | ||||
-rw-r--r-- | common_decode.cc | 90 | ||||
-rw-r--r-- | common_mutex.cc | 160 | ||||
-rw-r--r-- | common_mutex.h | 30 | ||||
-rw-r--r-- | compress.cc | 69 | ||||
-rwxr-xr-x | configure | 29 | ||||
-rw-r--r-- | create.cc | 182 | ||||
-rw-r--r-- | create.h | 3 | ||||
-rw-r--r-- | create_lz.cc | 50 | ||||
-rw-r--r-- | decode.cc | 119 | ||||
-rw-r--r-- | decode.h | 7 | ||||
-rw-r--r-- | decode_lz.cc | 24 | ||||
-rw-r--r-- | delete.cc | 11 | ||||
-rw-r--r-- | delete_lz.cc | 9 | ||||
-rw-r--r-- | doc/tarlz.1 | 27 | ||||
-rw-r--r-- | doc/tarlz.info | 179 | ||||
-rw-r--r-- | doc/tarlz.texi | 212 | ||||
-rw-r--r-- | exclude.cc | 3 | ||||
-rw-r--r-- | extended.cc | 141 | ||||
-rw-r--r-- | lzip_index.cc | 83 | ||||
-rw-r--r-- | lzip_index.h | 20 | ||||
-rw-r--r-- | main.cc | 107 | ||||
-rw-r--r-- | tarlz.h | 121 | ||||
-rwxr-xr-x | testsuite/check.sh | 230 |
33 files changed, 1185 insertions, 974 deletions
@@ -1,3 +1,25 @@ +2024-01-03 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.25 released. + * New option '--ignore-metadata. + * create.cc, decode.cc, decode_lz.cc: + '#include <sys/types.h>' for major, minor, makedev on BSD systems. + * compress.cc: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'. + (compress_archive): Create missing intermediate directories. + * configure, Makefile.in: New variable 'MAKEINFO'. + +2023-09-20 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.24 released. + * decode.cc (decode), common_decode.cc (check_skip_filename): + Make option '-C' position-dependent also for diff and extract. + (Reported by Devon Sean McCullough). + * create.cc (encode): Deduct '--uncompressed' from archive name ext. + * compress.cc (show_atpos_error): New function showing errno msg. + (compress_archive): Exit with error status 2 if archive is empty. + * Limit the size of a header set (extended+ustar) to INT_MAX. + * check.sh: Fix '--diff' test on OS/2 again. (Reported by Elbert Pol). + 2022-09-23 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.23 released. @@ -30,7 +52,7 @@ * main.cc (getnum): Show option name and valid range if error. (check_lib): Check that LZ_API_VERSION and LZ_version_string match. (main): Report an error if -o is used with any operation except -z. - * Set variable LIBS from configure. + * configure: Set variable LIBS. 2021-06-14 Antonio Diaz Diaz <antonio@gnu.org> @@ -40,15 +62,14 @@ decode_lz.cc (dworker): Likewise. (Reported by Florian Schmaus). * New options '-z, --compress' and '-o, --output'. * New option '--warn-newer'. - * tarlz.texi (Portable character set): Link to moe section on Unicode. - (Invoking tarlz): Document concatenation to standard output. - * check.sh: Fix the '--diff' test on OS/2. + * tarlz.texi (Invoking tarlz): Document concatenation to stdout. + * check.sh: Fix the '--diff' test on OS/2. (Reported by Elbert Pol). 2021-01-08 Antonio Diaz Diaz <antonio@gnu.org> * Version 0.19 released. * extended.cc: Print a diagnostic for each unknown keyword found. - * tarlz.h: Add a missing '#include <sys/types.h>'. + * tarlz.h: Add a missing '#include <sys/types.h>' for 'mode_t'. 2020-11-21 Antonio Diaz Diaz <antonio@gnu.org> @@ -154,7 +175,7 @@ * New option '--keep-damaged'. * New option '--no-solid'. * create.cc (archive_write): Minimize dictionary size. - * create.cc: Detect and skip archive in '-A', '-c', and '-r'. + Detect and skip archive in '-A', '-c', and '-r'. * main.cc (show_version): Show the version of lzlib being used. 2018-10-19 Antonio Diaz Diaz <antonio@gnu.org> @@ -162,7 +183,7 @@ * Version 0.6 released. * New option '-A, --concatenate'. * Option '--ignore-crc' replaced with '--missing-crc'. - * create.cc (add_member): Verify that uid, gid, mtime, devmajor, + * create.cc (add_member): Check that uid, gid, mtime, devmajor, and devminor are in ustar range. * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. * Makefile.in: Use tarlz in target 'dist'. @@ -208,7 +229,7 @@ * Version 0.1 released. -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -4,12 +4,11 @@ You will need a C++98 compiler with support for 'long long', and the compression library lzlib installed. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. - -Lzlib must be version 1.12 or newer. - Gcc is available at http://gcc.gnu.org. Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. +Lzlib must be version 1.12 or newer. + The operating system must allow signal handlers read access to objects with static storage duration so that the cleanup handler for Control-C can delete the partial output file in '-z, --compress' mode. @@ -23,8 +22,8 @@ Procedure or lzip -cd tarlz[version].tar.lz | tar -xf - -This creates the directory ./tarlz[version] containing the source from -the main archive. +This creates the directory ./tarlz[version] containing the source code +extracted from the archive. 2. Change to tarlz directory and run configure. (Try 'configure --help' for usage instructions). @@ -46,7 +45,8 @@ the main archive. 4. Optionally, type 'make check' to run the tests that come with tarlz. 5. Type 'make install' to install the program and any data files and - documentation. + documentation. You need root privileges to install into a prefix owned + by root. Or type 'make install-compress', which additionally compresses the info manual and the man page after installation. @@ -66,15 +66,15 @@ object files and executables to go and run the 'configure' script. 'configure' automatically checks for the source code in '.', in '..', and in the directory that 'configure' is in. -'configure' recognizes the option '--srcdir=DIR' to control where to -look for the sources. Usually 'configure' can determine that directory +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory automatically. After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 0fa5761..76c1fc8 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,8 +8,8 @@ SHELL = /bin/sh CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \ - compress.o create.o create_lz.o decode.o decode_lz.o delete.o \ - delete_lz.o exclude.o extended.o main.o + common_mutex.o compress.o create.o create_lz.o decode.o decode_lz.o \ + delete.o delete_lz.o exclude.o extended.o main.o .PHONY : all install install-bin install-info install-man \ @@ -29,16 +29,22 @@ main.o : main.cc %.o : %.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< +# prevent 'make' from trying to remake source files +$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; +%.h %.cc : ; + $(objs) : Makefile arg_parser.o : arg_parser.h archive_reader.o : tarlz.h lzip_index.h archive_reader.h -common.o : tarlz.h arg_parser.h -common_decode.o : tarlz.h arg_parser.h +common.o : tarlz.h +common_decode.o : tarlz.h arg_parser.h decode.h +common_mutex.o : common_mutex.h compress.o : tarlz.h arg_parser.h create.o : tarlz.h arg_parser.h create.h -create_lz.o : tarlz.h arg_parser.h create.h +create_lz.o : tarlz.h arg_parser.h common_mutex.h create.h decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h -decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h \ + common_mutex.h decode.h delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h exclude.o : tarlz.h @@ -46,13 +52,12 @@ extended.o : tarlz.h lzip_index.o : tarlz.h lzip_index.h main.o : tarlz.h arg_parser.h - doc : info man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/$(progname).1 @@ -1,53 +1,14 @@ -Changes in version 0.23: +Changes in version 0.25: -Tarlz now can create and decode the extended records 'atime' and 'mtime', -allowing times beyond the ustar range (before 1970-01-01 00:00:00 UTC or -after 2242-03-16 12:56:31 UTC). +The new option '--ignore-metadata', which makes '-d, --diff' ignore +differences in file permissions, owner and group IDs, and modification time, +has been added. -Tarlz now can create and decode the extended records 'uid' and 'gid', -allowing user and group IDs beyond the ustar limit of 2_097_151. +'#include <sys/types.h>' for major, minor, makedev on BSD systems. -The new option '--ignore-overflow', which makes '-d, --diff' ignore -differences in mtime caused by overflow on 32-bit systems, has been added. +File diagnostics of '-z' have been reformatted as 'PROGRAM: FILE: MESSAGE'. -Tarlz now refuses to read archive data from a terminal or write archive data -to a terminal. (Reported by DustDFG). +The option '-o, --output' now creates missing intermediate directories when +compressing to a file. -In the date format of option '--mtime' the time of day 'HH:MM:SS' is now -optional and defaults to '00:00:00'. Both space and 'T' are now accepted as -separator between date and time. - -Diagnostics caused by invalid arguments to command line options now show the -argument and the name of the option. - -Tarlz now diagnoses separately the failure to create an intermediate -directory during extraction. - -Failure to extract a member due to environmental problems is no longer fatal -in serial extraction. (It was already non-fatal in parallel extraction). - -The diagnostics emitted by the parallel decoder should now be identical to -the corresponding diagnostics of the serial decoder. - -Column alignment has been improved in listings by printing "user/group size" -in a field of minimum width 19 with at least 8 characters for size. - -The diagnostic shown when the filesystem reports a wrong st_size for a -symbolic link has been improved. (Reported by Jason Lenz). - -The diagnostic "File is the archive" has been changed to "Archive can't -contain itself" following a similar change made by Paul Eggert to GNU tar. - -The warning "Removing leading '/' from member names." is now not shown when -compressing nor if the member causing it is excluded. - -The texinfo category of the manual has been changed from 'Data Compression' -to 'Archiving' to match that of GNU tar. - -'end-of-archive' (EOA) is now used consistently to refer to the blocks of -binary zeros used to mark the end of the archive. - -Operations are now listed before options in the --help output and in the -manual. - -Many small improvements have been made to the code and documentation. +The variable MAKEINFO has been added to configure and Makefile.in. @@ -6,7 +6,7 @@ lzlib. Tarlz creates tar archives using a simplified and safer variant of the POSIX pax format compressed in lzip format, keeping the alignment between tar -members and lzip members. The resulting multimember tar.lz archive is fully +members and lzip members. The resulting multimember tar.lz archive is backward compatible with standard tar tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can append files to the end of such compressed archives. @@ -61,14 +61,14 @@ large, making undetected corruption and archiver misbehavior more probable. Headers and metadata must be protected separately from data because the integrity checking of lzip may not be able to detect the corruption before -the metadata has been used, for example, to create a new file in the wrong +the metadata have been used, for example, to create a new file in the wrong place. Because of the above, tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in a way compatible with standard tar tools. -Tarlz does not understand other tar formats like gnu, oldgnu, star or v7. -The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to verify +Tarlz does not understand other tar formats like gnu, oldgnu, star, or v7. +The command 'tarlz -t -f archive.tar.lz > /dev/null' can be used to check that the format of the archive is compatible with tarlz. The diagram below shows the correspondence between each tar member (formed @@ -87,11 +87,10 @@ tar.lz +===============+=================================================+========+ -Copyright (C) 2013-2022 Antonio Diaz Diaz. +Copyright (C) 2013-2024 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. -The file Makefile.in is a data file used by configure to produce the -Makefile. It has the same copyright owner and permissions that configure -itself. +The file Makefile.in is a data file used by configure to produce the Makefile. +It has the same copyright owner and permissions that configure itself. diff --git a/archive_reader.cc b/archive_reader.cc index 8ad315d..c4438ae 100644 --- a/archive_reader.cc +++ b/archive_reader.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -76,7 +76,7 @@ void xLZ_decompress_write( LZ_Decoder * const decoder, Archive_descriptor::Archive_descriptor( const std::string & archive_name ) : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), infd( non_tty_infd( archive_name, namep ) ), - lzip_index( infd, true, false ), + lzip_index( infd ), seekable( lseek( infd, 0, SEEK_SET ) == 0 ), indexed( seekable && lzip_index.retval() == 0 ) {} @@ -90,7 +90,7 @@ int Archive_reader_base::parse_records( Extended & extended, const long long edsize = parse_octal( header + size_o, size_l ); const long long bufsize = round_up( edsize ); if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records - if( edsize >= 1LL << 33 || bufsize >= INT_MAX ) + if( edsize >= 1LL << 33 || bufsize > max_edata_size ) return err( -2, longrec_msg ); // records too long if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg ); e_msg_ = ""; e_code_ = 0; @@ -116,10 +116,10 @@ int Archive_reader::read( uint8_t * const buf, const int size ) const int rd = readblock( ad.infd, buf, size ); if( rd != size && errno ) return err( -1, rdaerr_msg, errno, rd ); const Lzip_header & header = (*(const Lzip_header *)buf); - const bool islz = ( rd >= min_member_size && header.verify_magic() && - header.verify_version() && + const bool islz = ( rd >= min_member_size && header.check_magic() && + header.check_version() && isvalid_ds( header.dictionary_size() ) ); - const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); + const bool istar = ( rd == size && check_ustar_chksum( buf ) ); const bool iseoa = ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); bool maybe_lz = islz; // maybe corrupt tar.lz @@ -139,7 +139,7 @@ int Archive_reader::read( uint8_t * const buf, const int size ) { LZ_decompress_close( decoder ); decoder = 0; return err( -1, mem_msg ); } xLZ_decompress_write( decoder, buf, rd ); const int ret = read( buf, size ); if( ret != 0 ) return ret; - if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; + if( check_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; return err( 2, islz ? posix_lz_msg : "" ); } diff --git a/archive_reader.h b/archive_reader.h index 47fa844..e8963e0 100644 --- a/archive_reader.h +++ b/archive_reader.h @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/arg_parser.cc b/arg_parser.cc index 59998ac..0c04d8e 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided diff --git a/arg_parser.h b/arg_parser.h index e854838..1eeec9a 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ -/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2022 Antonio Diaz Diaz. +/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,82 +19,9 @@ #include <cctype> #include <cerrno> -#include <cstdlib> -#include <pthread.h> #include <unistd.h> #include "tarlz.h" -#include "arg_parser.h" - - -void xinit_mutex( pthread_mutex_t * const mutex ) - { - const int errcode = pthread_mutex_init( mutex, 0 ); - if( errcode ) - { show_error( "pthread_mutex_init", errcode ); exit_fail_mt(); } - } - -void xinit_cond( pthread_cond_t * const cond ) - { - const int errcode = pthread_cond_init( cond, 0 ); - if( errcode ) - { show_error( "pthread_cond_init", errcode ); exit_fail_mt(); } - } - - -void xdestroy_mutex( pthread_mutex_t * const mutex ) - { - const int errcode = pthread_mutex_destroy( mutex ); - if( errcode ) - { show_error( "pthread_mutex_destroy", errcode ); exit_fail_mt(); } - } - -void xdestroy_cond( pthread_cond_t * const cond ) - { - const int errcode = pthread_cond_destroy( cond ); - if( errcode ) - { show_error( "pthread_cond_destroy", errcode ); exit_fail_mt(); } - } - - -void xlock( pthread_mutex_t * const mutex ) - { - const int errcode = pthread_mutex_lock( mutex ); - if( errcode ) - { show_error( "pthread_mutex_lock", errcode ); exit_fail_mt(); } - } - - -void xunlock( pthread_mutex_t * const mutex ) - { - const int errcode = pthread_mutex_unlock( mutex ); - if( errcode ) - { show_error( "pthread_mutex_unlock", errcode ); exit_fail_mt(); } - } - - -void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) - { - const int errcode = pthread_cond_wait( cond, mutex ); - if( errcode ) - { show_error( "pthread_cond_wait", errcode ); exit_fail_mt(); } - } - - -void xsignal( pthread_cond_t * const cond ) - { - const int errcode = pthread_cond_signal( cond ); - if( errcode ) - { show_error( "pthread_cond_signal", errcode ); exit_fail_mt(); } - } - - -void xbroadcast( pthread_cond_t * const cond ) - { - const int errcode = pthread_cond_broadcast( cond ); - if( errcode ) - { show_error( "pthread_cond_broadcast", errcode ); exit_fail_mt(); } - } unsigned long long parse_octal( const uint8_t * const ptr, const int size ) @@ -143,9 +70,3 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) } return sz; } - - -bool nonempty_arg( const Arg_parser & parser, const int i ) - { - return ( parser.code( i ) == 0 && !parser.argument( i ).empty() ); - } diff --git a/common_decode.cc b/common_decode.cc index 835687f..a0ff89d 100644 --- a/common_decode.cc +++ b/common_decode.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,12 +19,13 @@ #include <cerrno> #include <cstdio> -#include <cstdlib> #include <ctime> +#include <unistd.h> #include <sys/stat.h> #include "tarlz.h" #include "arg_parser.h" +#include "decode.h" namespace { @@ -125,7 +126,7 @@ bool format_member_name( const Extended & extended, const Tar_header header, const time_t mtime = extended.mtime().sec(); struct tm t; if( !localtime_r( &mtime, &t ) ) // if local time fails - { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use the UTC epoch + { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use UTC, the epoch { t.tm_year = 70; t.tm_mon = t.tm_hour = t.tm_min = 0; t.tm_mday = 1; } } const Typeflag typeflag = (Typeflag)header[typeflag_o]; const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); @@ -184,54 +185,65 @@ bool show_member_name( const Extended & extended, const Tar_header header, bool check_skip_filename( const Cl_options & cl_opts, std::vector< char > & name_pending, - const char * const filename ) + const char * const filename, const int chdir_fd ) { + static int c_idx = -1; // parser index of last -C executed if( Exclude::excluded( filename ) ) return true; // skip excluded files - bool skip = cl_opts.num_files > 0; // if no files specified, skip nothing - if( skip ) // else skip all but the files (or trees) specified - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - if( nonempty_arg( cl_opts.parser, i ) ) + if( cl_opts.num_files <= 0 ) return false; // no files specified, no skip + bool skip = true; // else skip all but the files (or trees) specified + bool chdir_pending = false; + + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + { + if( cl_opts.parser.code( i ) == 'C' ) { chdir_pending = true; continue; } + if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names + std::string removed_prefix; + const char * const name = remove_leading_dotslash( + cl_opts.parser.argument( i ).c_str(), &removed_prefix ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { + print_removed_prefix( removed_prefix ); + skip = false; name_pending[i] = false; + if( chdir_pending && chdir_fd >= 0 ) { - std::string removed_prefix; - const char * const name = remove_leading_dotslash( - cl_opts.parser.argument( i ).c_str(), &removed_prefix ); - if( compare_prefix_dir( name, filename ) || - compare_tslash( name, filename ) ) - { print_removed_prefix( removed_prefix ); - skip = false; name_pending[i] = false; break; } + if( c_idx > i ) + { if( fchdir( chdir_fd ) != 0 ) + { show_error( "Error changing to initial working directory", errno ); + throw Chdir_error(); } c_idx = -1; } + for( int j = c_idx + 1; j < i; ++j ) + { + if( cl_opts.parser.code( j ) != 'C' ) continue; + const char * const dir = cl_opts.parser.argument( j ).c_str(); + if( chdir( dir ) != 0 ) + { show_file_error( dir, chdir_msg, errno ); throw Chdir_error(); } + c_idx = j; + } } + break; + } + } return skip; } -mode_t get_umask() +bool make_dirs( const std::string & name ) { - static mode_t mask = 0; // read once, cache the result - static bool first_call = true; - if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); - mask &= S_IRWXU | S_IRWXG | S_IRWXO; } - return mask; - } - - -bool make_path( const std::string & name ) - { - const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; - unsigned end = name.size(); // first slash before last component - - while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes - while( end > 0 && name[end-1] != '/' ) --end; // remove last component - while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes + int i = name.size(); + while( i > 0 && name[i-1] == '/' ) --i; // remove trailing slashes + while( i > 0 && name[i-1] != '/' ) --i; // remove last component + while( i > 0 && name[i-1] == '/' ) --i; // remove more slashes + const int dirsize = i; // first slash before last component - unsigned index = 0; - while( index < end ) + for( i = 0; i < dirsize; ) // if dirsize == 0, dirname is '/' or empty { - while( index < end && name[index] == '/' ) ++index; - unsigned first = index; - while( index < end && name[index] != '/' ) ++index; - if( first < index ) + while( i < dirsize && name[i] == '/' ) ++i; + const int first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) { - const std::string partial( name, 0, index ); + const std::string partial( name, 0, i ); + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; struct stat st; if( lstat( partial.c_str(), &st ) == 0 ) { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } diff --git a/common_mutex.cc b/common_mutex.cc new file mode 100644 index 0000000..fb253ed --- /dev/null +++ b/common_mutex.cc @@ -0,0 +1,160 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <pthread.h> + +#include "tarlz.h" +#include "common_mutex.h" + + +namespace { + +int error_status = 0; + +} // end namespace + + +void xinit_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_init( mutex, 0 ); + if( errcode ) + { show_error( "pthread_mutex_init", errcode ); exit_fail_mt(); } + } + +void xinit_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_init( cond, 0 ); + if( errcode ) + { show_error( "pthread_cond_init", errcode ); exit_fail_mt(); } + } + + +void xdestroy_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_destroy( mutex ); + if( errcode ) + { show_error( "pthread_mutex_destroy", errcode ); exit_fail_mt(); } + } + +void xdestroy_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_destroy( cond ); + if( errcode ) + { show_error( "pthread_cond_destroy", errcode ); exit_fail_mt(); } + } + + +void xlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_lock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_lock", errcode ); exit_fail_mt(); } + } + + +void xunlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_unlock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_unlock", errcode ); exit_fail_mt(); } + } + + +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) + { + const int errcode = pthread_cond_wait( cond, mutex ); + if( errcode ) + { show_error( "pthread_cond_wait", errcode ); exit_fail_mt(); } + } + + +void xsignal( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_signal( cond ); + if( errcode ) + { show_error( "pthread_cond_signal", errcode ); exit_fail_mt(); } + } + + +void xbroadcast( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_broadcast( cond ); + if( errcode ) + { show_error( "pthread_cond_broadcast", errcode ); exit_fail_mt(); } + } + + +/* This can be called from any thread, main thread or sub-threads alike, + since they all call common helper functions that call exit_fail_mt() + in case of an error. +*/ +void exit_fail_mt( const int retval ) + { + // calling 'exit' more than once results in undefined behavior + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + std::exit( retval ); + } + + +/* If msgp is null, print the message, else return the message in *msgp. + If prefix is already in the list, print nothing or return empty *msgp. + Return true if a message is printed or returned in *msgp. */ +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp ) + { + // prevent two threads from modifying the list of prefixes at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static std::vector< std::string > prefixes; // list of prefixes + + if( verbosity < 0 || prefix.empty() ) + { if( msgp ) msgp->clear(); return false; } + xlock( &mutex ); + for( unsigned i = 0; i < prefixes.size(); ++i ) + if( prefixes[i] == prefix ) + { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; } + prefixes.push_back( prefix ); + std::string msg( "Removing leading '" ); msg += prefix; + msg += "' from member names."; + if( msgp ) *msgp = msg; else show_error( msg.c_str() ); + xunlock( &mutex ); // put here to prevent mixing calls to show_error + return true; + } + + +void set_error_status( const int retval ) + { + // prevent two threads from modifying the error_status at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + xlock( &mutex ); + if( error_status < retval ) error_status = retval; + xunlock( &mutex ); + } + + +int final_exit_status( int retval, const bool show_msg ) + { + if( retval == 0 && error_status ) + { if( show_msg ) + show_error( "Exiting with failure status due to previous errors." ); + retval = error_status; } + return retval; + } diff --git a/common_mutex.h b/common_mutex.h new file mode 100644 index 0000000..ed3999c --- /dev/null +++ b/common_mutex.h @@ -0,0 +1,30 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +void xinit_mutex( pthread_mutex_t * const mutex ); +void xinit_cond( pthread_cond_t * const cond ); +void xdestroy_mutex( pthread_mutex_t * const mutex ); +void xdestroy_cond( pthread_cond_t * const cond ); +void xlock( pthread_mutex_t * const mutex ); +void xunlock( pthread_mutex_t * const mutex ); +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); +void xsignal( pthread_cond_t * const cond ); +void xbroadcast( pthread_cond_t * const cond ); + +// non-pthread_* declarations are in tarlz.h + +const char * const conofin_msg = "courier not finished."; diff --git a/compress.cc b/compress.cc index 4e74efa..3091889 100644 --- a/compress.cc +++ b/compress.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <cerrno> #include <csignal> #include <cstdio> -#include <cstdlib> #include <stdint.h> // for lzlib.h #include <unistd.h> #include <utime.h> @@ -54,12 +53,11 @@ void cleanup_and_fail( const int retval ) if( delete_output_on_interrupt ) { delete_output_on_interrupt = false; - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", - program_name, output_filename.c_str() ); + show_file_error( output_filename.c_str(), + "Deleting output file, if it exists." ); if( outfd >= 0 ) { close( outfd ); outfd = -1; } if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) - show_error( "WARNING: deletion of output file (apparently) failed." ); + show_error( "warning: deletion of output file failed", errno ); } std::exit( retval ); } @@ -104,7 +102,7 @@ void close_and_set_permissions( const struct stat * const in_statsp ) if( in_statsp ) { const mode_t mode = in_statsp->st_mode; - // fchown will in many cases return with EPERM, which can be safely ignored. + // fchown in many cases returns with EPERM, which can be safely ignored. if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) { if( fchmod( outfd, mode ) != 0 ) warning = true; } else @@ -113,10 +111,8 @@ void close_and_set_permissions( const struct stat * const in_statsp ) warning = true; } if( close( outfd ) != 0 ) - { - show_error( "Error closing output file", errno ); - cleanup_and_fail( 1 ); - } + { show_file_error( output_filename.c_str(), "Error closing output file", + errno ); cleanup_and_fail( 1 ); } outfd = -1; delete_output_on_interrupt = false; if( in_statsp ) @@ -127,11 +123,12 @@ void close_and_set_permissions( const struct stat * const in_statsp ) if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; } if( warning && verbosity >= 1 ) - show_error( "Can't change output file attributes." ); + show_file_error( output_filename.c_str(), + "warning: can't change output file attributes", errno ); } -bool archive_write( const uint8_t * const buf, const long long size, +bool archive_write( const uint8_t * const buf, const int size, LZ_Encoder * const encoder ) { static bool flushed = true; // avoid flushing empty lzip members @@ -140,13 +137,12 @@ bool archive_write( const uint8_t * const buf, const long long size, flushed = ( size <= 0 ); enum { obuf_size = 65536 }; uint8_t obuf[obuf_size]; - long long sz = 0; + int sz = 0; if( flushed ) LZ_compress_finish( encoder ); // flush encoder while( sz < size || flushed ) { if( sz < size ) - { const int wr = LZ_compress_write( encoder, buf + sz, - std::min( size - sz, (long long)max_dictionary_size ) ); + { const int wr = LZ_compress_write( encoder, buf + sz, size - sz ); if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); sz += wr; } if( sz >= size && !flushed ) break; // minimize dictionary size @@ -216,26 +212,37 @@ int compress_archive( const Cl_options & cl_opts, Resizable_buffer rbuf; // headers and extended records buffer if( !rbuf.size() ) { show_error( mem_msg ); return 1; } const char * const rderr_msg = "Read error"; + bool first_header = true; while( true ) // process one tar member per iteration { - int total_header_size = header_size; // size of header(s) read + int total_header_size = header_size; // e_header + edata + u_header const int rd = readblock( infd, rbuf.u8(), header_size ); - if( rd == 0 && errno == 0 ) break; // missing EOA blocks + if( rd == 0 && errno == 0 ) // missing EOA blocks + { if( !first_header ) break; + show_file_error( filename, "Archive is empty." ); + close( infd ); return 2; } if( rd != header_size ) { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } + first_header = false; - if( to_file && outfd < 0 ) // open outfd after verifying infd + const bool is_header = check_ustar_chksum( rbuf.u8() ); + const bool is_zero = !is_header && block_is_zero( rbuf.u8(), header_size ); + if( to_file && outfd < 0 && ( is_header || is_zero ) ) { + // open outfd after checking infd + if( !make_dirs( output_filename ) ) + { show_file_error( output_filename.c_str(), intdir_msg, errno ); + return 1; } outfd = open_outstream( output_filename, true, 0, false ); // check tty only once and don't try to delete a tty if( outfd < 0 || !check_tty_out() ) { close( infd ); return 1; } delete_output_on_interrupt = true; } - if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOA block + if( !is_header ) // maybe EOA block { - if( block_is_zero( rbuf.u8(), header_size ) ) // first EOA block + if( is_zero ) // first EOA block { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; } show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } @@ -246,7 +253,7 @@ int compress_archive( const Cl_options & cl_opts, const long long edsize = parse_octal( rbuf.u8() + size_o, size_l ); const long long bufsize = round_up( edsize ); // overflow or no extended data - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size ) { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } if( !rbuf.resize( total_header_size + bufsize ) ) { show_file_error( filename, mem_msg ); close( infd ); return 1; } @@ -263,7 +270,7 @@ int compress_archive( const Cl_options & cl_opts, if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size ) { show_file_error( filename, errno ? rderr_msg : end_msg, errno ); close( infd ); return errno ? 1 : 2; } - if( !verify_ustar_chksum( rbuf.u8() ) ) + if( !check_ustar_chksum( rbuf.u8() ) ) { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } const Typeflag typeflag2 = (Typeflag)(rbuf() + total_header_size)[typeflag_o]; if( typeflag2 == tf_extended || typeflag2 == tf_global ) @@ -294,9 +301,7 @@ int compress_archive( const Cl_options & cl_opts, rest -= rd; if( rd != size ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "'%s' ends unexpectedly at pos %llu\n", - filename, file_size - rest ); + show_atpos_error( filename, file_size - rest, true ); close( infd ); return 1; } if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; } @@ -321,6 +326,18 @@ int compress_archive( const Cl_options & cl_opts, } // end namespace +void show_atpos_error( const char * const filename, const long long pos, + const bool isarchive ) + { + if( verbosity < 0 ) return; + std::fprintf( stderr, "%s: %s: %s %s at pos %llu%s%s\n", program_name, + filename, isarchive ? "Archive" : "File", + ( errno > 0 ) ? "read error" : "ends unexpectedly", pos, + ( errno > 0 ) ? ": " : "", + ( errno > 0 ) ? std::strerror( errno ) : "" ); + } + + int compress( const Cl_options & cl_opts ) { if( cl_opts.num_files > 1 && cl_opts.output_filename.size() ) @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=tarlz -pkgversion=0.23 +pkgversion=0.25 progname=tarlz srctrigger=doc/${pkgname}.texi @@ -25,6 +25,7 @@ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= LIBS='-llz -lpthread' +MAKEINFO=makeinfo # checking whether we are using GNU C++. /bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } @@ -58,7 +59,7 @@ while [ $# != 0 ] ; do echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" - echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --srcdir=DIR find the source code in DIR [. or ..]" echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" @@ -66,11 +67,12 @@ while [ $# != 0 ] ; do echo " --infodir=DIR info files directory [${infodir}]" echo " --mandir=DIR man pages directory [${mandir}]" echo " CXX=COMPILER C++ compiler to use [${CXX}]" - echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" - echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" - echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo exit 0 ;; --version | -V) @@ -99,6 +101,7 @@ while [ $# != 0 ] ; do CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; LIBS=*) LIBS="${optarg} ${LIBS}" ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; --*) echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; @@ -118,7 +121,7 @@ while [ $# != 0 ] ; do fi done -# Find the source files, if location was not specified. +# Find the source code, if location was not specified. srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. @@ -130,7 +133,7 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -150,7 +153,7 @@ if [ -z "${no_create}" ] ; then # This script is free software: you have unlimited permission # to copy, distribute, and modify it. -exec /bin/sh $0 ${args} --no-create +exec /bin/sh "$0" ${args} --no-create EOF chmod +x config.status fi @@ -168,10 +171,11 @@ echo "CPPFLAGS = ${CPPFLAGS}" echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" echo "LIBS = ${LIBS}" +echo "MAKEINFO = ${MAKEINFO}" rm -f Makefile cat > Makefile << EOF # Makefile for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -192,9 +196,10 @@ CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} LIBS = ${LIBS} +MAKEINFO = ${MAKEINFO} EOF cat "${srcdir}/Makefile.in" >> Makefile echo "OK. Now you can run make." -echo "If make fails, verify that the compression library lzlib is correctly" -echo "installed (see INSTALL)." +echo "If make fails, check that the compression library lzlib is correctly installed" +echo "(see INSTALL)." @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,14 +20,14 @@ #include <algorithm> #include <cerrno> #include <cstdio> -#include <cstdlib> -#include <pthread.h> #include <stdint.h> // for lzlib.h #include <unistd.h> #include <sys/stat.h> #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ #include <sys/sysmacros.h> // for major, minor +#else +#include <sys/types.h> // for major, minor #endif #include <ftw.h> #include <grp.h> @@ -50,7 +50,6 @@ const char * archive_namep = 0; unsigned long long partial_data_size = 0; // size of current block Resizable_buffer grbuf; // extended header + data int goutfd = -1; -int error_status = 0; bool option_C_after_relative_filename( const Arg_parser & parser ) @@ -78,7 +77,7 @@ long long check_compressed_appendable( const int fd, const bool remove_eoa ) if( rd == 0 && errno == 0 ) return 0; // append to empty archive if( rd < min_member_size || ( rd != bufsize && errno ) ) return -1; const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc - if( !p->verify_magic() || !p->verify_version() ) return -1; + if( !p->check_magic() || !p->check_version() ) return -1; LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok || LZ_decompress_write( decoder, buf, rd ) != rd || @@ -86,7 +85,7 @@ long long check_compressed_appendable( const int fd, const bool remove_eoa ) { LZ_decompress_close( decoder ); return -1; } LZ_decompress_close( decoder ); const bool maybe_eoa = block_is_zero( buf, header_size ); - if( !verify_ustar_chksum( buf ) && !maybe_eoa ) return -1; + if( !check_ustar_chksum( buf ) && !maybe_eoa ) return -1; const long long end = lseek( fd, 0, SEEK_END ); if( end < min_member_size ) return -1; @@ -100,7 +99,7 @@ long long check_compressed_appendable( const int fd, const bool remove_eoa ) Lzip_header header; // read last header if( seek_read( fd, header.data, Lzip_header::size, end - member_size ) != Lzip_header::size ) return -1; - if( !header.verify_magic() || !header.verify_version() || + if( !header.check_magic() || !header.check_version() || !isvalid_ds( header.dictionary_size() ) ) return -1; // EOA marker in last member must contain between 512 and 32256 zeros alone @@ -142,7 +141,7 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) const int rd = readblock( fd, header, header_size ); if( rd == 0 && errno == 0 ) break; // missing EOA blocks if( rd != header_size ) return -1; - if( !verify_ustar_chksum( header ) ) // maybe EOA block + if( !check_ustar_chksum( header ) ) // maybe EOA block { if( block_is_zero( header, header_size ) ) break; else return -1; } const Typeflag typeflag = (Typeflag)header[typeflag_o]; if( typeflag == tf_extended || typeflag == tf_global ) @@ -150,7 +149,7 @@ long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) if( prev_extended ) return -1; const long long edsize = parse_octal( header + size_o, size_l ); const long long bufsize = round_up( edsize ); - if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size ) return -1; // overflow or no extended data if( !rbuf.resize( bufsize ) ) return -2; if( readblock( fd, rbuf.u8(), bufsize ) != bufsize ) @@ -204,21 +203,6 @@ bool archive_write( const uint8_t * const buf, const int size ) } -bool write_extended( const Extended & extended ) - { - const long long ebsize = extended.format_block( grbuf ); // may be 0 - if( ebsize < 0 ) - { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); return false; } - for( long long pos = 0; pos < ebsize; ) // write extended block to archive - { - int size = std::min( ebsize - pos, 1LL << 20 ); - if( !archive_write( grbuf.u8() + pos, size ) ) return false; - pos += size; - } - return true; - } - - // Return true if it stores filename in the ustar header. bool store_name( const char * const filename, Extended & extended, Tar_header header, const bool force_extended_name ) @@ -260,12 +244,15 @@ int add_member( const char * const filename, const struct stat *, const int infd = file_size ? open_instream( filename ) : -1; if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; } + const int ebsize = extended.format_block( grbuf ); // may be 0 + if( ebsize < 0 ) { show_error( extended.full_size_error() ); return 1; } if( encoder && gcl_opts->solidity == bsolid && - block_is_full( extended.full_size(), file_size, gcl_opts->data_size, + block_is_full( ebsize, file_size, gcl_opts->data_size, partial_data_size ) && !archive_write( 0, 0 ) ) return 1; + // write extended block to archive + if( ebsize > 0 && !archive_write( grbuf.u8(), ebsize ) ) return 1; + if( !archive_write( header, header_size ) ) return 1; - if( !write_extended( extended ) || !archive_write( header, header_size ) ) - return 1; if( file_size ) { const long long bufsize = 32 * header_size; @@ -278,9 +265,7 @@ int add_member( const char * const filename, const struct stat *, rest -= rd; if( rd != size ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", - filename, file_size - rest ); + show_atpos_error( filename, file_size - rest, false ); close( infd ); return 1; } if( rest == 0 ) // last read @@ -400,31 +385,6 @@ const char * remove_leading_dotslash( const char * const filename, } -/* If msgp is null, print the message, else return the message in *msgp. - If prefix is already in the list, print nothing or return empty *msgp. - Return true if a message is printed or returned in *msgp. */ -bool print_removed_prefix( const std::string & prefix, - std::string * const msgp ) - { - // prevent two threads from modifying the list of prefixes at the same time - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - static std::vector< std::string > prefixes; // list of prefixes - - if( verbosity < 0 || prefix.empty() ) - { if( msgp ) msgp->clear(); return false; } - xlock( &mutex ); - for( unsigned i = 0; i < prefixes.size(); ++i ) - if( prefixes[i] == prefix ) - { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; } - prefixes.push_back( prefix ); - std::string msg( "Removing leading '" ); msg += prefix; - msg += "' from member names."; - if( msgp ) *msgp = msg; else show_error( msg.c_str() ); - xunlock( &mutex ); // put here to prevent mixing calls to show_error - return true; - } - - // set file_size != 0 only for regular files bool fill_headers( const char * const filename, Extended & extended, Tar_header header, long long & file_size, const int flag ) @@ -534,13 +494,13 @@ bool fill_headers( const char * const filename, Extended & extended, } -bool block_is_full( const long long extended_size, +bool block_is_full( const int extended_size, const unsigned long long file_size, const unsigned long long target_size, unsigned long long & partial_data_size ) { const unsigned long long member_size = // may overflow 'long long' - header_size + extended_size + round_up( file_size ); + extended_size + header_size + round_up( file_size ); if( partial_data_size >= target_size || ( partial_data_size >= min_data_size && partial_data_size + member_size / 2 > target_size ) ) @@ -549,25 +509,6 @@ bool block_is_full( const long long extended_size, } -void set_error_status( const int retval ) - { - // prevent two threads from modifying the error_status at the same time - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - xlock( &mutex ); - if( error_status < retval ) error_status = retval; - xunlock( &mutex ); - } - -int final_exit_status( int retval, const bool show_msg ) - { - if( retval == 0 && error_status ) - { if( show_msg ) - show_error( "Exiting with failure status due to previous errors." ); - retval = error_status; } - return retval; - } - unsigned ustar_chksum( const Tar_header header ) { unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces @@ -577,8 +518,8 @@ unsigned ustar_chksum( const Tar_header header ) } -bool verify_ustar_chksum( const Tar_header header ) - { return ( verify_ustar_magic( header ) && +bool check_ustar_chksum( const Tar_header header ) + { return ( check_ustar_magic( header ) && ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); } @@ -591,10 +532,25 @@ bool has_lz_ext( const std::string & name ) } +int Cl_options::compressed() const // tri-state bool with error (-2) + { + const int lz_ext = archive_name.empty() ? -1 : has_lz_ext( archive_name ); + if( !level_set ) return lz_ext; // no level set in command line + const bool cl_compressed = !uncompressed(); + if( lz_ext < 0 || lz_ext == cl_compressed ) return cl_compressed; + show_file_error( archive_name.c_str(), lz_ext ? + "Uncompressed archive can't have .lz or .tlz extension." : + "Compressed archive requires .lz or .tlz extension." ); + return -2; + } + + int concatenate( const Cl_options & cl_opts ) { if( cl_opts.num_files <= 0 ) { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; } + int compressed = cl_opts.compressed(); // tri-state bool + if( compressed == -2 ) return 1; const bool to_stdout = cl_opts.archive_name.empty(); archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); const int outfd = @@ -604,24 +560,17 @@ int concatenate( const Cl_options & cl_opts ) { close( outfd ); return 1; } if( !to_stdout && !archive_attrs.init( outfd ) ) { show_file_error( archive_namep, "Can't stat", errno ); return 1; } - int compressed; // tri-state bool - if( to_stdout ) compressed = -1; // unknown - else + if( !to_stdout && compressed >= 0 ) // level or ext are set in cl { - compressed = has_lz_ext( cl_opts.archive_name ); // default value - long long pos = check_compressed_appendable( outfd, true ); - if( pos > 0 ) compressed = true; - else if( pos < 0 ) - { - pos = check_uncompressed_appendable( outfd, true ); - if( pos > 0 ) compressed = false; - else if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; } - else if( pos < 0 ) - { show_file_error( archive_namep, compressed ? - "This does not look like an appendable tar.lz archive." : - "This does not look like an appendable tar archive." ); - close( outfd ); return 2; } - } + const long long pos = compressed ? + check_compressed_appendable( outfd, true ) : + check_uncompressed_appendable( outfd, true ); + if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; } + if( pos < 0 ) + { show_file_error( archive_namep, compressed ? + "This does not look like an appendable tar.lz archive." : + "This does not look like an appendable tar archive." ); + close( outfd ); return 2; } } int retval = 0; @@ -634,17 +583,18 @@ int concatenate( const Cl_options & cl_opts ) const int infd = open_instream( filename ); if( infd < 0 ) { retval = 1; break; } struct stat st; - if( !to_stdout && fstat( infd, &st ) == 0 && archive_attrs.is_the_archive( st ) ) - { show_file_error( filename, "Archive can't contain itself; not concatenated." ); - close( infd ); continue; } + if( !to_stdout && fstat( infd, &st ) == 0 && + archive_attrs.is_the_archive( st ) ) + { show_file_error( filename, "Archive can't contain itself; " + "not concatenated." ); close( infd ); continue; } long long size; - if( compressed < 0 ) // not initialized yet + if( compressed < 0 ) // not initialized yet { if( ( size = check_compressed_appendable( infd, false ) ) > 0 ) compressed = true; else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 ) compressed = false; - else if( size != -2 ) { size = -1 ; compressed = has_lz_ext( filename ); } + else if( size != -2 ) { size = -1; compressed = has_lz_ext( filename ); } } else size = compressed ? check_compressed_appendable( infd, false ) : check_uncompressed_appendable( infd, false ); @@ -673,15 +623,12 @@ int concatenate( const Cl_options & cl_opts ) int encode( const Cl_options & cl_opts ) { if( !grbuf.size() ) { show_error( mem_msg ); return 1; } - const bool compressed = ( cl_opts.level >= 0 && cl_opts.level <= 9 ); + int compressed = cl_opts.compressed(); // tri-state bool + if( compressed == -2 ) return 1; const bool to_stdout = cl_opts.archive_name.empty(); archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); gcl_opts = &cl_opts; - if( !to_stdout && !compressed && has_lz_ext( cl_opts.archive_name ) ) - { show_file_error( archive_namep, - "Uncompressed mode incompatible with .lz extension." ); return 2; } - const bool append = cl_opts.program_mode == m_append; if( cl_opts.num_files <= 0 ) { @@ -701,18 +648,23 @@ int encode( const Cl_options & cl_opts ) { close( goutfd ); return 1; } if( append && !to_stdout ) { - if( compressed && check_compressed_appendable( goutfd, true ) < 0 ) - { show_file_error( archive_namep, - "This does not look like an appendable tar.lz archive." ); - close( goutfd ); return 2; } - if( !compressed ) + long long pos; + if( compressed < 0 ) // not initialized yet { - const long long pos = check_uncompressed_appendable( goutfd, true ); - if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; } - if( pos < 0 ) { show_file_error( archive_namep, - "This does not look like an appendable tar archive." ); - close( goutfd ); return 2; } + if( ( pos = check_compressed_appendable( goutfd, true ) ) > 0 ) + compressed = true; + else if( ( pos = check_uncompressed_appendable( goutfd, true ) ) > 0 ) + compressed = false; + else if( pos != -2 ) { pos = -1; compressed = false; } // unknown } + else pos = compressed ? check_compressed_appendable( goutfd, true ) : + check_uncompressed_appendable( goutfd, true ); + if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; } + if( pos < 0 ) + { show_file_error( archive_namep, compressed ? + "This does not look like an appendable tar.lz archive." : + "This does not look like an appendable tar archive." ); + close( goutfd ); return 2; } } if( !archive_attrs.init( goutfd ) ) @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,4 +45,3 @@ public: extern Archive_attrs archive_attrs; const char * const cant_stat = "Can't stat input file"; -const char * const eferec_msg = "Error formatting extended records."; diff --git a/create_lz.cc b/create_lz.cc index 1acaf23..5436bf5 100644 --- a/create_lz.cc +++ b/create_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <algorithm> #include <cerrno> #include <cstdio> -#include <cstdlib> #include <queue> #include <pthread.h> #include <stdint.h> // for lzlib.h @@ -31,6 +30,7 @@ #include "tarlz.h" #include "arg_parser.h" +#include "common_mutex.h" #include "create.h" @@ -87,9 +87,9 @@ struct Ipacket // filename, file size and headers const uint8_t * const header; Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {} - Ipacket( const char * const name, const long long s, + Ipacket( const char * const name, const long long fs, const Extended * const ext, const uint8_t * const head ) - : file_size( s ), filename( name ), extended( ext ), header( head ) {} + : file_size( fs ), filename( name ), extended( ext ), header( head ) {} }; struct Opacket // compressed data to be written to the archive @@ -269,11 +269,14 @@ int add_member_lz( const char * const filename, const struct stat *, { delete[] header; delete extended; return 0; } print_removed_prefix( extended->removed_prefix ); - if( gcl_opts->solidity == bsolid && - block_is_full( extended->full_size(), file_size, gcl_opts->data_size, - partial_data_size ) ) - courierp->receive_packet( new Ipacket ); // end of group - + if( gcl_opts->solidity == bsolid ) + { + const int ebsize = extended->full_size(); + if( ebsize < 0 ) { show_error( extended->full_size_error() ); return 1; } + if( block_is_full( ebsize, file_size, gcl_opts->data_size, + partial_data_size ) ) + courierp->receive_packet( new Ipacket ); // end of group + } courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) ); if( gcl_opts->solidity == no_solid ) // one tar member per group @@ -331,9 +334,9 @@ extern "C" void * grouper( void * arg ) } -/* Writes ibuf to encoder. To minimize dictionary size, it does not read - from encoder until encoder's input buffer is full or finish is true. - Sends opacket to courier and allocates new obuf each time obuf is full. +/* Write ibuf to encoder. To minimize dictionary size, do not read from + encoder until encoder's input buffer is full or finish is true. + Send opacket to courier and allocate new obuf each time obuf is full. */ void loop_encode( const uint8_t * const ibuf, const int isize, uint8_t * & obuf, int & opos, Packet_courier & courier, @@ -423,8 +426,7 @@ extern "C" void * cworker( void * arg ) } const char * const filename = ipacket->filename.c_str(); - const int infd = - ipacket->file_size ? open_instream( filename ) : -1; + const int infd = ipacket->file_size ? open_instream( filename ) : -1; if( ipacket->file_size && infd < 0 ) // can't read file data { delete[] ipacket->header; delete ipacket->extended; delete ipacket; set_error_status( 1 ); continue; } // skip file @@ -444,17 +446,11 @@ extern "C" void * cworker( void * arg ) } } - if( !ipacket->extended->empty() ) // compress extended block - { - const long long ebsize = ipacket->extended->format_block( rbuf ); - if( ebsize < 0 ) - { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); exit_fail_mt(); } - /* Limit the size of the extended block to INT_MAX - 1 so that it can - be fed to lzlib as one buffer. */ - if( ebsize >= INT_MAX ) - { show_error( "Extended records size >= INT_MAX." ); exit_fail_mt(); } + const int ebsize = ipacket->extended->format_block( rbuf ); // may be 0 + if( ebsize < 0 ) + { show_error( ipacket->extended->full_size_error() ); exit_fail_mt(); } + if( ebsize > 0 ) // compress extended block loop_encode( rbuf.u8(), ebsize, data, opos, courier, encoder, worker_id ); - } // compress ustar header loop_encode( ipacket->header, header_size, data, opos, courier, encoder, worker_id ); @@ -472,9 +468,7 @@ extern "C" void * cworker( void * arg ) rest -= rd; if( rd != size ) { - if( verbosity >= 0 ) - std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", - filename, ipacket->file_size - rest ); + show_atpos_error( filename, ipacket->file_size - rest, false ); close( infd ); exit_fail_mt(); } if( rest == 0 ) // last read @@ -595,6 +589,6 @@ int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, courier.ocheck_counter, courier.owait_counter ); - if( !courier.finished() ) internal_error( "courier not finished." ); + if( !courier.finished() ) internal_error( conofin_msg ); return final_exit_status( retval ); } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,7 +21,7 @@ #include <cctype> #include <cerrno> #include <cstdio> -#include <cstdlib> +#include <fcntl.h> #include <stdint.h> // for lzlib.h #include <unistd.h> #include <utime.h> @@ -29,6 +29,8 @@ #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ #include <sys/sysmacros.h> // for major, minor, makedev +#else +#include <sys/types.h> // for major, minor, makedev #endif #include <lzlib.h> @@ -38,6 +40,9 @@ #include "archive_reader.h" #include "decode.h" +#ifndef O_DIRECTORY +#define O_DIRECTORY 0 +#endif namespace { @@ -124,7 +129,7 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, if( !show_member_name( extended, header, 1, grbuf ) ) return 1; // remove file (or empty dir) before extraction to prevent following links std::remove( filename ); - if( !make_path( filename ) ) + if( !make_dirs( filename ) ) { show_file_error( filename, intdir_msg, errno ); set_error_status( 1 ); @@ -192,7 +197,7 @@ int extract_member( const Cl_options & cl_opts, Archive_reader & ar, chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) { if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); - // chown will in many cases return with EPERM, which can be safely ignored. + // chown in many cases returns with EPERM, which can be safely ignored. if( errno != EPERM && errno != EINVAL ) { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); } } @@ -246,9 +251,37 @@ void format_file_diff( std::string & ostr, const char * const filename, { if( verbosity >= 0 ) { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } + +bool option_C_present( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + + +bool option_C_after_filename( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) ) + while( ++i < parser.arguments() ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + } // end namespace +mode_t get_umask() + { + static mode_t mask = 0; // read once, cache the result + static bool first_call = true; + if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); + mask &= S_IRWXU | S_IRWXG | S_IRWXO; } + return mask; + } + + bool compare_file_type( std::string & estr, std::string & ostr, const Cl_options & cl_opts, const Extended & extended, const Tar_header header ) @@ -258,7 +291,7 @@ bool compare_file_type( std::string & estr, std::string & ostr, struct stat st; bool diff = false, size_differs = false, type_differs = true; if( hstat( filename, &st, cl_opts.dereference ) != 0 ) - format_file_error( estr, filename, "warning: Can't stat", errno ); + format_file_error( estr, filename, "warning: can't stat", errno ); else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && !S_ISREG( st.st_mode ) ) format_file_diff( ostr, filename, "Is not a regular file" ); @@ -275,14 +308,14 @@ bool compare_file_type( std::string & estr, std::string & ostr, else { type_differs = false; - if( typeflag != tf_symlink ) + if( typeflag != tf_symlink && !cl_opts.ignore_metadata ) { const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | S_IRWXU | S_IRWXG | S_IRWXO ) ) ) { format_file_diff( ostr, filename, "Mode differs" ); diff = true; } } - if( !cl_opts.ignore_ids ) + if( !cl_opts.ignore_ids && !cl_opts.ignore_metadata ) { if( extended.get_uid() != (long long)st.st_uid ) { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } @@ -291,7 +324,7 @@ bool compare_file_type( std::string & estr, std::string & ostr, } if( typeflag != tf_symlink ) { - if( typeflag != tf_directory && + if( typeflag != tf_directory && !cl_opts.ignore_metadata && extended.mtime().sec() != (long long)st.st_mtime ) { if( (time_t)extended.mtime().sec() == st.st_mtime ) @@ -380,32 +413,37 @@ int decode( const Cl_options & cl_opts ) const Archive_descriptor ad( cl_opts.archive_name ); if( ad.infd < 0 ) return 1; - // Execute -C options and mark filenames to be compared, extracted or listed. - // name_pending is of type char instead of bool to allow concurrent update. - std::vector< char > name_pending( cl_opts.parser.arguments(), false ); - for( int i = 0; i < cl_opts.parser.arguments(); ++i ) - { - const int code = cl_opts.parser.code( i ); - if( code == 'C' && cl_opts.program_mode != m_list ) + const bool c_present = option_C_present( cl_opts.parser ) && + cl_opts.program_mode != m_list; + const bool c_after_name = c_present && + option_C_after_filename( cl_opts.parser ); + // save current working directory for sequential decoding + const int chdir_fd = c_after_name ? open( ".", O_RDONLY | O_DIRECTORY ) : -1; + if( c_after_name && chdir_fd < 0 ) + { show_error( "Can't save current working directory", errno ); return 1; } + if( c_present && !c_after_name ) // execute all -C options + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) { + if( cl_opts.parser.code( i ) != 'C' ) continue; const char * const dir = cl_opts.parser.argument( i ).c_str(); if( chdir( dir ) != 0 ) { show_file_error( dir, chdir_msg, errno ); return 1; } } - if( !code && cl_opts.parser.argument( i ).size() && + /* Mark filenames to be compared, extracted or listed. + name_pending is of type char instead of bool to allow concurrent update. */ + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && // skip opts, empty names !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) name_pending[i] = true; - } - // multi-threaded --list is faster even with 1 thread and 1 file in archive - // but multi-threaded --diff and --extract probably need at least 2 of each - if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list || - cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 && - ad.indexed && ad.lzip_index.members() >= 2 ) // one file + EOA - { - // show_file_error( ad.namep, "Is compressed seekable" ); + /* multi-threaded --list is faster even with 1 thread and 1 file in archive + but multi-threaded --diff and --extract probably need at least 2 of each. + CWD is not per-thread; multi-threaded decode can't be used if a + -C option appears after a file name in the command line. */ + if( cl_opts.num_workers > 0 && !c_after_name && ad.indexed && + ad.lzip_index.members() >= 2 ) // 2 lzip members may be 1 file + EOA return decode_lz( cl_opts, ad, name_pending ); - } Archive_reader ar( ad ); // serial reader Extended extended; // metadata from extended records @@ -416,7 +454,7 @@ int decode( const Cl_options & cl_opts ) Tar_header header; const int ret = ar.read( header, header_size ); if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } } - if( ret != 0 || !verify_ustar_chksum( header ) ) // error or EOA + if( ret != 0 || !check_ustar_chksum( header ) ) // error or EOA { if( ret == 0 && block_is_zero( header, header_size ) ) // EOA { @@ -461,20 +499,23 @@ int decode( const Cl_options & cl_opts ) extended.fill_from_ustar( header ); // copy metadata from header - // members without name are skipped except when listing - if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) - retval = skip_member( ar, extended, typeflag ); - else - { - print_removed_prefix( extended.removed_prefix ); - if( cl_opts.program_mode == m_list ) - retval = list_member( ar, extended, header ); - else if( extended.path().empty() ) - retval = skip_member( ar, extended, typeflag ); - else if( cl_opts.program_mode == m_diff ) - retval = compare_member( cl_opts, ar, extended, header ); - else retval = extract_member( cl_opts, ar, extended, header ); + try { + // members without name are skipped except when listing + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str(), + chdir_fd ) ) retval = skip_member( ar, extended, typeflag ); + else + { + print_removed_prefix( extended.removed_prefix ); + if( cl_opts.program_mode == m_list ) + retval = list_member( ar, extended, header ); + else if( extended.path().empty() ) + retval = skip_member( ar, extended, typeflag ); + else if( cl_opts.program_mode == m_diff ) + retval = compare_member( cl_opts, ar, extended, header ); + else retval = extract_member( cl_opts, ar, extended, header ); + } } + catch( Chdir_error & ) { retval = 1; } extended.reset(); if( retval ) { show_error( "Error is not recoverable: exiting now." ); break; } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,10 +23,13 @@ inline bool uid_gid_in_range( const long long uid, const long long gid ) gid == (long long)( (gid_t)gid ); } const char * const dotdot_msg = "Contains a '..' component, skipping."; -const char * const intdir_msg = "Failed to create intermediate directory"; const char * const cantln_msg = "Can't %slink '%s' to '%s'"; const char * const mkdir_msg = "Can't create directory"; const char * const mknod_msg = "Can't create device node"; const char * const mkfifo_msg = "Can't create FIFO file"; const char * const uftype_msg = "%s: Unknown file type '%c', skipping."; const char * const chown_msg = "Can't change file owner"; + +mode_t get_umask(); + +struct Chdir_error {}; diff --git a/decode_lz.cc b/decode_lz.cc index 8780eab..867ffa5 100644 --- a/decode_lz.cc +++ b/decode_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <algorithm> #include <cerrno> #include <cstdio> -#include <cstdlib> #include <queue> #include <pthread.h> #include <stdint.h> // for lzlib.h @@ -30,6 +29,8 @@ #if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ #include <sys/sysmacros.h> // for major, minor, makedev +#else +#include <sys/types.h> // for major, minor, makedev #endif #include <lzlib.h> @@ -37,6 +38,7 @@ #include "arg_parser.h" #include "lzip_index.h" #include "archive_reader.h" +#include "common_mutex.h" #include "decode.h" /* When a problem is detected by any worker: @@ -218,7 +220,7 @@ public: bool reserve_name( const unsigned worker_id, const std::string & filename ) { - // compare the CRCs of the names, verify collisions comparing the names + // compare the CRCs of the names; compare the names if the CRCs collide const unsigned crc = crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() ); xlock( &mutex ); @@ -258,10 +260,10 @@ Trival skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, Trival compare_member_lz( const Cl_options & cl_opts, - Archive_reader_i & ar, Packet_courier & courier, - const Extended & extended, const Tar_header header, - Resizable_buffer & rbuf, const long member_id, - const int worker_id ) + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) { if( verbosity < 1 ) rbuf()[0] = 0; else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) @@ -357,7 +359,7 @@ Trival extract_member_lz( const Cl_options & cl_opts, /* Remove file before extraction to prevent following links. Don't remove an empty dir because other thread may need it. */ if( typeflag != tf_directory ) std::remove( filename ); - if( !make_path( filename ) ) + if( !make_dirs( filename ) ) { if( format_file_error( rbuf, filename, intdir_msg, errno ) && !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) @@ -451,7 +453,7 @@ Trival extract_member_lz( const Cl_options & cl_opts, chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) { if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); - // chown will in many cases return with EPERM, which can be safely ignored. + // chown in many cases returns with EPERM, which can be safely ignored. if( errno != EPERM && errno != EINVAL ) { if( format_file_error( rbuf, filename, chown_msg, errno ) && @@ -576,7 +578,7 @@ extern "C" void * dworker( void * arg ) courier.collect_packet( i, worker_id, ar.e_msg(), ( ret == 1 ) ? Packet::error1 : Packet::error2, ar.e_code() ); goto done; } - if( !verify_ustar_chksum( header ) ) // error or EOA + if( !check_ustar_chksum( header ) ) // error or EOA { if( !courier.request_mastership( i, worker_id ) ) goto done; if( block_is_zero( header, header_size ) ) // EOA @@ -758,6 +760,6 @@ int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, courier.ocheck_counter, courier.owait_counter ); - if( !courier.finished() ) internal_error( "courier not finished." ); + if( !courier.finished() ) internal_error( conofin_msg ); return final_exit_status( retval, cl_opts.program_mode != m_diff ); } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <cctype> #include <cerrno> #include <cstdio> -#include <cstdlib> #include <stdint.h> // for lzlib.h #include <unistd.h> #include <lzlib.h> @@ -101,8 +100,8 @@ int delete_members( const Cl_options & cl_opts ) if( !ad.seekable ) { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } if( ad.lzip_index.file_size() < 3 * header_size ) - { show_file_error( ad.namep, has_lz_ext( ad.name ) ? posix_lz_msg : posix_msg ); - return 2; } + { show_file_error( ad.namep, has_lz_ext( ad.name ) ? + posix_lz_msg : posix_msg ); return 2; } // archive is uncompressed seekable, unless compressed corrupt Archive_reader ar( ad ); // serial reader @@ -121,7 +120,7 @@ int delete_members( const Cl_options & cl_opts ) Tar_header header; if( ( retval = ar.read( header, header_size ) ) != 0 ) { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } - if( !verify_ustar_chksum( header ) ) // error or EOA + if( !check_ustar_chksum( header ) ) // error or EOA { if( block_is_zero( header, header_size ) ) // EOA { @@ -129,7 +128,7 @@ int delete_members( const Cl_options & cl_opts ) { show_file_error( ad.namep, fv_msg1 ); retval = 2; } break; } - // posix format already verified by archive reader + // posix format already checked by archive reader show_file_error( ad.namep, bad_hdr_msg ); retval = 2; break; } diff --git a/delete_lz.cc b/delete_lz.cc index 2e536e3..b67efa0 100644 --- a/delete_lz.cc +++ b/delete_lz.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <cctype> #include <cerrno> #include <cstdio> -#include <cstdlib> #include <stdint.h> // for lzlib.h #include <unistd.h> #include <lzlib.h> @@ -65,7 +64,7 @@ int delete_members_lz( const Cl_options & cl_opts, Tar_header header; if( ( retval = ar.read( header, header_size ) ) != 0 ) { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } - if( !verify_ustar_chksum( header ) ) // error or EOA + if( !check_ustar_chksum( header ) ) // error or EOA { if( block_is_zero( header, header_size ) ) // EOA { @@ -73,7 +72,7 @@ int delete_members_lz( const Cl_options & cl_opts, { show_file_error( ad.namep, fv_msg1 ); retval = 2; } goto done; } - // indexed archive reader does not verify posix format + // indexed archive reader does not check posix format show_file_error( ad.namep, ( ar.data_pos() > header_size ) ? bad_hdr_msg : posix_lz_msg ); retval = 2; @@ -112,7 +111,7 @@ int delete_members_lz( const Cl_options & cl_opts, if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) { print_removed_prefix( extended.removed_prefix ); - // verify that members match + // check that members match if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() ) { show_file_error( extended.path().c_str(), "Can't delete: not compressed individually." ); diff --git a/doc/tarlz.1 b/doc/tarlz.1 index d23b164..9d63da5 100644 --- a/doc/tarlz.1 +++ b/doc/tarlz.1 @@ -1,5 +1,5 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH TARLZ "1" "September 2022" "tarlz 0.23" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH TARLZ "1" "January 2024" "tarlz 0.25" "User Commands" .SH NAME tarlz \- creates tar archives with multimember lzip compression .SH SYNOPSIS @@ -10,12 +10,12 @@ Tarlz is a massively parallel (multi\-threaded) combined implementation of the tar archiver and the lzip compressor. Tarlz uses the compression library lzlib. .PP -Tarlz creates, lists, and extracts archives in a simplified and safer -variant of the POSIX pax format compressed in lzip format, keeping the -alignment between tar members and lzip members. The resulting multimember -tar.lz archive is fully backward compatible with standard tar tools like GNU -tar, which treat it like any other tar.lz archive. Tarlz can append files to -the end of such compressed archives. +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. .PP Keeping the alignment between tar members and lzip members has two advantages. It adds an indexed lzip layer on top of the tar archive, making @@ -80,7 +80,7 @@ follow symlinks; archive the files they point to set number of (de)compression threads [2] .TP \fB\-o\fR, \fB\-\-output=\fR<file> -compress to <file> +compress to <file> ('\-' for stdout) .TP \fB\-p\fR, \fB\-\-preserve\-permissions\fR don't subtract the umask on extraction @@ -127,6 +127,9 @@ exclude files matching a shell pattern \fB\-\-ignore\-ids\fR ignore differences in owner and group IDs .TP +\fB\-\-ignore\-metadata\fR +compare only file size and file content +.TP \fB\-\-ignore\-overflow\fR ignore mtime overflow differences on 32\-bit .TP @@ -149,7 +152,7 @@ If no archive is specified, tarlz tries to read it from standard input or write it to standard output. .PP Exit status: 0 for a normal exit, 1 for environmental problems -(file not found, files differ, invalid command line options, I/O errors, +(file not found, files differ, invalid command\-line options, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused tarlz to panic. .SH "REPORTING BUGS" @@ -157,8 +160,8 @@ Report bugs to lzip\-bug@nongnu.org .br Tarlz home page: http://www.nongnu.org/lzip/tarlz.html .SH COPYRIGHT -Copyright \(co 2022 Antonio Diaz Diaz. -Using lzlib 1.13 +Copyright \(co 2024 Antonio Diaz Diaz. +Using lzlib 1.14\-rc1 License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. diff --git a/doc/tarlz.info b/doc/tarlz.info index d71c0a3..25ba882 100644 --- a/doc/tarlz.info +++ b/doc/tarlz.info @@ -11,12 +11,12 @@ File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) Tarlz Manual ************ -This manual is for Tarlz (version 0.23, 23 September 2022). +This manual is for Tarlz (version 0.25, 3 January 2024). * Menu: * Introduction:: Purpose and features of tarlz -* Invoking tarlz:: Command line interface +* Invoking tarlz:: Command-line interface * Portable character set:: POSIX portable filename character set * File format:: Detailed format of the compressed archive * Amendments to pax format:: The reasons for the differences with pax @@ -28,7 +28,7 @@ This manual is for Tarlz (version 0.23, 23 September 2022). * Concept index:: Index of concepts - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -46,8 +46,8 @@ library lzlib. Tarlz creates tar archives using a simplified and safer variant of the POSIX pax format compressed in lzip format, keeping the alignment between tar members and lzip members. The resulting multimember tar.lz archive is -fully backward compatible with standard tar tools like GNU tar, which treat -it like any other tar.lz archive. Tarlz can append files to the end of such +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such compressed archives. Keeping the alignment between tar members and lzip members has two @@ -58,9 +58,9 @@ plzip may even double the amount of files lost for each lzip member damaged because it does not keep the members aligned. Tarlz can create tar archives with five levels of compression -granularity: per file (--no-solid), per block (--bsolid, default), per -directory (--dsolid), appendable solid (--asolid), and solid (--solid). It -can also create uncompressed tar archives. +granularity: per file ('--no-solid'), per block ('--bsolid', default), per +directory ('--dsolid'), appendable solid ('--asolid'), and solid +('--solid'). It can also create uncompressed tar archives. Of course, compressing each file (or each directory) individually can't achieve a compression ratio as high as compressing solidly the whole tar @@ -87,9 +87,9 @@ archive, but it has the following advantages: Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in a way compatible with standard tar tools. *Note crc32::. - Tarlz does not understand other tar formats like 'gnu', 'oldgnu', 'star' -or 'v7'. The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to -verify that the format of the archive is compatible with tarlz. + Tarlz does not understand other tar formats like 'gnu', 'oldgnu', +'star', or 'v7'. The command 'tarlz -t -f archive.tar.lz > /dev/null' can +be used to check that the format of the archive is compatible with tarlz. File: tarlz.info, Node: Invoking tarlz, Next: Portable character set, Prev: Introduction, Up: Top @@ -140,7 +140,7 @@ to '-1 --solid'. '-A' '--concatenate' Append one or more archives to the end of an archive. If no archive is - specified with the option '-f', the input archives are concatenated to + specified with the option '-f', concatenate the input archives to standard output. All the archives involved must be regular (seekable) files, and must be either all compressed or all uncompressed. Compressed and uncompressed archives can't be mixed. Compressed @@ -163,7 +163,7 @@ to '-1 --solid'. '-d' '--diff' Compare and report differences between archive and file system. For - each tar member in the archive, verify that the corresponding file in + each tar member in the archive, check that the corresponding file in the file system exists and is of the same type (regular file, directory, etc). Report on standard output the differences found in type, mode (permissions), owner and group IDs, modification time, file @@ -224,22 +224,25 @@ to '-1 --solid'. directory without extracting the files under it, use 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty directories unconditionally before extracting over them. Other than - that, it will not make any special effort to extract a file over an + that, it does not make any special effort to extract a file over an incompatible type of file. For example, extracting a file over a - non-empty directory will usually fail. + non-empty directory usually fails. '-z' '--compress' Compress existing POSIX tar archives aligning the lzip members to the - tar members with choice of granularity (--bsolid by default, --dsolid - works like --asolid). The input archives are kept unchanged. Existing - compressed archives are not overwritten. A hyphen '-' used as the name - of an input archive reads from standard input and writes to standard - output (unless the option '--output' is used). Tarlz can be used as - compressor for GNU tar using a command like - 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Note that tarlz only - works reliably on archives without global headers, or with global - headers whose content can be ignored. + tar members with choice of granularity ('--bsolid' by default, + '--dsolid' works like '--asolid'). Exit with error status 2 if any + input archive is an empty file. The input archives are kept unchanged. + Existing compressed archives are not overwritten. A hyphen '-' used as + the name of an input archive reads from standard input and writes to + standard output (unless the option '--output' is used). Tarlz can be + used as compressor for GNU tar by using a command like + 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Tarlz can be used as + compressor for zupdate (zutils) by using a command like + 'zupdate --lz="tarlz -z" foo.tar.gz'. Note that tarlz only works + reliably on archives without global headers, or with global headers + whose content can be ignored. The compression is reversible, including any garbage present after the end-of-archive blocks. Tarlz stops parsing after the first @@ -277,18 +280,18 @@ to '-1 --solid'. '-C DIR' '--directory=DIR' - Change to directory DIR. When creating or appending, the position of - each '-C' option in the command line is significant; it will change the - current working directory for the following FILES until a new '-C' - option appears in the command line. When extracting or comparing, all - the '-C' options are executed in sequence before reading the archive. - Listing ignores any '-C' options specified. DIR is relative to the - then current working directory, perhaps changed by a previous '-C' + Change to directory DIR. When creating, appending, comparing, or + extracting, the position of each '-C' option in the command line is + significant; it changes the current working directory for the following + FILES until a new '-C' option appears in the command line. '--list' + and '--delete' ignore any '-C' options specified. DIR is relative to + the then current working directory, perhaps changed by a previous '-C' option. Note that a process can only have one current working directory (CWD). - Therefore multi-threading can't be used to create an archive if a '-C' - option appears after a relative file name in the command line. + Therefore multi-threading can't be used to create or decode an archive + if a '-C' option appears after a (relative) file name in the command + line. (All file names are made relative when decoding). '-f ARCHIVE' '--file=ARCHIVE' @@ -308,8 +311,7 @@ to '-1 --solid'. support". A value of 0 disables threads entirely. If this option is not used, tarlz tries to detect the number of processors in the system and use it as default value. 'tarlz --help' shows the system's default - value. See the note about multi-threaded archive creation in the - option '-C' above. + value. See the note about multi-threading in the option '-C' above. Note that the number of usable threads is limited during compression to ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::), @@ -360,7 +362,9 @@ to '-1 --solid'. With '--create', don't compress the tar archive created. Create an uncompressed tar archive instead. With '--append', don't compress the new members appended to the tar archive. Compressed members can't be - appended to an uncompressed archive, nor vice versa. + appended to an uncompressed archive, nor vice versa. '--uncompressed' + can be omitted if it can be deduced from the archive name. (An + uncompressed archive name lacks a '.lz' or '.tlz' extension). '--asolid' When creating or appending to a compressed archive, use appendable @@ -429,6 +433,12 @@ to '-1 --solid'. Make '--diff' ignore differences in owner and group IDs. This option is useful when comparing an '--anonymous' archive. +'--ignore-metadata' + Make '--diff' ignore any differences in metadata (file permissions, + owner and group IDs, modification time). Compare only file type, file + size, and file content. This option is useful when file permissions + have not been fully restored because uid/gid changed on extraction. + '--ignore-overflow' Make '--diff' ignore differences in mtime caused by overflow on 32-bit systems with a 32-bit time_t. @@ -438,13 +448,13 @@ to '-1 --solid'. happens while extracting a file, keep the partial data extracted. Use this option to recover as much data as possible from each damaged member. It is recommended to run tarlz in single-threaded mode - (--threads=0) when using this option. + ('--threads=0') when using this option. '--missing-crc' Exit with error status 2 if the CRC of the extended records is missing. When this option is used, tarlz detects any corruption in the extended records (only limited by CRC collisions). But note that a - corrupt 'GNU.crc32' keyword, for example 'GNU.crc33', is reported as a + corrupt 'GNU.crc32' keyword, for example 'GNU.crc30', is reported as a missing CRC instead of as a corrupt record. This misleading 'Missing CRC' message is the consequence of a flaw in the POSIX pax format; i.e., the lack of a mandatory check sequence of the extended @@ -481,7 +491,7 @@ to '-1 --solid'. Exit status: 0 for a normal exit, 1 for environmental problems (file not -found, files differ, invalid command line options, I/O errors, etc), 2 to +found, files differ, invalid command-line options, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused tarlz to panic. @@ -525,7 +535,7 @@ In the diagram below, a box like this: bytes (for example 512). - A tar.lz file consists of a series of lzip members (compressed data + A tar.lz file consists of one or more lzip members (compressed data sets). The members simply appear one after another in the file, with no additional information before, between, or after them. @@ -560,7 +570,7 @@ binary zeros, interpreted as an end-of-archive indicator. These EOA blocks are either compressed in a separate lzip member or compressed along with the tar members contained in the last lzip member. For a compressed archive to be recognized by tarlz as appendable, the last lzip member must contain -between 512 and 32256 zeros alone. +between 512 and 32256 zeros alone (without any non-zero bytes). The diagram below shows the correspondence between each tar member (formed by one or two headers plus optional data) in the tar archive and @@ -588,6 +598,10 @@ header block are zeroed on archive creation to prevent trouble if the archive is read by an ustar tool, and are ignored by tarlz on archive extraction. *Note flawed-compat::. + Tarlz limits the size of the pax extended header data so that the whole +header set (extended header + extended data + ustar header) can be read and +decoded in a buffer of size INT_MAX. + The pax extended header data consists of one or more records, each of them constructed as follows: '"%d %s=%s\n", <length>, <keyword>, <value>' @@ -610,7 +624,7 @@ space, equal-sign, and newline. 'gid' The unsigned decimal representation of the group ID of the group that owns the following file. The gid record is created only for files with - a group ID greater than 2_097_151 (octal 7777777). *Note + a group ID greater than 2_097_151 (octal 7_777_777). *Note ustar-uid-gid::. 'linkpath' @@ -618,11 +632,11 @@ space, equal-sign, and newline. previously archived. This record overrides the field 'linkname' in the following ustar header block. The following ustar header block determines the type of link created. If typeflag of the following - header block is 1, it will be a hard link. If typeflag is 2, it will - be a symbolic link and the linkpath value will be used as the contents - of the symbolic link. The linkpath record is created only for links - with a link name that does not fit in the space provided by the ustar - header. + header block is 1, a hard link is created. If typeflag is 2, a + symbolic link is created and the linkpath value is used as the + contents of the symbolic link. The linkpath record is created only for + links with a link name that does not fit in the space provided by the + ustar header. 'mtime' The signed decimal representation of the modification time of the @@ -645,19 +659,20 @@ space, equal-sign, and newline. digits from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field 'size' in the following ustar header block. The size record is created only for files with a size value greater than - 8_589_934_591 (octal 77777777777); that is, 8 GiB (2^33 bytes) or + 8_589_934_591 (octal 77_777_777_777); that is, 8 GiB (2^33 bytes) or larger. 'uid' The unsigned decimal representation of the user ID of the file owner of the following file. The uid record is created only for files with a - user ID greater than 2_097_151 (octal 7777777). *Note ustar-uid-gid::. + user ID greater than 2_097_151 (octal 7_777_777). *Note + ustar-uid-gid::. 'GNU.crc32' CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes representing the CRC <value> itself. The <value> is represented as 8 hexadecimal digits in big endian order, '22 GNU.crc32=00000000\n'. The - keyword of the CRC record is protected by the CRC to guarante that + keyword of the CRC record is protected by the CRC to guarantee that corruption is always detected when using '--missing-crc' (except in case of CRC collision). A CRC was chosen because a checksum is too weak for a potentially large list of variable sized records. A @@ -729,7 +744,7 @@ S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001 The fields 'uid' and 'gid' are the user and group IDs of the owner and group of the file, respectively. If the file uid or gid are greater than -2_097_151 (octal 7777777), an extended record is used to store the uid or +2_097_151 (octal 7_777_777), an extended record is used to store the uid or gid. The field 'size' contains the octal representation of the size of the @@ -739,13 +754,13 @@ records following the header is (size / 512) rounded to the next integer. For all other values of typeflag, tarlz either sets the size field to 0 or ignores it, and does not store or expect any logical records following the header. If the file size is larger than 8_589_934_591 bytes -(octal 77777777777), an extended record is used to store the file size. +(octal 77_777_777_777), an extended record is used to store the file size. The field 'mtime' contains the octal representation of the modification time of the file at the time it was archived, obtained from the function 'stat'. If the modification time is negative or larger than 8_589_934_591 -(octal 77777777777) seconds since the epoch, an extended record is used to -store the modification time. The ustar range of mtime goes from +(octal 77_777_777_777) seconds since the epoch, an extended record is used +to store the modification time. The ustar range of mtime goes from '1970-01-01 00:00:00 UTC' to '2242-03-16 12:56:31 UTC'. The field 'chksum' contains the octal representation of the value of the @@ -827,7 +842,7 @@ more probable. Headers and metadata must be protected separately from data because the integrity checking of lzip may not be able to detect the corruption before -the metadata has been used, for example, to create a new file in the wrong +the metadata have been used, for example, to create a new file in the wrong place. Because of the above, tarlz protects the extended records with a Cyclic @@ -843,11 +858,11 @@ to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended header field values that allow such tar to create a regular file containing the extended header records as data. This approach is broken because if the extended header is needed because of a long file name, the fields 'name' -and 'prefix' will be unable to contain the full file name. (Some tar +and 'prefix' are unable to contain the full file name. (Some tar implementations store the truncated name in the field 'name' alone, truncating the name to only 100 bytes instead of 256). Therefore the files corresponding to both the extended header and the overridden ustar header -will be extracted using truncated file names, perhaps overwriting existing +are extracted using truncated file names, perhaps overwriting existing files or directories. It may be a security risk to extract a file with a truncated file name. @@ -915,7 +930,7 @@ There is no portable way to tell what charset a text string is coded into. Therefore, tarlz stores all fields representing text strings unmodified, without conversion to UTF-8 nor any other transformation. This prevents accidental double UTF-8 conversions. If the need arises this behavior will -be adjusted with a command line option in the future. +be adjusted with a command-line option in the future. File: tarlz.info, Node: Program design, Next: Multi-threaded decoding, Prev: Amendments to pax format, Up: Top @@ -1098,11 +1113,11 @@ multimember compressed archive. For this to work as expected (and roughly multiply the compression speed by the number of available processors), the uncompressed archive must be at least as large as the number of worker threads times the block size (*note ---data-size::). Else some processors will not get any data to compress, and -compression will be proportionally slower. The maximum speed increase -achievable on a given archive is limited by the ratio -(uncompressed_size / data_size). For example, a tarball the size of gcc or -linux will scale up to 10 or 14 processors at level -9. +--data-size::). Else some processors do not get any data to compress, and +compression is proportionally slower. The maximum speed increase achievable +on a given archive is limited by the ratio (uncompressed_size / data_size). +For example, a tarball the size of gcc or linux scales up to 10 or 14 +processors at level -9. The following table shows the minimum uncompressed archive size needed for full use of N processors at a given compression level, using the default @@ -1244,25 +1259,25 @@ Concept index Tag Table: Node: Top216 -Node: Introduction1210 -Node: Invoking tarlz4029 -Ref: --data-size12880 -Ref: --bsolid17192 -Node: Portable character set22788 -Node: File format23431 -Ref: key_crc3230188 -Ref: ustar-uid-gid33452 -Ref: ustar-mtime34254 -Node: Amendments to pax format36254 -Ref: crc3236963 -Ref: flawed-compat38274 -Node: Program design42364 -Node: Multi-threaded decoding46289 -Ref: mt-extraction49570 -Node: Minimum archive sizes50876 -Node: Examples53014 -Node: Problems55381 -Node: Concept index55936 +Node: Introduction1207 +Node: Invoking tarlz4032 +Ref: --data-size13076 +Ref: --bsolid17512 +Node: Portable character set23425 +Node: File format24068 +Ref: key_crc3231050 +Ref: ustar-uid-gid34315 +Ref: ustar-mtime35122 +Node: Amendments to pax format37125 +Ref: crc3237834 +Ref: flawed-compat39146 +Node: Program design43228 +Node: Multi-threaded decoding47153 +Ref: mt-extraction50434 +Node: Minimum archive sizes51740 +Node: Examples53867 +Node: Problems56234 +Node: Concept index56789 End Tag Table diff --git a/doc/tarlz.texi b/doc/tarlz.texi index 5bdd2af..f37164f 100644 --- a/doc/tarlz.texi +++ b/doc/tarlz.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 23 September 2022 -@set VERSION 0.23 +@set UPDATED 3 January 2024 +@set VERSION 0.25 @dircategory Archiving @direntry @@ -37,7 +37,7 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of tarlz -* Invoking tarlz:: Command line interface +* Invoking tarlz:: Command-line interface * Portable character set:: POSIX portable filename character set * File format:: Detailed format of the compressed archive * Amendments to pax format:: The reasons for the differences with pax @@ -50,7 +50,7 @@ This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2013-2022 Antonio Diaz Diaz. +Copyright @copyright{} 2013-2024 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -68,7 +68,7 @@ compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}. Tarlz creates tar archives using a simplified and safer variant of the POSIX pax format compressed in lzip format, keeping the alignment between tar -members and lzip members. The resulting multimember tar.lz archive is fully +members and lzip members. The resulting multimember tar.lz archive is backward compatible with standard tar tools like GNU tar, which treat it like any other tar.lz archive. Tarlz can append files to the end of such compressed archives. @@ -81,9 +81,9 @@ plzip may even double the amount of files lost for each lzip member damaged because it does not keep the members aligned. Tarlz can create tar archives with five levels of compression granularity: -per file (---no-solid), per block (---bsolid, default), per directory -(---dsolid), appendable solid (---asolid), and solid (---solid). It can also -create uncompressed tar archives. +per file (@option{--no-solid}), per block (@option{--bsolid}, default), per +directory (@option{--dsolid}), appendable solid (@option{--asolid}), and +solid (@option{--solid}). It can also create uncompressed tar archives. @noindent Of course, compressing each file (or each directory) individually can't @@ -104,7 +104,7 @@ archive. Just like an uncompressed tar archive. It is a safe POSIX-style backup format. In case of corruption, tarlz can extract all the undamaged members from the tar.lz archive, skipping over the damaged members, just like the standard -(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be used +(uncompressed) tar. Moreover, the option @option{--keep-damaged} can be used to recover as much data as possible from each damaged member, and lziprecover can be used to recover some of the damaged members. @@ -118,8 +118,8 @@ Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in a way compatible with standard tar tools. @xref{crc32}. Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu}, -@samp{star} or @samp{v7}. The command -@w{@samp{tarlz -tf archive.tar.lz > /dev/null}} can be used to verify that +@samp{star}, or @samp{v7}. The command +@w{@samp{tarlz -t -f archive.tar.lz > /dev/null}} can be used to check that the format of the archive is compatible with tarlz. @@ -137,9 +137,9 @@ tarlz @var{operation} [@var{options}] [@var{files}] @end example @noindent -All operations except @samp{--concatenate} and @samp{--compress} operate on -whole trees if any @var{file} is a directory. All operations except -@samp{--compress} overwrite output files without warning. If no archive is +All operations except @option{--concatenate} and @option{--compress} operate +on whole trees if any @var{file} is a directory. All operations except +@option{--compress} overwrite output files without warning. If no archive is specified, tarlz tries to read it from standard input or write it to standard output. Tarlz refuses to read archive data from a terminal or write archive data to a terminal. Tarlz detects when the archive being created or @@ -147,7 +147,7 @@ enlarged is among the files to be archived, appended, or concatenated, and skips it. Tarlz does not use absolute file names nor file names above the current -working directory (perhaps changed by option @samp{-C}). On archive creation +working directory (perhaps changed by option @option{-C}). On archive creation or appending tarlz archives the files specified, but removes from member names any leading and trailing slashes and any file name prefixes containing a @samp{..} component. On extraction, leading and trailing slashes are also @@ -161,9 +161,9 @@ member names in the archive or given in the command line, so that @w{@samp{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and @samp{./baz} from archive @samp{foo}. -If several compression levels or @samp{--*solid} options are given, the last -setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is -equivalent to @w{@samp{-1 --solid}}. +If several compression levels or @option{--*solid} options are given, the last +setting is used. For example @w{@option{-9 --solid --uncompressed -1}} is +equivalent to @w{@option{-1 --solid}}. tarlz supports the following operations: @@ -179,7 +179,7 @@ This version number should be included in all bug reports. @item -A @itemx --concatenate Append one or more archives to the end of an archive. If no archive is -specified with the option @samp{-f}, the input archives are concatenated to +specified with the option @option{-f}, concatenate the input archives to standard output. All the archives involved must be regular (seekable) files, and must be either all compressed or all uncompressed. Compressed and uncompressed archives can't be mixed. Compressed archives must be @@ -202,23 +202,23 @@ Create a new archive from @var{files}. @item -d @itemx --diff Compare and report differences between archive and file system. For each tar -member in the archive, verify that the corresponding file in the file system +member in the archive, check that the corresponding file in the file system exists and is of the same type (regular file, directory, etc). Report on standard output the differences found in type, mode (permissions), owner and group IDs, modification time, file size, file contents (of regular files), target (of symlinks) and device number (of block/character special files). -As tarlz removes leading slashes from member names, the option @samp{-C} may -be used in combination with @samp{--diff} when absolute file names were used +As tarlz removes leading slashes from member names, the option @option{-C} may +be used in combination with @option{--diff} when absolute file names were used on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be run from the root directory to perform the comparison. @item --delete Delete files and directories from an archive in place. It currently can delete only from uncompressed archives and from archives with files -compressed individually (@samp{--no-solid} archives). Note that files of -about @samp{--data-size} or larger are compressed individually even if -@samp{--bsolid} is used, and can therefore be deleted. Tarlz takes care to +compressed individually (@option{--no-solid} archives). Note that files of +about @option{--data-size} or larger are compressed individually even if +@option{--bsolid} is used, and can therefore be deleted. Tarlz takes care to not delete a tar member unless it is possible to do so. For example it won't try to delete a tar member that is not compressed individually. Even in the case of finding a corrupt member after having deleted some member(s), tarlz @@ -261,32 +261,36 @@ Extract files from an archive. If @var{files} are given, extract only the directory without extracting the files under it, use @w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and empty directories unconditionally before extracting over them. Other than -that, it will not make any special effort to extract a file over an +that, it does not make any special effort to extract a file over an incompatible type of file. For example, extracting a file over a non-empty -directory will usually fail. +directory usually fails. @item -z @itemx --compress Compress existing POSIX tar archives aligning the lzip members to the tar -members with choice of granularity (---bsolid by default, ---dsolid works -like ---asolid). The input archives are kept unchanged. Existing compressed -archives are not overwritten. A hyphen @samp{-} used as the name of an input -archive reads from standard input and writes to standard output (unless the -option @samp{--output} is used). Tarlz can be used as compressor for GNU tar -using a command like @w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. -Note that tarlz only works reliably on archives without global headers, or -with global headers whose content can be ignored. +members with choice of granularity (@option{--bsolid} by default, +@option{--dsolid} works like @option{--asolid}). Exit with error status 2 if +any input archive is an empty file. The input archives are kept unchanged. +Existing compressed archives are not overwritten. A hyphen @samp{-} used as +the name of an input archive reads from standard input and writes to +standard output (unless the option @option{--output} is used). Tarlz can be +used as compressor for GNU tar by using a command like +@w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. Tarlz can be used as +compressor for zupdate (zutils) by using a command like +@w{@samp{zupdate --lz="tarlz -z" foo.tar.gz}}. Note that tarlz only works +reliably on archives without global headers, or with global headers whose +content can be ignored. The compression is reversible, including any garbage present after the end-of-archive blocks. Tarlz stops parsing after the first end-of-archive block is found, and then compresses the rest of the archive. Unless solid compression is requested, the end-of-archive blocks are compressed in a lzip member separated from the preceding members and from any non-zero garbage -following the end-of-archive blocks. @samp{--compress} implies plzip +following the end-of-archive blocks. @option{--compress} implies plzip argument style, not tar style. Each input archive is compressed to a file -with the extension @samp{.lz} added unless the option @samp{--output} is -used. When @samp{--output} is used, only one input archive can be specified. -@samp{-f} can't be used with @samp{--compress}. +with the extension @samp{.lz} added unless the option @option{--output} is +used. When @option{--output} is used, only one input archive can be specified. +@option{-f} can't be used with @option{--compress}. @item --check-lib Compare the @@ -314,25 +318,25 @@ tarlz supports the following @anchor{--data-size} @item -B @var{bytes} @itemx --data-size=@var{bytes} -Set target size of input data blocks for the option @samp{--bsolid}. +Set target size of input data blocks for the option @option{--bsolid}. @xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default -value is two times the dictionary size, except for option @samp{-0} where it +value is two times the dictionary size, except for option @option{-0} where it defaults to @w{1 MiB}. @xref{Minimum archive sizes}. @item -C @var{dir} @itemx --directory=@var{dir} -Change to directory @var{dir}. When creating or appending, the position of -each @samp{-C} option in the command line is significant; it will change the -current working directory for the following @var{files} until a new -@samp{-C} option appears in the command line. When extracting or comparing, -all the @samp{-C} options are executed in sequence before reading the -archive. Listing ignores any @samp{-C} options specified. @var{dir} is -relative to the then current working directory, perhaps changed by a -previous @samp{-C} option. +Change to directory @var{dir}. When creating, appending, comparing, or +extracting, the position of each @option{-C} option in the command line is +significant; it changes the current working directory for the following +@var{files} until a new @option{-C} option appears in the command line. +@option{--list} and @option{--delete} ignore any @option{-C} options +specified. @var{dir} is relative to the then current working directory, +perhaps changed by a previous @option{-C} option. Note that a process can only have one current working directory (CWD). -Therefore multi-threading can't be used to create an archive if a @samp{-C} -option appears after a relative file name in the command line. +Therefore multi-threading can't be used to create or decode an archive if a +@option{-C} option appears after a (relative) file name in the command line. +(All file names are made relative when decoding). @item -f @var{archive} @itemx --file=@var{archive} @@ -351,7 +355,7 @@ Valid values range from 0 to "as many as your system can support". A value of 0 disables threads entirely. If this option is not used, tarlz tries to detect the number of processors in the system and use it as default value. @w{@samp{tarlz --help}} shows the system's default value. See the note about -multi-threaded archive creation in the option @samp{-C} above. +multi-threading in the option @option{-C} above. Note that the number of usable threads is limited during compression to @w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}), @@ -360,9 +364,9 @@ archive, which you can find by running @w{@samp{lzip -lv archive.tar.lz}}. @item -o @var{file} @itemx --output=@var{file} -Write the compressed output to @var{file}. @w{@samp{-o -}} writes the -compressed output to standard output. Currently @samp{--output} only works -with @samp{--compress}. +Write the compressed output to @var{file}. @w{@option{-o -}} writes the +compressed output to standard output. Currently @option{--output} only works +with @option{--compress}. @item -p @itemx --preserve-permissions @@ -381,8 +385,8 @@ Verbosely list files processed. Further -v's (up to 4) increase the verbosity level. @item -0 .. -9 -Set the compression level for @samp{--create}, @samp{--append}, and -@samp{--compress}. The default compression level is @samp{-6}. Like lzip, +Set the compression level for @option{--create}, @option{--append}, and +@option{--compress}. The default compression level is @option{-6}. Like lzip, tarlz also minimizes the dictionary size of the lzip members it creates, reducing the amount of memory required for decompression. @@ -401,10 +405,12 @@ reducing the amount of memory required for decompression. @end multitable @item --uncompressed -With @samp{--create}, don't compress the tar archive created. Create an -uncompressed tar archive instead. With @samp{--append}, don't compress the +With @option{--create}, don't compress the tar archive created. Create an +uncompressed tar archive instead. With @option{--append}, don't compress the new members appended to the tar archive. Compressed members can't be -appended to an uncompressed archive, nor vice versa. +appended to an uncompressed archive, nor vice versa. @option{--uncompressed} +can be omitted if it can be deduced from the archive name. (An uncompressed +archive name lacks a @samp{.lz} or @samp{.tlz} extension). @item --asolid When creating or appending to a compressed archive, use appendable solid @@ -447,7 +453,7 @@ appendable. No more files can be later appended to the archive. Solid archives can't be created nor decoded in parallel. @item --anonymous -Equivalent to @w{@samp{--owner=root --group=root}}. +Equivalent to @w{@option{--owner=root --group=root}}. @item --owner=@var{owner} When creating or appending, use @var{owner} for files added to the archive. @@ -465,27 +471,34 @@ to match if any component of the file name matches. For example, @samp{*.o} matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If @var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}. -Multiple @samp{--exclude} options can be specified. +Multiple @option{--exclude} options can be specified. @item --ignore-ids -Make @samp{--diff} ignore differences in owner and group IDs. This option is -useful when comparing an @samp{--anonymous} archive. +Make @option{--diff} ignore differences in owner and group IDs. This option is +useful when comparing an @option{--anonymous} archive. + +@item --ignore-metadata +Make @option{--diff} ignore any differences in metadata (file permissions, +owner and group IDs, modification time). Compare only file type, file size, +and file content. This option is useful when file permissions have not been +fully restored because uid/gid changed on extraction. @item --ignore-overflow -Make @samp{--diff} ignore differences in mtime caused by overflow on 32-bit +Make @option{--diff} ignore differences in mtime caused by overflow on 32-bit systems with a 32-bit time_t. @item --keep-damaged Don't delete partially extracted files. If a decompression error happens while extracting a file, keep the partial data extracted. Use this option to recover as much data as possible from each damaged member. It is recommended -to run tarlz in single-threaded mode (---threads=0) when using this option. +to run tarlz in single-threaded mode (@option{--threads=0}) when using this +option. @item --missing-crc Exit with error status 2 if the CRC of the extended records is missing. When this option is used, tarlz detects any corruption in the extended records (only limited by CRC collisions). But note that a corrupt @samp{GNU.crc32} -keyword, for example @samp{GNU.crc33}, is reported as a missing CRC instead +keyword, for example @samp{GNU.crc30}, is reported as a missing CRC instead of as a corrupt record. This misleading @w{@samp{Missing CRC}} message is the consequence of a flaw in the POSIX pax format; i.e., the lack of a mandatory check sequence of the extended records. @xref{crc32}. @@ -527,7 +540,7 @@ keyword appearing in the same block of extended records. @end table Exit status: 0 for a normal exit, 1 for environmental problems -(file not found, files differ, invalid command line options, I/O errors, +(file not found, files differ, invalid command-line options, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused tarlz to panic. @@ -575,7 +588,7 @@ represents a variable number of bytes or a fixed but large number of bytes (for example 512). @sp 1 -A tar.lz file consists of a series of lzip members (compressed data sets). +A tar.lz file consists of one or more lzip members (compressed data sets). The members simply appear one after another in the file, with no additional information before, between, or after them. @@ -606,7 +619,7 @@ Zero or more blocks that contain the contents of the file. @end itemize Each tar member must be contiguously stored in a lzip member for the -parallel decoding operations like @samp{--list} to work. If any tar member +parallel decoding operations like @option{--list} to work. If any tar member is split over two or more lzip members, the archive must be decoded sequentially. @xref{Multi-threaded decoding}. @@ -615,7 +628,7 @@ binary zeros, interpreted as an end-of-archive indicator. These EOA blocks are either compressed in a separate lzip member or compressed along with the tar members contained in the last lzip member. For a compressed archive to be recognized by tarlz as appendable, the last lzip member must contain -between 512 and 32256 zeros alone. +between 512 and 32256 zeros alone (without any non-zero bytes). The diagram below shows the correspondence between each tar member (formed by one or two headers plus optional data) in the tar archive and each @@ -639,7 +652,7 @@ tar.lz @end verbatim @ignore -When @samp{--permissive} is used, the following violations of the +When @option{--permissive} is used, the following violations of the archive format are allowed:@* If several extended headers precede an ustar header, only the last extended header takes effect. The other extended headers are ignored. @@ -660,6 +673,10 @@ fields in the pax header block are zeroed on archive creation to prevent trouble if the archive is read by an ustar tool, and are ignored by tarlz on archive extraction. @xref{flawed-compat}. +Tarlz limits the size of the pax extended header data so that the whole +header set (extended header + extended data + ustar header) can be read and +decoded in a buffer of size INT_MAX. + The pax extended header data consists of one or more records, each of them constructed as follows:@* @w{@samp{"%d %s=%s\n", <length>, <keyword>, <value>}} @@ -683,17 +700,17 @@ time outside of the ustar range. @xref{ustar-mtime}. @item gid The unsigned decimal representation of the group ID of the group that owns the following file. The gid record is created only for files with a group ID -greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. +greater than 2_097_151 @w{(octal 7_777_777)}. @xref{ustar-uid-gid}. @item linkpath The file name of a link being created to another file, of any type, previously archived. This record overrides the field @samp{linkname} in the following ustar header block. The following ustar header block determines -the type of link created. If typeflag of the following header block is 1, it -will be a hard link. If typeflag is 2, it will be a symbolic link and the -linkpath value will be used as the contents of the symbolic link. The -linkpath record is created only for links with a link name that does not fit -in the space provided by the ustar header. +the type of link created. If typeflag of the following header block is 1, a +hard link is created. If typeflag is 2, a symbolic link is created and the +linkpath value is used as the contents of the symbolic link. The linkpath +record is created only for links with a link name that does not fit in the +space provided by the ustar header. @item mtime The signed decimal representation of the modification time of the following @@ -715,12 +732,12 @@ The size of the file in bytes, expressed as a decimal number using digits from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field @samp{size} in the following ustar header block. The size record is created only for files with a size value greater than 8_589_934_591 -@w{(octal 77777777777)}; that is, @w{8 GiB} (2^33 bytes) or larger. +@w{(octal 77_777_777_777)}; that is, @w{8 GiB} (2^33 bytes) or larger. @item uid The unsigned decimal representation of the user ID of the file owner of the following file. The uid record is created only for files with a user ID -greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. +greater than 2_097_151 @w{(octal 7_777_777)}. @xref{ustar-uid-gid}. @anchor{key_crc32} @item GNU.crc32 @@ -728,8 +745,8 @@ CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes representing the CRC <value> itself. The <value> is represented as 8 hexadecimal digits in big endian order, @w{@samp{22 GNU.crc32=00000000\n}}. The keyword of the CRC record is -protected by the CRC to guarante that corruption is always detected when -using @samp{--missing-crc} (except in case of CRC collision). A CRC was +protected by the CRC to guarantee that corruption is always detected when +using @option{--missing-crc} (except in case of CRC collision). A CRC was chosen because a checksum is too weak for a potentially large list of variable sized records. A checksum can't detect simple errors like the swapping of two bytes. @@ -804,7 +821,8 @@ table shows the symbolic name of each bit and its octal value: @anchor{ustar-uid-gid} The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner and group of the file, respectively. If the file uid or gid are greater than -2_097_151 (octal 7777777), an extended record is used to store the uid or gid. +2_097_151 @w{(octal 7_777_777)}, an extended record is used to store the uid +or gid. The field @samp{size} contains the octal representation of the size of the file in bytes. If the field @samp{typeflag} specifies a file of type '0' @@ -813,13 +831,13 @@ records following the header is @w{(size / 512)} rounded to the next integer. For all other values of typeflag, tarlz either sets the size field to 0 or ignores it, and does not store or expect any logical records following the header. If the file size is larger than 8_589_934_591 bytes -@w{(octal 77777777777)}, an extended record is used to store the file size. +@w{(octal 77_777_777_777)}, an extended record is used to store the file size. @anchor{ustar-mtime} The field @samp{mtime} contains the octal representation of the modification time of the file at the time it was archived, obtained from the function @samp{stat}. If the modification time is negative or larger than -8_589_934_591 @w{(octal 77777777777)} seconds since the epoch, an extended +8_589_934_591 @w{(octal 77_777_777_777)} seconds since the epoch, an extended record is used to store the modification time. The ustar range of mtime goes from @w{@samp{1970-01-01 00:00:00 UTC}} to @w{@samp{2242-03-16 12:56:31 UTC}}. @@ -878,7 +896,7 @@ character. Tarlz creates safe archives that allow the reliable detection of invalid or corrupt metadata during decoding even when the integrity checking of lzip can't be used because the lzip members are only decompressed partially, as -it happens in parallel @samp{--diff}, @samp{--list}, and @samp{--extract}. +it happens in parallel @option{--diff}, @option{--list}, and @option{--extract}. In order to achieve this goal and avoid some other flaws in the pax format, tarlz makes some changes to the variant of the pax format that it uses. This chapter describes these changes and the concrete reasons to implement them. @@ -903,7 +921,7 @@ large, making undetected corruption and archiver misbehavior more probable. Headers and metadata must be protected separately from data because the integrity checking of lzip may not be able to detect the corruption before -the metadata has been used, for example, to create a new file in the wrong +the metadata have been used, for example, to create a new file in the wrong place. Because of the above, tarlz protects the extended records with a Cyclic @@ -919,11 +937,11 @@ to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended header field values that allow such tar to create a regular file containing the extended header records as data. This approach is broken because if the extended header is needed because of a long file name, the fields -@samp{name} and @samp{prefix} will be unable to contain the full file name. +@samp{name} and @samp{prefix} are unable to contain the full file name. (Some tar implementations store the truncated name in the field @samp{name} alone, truncating the name to only 100 bytes instead of 256). Therefore the files corresponding to both the extended header and the overridden ustar -header will be extracted using truncated file names, perhaps overwriting +header are extracted using truncated file names, perhaps overwriting existing files or directories. It may be a security risk to extract a file with a truncated file name. @@ -988,7 +1006,7 @@ There is no portable way to tell what charset a text string is coded into. Therefore, tarlz stores all fields representing text strings unmodified, without conversion to UTF-8 nor any other transformation. This prevents accidental double UTF-8 conversions. If the need arises this behavior will -be adjusted with a command line option in the future. +be adjusted with a command-line option in the future. @node Program design @@ -1117,9 +1135,9 @@ tar.lz archives, keeping backwards compatibility. If tarlz finds a member misalignment during multi-threaded decoding, it switches to single-threaded mode and continues decoding the archive. -If the files in the archive are large, multi-threaded @samp{--list} on a +If the files in the archive are large, multi-threaded @option{--list} on a regular (seekable) tar.lz archive can be hundreds of times faster than -sequential @samp{--list} because, in addition to using several processors, +sequential @option{--list} because, in addition to using several processors, it only needs to decompress part of each lzip member. See the following example listing the Silesia corpus on a dual core machine: @@ -1130,7 +1148,7 @@ time plzip -cd silesia.tar.lz | tar -tf - (3.256s) time tarlz -tf silesia.tar.lz (0.020s) @end example -On the other hand, multi-threaded @samp{--list} won't detect corruption in +On the other hand, multi-threaded @option{--list} won't detect corruption in the tar member data because it only decodes the part of each lzip member corresponding to the tar member header. This is another reason why the tar headers must provide their own integrity checking. @@ -1176,11 +1194,11 @@ multimember compressed archive. For this to work as expected (and roughly multiply the compression speed by the number of available processors), the uncompressed archive must be at least as large as the number of worker threads times the block size -(@pxref{--data-size}). Else some processors will not get any data to -compress, and compression will be proportionally slower. The maximum speed -increase achievable on a given archive is limited by the ratio +(@pxref{--data-size}). Else some processors do not get any data to compress, +and compression is proportionally slower. The maximum speed increase +achievable on a given archive is limited by the ratio @w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc -or linux will scale up to 10 or 14 processors at level -9. +or linux scales up to 10 or 14 processors at level -9. The following table shows the minimum uncompressed archive size needed for full use of N processors at a given compression level, using the default @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,7 +17,6 @@ #define _FILE_OFFSET_BITS 64 -#include <cstdlib> #include <fnmatch.h> #include "tarlz.h" diff --git a/extended.cc b/extended.cc index f05d15f..0dfba9b 100644 --- a/extended.cc +++ b/extended.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,7 +20,6 @@ #include <cctype> #include <cerrno> #include <cstdio> -#include <cstdlib> #include "tarlz.h" @@ -30,34 +29,31 @@ const CRC32 crc32c( true ); namespace { -unsigned long long record_size( const unsigned keyword_size, - const unsigned long value_size ) +unsigned record_size( const unsigned keyword_size, const unsigned value_size ) { /* length + ' ' + keyword + '=' + value + '\n' minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */ - unsigned long long size = 1 + keyword_size + 1 + value_size + 1; + unsigned size = 1 + keyword_size + 1 + value_size + 1; size += decimal_digits( decimal_digits( size ) + size ); return size; } -unsigned long long parse_decimal( const char * const ptr, - const char ** const tailp, - const unsigned long long size ) +long long parse_decimal( const char * const ptr, const char ** const tailp, + const int size, const unsigned long long limit = LLONG_MAX ) { unsigned long long result = 0; - unsigned long long i = 0; + int i = 0; while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i; - if( !std::isdigit( (unsigned char)ptr[i] ) ) - { if( tailp ) *tailp = ptr; return 0; } + if( !std::isdigit( (unsigned char)ptr[i] ) ) { *tailp = ptr; return -1; } for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) { const unsigned long long prev = result; result *= 10; result += ptr[i] - '0'; - if( result < prev || result > LLONG_MAX ) // overflow - { if( tailp ) *tailp = ptr; return 0; } + if( result < prev || result > limit || result > LLONG_MAX ) // overflow + { *tailp = ptr; return -1; } } - if( tailp ) *tailp = ptr + i; + *tailp = ptr + i; return result; } @@ -77,7 +73,7 @@ uint32_t parse_record_crc( const char * const ptr ) } -unsigned char xdigit( const unsigned value ) +unsigned char xdigit( const unsigned value ) // hex digit for 'value' { if( value <= 9 ) return '0' + value; if( value <= 15 ) return 'A' + value - 10; @@ -90,27 +86,26 @@ void print_hex( char * const buf, int size, unsigned long long num ) void print_decimal( char * const buf, int size, unsigned long long num ) { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } } -unsigned long long print_size_keyword( char * const buf, - const unsigned long long size, const char * keyword ) +int print_size_keyword( char * const buf, const int size, const char * keyword ) { // "size keyword=value\n" - unsigned long long pos = decimal_digits( size ); + int pos = decimal_digits( size ); print_decimal( buf, pos, size ); buf[pos++] = ' '; while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; return pos; } -bool print_record( char * const buf, const unsigned long long size, - const char * keyword, const std::string & value ) +bool print_record( char * const buf, const int size, const char * keyword, + const std::string & value ) { - unsigned long long pos = print_size_keyword( buf, size, keyword ); + int pos = print_size_keyword( buf, size, keyword ); std::memcpy( buf + pos, value.c_str(), value.size() ); pos += value.size(); buf[pos++] = '\n'; return pos == size; } -bool print_record( char * const buf, const int size, - const char * keyword, const unsigned long long value ) +bool print_record( char * const buf, const int size, const char * keyword, + const unsigned long long value ) { int pos = print_size_keyword( buf, size, keyword ); const int vd = decimal_digits( value ); @@ -118,8 +113,8 @@ bool print_record( char * const buf, const int size, return pos == size; } -bool print_record( char * const buf, const int size, - const char * keyword, const Etime & value ) +bool print_record( char * const buf, const int size, const char * keyword, + const Etime & value ) { int pos = print_size_keyword( buf, size, keyword ); pos += value.print( buf + pos ); buf[pos++] = '\n'; @@ -154,12 +149,12 @@ unsigned Etime::print( char * const buf ) const } bool Etime::parse( const char * const ptr, const char ** const tailp, - const long long size ) + const int size ) { char * tail; errno = 0; long long s = strtoll( ptr, &tail, 10 ); - if( tail == ptr || errno || + if( tail == ptr || tail - ptr > size || errno || ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false; int ns = 0; if( *tail == '.' ) // parse nanoseconds and any extra digits @@ -182,6 +177,8 @@ const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); void Extended::calculate_sizes() const { + if( linkpath_.size() > max_edata_size || path_.size() > max_edata_size ) + { full_size_ = -3; return; } linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; file_size_recsize_ = @@ -192,19 +189,21 @@ void Extended::calculate_sizes() const atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0; mtime_recsize_ = mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0; - edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + - uid_recsize_ + gid_recsize_ + atime_recsize_ + mtime_recsize_ + - crc_record.size(); + const long long tmp = linkpath_recsize_ + path_recsize_ + + file_size_recsize_ + uid_recsize_ + gid_recsize_ + + atime_recsize_ + mtime_recsize_ + crc_record.size(); + if( tmp > max_edata_size ) { full_size_ = -3; return; } + edsize_ = tmp; padded_edsize_ = round_up( edsize_ ); + if( padded_edsize_ > max_edata_size ) { full_size_ = -3; return; } full_size_ = header_size + padded_edsize_; } // print a diagnostic for each unknown keyword once per keyword -void Extended::unknown_keyword( const char * const buf, - const unsigned long long size ) const +void Extended::unknown_keyword( const char * const buf, const int size ) const { - unsigned long long eq_pos = 0; // position of '=' in buf + int eq_pos = 0; // position of '=' in buf while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos; const std::string keyword( buf, eq_pos ); for( unsigned i = 0; i < unknown_keywords.size(); ++i ) @@ -215,13 +214,12 @@ void Extended::unknown_keyword( const char * const buf, } -// Return the size of the extended block, -1 if error, -2 if out of memory. -long long Extended::format_block( Resizable_buffer & rbuf ) const +/* Return the size of the extended block, or 0 if empty. + Return -1 if error, -2 if out of memory, -3 if block too long. */ +int Extended::format_block( Resizable_buffer & rbuf ) const { - if( empty() ) return 0; // no extended data - const unsigned long long bufsize = full_size(); // recalculate sizes - if( edsize_ <= 0 ) return 0; // no extended data - if( edsize_ >= 1LL << 33 ) return -1; // too much extended data + const int bufsize = full_size(); // recalculate sizes if needed + if( bufsize <= 0 ) return bufsize; // error or no extended data if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer uint8_t * const header = rbuf.u8(); // extended header char * const buf = rbuf() + header_size; // extended records @@ -232,7 +230,7 @@ long long Extended::format_block( Resizable_buffer & rbuf ) const if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) ) return -1; - long long pos = path_recsize_; + int pos = path_recsize_; if( linkpath_recsize_ && !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) ) return -1; @@ -268,24 +266,37 @@ long long Extended::format_block( Resizable_buffer & rbuf ) const } -bool Extended::parse( const char * const buf, const unsigned long long edsize, +const char * Extended::full_size_error() const + { + const char * const eferec_msg = "Error formatting extended records."; + switch( full_size_ ) + { + case -1: return eferec_msg; + case -2: return mem_msg2; + case -3: return longrec_msg; + default: internal_error( "invalid call to full_size_error." ); + return 0; // keep compiler quiet + } + } + + +bool Extended::parse( const char * const buf, const int edsize, const bool permissive ) { - reset(); full_size_ = -1; // invalidate cached sizes - for( unsigned long long pos = 0; pos < edsize; ) // parse records + reset(); full_size_ = -4; // invalidate cached sizes + for( int pos = 0; pos < edsize; ) // parse records { const char * tail; - const unsigned long long rsize = - parse_decimal( buf + pos, &tail, edsize - pos ); - if( rsize == 0 || rsize > edsize - pos || - tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; + const int rsize = + parse_decimal( buf + pos, &tail, edsize - pos, edsize - pos ); + if( rsize <= 0 || tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; ++tail; // point to keyword // rest = length of (keyword + '=' + value) without the final newline - const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail; + const int rest = ( buf + ( pos + rsize - 1 ) ) - tail; if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) { if( path_.size() && !permissive ) return false; - unsigned long long len = rest - 5; + int len = rest - 5; while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/' path_.assign( tail + 5, len ); // this also truncates path_ at the first embedded null character @@ -294,30 +305,30 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) { if( linkpath_.size() && !permissive ) return false; - unsigned long long len = rest - 9; + int len = rest - 9; while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/' linkpath_.assign( tail + 9, len ); } else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) { if( file_size_ != 0 && !permissive ) return false; - file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); - // parse error or size fits in ustar header - if( file_size_ < 1LL << 33 || file_size_ > max_file_size || - tail != buf + ( pos + rsize - 1 ) ) return false; + file_size_ = parse_decimal( tail + 5, &tail, rest - 5, max_file_size ); + // overflow, parse error, or size fits in ustar header + if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) ) + return false; } else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 ) { if( uid_ >= 0 && !permissive ) return false; uid_ = parse_decimal( tail + 4, &tail, rest - 4 ); - // parse error or uid fits in ustar header + // overflow, parse error, or uid fits in ustar header if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 ) { if( gid_ >= 0 && !permissive ) return false; gid_ = parse_decimal( tail + 4, &tail, rest - 4 ); - // parse error or gid fits in ustar header + // overflow, parse error, or gid fits in ustar header if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; } else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 ) @@ -335,7 +346,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) { if( crc_present_ && !permissive ) return false; - if( rsize != crc_record.size() ) return false; + if( rsize != (int)crc_record.size() ) return false; crc_present_ = true; const uint32_t stored_crc = parse_record_crc( tail + 10 ); const uint32_t computed_crc = @@ -343,7 +354,7 @@ bool Extended::parse( const char * const buf, const unsigned long long edsize, if( stored_crc != computed_crc ) { if( verbosity >= 2 ) - std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc ); + std::fprintf( stderr, "CRC32-C = %08X\n", (unsigned)computed_crc ); return false; } } @@ -367,7 +378,7 @@ void Extended::fill_from_ustar( const Tar_header header ) if( len > 0 ) { linkpath_.assign( (const char *)header + linkname_o, len ); - full_size_ = -1; + full_size_ = -4; } } @@ -399,17 +410,13 @@ void Extended::fill_from_ustar( const Tar_header header ) /* Return file size from record or from ustar header, and reset file_size_. - Used for fast parsing of headers in uncompressed archives. -*/ + Used for fast parsing of headers in uncompressed archives. */ long long Extended::get_file_size_and_reset( const Tar_header header ) { const long long tmp = file_size_; file_size( 0 ); // reset full_size_ const Typeflag typeflag = (Typeflag)header[typeflag_o]; - if( typeflag == tf_regular || typeflag == tf_hiperf ) - { - if( tmp == 0 ) return parse_octal( header + size_o, size_l ); - else return tmp; - } - return 0; + if( typeflag != tf_regular && typeflag != tf_hiperf ) return 0; + if( tmp > 0 ) return tmp; + return parse_octal( header + size_o, size_l ); } diff --git a/lzip_index.cc b/lzip_index.cc index b886d2b..bcdc54f 100644 --- a/lzip_index.cc +++ b/lzip_index.cc @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,17 +47,16 @@ const char * bad_version( const unsigned version ) } // end namespace -bool Lzip_index::check_header_error( const Lzip_header & header, - const bool first ) +bool Lzip_index::check_header( const Lzip_header & header, const bool first ) { - if( !header.verify_magic() ) + if( !header.check_magic() ) { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true; - return true; } - if( !header.verify_version() ) - { error_ = bad_version( header.version() ); retval_ = 2; return true; } + return false; } + if( !header.check_version() ) + { error_ = bad_version( header.version() ); retval_ = 2; return false; } if( !isvalid_ds( header.dictionary_size() ) ) - { error_ = bad_dict_msg; retval_ = 2; return true; } - return false; + { error_ = bad_dict_msg; retval_ = 2; return false; } + return true; } void Lzip_index::set_errno_error( const char * const msg ) @@ -78,16 +77,14 @@ void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) bool Lzip_index::read_header( const int fd, Lzip_header & header, const long long pos ) { - if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size ) + if( seek_read( fd, header.data, header.size, pos ) != header.size ) { set_errno_error( "Error reading member header: " ); return false; } return true; } // If successful, push last member and set pos to member header. -bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, - const bool ignore_trailing, - const bool loose_trailing ) +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos ) { if( pos < min_member_size ) return false; enum { block_size = 16384, @@ -108,34 +105,31 @@ bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, if( buffer[i-1] <= max_msb ) // most significant byte of member_size { const Lzip_trailer & trailer = - *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size ); + *(const Lzip_trailer *)( buffer + i - trailer.size ); const unsigned long long member_size = trailer.member_size(); if( member_size == 0 ) // skip trailing zeros - { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; } - if( member_size > ipos + i || !trailer.verify_consistency() ) - continue; + { while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !trailer.check_consistency() ) continue; Lzip_header header; if( !read_header( fd, header, ipos + i - member_size ) ) return false; - if( !header.verify() ) continue; + if( !header.check() ) continue; const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); - const bool full_h2 = bsize - i >= Lzip_header::size; - if( header2.verify_prefix( bsize - i ) ) // last member + const bool full_h2 = bsize - i >= header.size; + if( header2.check_prefix( bsize - i ) ) // last member { if( !full_h2 ) error_ = "Last member in input file is truncated."; - else if( !check_header_error( header2, false ) ) + else if( check_header( header2, false ) ) error_ = "Last member in input file is truncated or corrupt."; retval_ = 2; return false; } - if( !loose_trailing && full_h2 && header2.verify_corrupt() ) + if( full_h2 && header2.check_corrupt() ) { error_ = corrupt_mm_msg; retval_ = 2; return false; } - if( !ignore_trailing ) - { error_ = trailing_msg; retval_ = 2; return false; } - pos = ipos + i - member_size; + pos = ipos + i - member_size; // good member const unsigned dictionary_size = header.dictionary_size(); - member_vector.push_back( Member( 0, trailer.data_size(), pos, - member_size, dictionary_size ) ); if( dictionary_size_ < dictionary_size ) dictionary_size_ = dictionary_size; + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); return true; } if( ipos == 0 ) @@ -150,8 +144,7 @@ bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, } -Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, - const bool loose_trailing ) +Lzip_index::Lzip_index( const int infd ) : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ), bad_magic_( false ) { @@ -164,42 +157,38 @@ Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, retval_ = 2; return; } Lzip_header header; - if( !read_header( infd, header, 0 ) ) return; - if( check_header_error( header, true ) ) return; + if( !read_header( infd, header, 0 ) || + !check_header( header, true ) ) return; unsigned long long pos = insize; // always points to a header or to EOF while( pos >= min_member_size ) { Lzip_trailer trailer; - if( seek_read( infd, trailer.data, Lzip_trailer::size, - pos - Lzip_trailer::size ) != Lzip_trailer::size ) + if( seek_read( infd, trailer.data, trailer.size, pos - trailer.size ) != + trailer.size ) { set_errno_error( "Error reading member trailer: " ); break; } const unsigned long long member_size = trailer.member_size(); - if( member_size > pos || !trailer.verify_consistency() ) // bad trailer + if( member_size > pos || !trailer.check_consistency() ) // bad trailer { if( member_vector.empty() ) - { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) - continue; else return; } - set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); - break; + { if( skip_trailing_data( infd, pos ) ) continue; return; } + set_num_error( "Bad trailer at pos ", pos - trailer.size ); break; } if( !read_header( infd, header, pos - member_size ) ) break; - if( !header.verify() ) // bad header + if( !header.check() ) // bad header { if( member_vector.empty() ) - { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) - continue; else return; } - set_num_error( "Bad header at pos ", pos - member_size ); - break; + { if( skip_trailing_data( infd, pos ) ) continue; return; } + set_num_error( "Bad header at pos ", pos - member_size ); break; } - pos -= member_size; + pos -= member_size; // good member const unsigned dictionary_size = header.dictionary_size(); - member_vector.push_back( Member( 0, trailer.data_size(), pos, - member_size, dictionary_size ) ); if( dictionary_size_ < dictionary_size ) dictionary_size_ = dictionary_size; + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); } - if( pos != 0 || member_vector.empty() ) + if( pos != 0 || member_vector.empty() || retval_ != 0 ) { member_vector.clear(); if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } diff --git a/lzip_index.h b/lzip_index.h index af8aaa4..822f537 100644 --- a/lzip_index.h +++ b/lzip_index.h @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ class Block { - long long pos_, size_; // pos + size <= INT64_MAX + long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX public: Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} @@ -43,9 +43,11 @@ class Lzip_index Block dblock, mblock; // data block, member block unsigned dictionary_size; - Member( const long long dp, const long long ds, - const long long mp, const long long ms, const unsigned dict_size ) - : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} + Member( const long long dpos, const long long dsize, + const long long mpos, const long long msize, + const unsigned dict_size ) + : dblock( dpos, dsize ), mblock( mpos, msize ), + dictionary_size( dict_size ) {} }; std::vector< Member > member_vector; @@ -55,16 +57,14 @@ class Lzip_index unsigned dictionary_size_; // largest dictionary size in the file bool bad_magic_; // bad magic in first header - bool check_header_error( const Lzip_header & header, const bool first ); + bool check_header( const Lzip_header & header, const bool first ); void set_errno_error( const char * const msg ); void set_num_error( const char * const msg, unsigned long long num ); bool read_header( const int fd, Lzip_header & header, const long long pos ); - bool skip_trailing_data( const int fd, unsigned long long & pos, - const bool ignore_trailing, const bool loose_trailing ); + bool skip_trailing_data( const int fd, unsigned long long & pos ); public: - Lzip_index( const int infd, const bool ignore_trailing, - const bool loose_trailing ); + Lzip_index( const int infd ); long members() const { return member_vector.size(); } const std::string & error() const { return error_; } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,7 +16,7 @@ */ /* Exit status: 0 for a normal exit, 1 for environmental problems - (file not found, files differ, invalid command line options, I/O errors, + (file not found, files differ, invalid command-line options, I/O errors, etc), 2 to indicate a corrupt or invalid input file, 3 for an internal consistency error (e.g., bug) which caused tarlz to panic. */ @@ -27,10 +27,9 @@ #include <cerrno> #include <cstdarg> #include <cstdio> -#include <cstdlib> #include <ctime> #include <fcntl.h> -#include <pthread.h> +#include <pthread.h> // for pthread_t #include <stdint.h> // for lzlib.h #include <unistd.h> #include <sys/stat.h> @@ -57,7 +56,7 @@ const char * const program_name = "tarlz"; namespace { -const char * const program_year = "2022"; +const char * const program_year = "2024"; const char * invocation_name = program_name; // default value @@ -66,12 +65,12 @@ void show_help( const long num_online ) std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n" "the tar archiver and the lzip compressor. Tarlz uses the compression library\n" "lzlib.\n" - "\nTarlz creates, lists, and extracts archives in a simplified and safer\n" - "variant of the POSIX pax format compressed in lzip format, keeping the\n" - "alignment between tar members and lzip members. The resulting multimember\n" - "tar.lz archive is fully backward compatible with standard tar tools like GNU\n" - "tar, which treat it like any other tar.lz archive. Tarlz can append files to\n" - "the end of such compressed archives.\n" + "\nTarlz creates tar archives using a simplified and safer variant of the POSIX\n" + "pax format compressed in lzip format, keeping the alignment between tar\n" + "members and lzip members. The resulting multimember tar.lz archive is\n" + "backward compatible with standard tar tools like GNU tar, which treat it\n" + "like any other tar.lz archive. Tarlz can append files to the end of such\n" + "compressed archives.\n" "\nKeeping the alignment between tar members and lzip members has two\n" "advantages. It adds an indexed lzip layer on top of the tar archive, making\n" "it possible to decode the archive safely in parallel. It also minimizes the\n" @@ -101,7 +100,7 @@ void show_help( const long num_online ) " -f, --file=<archive> use archive file <archive>\n" " -h, --dereference follow symlinks; archive the files they point to\n" " -n, --threads=<n> set number of (de)compression threads [%ld]\n" - " -o, --output=<file> compress to <file>\n" + " -o, --output=<file> compress to <file> ('-' for stdout)\n" " -p, --preserve-permissions don't subtract the umask on extraction\n" " -q, --quiet suppress all messages\n" " -v, --verbose verbosely list files processed\n" @@ -117,6 +116,7 @@ void show_help( const long num_online ) " --group=<group> use <group> name/ID for files added to archive\n" " --exclude=<pattern> exclude files matching a shell pattern\n" " --ignore-ids ignore differences in owner and group IDs\n" + " --ignore-metadata compare only file size and file content\n" " --ignore-overflow ignore mtime overflow differences on 32-bit\n" " --keep-damaged don't delete partially extracted files\n" " --missing-crc exit with error status if missing extended CRC\n" @@ -132,7 +132,7 @@ void show_help( const long num_online ) std::printf( "\nIf no archive is specified, tarlz tries to read it from standard input or\n" "write it to standard output.\n" "\nExit status: 0 for a normal exit, 1 for environmental problems\n" - "(file not found, files differ, invalid command line options, I/O errors,\n" + "(file not found, files differ, invalid command-line options, I/O errors,\n" "etc), 2 to indicate a corrupt or invalid input file, 3 for an internal\n" "consistency error (e.g., bug) which caused tarlz to panic.\n" "\nReport bugs to lzip-bug@nongnu.org\n" @@ -209,12 +209,12 @@ int check_lib() } -// separate numbers of 6 or more digits in groups of 3 digits using '_' +// separate numbers of 5 or more digits in groups of 3 digits using '_' const char * format_num3( long long num ) { - const char * const si_prefix = "kMGTPEZY"; - const char * const binary_prefix = "KMGTPEZY"; - enum { buffers = 8, bufsize = 4 * sizeof num }; + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; static char buffer[buffers][bufsize]; // circle of static buffers for printf static int current = 0; @@ -222,20 +222,23 @@ const char * format_num3( long long num ) char * p = buf + bufsize - 1; // fill the buffer backwards *p = 0; // terminator const bool negative = num < 0; - char prefix = 0; // try binary first, then si - for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) - { num /= 1024; prefix = binary_prefix[i]; } - if( prefix ) *(--p) = 'i'; - else - for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) - { num /= 1000; prefix = si_prefix[i]; } - if( prefix ) *(--p) = prefix; - const bool split = num >= 100000 || num <= -100000; + if( num > 1024 || num < -1024 ) + { + char prefix = 0; // try binary first, then si + for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 10000 || num <= -10000; for( int i = 0; ; ) { - long long onum = num; num /= 10; - *(--p) = llabs( onum - 10 * num ) + '0'; if( num == 0 ) break; + const long long onum = num; num /= 10; + *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break; if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } } if( negative ) *(--p) = '-'; @@ -252,6 +255,7 @@ void show_option_error( const char * const arg, const char * const msg, } +// Recognized formats: <num>k, <num>Ki, <num>[MGTPEZYRQ][i] long long getnum( const char * const arg, const char * const option_name, const long long llimit = LLONG_MIN, const long long ulimit = LLONG_MAX ) @@ -269,6 +273,8 @@ long long getnum( const char * const arg, const char * const option_name, int exponent = 0; // 0 = bad multiplier switch( tail[0] ) { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; case 'Y': exponent = 8; break; case 'Z': exponent = 7; break; case 'E': exponent = 6; break; @@ -327,7 +333,7 @@ void set_mode( Program_mode & program_mode, const Program_mode new_mode ) // parse time as 'long long' even if time_t is 32-bit long long parse_mtime( const char * arg, const char * const pn ) { - if( *arg == '@' ) return getnum( arg + 1, pn ); + if( *arg == '@' ) return getnum( arg + 1, pn ); // seconds since the epoch else if( *arg == '.' || *arg == '/' ) { struct stat st; @@ -389,6 +395,10 @@ int hstat( const char * const filename, struct stat * const st, { return dereference ? stat( filename, st ) : lstat( filename, st ); } +bool nonempty_arg( const Arg_parser & parser, const int i ) + { return ( parser.code( i ) == 0 && !parser.argument( i ).empty() ); } + + int open_instream( const std::string & name ) { const int infd = open( name.c_str(), O_RDONLY | O_BINARY ); @@ -422,20 +432,6 @@ int open_outstream( const std::string & name, const bool create, } -/* This can be called from any thread, main thread or sub-threads alike, - since they all call common helper functions that call exit_fail_mt() - in case of an error. -*/ -void exit_fail_mt( const int retval ) - { - // calling 'exit' more than once results in undefined behavior - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - - pthread_mutex_lock( &mutex ); // ignore errors to avoid loop - std::exit( retval ); - } - - void show_error( const char * const msg, const int errcode, const bool help ) { if( verbosity < 0 ) return; @@ -531,8 +527,8 @@ int main( const int argc, const char * const argv[] ) if( argc > 0 ) invocation_name = argv[0]; enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, - opt_dso, opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, - opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; + opt_dso, opt_exc, opt_grp, opt_hlp, opt_iid, opt_imd, opt_kd, opt_mti, + opt_nso, opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; const Arg_parser::Option options[] = { { '0', 0, Arg_parser::no }, @@ -573,7 +569,8 @@ int main( const int argc, const char * const argv[] ) { opt_exc, "exclude", Arg_parser::yes }, { opt_grp, "group", Arg_parser::yes }, { opt_hlp, "help", Arg_parser::no }, - { opt_id, "ignore-ids", Arg_parser::no }, + { opt_iid, "ignore-ids", Arg_parser::no }, + { opt_imd, "ignore-metadata", Arg_parser::no }, { opt_kd, "keep-damaged", Arg_parser::no }, { opt_crc, "missing-crc", Arg_parser::no }, { opt_mti, "mtime", Arg_parser::yes }, @@ -617,7 +614,7 @@ int main( const int argc, const char * const argv[] ) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - cl_opts.level = code - '0'; break; + cl_opts.set_level( code - '0' ); break; case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break; case 'B': cl_opts.data_size = getnum( arg, pn, min_data_size, max_data_size ); break; @@ -649,7 +646,8 @@ int main( const int argc, const char * const argv[] ) case opt_exc: Exclude::add_pattern( sarg ); break; case opt_grp: cl_opts.gid = parse_group( arg, pn ); break; case opt_hlp: show_help( num_online ); return 0; - case opt_id: cl_opts.ignore_ids = true; break; + case opt_iid: cl_opts.ignore_ids = true; break; + case opt_imd: cl_opts.ignore_metadata = true; break; case opt_kd: cl_opts.keep_damaged = true; break; case opt_mti: cl_opts.mtime = parse_mtime( arg, pn ); cl_opts.mtime_set = true; break; @@ -659,9 +657,9 @@ int main( const int argc, const char * const argv[] ) case opt_own: cl_opts.uid = parse_owner( arg, pn ); break; case opt_per: cl_opts.permissive = true; break; case opt_sol: cl_opts.solidity = solid; break; - case opt_un: cl_opts.level = -1; break; + case opt_un: cl_opts.set_level( -1 ); break; case opt_wn: cl_opts.warn_newer = true; break; - default : internal_error( "uncaught option" ); + default: internal_error( "uncaught option." ); } } // end process options @@ -679,8 +677,7 @@ int main( const int argc, const char * const argv[] ) program_name, f_pn, z_pn ); return 1; } - if( cl_opts.program_mode == m_compress && - ( cl_opts.level < 0 || cl_opts.level > 9 ) ) + if( cl_opts.program_mode == m_compress && cl_opts.uncompressed() ) { if( verbosity >= 0 ) std::fprintf( stderr, "%s: Option '--uncompressed' can't be used with '%s'.\n", @@ -700,7 +697,7 @@ int main( const int argc, const char * const argv[] ) setmode( STDOUT_FILENO, O_BINARY ); #endif - if( cl_opts.data_size <= 0 && cl_opts.level >= 0 && cl_opts.level <= 9 ) + if( cl_opts.data_size <= 0 && !cl_opts.uncompressed() ) { if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size; @@ -715,9 +712,9 @@ int main( const int argc, const char * const argv[] ) case m_create: return encode( cl_opts ); case m_compress: return compress( cl_opts ); case m_concatenate: return concatenate( cl_opts ); - case m_delete: return delete_members( cl_opts ); + case m_delete: tzset(); return delete_members( cl_opts ); case m_diff: case m_extract: - case m_list: return decode( cl_opts ); + case m_list: tzset(); return decode( cl_opts ); } } @@ -1,5 +1,5 @@ /* Tarlz - Archiver with multimember lzip compression - Copyright (C) 2013-2022 Antonio Diaz Diaz. + Copyright (C) 2013-2024 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,14 +16,15 @@ */ #include <climits> +#include <cstdlib> #include <cstring> #include <string> #include <vector> #include <stdint.h> -#include <sys/types.h> #define max_file_size ( LLONG_MAX - header_size ) -enum { header_size = 512 }; +enum { header_size = 512, + max_edata_size = ( INT_MAX / header_size - 2 ) * header_size }; typedef uint8_t Tar_header[header_size]; enum Offsets { @@ -46,7 +47,7 @@ enum Typeflag { const uint8_t ustar_magic[magic_l] = { 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0" -inline bool verify_ustar_magic( const Tar_header header ) +inline bool check_ustar_magic( const Tar_header header ) { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; } inline void init_tar_header( Tar_header header ) // set magic and version @@ -154,7 +155,7 @@ public: unsigned decimal_size() const; unsigned print( char * const buf ) const; bool parse( const char * const ptr, const char ** const tailp, - const long long size ); + const int size ); }; @@ -167,12 +168,12 @@ class Extended // stores metadata from/for extended records long long uid_, gid_; // may not fit in unsigned int Etime atime_, mtime_; - // cached sizes; if full_size_ < 0 they must be recalculated - mutable long long edsize_; // extended data size - mutable long long padded_edsize_; // edsize rounded up - mutable long long full_size_; // header + padded edsize - mutable long long linkpath_recsize_; - mutable long long path_recsize_; + // cached sizes; if full_size_ <= -4 they must be recalculated + mutable int edsize_; // extended data size + mutable int padded_edsize_; // edsize rounded up + mutable int full_size_; // header + padded edsize + mutable int linkpath_recsize_; + mutable int path_recsize_; mutable int file_size_recsize_; mutable int uid_recsize_; mutable int gid_recsize_; @@ -183,8 +184,7 @@ class Extended // stores metadata from/for extended records mutable bool crc_present_; void calculate_sizes() const; - void unknown_keyword( const char * const buf, - const unsigned long long size ) const; + void unknown_keyword( const char * const buf, const int size ) const; public: static const std::string crc_record; @@ -205,11 +205,6 @@ public: atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false; removed_prefix.clear(); } - bool empty() const - { return linkpath_.empty() && path_.empty() && file_size_ == 0 && - uid_ < 0 && gid_ < 0 && - !atime_.out_of_ustar_range() && !mtime_.out_of_ustar_range(); } - const std::string & linkpath() const { return linkpath_; } const std::string & path() const { return path_; } long long file_size() const { return file_size_; } @@ -219,23 +214,26 @@ public: const Etime & atime() const { return atime_; } const Etime & mtime() const { return mtime_; } - void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } - void path( const char * const p ) { path_ = p; full_size_ = -1; } - void file_size( const long long fs ) { full_size_ = -1; + void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -4; } + void path( const char * const p ) { path_ = p; full_size_ = -4; } + void file_size( const long long fs ) { full_size_ = -4; file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } bool set_uid( const long long id ) - { if( id >= 0 ) { uid_ = id; full_size_ = -1; } return id >= 0; } + { if( id >= 0 ) { uid_ = id; full_size_ = -4; } return id >= 0; } bool set_gid( const long long id ) - { if( id >= 0 ) { gid_ = id; full_size_ = -1; } return id >= 0; } - void set_atime( const long long s ) { atime_.set( s ); full_size_ = -1; } - void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -1; } + { if( id >= 0 ) { gid_ = id; full_size_ = -4; } return id >= 0; } + void set_atime( const long long s ) { atime_.set( s ); full_size_ = -4; } + void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -4; } - long long full_size() const - { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } + /* Return the size of the extended block, or 0 if empty. + Return -1 if error, -2 if out of memory, -3 if block too long. */ + int full_size() const + { if( full_size_ <= -4 ) calculate_sizes(); return full_size_; } + int format_block( Resizable_buffer & rbuf ) const; + const char * full_size_error() const; bool crc_present() const { return crc_present_; } - long long format_block( Resizable_buffer & rbuf ) const; - bool parse( const char * const buf, const unsigned long long edsize, + bool parse( const char * const buf, const int edsize, const bool permissive ); void fill_from_ustar( const Tar_header header ); }; @@ -337,17 +335,17 @@ struct Lzip_header // 4 version // 5 coded dictionary size - bool verify_magic() const + bool check_magic() const { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } - bool verify_prefix( const int sz ) const // detect (truncated) header + bool check_prefix( const int sz ) const // detect (truncated) header { for( int i = 0; i < sz && i < 4; ++i ) if( data[i] != lzip_magic[i] ) return false; return ( sz > 0 ); } - bool verify_corrupt() const // detect corrupt header + bool check_corrupt() const // detect corrupt header { int matches = 0; for( int i = 0; i < 4; ++i ) @@ -356,7 +354,7 @@ struct Lzip_header } uint8_t version() const { return data[4]; } - bool verify_version() const { return ( data[4] == 1 ); } + bool check_version() const { return ( data[4] == 1 ); } unsigned dictionary_size() const { @@ -366,8 +364,8 @@ struct Lzip_header return sz; } - bool verify() const - { return verify_magic() && verify_version() && + bool check() const + { return check_magic() && check_version() && isvalid_ds( dictionary_size() ); } }; @@ -400,7 +398,7 @@ struct Lzip_trailer return tmp; } - bool verify_consistency() const // check internal consistency + bool check_consistency() const // check internal consistency { const unsigned crc = data_crc(); const unsigned long long dsize = data_size(); @@ -421,7 +419,7 @@ enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create, enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; class Arg_parser; -struct Cl_options // command line options +struct Cl_options // command-line options { const Arg_parser & parser; std::string archive_name; @@ -440,8 +438,10 @@ struct Cl_options // command line options bool dereference; bool filenames_given; bool ignore_ids; + bool ignore_metadata; bool ignore_overflow; bool keep_damaged; + bool level_set; // compression level set in command line bool missing_crc; bool mtime_set; bool permissive; @@ -452,10 +452,15 @@ struct Cl_options // command line options : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), - filenames_given( false ), ignore_ids( false ), ignore_overflow( false ), - keep_damaged( false ), missing_crc( false ), mtime_set( false ), - permissive( false ), preserve_permissions( false ), warn_newer( false ) {} + filenames_given( false ), ignore_ids( false ), ignore_metadata( false ), + ignore_overflow( false ), keep_damaged( false ), level_set( false ), + missing_crc( false ), mtime_set( false ), permissive( false ), + preserve_permissions( false ), warn_newer( false ) {} + + void set_level( const int l ) { level = l; level_set = true; } + int compressed() const; // tri-state bool with error (-2) + bool uncompressed() const { return level < 0 || level > 9; } bool to_stdout() const { return output_filename == "-"; } }; @@ -465,7 +470,6 @@ inline void set_retval( int & retval, const int new_val ) const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; const char * const bad_dict_msg = "Invalid dictionary size in member header."; const char * const corrupt_mm_msg = "Corrupt header in multimember file."; -const char * const trailing_msg = "Trailing data not allowed."; const char * const bad_hdr_msg = "Corrupt or invalid tar header."; const char * const gblrec_msg = "Error in global extended records."; const char * const extrec_msg = "Error in extended records."; @@ -486,21 +490,12 @@ const char * const nfound_msg = "Not found in archive."; const char * const seek_msg = "Seek error"; const char * const werr_msg = "Write error"; const char * const chdir_msg = "Error changing working directory"; +const char * const intdir_msg = "Failed to create intermediate directory"; // defined in common.cc -void xinit_mutex( pthread_mutex_t * const mutex ); -void xinit_cond( pthread_cond_t * const cond ); -void xdestroy_mutex( pthread_mutex_t * const mutex ); -void xdestroy_cond( pthread_cond_t * const cond ); -void xlock( pthread_mutex_t * const mutex ); -void xunlock( pthread_mutex_t * const mutex ); -void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); -void xsignal( pthread_cond_t * const cond ); -void xbroadcast( pthread_cond_t * const cond ); unsigned long long parse_octal( const uint8_t * const ptr, const int size ); int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); -bool nonempty_arg( const Arg_parser & parser, const int i ); // defined in common_decode.cc bool block_is_zero( const uint8_t * const buf, const int size ); @@ -510,11 +505,19 @@ bool show_member_name( const Extended & extended, const Tar_header header, const int vlevel, Resizable_buffer & rbuf ); bool check_skip_filename( const Cl_options & cl_opts, std::vector< char > & name_pending, - const char * const filename ); -mode_t get_umask(); -bool make_path( const std::string & name ); + const char * const filename, const int chdir_fd = -1 ); +bool make_dirs( const std::string & name ); + +// defined in common_mutex.cc +void exit_fail_mt( const int retval = 1 ); // terminate the program +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp = 0 ); +void set_error_status( const int retval ); +int final_exit_status( int retval, const bool show_msg = true ); // defined in compress.cc +void show_atpos_error( const char * const filename, const long long pos, + const bool isarchive ); int compress( const Cl_options & cl_opts ); // defined in create.cc @@ -524,18 +527,14 @@ bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, bool write_eoa_records( const int outfd, const bool compressed ); const char * remove_leading_dotslash( const char * const filename, std::string * const removed_prefixp, const bool dotdot = false ); -bool print_removed_prefix( const std::string & prefix, - std::string * const msgp = 0 ); bool fill_headers( const char * const filename, Extended & extended, Tar_header header, long long & file_size, const int flag ); -bool block_is_full( const long long extended_size, +bool block_is_full( const int extended_size, const unsigned long long file_size, const unsigned long long target_size, unsigned long long & partial_data_size ); -void set_error_status( const int retval ); -int final_exit_status( int retval, const bool show_msg = true ); unsigned ustar_chksum( const Tar_header header ); -bool verify_ustar_chksum( const Tar_header header ); +bool check_ustar_chksum( const Tar_header header ); bool has_lz_ext( const std::string & name ); int concatenate( const Cl_options & cl_opts ); int encode( const Cl_options & cl_opts ); @@ -591,10 +590,10 @@ extern const char * const program_name; struct stat; int hstat( const char * const filename, struct stat * const st, const bool dereference ); +bool nonempty_arg( const Arg_parser & parser, const int i ); int open_instream( const std::string & name ); int open_outstream( const std::string & name, const bool create = true, Resizable_buffer * const rbufp = 0, const bool force = true ); -void exit_fail_mt( const int retval = 1 ); // terminate the program void show_error( const char * const msg, const int errcode = 0, const bool help = false ); bool format_error( Resizable_buffer & rbuf, const int errcode, diff --git a/testsuite/check.sh b/testsuite/check.sh index 348e447..9027bd5 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Tarlz - Archiver with multimember lzip compression -# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# Copyright (C) 2013-2024 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -59,6 +59,8 @@ eoa_lz="${testdir}"/eoa_blocks.tar.lz fail=0 lwarnc=0 test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } +is_compressed() { [ "`dd if="$1" bs=4 count=1 2> /dev/null`" = LZIP ] ; } +is_uncompressed() { [ "`dd if="$1" bs=4 count=1 2> /dev/null`" != LZIP ] ; } cyg_symlink() { [ ${lwarnc} = 0 ] && printf "\nwarning: your OS follows symbolic links to directories even when tarlz asks it not to\n$1" lwarnc=1 ; } @@ -116,6 +118,7 @@ cyg_symlink() { [ ${lwarnc} = 0 ] && "${TARLZ}" --check-lib # just print warning [ $? != 2 ] || test_failed $LINENO # unless bad lzlib.h + printf "testing tarlz-%s..." "$2" "${TARLZ}" -q -tf "${in}" @@ -131,34 +134,37 @@ printf "testing tarlz-%s..." "$2" "${TARLZ}" -q -cf out.tar.lz [ $? = 1 ] || test_failed $LINENO [ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -q -cf out.tar +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar ] || test_failed $LINENO "${TARLZ}" -rf out.tar.lz || test_failed $LINENO [ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -rf out.tar || test_failed $LINENO +[ ! -e out.tar ] || test_failed $LINENO "${TARLZ}" -r || test_failed $LINENO -"${TARLZ}" --uncompressed -q -rf out.tar "${in}" +"${TARLZ}" -q -rf out.tar.lz "${in}" +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -q -rf out.tar "${in}" [ $? = 1 ] || test_failed $LINENO [ ! -e out.tar ] || test_failed $LINENO -cat "${test3_lz}" > test.tar.lz || framework_failure -"${TARLZ}" --uncompressed -q -rf test.tar.lz "${in}" -[ $? = 2 ] || test_failed $LINENO -cmp "${test3_lz}" test.tar.lz || test_failed $LINENO -rm -f test.tar.lz || framework_failure -cat "${test3}" > test.tar || framework_failure -"${TARLZ}" -q -rf test.tar "${in}" -[ $? = 2 ] || test_failed $LINENO -cmp "${test3}" test.tar || test_failed $LINENO -rm -f test.tar || framework_failure "${TARLZ}" -q -c "${in}" nx_file > /dev/null [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -q -c -C nx_dir "${in}" [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -q -x -C nx_dir "${test3_lz}" [ $? = 1 ] || test_failed $LINENO -touch empty.tar.lz empty.tlz # list an empty lz file +touch empty.tar.lz empty.tlz || framework_failure # list an empty lz file "${TARLZ}" -q -tf empty.tar.lz [ $? = 2 ] || test_failed $LINENO "${TARLZ}" -q -tf empty.tlz [ $? = 2 ] || test_failed $LINENO rm -f empty.tar.lz empty.tlz || framework_failure +touch empty.tar || framework_failure # compress an empty archive +"${TARLZ}" -q -z empty.tar +[ $? = 2 ] || test_failed $LINENO +[ ! -e empty.tar.lz ] || test_failed $LINENO +rm -f empty.tar empty.tar.lz || framework_failure "${TARLZ}" -q -cd # test mixed operations [ $? = 1 ] || test_failed $LINENO "${TARLZ}" -q -cr @@ -239,6 +245,10 @@ rm -f foo bar baz || framework_failure cmp cfoo foo || test_failed $LINENO cmp cbar bar || test_failed $LINENO cmp cbaz baz || test_failed $LINENO +# time and mode comparison always fails on OS/2 +if "${TARLZ}" -df "${test3}" --ignore-ids ; then d_works=yes +else printf "warning: some '--diff' tests will be skipped.\n" +fi rm -f foo bar baz || framework_failure for i in 0 2 6 ; do "${TARLZ}" -n$i -xf "${test3_lz}" --missing-crc || test_failed $LINENO $i @@ -247,7 +257,7 @@ for i in 0 2 6 ; do cmp cbaz baz || test_failed $LINENO $i rm -f foo bar baz || framework_failure "${TARLZ}" -n$i -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || - test_failed $LINENO $i + test_failed $LINENO $i diff -u vlist3 out || test_failed $LINENO $i rm -f out || framework_failure "${TARLZ}" -q -n$i -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO $i @@ -273,6 +283,38 @@ for i in 0 2 6 ; do rm -f foo bar baz || framework_failure done +# test -C in --diff and --extract +for i in "${test3}" "${test3_lz}" ; do + mkdir dir1 dir2 dir3 || framework_failure + "${TARLZ}" -q -xf "$i" -C dir1 foo -C ../dir2 bar -C ../dir3 baz || + test_failed $LINENO "$i" + cmp cfoo dir1/foo || test_failed $LINENO "$i" + cmp cbar dir2/bar || test_failed $LINENO "$i" + cmp cbaz dir3/baz || test_failed $LINENO "$i" + if [ "${d_works}" = yes ] ; then + "${TARLZ}" -df "$i" -C dir1 foo -C ../dir2 --ignore-ids bar \ + -C ../dir3 baz || test_failed $LINENO "$i" + "${TARLZ}" -df "$i" -C dir3 baz -C ../dir2 bar -C ../dir1 foo \ + --ignore-ids || test_failed $LINENO "$i" + fi + rm -rf dir1 dir2 dir3 || framework_failure +done +for i in "${test3dir}" "${test3dir_lz}" ; do + mkdir dir1 dir2 dir3 || framework_failure + "${TARLZ}" -q -xf "$i" -C dir2 dir/bar -C ../dir1 dir/foo \ + -C ../dir3 dir/baz || test_failed $LINENO "$i" + cmp cfoo dir1/dir/foo || test_failed $LINENO "$i" + cmp cbar dir2/dir/bar || test_failed $LINENO "$i" + cmp cbaz dir3/dir/baz || test_failed $LINENO "$i" + if [ "${d_works}" = yes ] ; then + "${TARLZ}" -q -df "$i" --ignore-ids -C dir1 dir/foo -C ../dir2 dir/bar \ + -C ../dir3 dir/baz || test_failed $LINENO "$i" + "${TARLZ}" -q -df "${test3}" -C dir1/dir foo -C ../../dir2/dir bar \ + --ignore-ids -C ../../dir3/dir baz || test_failed $LINENO "$i" + fi + rm -rf dir1 dir2 dir3 || framework_failure +done + for i in "${test3dir}" "${test3dir_lz}" ; do "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i" "${TARLZ}" -q -xf "$i" --missing-crc || test_failed $LINENO "$i" @@ -409,9 +451,9 @@ for i in 0 2 6 ; do rm -f foo bar baz || framework_failure done "${TARLZ}" -n0 -xf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO -cmp cfoo foo || test_failed $LINENO $i -[ ! -e bar ] || test_failed $LINENO $i -[ ! -e baz ] || test_failed $LINENO $i +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO rm -f foo bar baz || framework_failure # test --list and --extract tar in tar.lz @@ -480,6 +522,10 @@ cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz "${TARLZ}" -Aqf out.tar.lz "${test3_lz}" [ $? = 2 ] || test_failed $LINENO cat "${in_tar_lz}" > out.tar.lz || framework_failure +"${TARLZ}" -q --un -Af out.tar.lz "${test3_lz}" # contradictory ext +[ $? = 1 ] || test_failed $LINENO +cmp "${in_tar_lz}" out.tar.lz || test_failed $LINENO +cat "${in_tar_lz}" > out.tar.lz || framework_failure "${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO "${TARLZ}" -xf out.tar.lz || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO @@ -524,6 +570,10 @@ cat "${in}" > out.tar || framework_failure # invalid tar "${TARLZ}" -Aqf out.tar "${test3}" [ $? = 2 ] || test_failed $LINENO cat "${in_tar}" > out.tar || framework_failure +"${TARLZ}" -q -0 -Af out.tar "${test3}" # contradictory ext +[ $? = 1 ] || test_failed $LINENO +cmp "${in_tar}" out.tar || test_failed $LINENO +cat "${in_tar}" > out.tar || framework_failure "${TARLZ}" -Af out.tar "${test3}" || test_failed $LINENO "${TARLZ}" -xf out.tar || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO @@ -568,11 +618,13 @@ printf "\ntesting --create..." # test --create cat "${in}" > test.txt || framework_failure "${TARLZ}" --warn-newer -0 -cf out.tar.lz test.txt || test_failed $LINENO +is_compressed out.tar.lz || test_failed $LINENO rm -f test.txt || framework_failure "${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO cat "${in}" > test.txt || framework_failure -"${TARLZ}" --warn-newer --uncompressed -cf out.tar test.txt || test_failed $LINENO +"${TARLZ}" --warn-newer --un -cf out.tar test.txt || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO rm -f test.txt || framework_failure "${TARLZ}" -xf out.tar --missing-crc || test_failed $LINENO cmp "${in}" test.txt || test_failed $LINENO @@ -639,7 +691,7 @@ cmp cfoo dir1/foo || test_failed $LINENO cmp cbar dir1/bar || test_failed $LINENO cmp cbaz dir1/baz || test_failed $LINENO rm -f dir1/foo dir1/bar dir1/baz || framework_failure -"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -x foo bar baz -C dir1 || +"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -x -C dir1 foo bar baz || test_failed $LINENO cmp cfoo dir1/foo || test_failed $LINENO cmp cbar dir1/bar || test_failed $LINENO @@ -670,7 +722,8 @@ rm -f out.tar.lz foo bar baz || framework_failure cat cfoo > foo || framework_failure cat cbar > bar || framework_failure cat cbaz > baz || framework_failure -"${TARLZ}" --un -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO +"${TARLZ}" -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO rm -f foo bar baz || framework_failure "${TARLZ}" -xf out.tar || test_failed $LINENO cmp cfoo foo || test_failed $LINENO @@ -697,10 +750,13 @@ touch -d 2022-01-05T12:22:13 bar || framework_failure for i in ${dates} @-8Ei '2017-10-01 09:00:00' '2017-10-1 9:0:0' \ '2017-10-01 09:00' '2017-10-01 09' 2017-10-01 ./bar ; do touch foo || framework_failure - "${TARLZ}" --un -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i" + "${TARLZ}" -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i" + is_uncompressed out.tar || test_failed $LINENO "$i" "${TARLZ}" -q -df out.tar && test_failed $LINENO "$i" "${TARLZ}" -xf out.tar || test_failed $LINENO "$i" - "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i" + if [ "${d_works}" = yes ] ; then + "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i" + fi done rm -f out.tar foo bar || framework_failure @@ -708,18 +764,20 @@ mkdir dir || framework_failure for i in ${dates} ; do # Skip a time stamp $i if it's out of range for this platform, # of if it uses a notation that this platform does not recognize. - touch -d $i dir/f$i >/dev/null 2>&1 || continue + touch -d "$i" "dir/f$i" >/dev/null 2>&1 || continue done -"${TARLZ}" --uncompressed -cf out.tar dir || test_failed $LINENO +"${TARLZ}" -cf out.tar dir || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO "${TARLZ}" -df out.tar || test_failed $LINENO rm -rf out.tar dir || framework_failure printf "\ntesting --diff..." -# test --diff "${TARLZ}" -xf "${test3_lz}" || test_failed $LINENO -"${TARLZ}" --uncompressed -cf out.tar foo || test_failed $LINENO -"${TARLZ}" --uncompressed -cf aout.tar foo --anonymous || test_failed $LINENO +"${TARLZ}" -cf out.tar foo || test_failed $LINENO +"${TARLZ}" -cf aout.tar foo --anonymous || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO +is_uncompressed aout.tar || test_failed $LINENO if cmp out.tar aout.tar > /dev/null ; then printf "\nwarning: '--diff' test can't be run as root.\n" else @@ -731,7 +789,9 @@ else "${TARLZ}" -n$i -df "${test3_lz}" --exclude '*' || test_failed $LINENO $i "${TARLZ}" -n$i -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO $i rm -f bar || framework_failure - "${TARLZ}" -n$i -df "${test3_lz}" foo baz --ignore-ids || + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids foo baz || + test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-metadata foo baz || test_failed $LINENO $i "${TARLZ}" -n$i -df "${test3_lz}" --exclude bar --ignore-ids || test_failed $LINENO $i @@ -879,12 +939,14 @@ if [ "${ln_works}" = yes ] ; then cat cbar > dir/bar || framework_failure cat cbaz > dir/baz || framework_failure ln -s dir dir_link || framework_failure - "${TARLZ}" -0 -cf out1 dir_link || test_failed $LINENO - "${TARLZ}" --un -cf out2 dir_link || test_failed $LINENO - "${TARLZ}" -0 -n0 -cf out3 dir_link || test_failed $LINENO - "${TARLZ}" -0 -h -cf hout1 dir_link || test_failed $LINENO - "${TARLZ}" --un -h -cf hout2 dir_link || test_failed $LINENO - "${TARLZ}" -0 -n0 -h -cf hout3 dir_link || test_failed $LINENO + "${TARLZ}" -0 -c dir_link > out1 || test_failed $LINENO + is_compressed out1 || test_failed $LINENO + "${TARLZ}" --un -c dir_link > out2 || test_failed $LINENO + is_uncompressed out2 || test_failed $LINENO + "${TARLZ}" -0 -n0 -c dir_link > out3 || test_failed $LINENO + "${TARLZ}" -0 -h -c dir_link > hout1 || test_failed $LINENO + "${TARLZ}" --un -h -c dir_link > hout2 || test_failed $LINENO + "${TARLZ}" -0 -n0 -h -c dir_link > hout3 || test_failed $LINENO rm -rf dir dir_link || framework_failure for i in 1 2 3 ; do "${TARLZ}" -xf out$i --exclude='dir_link/*' dir_link || @@ -915,10 +977,6 @@ cat cbaz > baz || framework_failure "${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO cmp nout.tar.lz aout.tar.lz || test_failed $LINENO rm -f nout.tar.lz aout.tar.lz || framework_failure -touch aout.tar || framework_failure # wrong extension empty file -"${TARLZ}" -0 -rf aout.tar foo bar baz || test_failed $LINENO -cmp out.tar.lz aout.tar || test_failed $LINENO -rm -f aout.tar || framework_failure touch aout.tar.lz || framework_failure # append to empty file "${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO @@ -933,50 +991,45 @@ cmp out.tar.lz aout.tar.lz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO "${TARLZ}" -0 -r foo bar baz > aout.tar.lz || test_failed $LINENO # to stdout cmp out.tar.lz aout.tar.lz || test_failed $LINENO -"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension archive -[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # contradictory ext +[ $? = 1 ] || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO cat "${eoa_lz}" > aout.tar.lz || framework_failure # append to empty archive "${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO cmp out.tar.lz aout.tar.lz || test_failed $LINENO -"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension empty archive -[ $? = 2 ] || test_failed $LINENO -cmp out.tar.lz aout.tar.lz || test_failed $LINENO rm -f out.tar.lz aout.tar.lz || framework_failure # test --append --uncompressed -"${TARLZ}" --un -cf out.tar foo bar baz || test_failed $LINENO -"${TARLZ}" --un -cf aout.tar foo || test_failed $LINENO -"${TARLZ}" --un -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO +"${TARLZ}" -cf out.tar foo bar baz || test_failed $LINENO +"${TARLZ}" -cf aout.tar foo || test_failed $LINENO +"${TARLZ}" -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO rm -f aout.tar || framework_failure -touch aout.tar.lz empty || framework_failure # wrong extension empty file -"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz -[ $? = 2 ] || test_failed $LINENO -cmp aout.tar.lz empty || test_failed $LINENO -rm -f aout.tar.lz empty || framework_failure +touch aout.tar empty || framework_failure # contradictory ext empty file +"${TARLZ}" -0 -q -rf aout.tar foo bar baz +[ $? = 1 ] || test_failed $LINENO +cmp aout.tar empty || test_failed $LINENO +rm -f aout.tar empty || framework_failure touch aout.tar || framework_failure # append to empty file -"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO +"${TARLZ}" -rf aout.tar foo bar baz || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" --un -rf aout.tar || test_failed $LINENO # append nothing +"${TARLZ}" -rf aout.tar || test_failed $LINENO # append nothing cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" --un -rf aout.tar -C nx_dir || test_failed $LINENO +"${TARLZ}" -rf aout.tar -C nx_dir || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" --un -q -rf aout.tar nx_file +"${TARLZ}" -q -rf aout.tar nx_file [ $? = 1 ] || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" --un -q -rf aout.tar aout.tar || test_failed $LINENO +"${TARLZ}" -q -rf aout.tar aout.tar || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO "${TARLZ}" --un -r foo bar baz > aout.tar || test_failed $LINENO # to stdout cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension archive -[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar foo bar baz # contradictory ext +[ $? = 1 ] || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO cat "${eoa}" > aout.tar || framework_failure # append to empty archive -"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO -cmp out.tar aout.tar || test_failed $LINENO -"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension empty archive -[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -rf aout.tar foo bar baz || test_failed $LINENO cmp out.tar aout.tar || test_failed $LINENO rm -f out.tar aout.tar || framework_failure @@ -1015,7 +1068,8 @@ rmdir dir1 || framework_failure rmdir dir1 rm -f out.tar.lz || framework_failure mkdir dir1 || framework_failure -"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO +"${TARLZ}" -cf out.tar dir1 || test_failed $LINENO +is_uncompressed out.tar || test_failed $LINENO rmdir dir1 || framework_failure "${TARLZ}" -xf out.tar || test_failed $LINENO [ -d dir1 ] || test_failed $LINENO @@ -1110,8 +1164,8 @@ cat cfoo > foo || framework_failure cat cbar > bar || framework_failure cat cbaz > baz || framework_failure cat "${in}" > test.txt || framework_failure -"${TARLZ}" --un -cf out.tar test.txt foo bar baz test.txt || test_failed $LINENO -"${TARLZ}" --un -cf out3.tar foo bar baz || test_failed $LINENO +"${TARLZ}" -cf out.tar test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" -cf out3.tar foo bar baz || test_failed $LINENO cat out.tar > outz.tar || framework_failure cat out3.tar > out3z.tar || framework_failure # @@ -1151,6 +1205,9 @@ cmp out.tar.lz out || test_failed $LINENO "${TARLZ}" -0 -B8KiB -z --bsolid outz.tar || test_failed $LINENO cmp out.tar.lz outz.tar.lz || test_failed $LINENO rm -f out outz.tar.lz || framework_failure +"${TARLZ}" -0 -B8KiB -z -o a/b/c/out --bsolid out.tar || test_failed $LINENO +cmp out.tar.lz a/b/c/out || test_failed $LINENO +rm -rf a || framework_failure # "${TARLZ}" -0 -n0 --asolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO "${TARLZ}" -0 -n0 --asolid -cf out3.tar.lz foo bar baz || test_failed $LINENO @@ -1164,12 +1221,15 @@ for i in --asolid --bsolid --dsolid ; do rm -f out outz.tar.lz out3z.tar.lz || framework_failure done # concatenate and compress -"${TARLZ}" --un -cf foo.tar foo || test_failed $LINENO -"${TARLZ}" --un -cf bar.tar bar || test_failed $LINENO -"${TARLZ}" --un -cf baz.tar baz || test_failed $LINENO +"${TARLZ}" -cf foo.tar foo || test_failed $LINENO +"${TARLZ}" -cf bar.tar bar || test_failed $LINENO +"${TARLZ}" -cf baz.tar baz || test_failed $LINENO "${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z -o foobarbaz.tar.lz || test_failed $LINENO cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO +"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z > foobarbaz.tar.lz || + test_failed $LINENO +cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO # compress and concatenate "${TARLZ}" -0 -z foo.tar bar.tar baz.tar || test_failed $LINENO "${TARLZ}" -A foo.tar.lz bar.tar.lz baz.tar.lz > foobarbaz.tar.lz || @@ -1177,8 +1237,8 @@ cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO "${TARLZ}" -0 -n0 --no-solid -c foo bar baz | cmp foobarbaz.tar.lz - || test_failed $LINENO rm -f foo bar baz test.txt out.tar.lz out.tar outz.tar foobarbaz.tar.lz \ - out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \ - foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure + out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \ + foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure printf "\ntesting bad input..." @@ -1260,19 +1320,23 @@ done if [ "${ln_works}" = yes ] ; then rm -rf dir1 || framework_failure ; fi for i in "${testdir}"/test3_nn.tar "${testdir}"/test3_nn.tar.lz ; do - "${TARLZ}" -q -n0 -tf "$i" || test_failed $LINENO $i - "${TARLZ}" -q -n4 -tf "$i" || test_failed $LINENO $i - "${TARLZ}" -q -n0 -xf "$i" || test_failed $LINENO $i - "${TARLZ}" -n0 -df "$i" --ignore-ids || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - [ ! -e bar ] || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i + "${TARLZ}" -q -n0 -tf "$i" || test_failed $LINENO "$i" + "${TARLZ}" -q -n4 -tf "$i" || test_failed $LINENO "$i" + "${TARLZ}" -q -n0 -xf "$i" || test_failed $LINENO "$i" + if [ "${d_works}" = yes ] ; then + "${TARLZ}" -n0 -df "$i" --ignore-ids || test_failed $LINENO "$i" + fi + cmp cfoo foo || test_failed $LINENO "$i" + [ ! -e bar ] || test_failed $LINENO "$i" + cmp cbaz baz || test_failed $LINENO "$i" rm -f foo bar baz || framework_failure - "${TARLZ}" -q -n4 -xf "$i" || test_failed $LINENO $i - "${TARLZ}" -n4 -df "$i" --ignore-ids || test_failed $LINENO $i - cmp cfoo foo || test_failed $LINENO $i - [ ! -e bar ] || test_failed $LINENO $i - cmp cbaz baz || test_failed $LINENO $i + "${TARLZ}" -q -n4 -xf "$i" || test_failed $LINENO "$i" + if [ "${d_works}" = yes ] ; then + "${TARLZ}" -n4 -df "$i" --ignore-ids || test_failed $LINENO "$i" + fi + cmp cfoo foo || test_failed $LINENO "$i" + [ ! -e bar ] || test_failed $LINENO "$i" + cmp cbaz baz || test_failed $LINENO "$i" rm -f foo bar baz || framework_failure done @@ -1288,7 +1352,7 @@ for i in "${inbad1}" "${inbad2}" ; do "${TARLZ}" -q -n0 -xf "${i}.tar.lz" --keep-damaged [ $? = 2 ] || test_failed $LINENO "$i" [ -e test.txt ] || test_failed $LINENO "$i" - cmp "$i" test.txt 2> /dev/null || test_failed $LINENO $i + cmp "$i" test.txt 2> /dev/null || test_failed $LINENO "$i" rm -f test.txt || framework_failure done # |