diff options
Diffstat (limited to 'src/xz/args.c')
-rw-r--r-- | src/xz/args.c | 763 |
1 files changed, 763 insertions, 0 deletions
diff --git a/src/xz/args.c b/src/xz/args.c new file mode 100644 index 0000000..17e778c --- /dev/null +++ b/src/xz/args.c @@ -0,0 +1,763 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include <ctype.h> + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_robot = false; +bool opt_ignore_check = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char stdin_filename[] = "(stdin)"; + + +/// Parse and set the memory usage limit for compression, decompression, +/// and/or multithreaded decompression. +static void +parse_memlimit(const char *name, const char *name_percentage, const char *str, + bool set_compress, bool set_decompress, bool set_mtdec) +{ + bool is_percentage = false; + uint64_t value; + + const size_t len = strlen(str); + if (len > 0 && str[len - 1] == '%') { + // Make a copy so that we can get rid of %. + // + // In the past str wasn't const and we modified it directly + // but that modified argv[] and thus affected what was visible + // in "ps auxf" or similar tools which was confusing. For + // example, --memlimit=50% would show up as --memlimit=50 + // since the percent sign was overwritten here. + char *s = xstrdup(str); + s[len - 1] = '\0'; + is_percentage = true; + value = str_to_uint64(name_percentage, s, 1, 100); + free(s); + } else { + // On 32-bit systems, SIZE_MAX would make more sense than + // UINT64_MAX. But use UINT64_MAX still so that scripts + // that assume > 4 GiB values don't break. + value = str_to_uint64(name, str, 0, UINT64_MAX); + } + + hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, + is_percentage); + return; +} + + +static void +parse_block_list(const char *str_const) +{ + // We need a modifiable string in the for-loop. + char *str_start = xstrdup(str_const); + char *str = str_start; + + // It must be non-empty and not begin with a comma. + if (str[0] == '\0' || str[0] == ',') + message_fatal(_("%s: Invalid argument to --block-list"), str); + + // Count the number of comma-separated strings. + size_t count = 1; + for (size_t i = 0; str[i] != '\0'; ++i) + if (str[i] == ',') + ++count; + + // Prevent an unlikely integer overflow. + if (count > SIZE_MAX / sizeof(uint64_t) - 1) + message_fatal(_("%s: Too many arguments to --block-list"), + str); + + // Allocate memory to hold all the sizes specified. + // If --block-list was specified already, its value is forgotten. + free(opt_block_list); + opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); + + for (size_t i = 0; i < count; ++i) { + // Locate the next comma and replace it with \0. + char *p = strchr(str, ','); + if (p != NULL) + *p = '\0'; + + if (str[0] == '\0') { + // There is no string, that is, a comma follows + // another comma. Use the previous value. + // + // NOTE: We checked earlier that the first char + // of the whole list cannot be a comma. + assert(i > 0); + opt_block_list[i] = opt_block_list[i - 1]; + } else { + opt_block_list[i] = str_to_uint64("block-list", str, + 0, UINT64_MAX); + + // Zero indicates no more new Blocks. + if (opt_block_list[i] == 0) { + if (i + 1 != count) + message_fatal(_("0 can only be used " + "as the last element " + "in --block-list")); + + opt_block_list[i] = UINT64_MAX; + } + } + + str = p + 1; + } + + // Terminate the array. + opt_block_list[count] = 0; + + free(str_start); + return; +} + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_X86 = INT_MIN, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_ARM64, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_SINGLE_STREAM, + OPT_NO_SPARSE, + OPT_FILES, + OPT_FILES0, + OPT_BLOCK_SIZE, + OPT_BLOCK_LIST, + OPT_MEM_COMPRESS, + OPT_MEM_DECOMPRESS, + OPT_MEM_MT_DECOMPRESS, + OPT_NO_ADJUST, + OPT_INFO_MEMORY, + OPT_ROBOT, + OPT_FLUSH_TIMEOUT, + OPT_IGNORE_CHECK, + }; + + static const char short_opts[] + = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, + { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, + { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, + { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, + { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, + { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, + { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, + { "memlimit", required_argument, NULL, 'M' }, + { "memory", required_argument, NULL, 'M' }, // Old alias + { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, + { "threads", required_argument, NULL, 'T' }, + { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, + + { "extreme", no_argument, NULL, 'e' }, + { "fast", no_argument, NULL, '0' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", optional_argument, NULL, OPT_X86 }, + { "powerpc", optional_argument, NULL, OPT_POWERPC }, + { "ia64", optional_argument, NULL, OPT_IA64 }, + { "arm", optional_argument, NULL, OPT_ARM }, + { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, + { "arm64", optional_argument, NULL, OPT_ARM64 }, + { "sparc", optional_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "robot", no_argument, NULL, OPT_ROBOT }, + { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // Compression preset (also for decompression if --format=raw) + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset((uint32_t)(c - '0')); + break; + + // --memlimit-compress + case OPT_MEM_COMPRESS: + parse_memlimit("memlimit-compress", + "memlimit-compress%", optarg, + true, false, false); + break; + + // --memlimit-decompress + case OPT_MEM_DECOMPRESS: + parse_memlimit("memlimit-decompress", + "memlimit-decompress%", optarg, + false, true, false); + break; + + // --memlimit-mt-decompress + case OPT_MEM_MT_DECOMPRESS: + parse_memlimit("memlimit-mt-decompress", + "memlimit-mt-decompress%", optarg, + false, false, true); + break; + + // --memlimit + case 'M': + parse_memlimit("memlimit", "memlimit%", optarg, + true, true, true); + break; + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': { + // Since xz 5.4.0: Ignore leading '+' first. + const char *s = optarg; + if (optarg[0] == '+') + ++s; + + // The max is from src/liblzma/common/common.h. + uint32_t t = str_to_uint64("threads", s, 0, 16384); + + // If leading '+' was used then use multi-threaded + // mode even if exactly one thread was specified. + if (t == 1 && optarg[0] == '+') + t = UINT32_MAX; + + hardware_threads_set(t); + break; + } + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --extreme + case 'e': + coder_set_extreme(); + break; + + // --force + case 'f': + opt_force = true; + break; + + // --info-memory + case OPT_INFO_MEMORY: + // This doesn't return. + hardware_memlimit_show(); + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 'Q': + set_exit_no_warn(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + // --robot + case OPT_ROBOT: + opt_robot = true; + + // This is to make sure that floating point numbers + // always have a dot as decimal separator. + setlocale(LC_NUMERIC, "C"); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, + options_bcj(optarg)); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, + options_bcj(optarg)); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, + options_bcj(optarg)); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, + options_bcj(optarg)); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, + options_bcj(optarg)); + break; + + case OPT_ARM64: + coder_add_filter(LZMA_FILTER_ARM64, + options_bcj(optarg)); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, + options_bcj(optarg)); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, +#ifdef HAVE_LZIP_DECODER + { "lzip", FORMAT_LZIP }, +#endif + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unsupported " + "integrity " + "check type"), optarg); + } + + // Use a separate check in case we are using different + // liblzma than what was used to compile us. + if (!lzma_check_is_supported(types[i].check)) + message_fatal(_("%s: Unsupported integrity " + "check type"), optarg); + + coder_set_check(types[i].check); + break; + } + + case OPT_IGNORE_CHECK: + opt_ignore_check = true; + break; + + case OPT_BLOCK_SIZE: + opt_block_size = str_to_uint64("block-size", optarg, + 0, LZMA_VLI_MAX); + break; + + case OPT_BLOCK_LIST: { + parse_block_list(optarg); + break; + } + + case OPT_SINGLE_STREAM: + opt_single_stream = true; + break; + + case OPT_NO_SPARSE: + io_no_sparse(); + break; + + case OPT_FILES: + args->files_delim = '\n'; + + // Fall through + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with `--files' " + "or `--files0'.")); + + if (optarg == NULL) { + args->files_name = stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + // TRANSLATORS: This is a translatable + // string because French needs a space + // before the colon ("%s : %s"). + message_fatal(_("%s: %s"), optarg, + strerror(errno)); + } + + break; + + case OPT_NO_ADJUST: + opt_auto_adjust = false; + break; + + case OPT_FLUSH_TIMEOUT: + opt_flush_timeout = str_to_uint64("flush-timeout", + optarg, 0, UINT64_MAX); + break; + + default: + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0, const char *varname) +{ + char *env = getenv(varname); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + // NOTE: Cast to unsigned char is needed so that correct + // value gets passed to isspace(), which expects + // unsigned char cast to int. Casting to int is done + // automatically due to integer promotion, but we need to + // force char to unsigned char manually. Otherwise 8-bit + // characters would get promoted to wrong value if + // char is signed. + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a signed int + // and to keep it usable for memory allocation. + if (++argc == my_min( + INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "%s contains too many " + "arguments"), varname); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // Look for full command names instead of substrings like + // "un", "cat", and "lz" to reduce possibility of false + // positives when the programs have been renamed. + if (strstr(name, "xzcat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unxz") != NULL) { + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzcat") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unlzma") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzma") != NULL) { + opt_format = FORMAT_LZMA; + } + } + + // First the flags from the environment + parse_environment(args, argv[0], "XZ_DEFAULTS"); + parse_environment(args, argv[0], "XZ_OPT"); + + // Then from the command line + parse_real(args, argc, argv); + + // If encoder or decoder support was omitted at build time, + // show an error now so that the rest of the code can rely on + // that whatever is in opt_mode is also supported. +#ifndef HAVE_ENCODERS + if (opt_mode == MODE_COMPRESS) + message_fatal(_("Compression support was disabled " + "at build time")); +#endif +#ifndef HAVE_DECODERS + // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS + // is the only valid choice. + if (opt_mode != MODE_COMPRESS) + message_fatal(_("Decompression support was disabled " + "at build time")); +#endif + +#ifdef HAVE_LZIP_DECODER + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) + message_fatal(_("Compression of lzip files (.lz) " + "is not supported")); +#endif + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // When compressing, if no --format flag was used, or it + // was --format=auto, we compress to the .xz format. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = FORMAT_XZ; + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW + && opt_mode != MODE_LIST)) + coder_set_compression_settings(); + + // If raw format is used and a custom suffix is not provided, + // then only stdout mode can be used when compressing or decompressing. + if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout + && (opt_mode == MODE_COMPRESS + || opt_mode == MODE_DECOMPRESS)) + message_fatal(_("With --format=raw, --suffix=.SUF is " + "required unless writing to stdout")); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = (unsigned int)(argc - optind); + } + + return; +} + + +#ifndef NDEBUG +extern void +args_free(void) +{ + free(opt_block_list); + return; +} +#endif |