From e1908ae95dd4c9d19ee4dfabfc8bf8a7f85943fe Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 18:58:41 +0200 Subject: Adding upstream version 9.4. Signed-off-by: Daniel Baumann --- src/basename.c | 190 ++ src/basenc.c | 1245 ++++++++++ src/blake2/b2sum.c | 397 ++++ src/blake2/b2sum.h | 20 + src/blake2/blake2-impl.h | 164 ++ src/blake2/blake2.h | 198 ++ src/blake2/blake2b-ref.c | 383 ++++ src/cat.c | 803 +++++++ src/chcon.c | 588 +++++ src/chgrp.c | 315 +++ src/chmod.c | 571 +++++ src/chown-core.c | 584 +++++ src/chown-core.h | 92 + src/chown.c | 330 +++ src/chroot.c | 431 ++++ src/cksum.c | 270 +++ src/cksum.h | 19 + src/cksum_pclmul.c | 189 ++ src/comm.c | 512 +++++ src/copy.c | 3492 ++++++++++++++++++++++++++++ src/copy.h | 332 +++ src/coreutils-arch.c | 33 + src/coreutils-dir.c | 33 + src/coreutils-vdir.c | 33 + src/coreutils.c | 206 ++ src/cp-hash.c | 155 ++ src/cp-hash.h | 5 + src/cp.c | 1290 +++++++++++ src/crctab.c | 437 ++++ src/csplit.c | 1486 ++++++++++++ src/cu-progs.mk | 114 + src/cut.c | 601 +++++ src/date.c | 679 ++++++ src/dcgen | 55 + src/dd.c | 2567 +++++++++++++++++++++ src/df.c | 1853 +++++++++++++++ src/digest.c | 1628 +++++++++++++ src/dircolors.c | 543 +++++ src/dircolors.h | 239 ++ src/dircolors.hin | 251 +++ src/dirname.c | 135 ++ src/du.c | 1139 ++++++++++ src/echo.c | 277 +++ src/env.c | 902 ++++++++ src/expand-common.c | 395 ++++ src/expand-common.h | 68 + src/expand.c | 235 ++ src/expr.c | 1014 +++++++++ src/extract-magic | 162 ++ src/factor.c | 2662 ++++++++++++++++++++++ src/false.c | 2 + src/find-mount-point.c | 111 + src/find-mount-point.h | 20 + src/fmt.c | 1045 +++++++++ src/fold.c | 307 +++ src/force-link.c | 184 ++ src/force-link.h | 4 + src/fs-is-local.h | 143 ++ src/fs.h | 140 ++ src/getlimits.c | 172 ++ src/group-list.c | 129 ++ src/group-list.h | 19 + src/groups.c | 144 ++ src/head.c | 1097 +++++++++ src/hostid.c | 85 + src/hostname.c | 112 + src/id.c | 460 ++++ src/install.c | 1050 +++++++++ src/ioblksize.h | 107 + src/iopoll.c | 239 ++ src/iopoll.h | 9 + src/join.c | 1186 ++++++++++ src/kill.c | 315 +++ src/lbracket.c | 2 + src/libstdbuf.c | 150 ++ src/link.c | 91 + src/ln.c | 681 ++++++ src/local.mk | 704 ++++++ src/logname.c | 80 + src/longlong.h | 2275 +++++++++++++++++++ src/ls-dir.c | 2 + src/ls-ls.c | 2 + src/ls-vdir.c | 2 + src/ls.c | 5647 ++++++++++++++++++++++++++++++++++++++++++++++ src/ls.h | 10 + src/make-prime-list.c | 240 ++ src/mkdir.c | 308 +++ src/mkfifo.c | 185 ++ src/mknod.c | 278 +++ src/mktemp.c | 342 +++ src/mv.c | 556 +++++ src/nice.c | 220 ++ src/nl.c | 619 +++++ src/nohup.c | 230 ++ src/nproc.c | 128 ++ src/numfmt.c | 1655 ++++++++++++++ src/od.c | 1980 ++++++++++++++++ src/operand2sig.c | 92 + src/operand2sig.h | 19 + src/paste.c | 517 +++++ src/pathchk.c | 419 ++++ src/pinky.c | 604 +++++ src/pr.c | 2867 +++++++++++++++++++++++ src/primes.h | 4014 ++++++++++++++++++++++++++++++++ src/printenv.c | 154 ++ src/printf.c | 725 ++++++ src/prog-fprintf.c | 37 + src/prog-fprintf.h | 25 + src/ptx.c | 2049 +++++++++++++++++ src/pwd.c | 392 ++++ src/readlink.c | 177 ++ src/realpath.c | 276 +++ src/relpath.c | 133 ++ src/relpath.h | 25 + src/remove.c | 648 ++++++ src/remove.h | 103 + src/rm.c | 369 +++ src/rmdir.c | 298 +++ src/runcon.c | 263 +++ src/selinux.c | 327 +++ src/selinux.h | 55 + src/seq.c | 724 ++++++ src/set-fields.c | 308 +++ src/set-fields.h | 44 + src/shred.c | 1273 +++++++++++ src/shuf.c | 603 +++++ src/single-binary.mk | 602 +++++ src/sleep.c | 144 ++ src/sort.c | 4846 +++++++++++++++++++++++++++++++++++++++ src/split.c | 1699 ++++++++++++++ src/stat.c | 1977 ++++++++++++++++ src/statx.h | 52 + src/stdbuf.c | 391 ++++ src/stty.c | 2366 +++++++++++++++++++ src/sum.c | 237 ++ src/sum.h | 18 + src/sync.c | 234 ++ src/system.h | 823 +++++++ src/tac-pipe.c | 260 +++ src/tac.c | 586 +++++ src/tail.c | 2478 ++++++++++++++++++++ src/tee.c | 344 +++ src/temp-stream.c | 165 ++ src/temp-stream.h | 6 + src/test.c | 862 +++++++ src/timeout.c | 614 +++++ src/touch.c | 438 ++++ src/tr.c | 1901 ++++++++++++++++ src/true.c | 80 + src/truncate.c | 384 ++++ src/tsort.c | 552 +++++ src/tty.c | 132 ++ src/uname-arch.c | 2 + src/uname-uname.c | 2 + src/uname.c | 379 ++++ src/uname.h | 7 + src/unexpand.c | 323 +++ src/uniq.c | 665 ++++++ src/unlink.c | 86 + src/uptime.c | 206 ++ src/users.c | 148 ++ src/wc.c | 1033 +++++++++ src/wc_avx2.c | 121 + src/who.c | 834 +++++++ src/whoami.c | 88 + src/yes.c | 129 ++ 166 files changed, 97543 insertions(+) create mode 100644 src/basename.c create mode 100644 src/basenc.c create mode 100644 src/blake2/b2sum.c create mode 100644 src/blake2/b2sum.h create mode 100644 src/blake2/blake2-impl.h create mode 100644 src/blake2/blake2.h create mode 100644 src/blake2/blake2b-ref.c create mode 100644 src/cat.c create mode 100644 src/chcon.c create mode 100644 src/chgrp.c create mode 100644 src/chmod.c create mode 100644 src/chown-core.c create mode 100644 src/chown-core.h create mode 100644 src/chown.c create mode 100644 src/chroot.c create mode 100644 src/cksum.c create mode 100644 src/cksum.h create mode 100644 src/cksum_pclmul.c create mode 100644 src/comm.c create mode 100644 src/copy.c create mode 100644 src/copy.h create mode 100644 src/coreutils-arch.c create mode 100644 src/coreutils-dir.c create mode 100644 src/coreutils-vdir.c create mode 100644 src/coreutils.c create mode 100644 src/cp-hash.c create mode 100644 src/cp-hash.h create mode 100644 src/cp.c create mode 100644 src/crctab.c create mode 100644 src/csplit.c create mode 100644 src/cu-progs.mk create mode 100644 src/cut.c create mode 100644 src/date.c create mode 100755 src/dcgen create mode 100644 src/dd.c create mode 100644 src/df.c create mode 100644 src/digest.c create mode 100644 src/dircolors.c create mode 100644 src/dircolors.h create mode 100644 src/dircolors.hin create mode 100644 src/dirname.c create mode 100644 src/du.c create mode 100644 src/echo.c create mode 100644 src/env.c create mode 100644 src/expand-common.c create mode 100644 src/expand-common.h create mode 100644 src/expand.c create mode 100644 src/expr.c create mode 100644 src/extract-magic create mode 100644 src/factor.c create mode 100644 src/false.c create mode 100644 src/find-mount-point.c create mode 100644 src/find-mount-point.h create mode 100644 src/fmt.c create mode 100644 src/fold.c create mode 100644 src/force-link.c create mode 100644 src/force-link.h create mode 100644 src/fs-is-local.h create mode 100644 src/fs.h create mode 100644 src/getlimits.c create mode 100644 src/group-list.c create mode 100644 src/group-list.h create mode 100644 src/groups.c create mode 100644 src/head.c create mode 100644 src/hostid.c create mode 100644 src/hostname.c create mode 100644 src/id.c create mode 100644 src/install.c create mode 100644 src/ioblksize.h create mode 100644 src/iopoll.c create mode 100644 src/iopoll.h create mode 100644 src/join.c create mode 100644 src/kill.c create mode 100644 src/lbracket.c create mode 100644 src/libstdbuf.c create mode 100644 src/link.c create mode 100644 src/ln.c create mode 100644 src/local.mk create mode 100644 src/logname.c create mode 100644 src/longlong.h create mode 100644 src/ls-dir.c create mode 100644 src/ls-ls.c create mode 100644 src/ls-vdir.c create mode 100644 src/ls.c create mode 100644 src/ls.h create mode 100644 src/make-prime-list.c create mode 100644 src/mkdir.c create mode 100644 src/mkfifo.c create mode 100644 src/mknod.c create mode 100644 src/mktemp.c create mode 100644 src/mv.c create mode 100644 src/nice.c create mode 100644 src/nl.c create mode 100644 src/nohup.c create mode 100644 src/nproc.c create mode 100644 src/numfmt.c create mode 100644 src/od.c create mode 100644 src/operand2sig.c create mode 100644 src/operand2sig.h create mode 100644 src/paste.c create mode 100644 src/pathchk.c create mode 100644 src/pinky.c create mode 100644 src/pr.c create mode 100644 src/primes.h create mode 100644 src/printenv.c create mode 100644 src/printf.c create mode 100644 src/prog-fprintf.c create mode 100644 src/prog-fprintf.h create mode 100644 src/ptx.c create mode 100644 src/pwd.c create mode 100644 src/readlink.c create mode 100644 src/realpath.c create mode 100644 src/relpath.c create mode 100644 src/relpath.h create mode 100644 src/remove.c create mode 100644 src/remove.h create mode 100644 src/rm.c create mode 100644 src/rmdir.c create mode 100644 src/runcon.c create mode 100644 src/selinux.c create mode 100644 src/selinux.h create mode 100644 src/seq.c create mode 100644 src/set-fields.c create mode 100644 src/set-fields.h create mode 100644 src/shred.c create mode 100644 src/shuf.c create mode 100644 src/single-binary.mk create mode 100644 src/sleep.c create mode 100644 src/sort.c create mode 100644 src/split.c create mode 100644 src/stat.c create mode 100644 src/statx.h create mode 100644 src/stdbuf.c create mode 100644 src/stty.c create mode 100644 src/sum.c create mode 100644 src/sum.h create mode 100644 src/sync.c create mode 100644 src/system.h create mode 100644 src/tac-pipe.c create mode 100644 src/tac.c create mode 100644 src/tail.c create mode 100644 src/tee.c create mode 100644 src/temp-stream.c create mode 100644 src/temp-stream.h create mode 100644 src/test.c create mode 100644 src/timeout.c create mode 100644 src/touch.c create mode 100644 src/tr.c create mode 100644 src/true.c create mode 100644 src/truncate.c create mode 100644 src/tsort.c create mode 100644 src/tty.c create mode 100644 src/uname-arch.c create mode 100644 src/uname-uname.c create mode 100644 src/uname.c create mode 100644 src/uname.h create mode 100644 src/unexpand.c create mode 100644 src/uniq.c create mode 100644 src/unlink.c create mode 100644 src/uptime.c create mode 100644 src/users.c create mode 100644 src/wc.c create mode 100644 src/wc_avx2.c create mode 100644 src/who.c create mode 100644 src/whoami.c create mode 100644 src/yes.c (limited to 'src') diff --git a/src/basename.c b/src/basename.c new file mode 100644 index 0000000..479815b --- /dev/null +++ b/src/basename.c @@ -0,0 +1,190 @@ +/* basename -- strip directory and suffix from file names + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "basename" + +#define AUTHORS proper_name ("David MacKenzie") + +static struct option const longopts[] = +{ + {"multiple", no_argument, nullptr, 'a'}, + {"suffix", required_argument, nullptr, 's'}, + {"zero", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s NAME [SUFFIX]\n\ + or: %s OPTION... NAME...\n\ +"), + program_name, program_name); + fputs (_("\ +Print NAME with any leading directory components removed.\n\ +If specified, also remove a trailing SUFFIX.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -a, --multiple support multiple arguments and treat each as a NAME\n\ + -s, --suffix=SUFFIX remove a trailing SUFFIX; implies -a\n\ + -z, --zero end each output line with NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +Examples:\n\ + %s /usr/bin/sort -> \"sort\"\n\ + %s include/stdio.h .h -> \"stdio\"\n\ + %s -s .h include/stdio.h -> \"stdio\"\n\ + %s -a any/str1 any/str2 -> \"str1\" followed by \"str2\"\n\ +"), + program_name, program_name, program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Remove SUFFIX from the end of NAME if it is there, unless NAME + consists entirely of SUFFIX. */ + +static void +remove_suffix (char *name, char const *suffix) +{ + char *np; + char const *sp; + + np = name + strlen (name); + sp = suffix + strlen (suffix); + + while (np > name && sp > suffix) + if (*--np != *--sp) + return; + if (np > name) + *np = '\0'; +} + +/* Perform the basename operation on STRING. If SUFFIX is non-null, remove + the trailing SUFFIX. Finally, output the result string. */ + +static void +perform_basename (char const *string, char const *suffix, bool use_nuls) +{ + char *name = base_name (string); + strip_trailing_slashes (name); + + /* Per POSIX, 'basename // /' must return '//' on platforms with + distinct //. On platforms with drive letters, this generalizes + to making 'basename c: :' return 'c:'. This rule is captured by + skipping suffix stripping if base_name returned an absolute path + or a drive letter (only possible if name is a file-system + root). */ + if (suffix && IS_RELATIVE_FILE_NAME (name) && ! FILE_SYSTEM_PREFIX_LEN (name)) + remove_suffix (name, suffix); + + fputs (name, stdout); + putchar (use_nuls ? '\0' : '\n'); + free (name); +} + +int +main (int argc, char **argv) +{ + bool multiple_names = false; + bool use_nuls = false; + char const *suffix = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "+as:z", longopts, nullptr); + + if (c == -1) + break; + + switch (c) + { + case 's': + suffix = optarg; + /* -s implies -a, so... */ + FALLTHROUGH; + + case 'a': + multiple_names = true; + break; + + case 'z': + use_nuls = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (argc < optind + 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (!multiple_names && optind + 2 < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); + usage (EXIT_FAILURE); + } + + if (multiple_names) + { + for (; optind < argc; optind++) + perform_basename (argv[optind], suffix, use_nuls); + } + else + perform_basename (argv[optind], + optind + 2 == argc ? argv[optind + 1] : nullptr, + use_nuls); + + return EXIT_SUCCESS; +} diff --git a/src/basenc.c b/src/basenc.c new file mode 100644 index 0000000..ce259c4 --- /dev/null +++ b/src/basenc.c @@ -0,0 +1,1245 @@ +/* Base64, base32, and similar encoding/decoding strings or files. + Copyright (C) 2004-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Simon Josefsson . */ + +#include + +#include +#include +#include + +#include "system.h" +#include "c-ctype.h" +#include "fadvise.h" +#include "quote.h" +#include "xstrtol.h" +#include "xdectoint.h" +#include "xbinary-io.h" + +#if BASE_TYPE == 42 +# define AUTHORS \ + proper_name ("Simon Josefsson"), \ + proper_name ("Assaf Gordon") +#else +# define AUTHORS proper_name ("Simon Josefsson") +#endif + +#if BASE_TYPE == 32 +# include "base32.h" +# define PROGRAM_NAME "base32" +#elif BASE_TYPE == 64 +# include "base64.h" +# define PROGRAM_NAME "base64" +#elif BASE_TYPE == 42 +# include "base32.h" +# include "base64.h" +# include "assure.h" +# define PROGRAM_NAME "basenc" +#else +# error missing/invalid BASE_TYPE definition +#endif + + + +#if BASE_TYPE == 42 +enum +{ + BASE64_OPTION = CHAR_MAX + 1, + BASE64URL_OPTION, + BASE32_OPTION, + BASE32HEX_OPTION, + BASE16_OPTION, + BASE2MSBF_OPTION, + BASE2LSBF_OPTION, + Z85_OPTION +}; +#endif + +static struct option const long_options[] = +{ + {"decode", no_argument, 0, 'd'}, + {"wrap", required_argument, 0, 'w'}, + {"ignore-garbage", no_argument, 0, 'i'}, +#if BASE_TYPE == 42 + {"base64", no_argument, 0, BASE64_OPTION}, + {"base64url", no_argument, 0, BASE64URL_OPTION}, + {"base32", no_argument, 0, BASE32_OPTION}, + {"base32hex", no_argument, 0, BASE32HEX_OPTION}, + {"base16", no_argument, 0, BASE16_OPTION}, + {"base2msbf", no_argument, 0, BASE2MSBF_OPTION}, + {"base2lsbf", no_argument, 0, BASE2LSBF_OPTION}, + {"z85", no_argument, 0, Z85_OPTION}, +#endif + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]\n\ +"), program_name); + +#if BASE_TYPE == 42 + fputs (_("\ +basenc encode or decode FILE, or standard input, to standard output.\n\ +"), stdout); +#else + printf (_("\ +Base%d encode or decode FILE, or standard input, to standard output.\n\ +"), BASE_TYPE); +#endif + + emit_stdin_note (); + emit_mandatory_arg_note (); +#if BASE_TYPE == 42 + fputs (_("\ + --base64 same as 'base64' program (RFC4648 section 4)\n\ +"), stdout); + fputs (_("\ + --base64url file- and url-safe base64 (RFC4648 section 5)\n\ +"), stdout); + fputs (_("\ + --base32 same as 'base32' program (RFC4648 section 6)\n\ +"), stdout); + fputs (_("\ + --base32hex extended hex alphabet base32 (RFC4648 section 7)\n\ +"), stdout); + fputs (_("\ + --base16 hex encoding (RFC4648 section 8)\n\ +"), stdout); + fputs (_("\ + --base2msbf bit string with most significant bit (msb) first\n\ +"), stdout); + fputs (_("\ + --base2lsbf bit string with least significant bit (lsb) first\n\ +"), stdout); +#endif + fputs (_("\ + -d, --decode decode data\n\ + -i, --ignore-garbage when decoding, ignore non-alphabet characters\n\ + -w, --wrap=COLS wrap encoded lines after COLS character (default 76).\n\ + Use 0 to disable line wrapping\n\ +"), stdout); +#if BASE_TYPE == 42 + fputs (_("\ + --z85 ascii85-like encoding (ZeroMQ spec:32/Z85);\n\ + when encoding, input length must be a multiple of 4;\n\ + when decoding, input length must be a multiple of 5\n\ +"), stdout); +#endif + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); +#if BASE_TYPE == 42 + fputs (_("\ +\n\ +When decoding, the input may contain newlines in addition to the bytes of\n\ +the formal alphabet. Use --ignore-garbage to attempt to recover\n\ +from any other non-alphabet bytes in the encoded stream.\n\ +"), stdout); +#else + printf (_("\ +\n\ +The data are encoded as described for the %s alphabet in RFC 4648.\n\ +When decoding, the input may contain newlines in addition to the bytes of\n\ +the formal %s alphabet. Use --ignore-garbage to attempt to recover\n\ +from any other non-alphabet bytes in the encoded stream.\n"), + PROGRAM_NAME, PROGRAM_NAME); +#endif + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +#define ENC_BLOCKSIZE (1024 * 3 * 10) + +#if BASE_TYPE == 32 +# define BASE_LENGTH BASE32_LENGTH +/* Note that increasing this may decrease performance if --ignore-garbage + is used, because of the memmove operation below. */ +# define DEC_BLOCKSIZE (1024 * 5) + +/* Ensure that BLOCKSIZE is a multiple of 5 and 8. */ +static_assert (ENC_BLOCKSIZE % 40 == 0); /* Padding chars only on last block. */ +static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */ + +# define base_encode base32_encode +# define base_decode_context base32_decode_context +# define base_decode_ctx_init base32_decode_ctx_init +# define base_decode_ctx base32_decode_ctx +# define isbase isbase32 +#elif BASE_TYPE == 64 +# define BASE_LENGTH BASE64_LENGTH +/* Note that increasing this may decrease performance if --ignore-garbage + is used, because of the memmove operation below. */ +# define DEC_BLOCKSIZE (1024 * 3) + +/* Ensure that BLOCKSIZE is a multiple of 3 and 4. */ +static_assert (ENC_BLOCKSIZE % 12 == 0); /* Padding chars only on last block. */ +static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */ + +# define base_encode base64_encode +# define base_decode_context base64_decode_context +# define base_decode_ctx_init base64_decode_ctx_init +# define base_decode_ctx base64_decode_ctx +# define isbase isbase64 +#elif BASE_TYPE == 42 + + +# define BASE_LENGTH base_length + +/* Note that increasing this may decrease performance if --ignore-garbage + is used, because of the memmove operation below. */ +# define DEC_BLOCKSIZE (4200) +static_assert (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32*/ +static_assert (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64*/ + +static int (*base_length) (int i); +static bool (*isbase) (char ch); +static void (*base_encode) (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen); + +struct base16_decode_context +{ + char nibble; + bool have_nibble; +}; + +struct z85_decode_context +{ + int i; + unsigned char octets[5]; +}; + +struct base2_decode_context +{ + unsigned char octet; +}; + +struct base_decode_context +{ + int i; /* will be updated manually */ + union { + struct base64_decode_context base64; + struct base32_decode_context base32; + struct base16_decode_context base16; + struct base2_decode_context base2; + struct z85_decode_context z85; + } ctx; + char *inbuf; + idx_t bufsize; +}; +static void (*base_decode_ctx_init) (struct base_decode_context *ctx); +static bool (*base_decode_ctx) (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen); +#endif + + + + +#if BASE_TYPE == 42 + +static int +base64_length_wrapper (int len) +{ + return BASE64_LENGTH (len); +} + +static void +base64_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base64_decode_ctx_init (&ctx->ctx.base64); +} + +static bool +base64_decode_ctx_wrapper (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool b = base64_decode_ctx (&ctx->ctx.base64, in, inlen, out, outlen); + ctx->i = ctx->ctx.base64.i; + return b; +} + +static void +init_inbuf (struct base_decode_context *ctx) +{ + ctx->bufsize = DEC_BLOCKSIZE; + ctx->inbuf = xcharalloc (ctx->bufsize); +} + +static void +prepare_inbuf (struct base_decode_context *ctx, idx_t inlen) +{ + if (ctx->bufsize < inlen) + { + ctx->bufsize = inlen * 2; + ctx->inbuf = xnrealloc (ctx->inbuf, ctx->bufsize, sizeof (char)); + } +} + + +static void +base64url_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + base64_encode (in, inlen, out, outlen); + /* translate 62nd and 63rd characters */ + char *p = out; + while (outlen--) + { + if (*p == '+') + *p = '-'; + else if (*p == '/') + *p = '_'; + ++p; + } +} + +static bool +isbase64url (char ch) +{ + return (ch == '-' || ch == '_' + || (ch != '+' && ch != '/' && isbase64 (ch))); +} + +static void +base64url_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base64_decode_ctx_init (&ctx->ctx.base64); + init_inbuf (ctx); +} + + +static bool +base64url_decode_ctx_wrapper (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + prepare_inbuf (ctx, inlen); + memcpy (ctx->inbuf, in, inlen); + + /* translate 62nd and 63rd characters */ + idx_t i = inlen; + char *p = ctx->inbuf; + while (i--) + { + if (*p == '+' || *p == '/') + { + *outlen = 0; + return false; /* reject base64 input */ + } + else if (*p == '-') + *p = '+'; + else if (*p == '_') + *p = '/'; + ++p; + } + + bool b = base64_decode_ctx (&ctx->ctx.base64, ctx->inbuf, inlen, + out, outlen); + ctx->i = ctx->ctx.base64.i; + + return b; +} + + + +static int +base32_length_wrapper (int len) +{ + return BASE32_LENGTH (len); +} + +static void +base32_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base32_decode_ctx_init (&ctx->ctx.base32); +} + +static bool +base32_decode_ctx_wrapper (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool b = base32_decode_ctx (&ctx->ctx.base32, in, inlen, out, outlen); + ctx->i = ctx->ctx.base32.i; + return b; +} + +/* ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 + to + 0123456789ABCDEFGHIJKLMNOPQRSTUV */ +static const char base32_norm_to_hex[32 + 9] = { +/*0x32, 0x33, 0x34, 0x35, 0x36, 0x37, */ + 'Q', 'R', 'S', 'T', 'U', 'V', + + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + +/*0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, */ + '0', '1', '2', '3', '4', '5', '6', '7', + +/*0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, */ + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + +/*0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, */ + 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + +/*0x59, 0x5a, */ + 'O', 'P', +}; + +/* 0123456789ABCDEFGHIJKLMNOPQRSTUV + to + ABCDEFGHIJKLMNOPQRSTUVWXYZ234567 */ +static const char base32_hex_to_norm[32 + 9] = { + /* from: 0x30 .. 0x39 ('0' to '9') */ + /* to:*/ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + + 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + + /* from: 0x41 .. 0x4A ('A' to 'J') */ + /* to:*/ 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + + /* from: 0x4B .. 0x54 ('K' to 'T') */ + /* to:*/ 'U', 'V', 'W', 'X', 'Y', 'Z', '2', '3', '4', '5', + + /* from: 0x55 .. 0x56 ('U' to 'V') */ + /* to:*/ '6', '7' +}; + + +inline static bool +isbase32hex (char ch) +{ + return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'V'); +} + + +static void +base32hex_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + base32_encode (in, inlen, out, outlen); + + for (char *p = out; outlen--; p++) + { + affirm (0x32 <= *p && *p <= 0x5a); /* LCOV_EXCL_LINE */ + *p = base32_norm_to_hex[*p - 0x32]; + } +} + + +static void +base32hex_decode_ctx_init_wrapper (struct base_decode_context *ctx) +{ + base32_decode_ctx_init (&ctx->ctx.base32); + init_inbuf (ctx); +} + + +static bool +base32hex_decode_ctx_wrapper (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + prepare_inbuf (ctx, inlen); + + idx_t i = inlen; + char *p = ctx->inbuf; + while (i--) + { + if (isbase32hex (*in)) + *p = base32_hex_to_norm[ (int)*in - 0x30]; + else + *p = *in; + ++p; + ++in; + } + + bool b = base32_decode_ctx (&ctx->ctx.base32, ctx->inbuf, inlen, + out, outlen); + ctx->i = ctx->ctx.base32.i; + + return b; +} + + +static bool +isbase16 (char ch) +{ + return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'F'); +} + +static int +base16_length (int len) +{ + return len * 2; +} + +static const char base16[16] = "0123456789ABCDEF"; + +static void +base16_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + while (inlen--) + { + unsigned char c = *in; + *out++ = base16[c >> 4]; + *out++ = base16[c & 0x0F]; + ++in; + } +} + + +static void +base16_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->ctx.base16.have_nibble = false; + ctx->i = 1; +} + + +static bool +base16_decode_ctx (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling high nibble - we are missing the low nibble, + so return false - indicating an invalid input. */ + if (inlen == 0) + return !ctx->ctx.base16.have_nibble; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + int nib = *in++; + if ('0' <= nib && nib <= '9') + nib -= '0'; + else if ('A' <= nib && nib <= 'F') + nib -= 'A' - 10; + else + return false; /* garbage - return false */ + + if (ctx->ctx.base16.have_nibble) + { + /* have both nibbles, write octet */ + *out++ = (ctx->ctx.base16.nibble << 4) + nib; + ++(*outlen); + } + else + { + /* Store higher nibble until next one arrives */ + ctx->ctx.base16.nibble = nib; + } + ctx->ctx.base16.have_nibble = !ctx->ctx.base16.have_nibble; + } + return true; +} + + + + +static int +z85_length (int len) +{ + /* Z85 does not allow padding, so no need to round to highest integer. */ + int outlen = (len * 5) / 4; + return outlen; +} + +static bool +isz85 (char ch) +{ + return c_isalnum (ch) || strchr (".-:+=^!/*?&<>()[]{}@%$#", ch) != nullptr; +} + +static char const z85_encoding[85] = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + ".-:+=^!/*?&<>()[]{}@%$#"; + +static void +z85_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + int i = 0; + unsigned char quad[4]; + idx_t outidx = 0; + + while (true) + { + if (inlen == 0) + { + /* no more input, exactly on 4 octet boundary. */ + if (i == 0) + return; + + /* currently, there's no way to return an error in encoding. */ + error (EXIT_FAILURE, 0, + _("invalid input (length must be multiple of 4 characters)")); + } + else + { + quad[i++] = *in++; + --inlen; + } + + /* Got a quad, encode it */ + if (i == 4) + { + int_fast64_t val = quad[0]; + val = (val << 24) + (quad[1] << 16) + (quad[2] << 8) + quad[3]; + + for (int j = 4; j >= 0; --j) + { + int c = val % 85; + val /= 85; + + /* NOTE: if there is padding (which is trimmed by z85 + before outputting the result), the output buffer 'out' + might not include enough allocated bytes for the padding, + so don't store them. */ + if (outidx + j < outlen) + out[j] = z85_encoding[c]; + } + out += 5; + outidx += 5; + i = 0; + } + } +} + +static void +z85_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->ctx.z85.i = 0; + ctx->i = 1; +} + + +# define Z85_LO_CTX_TO_32BIT_VAL(ctx) \ + (((ctx)->ctx.z85.octets[1] * 85 * 85 * 85) + \ + ((ctx)->ctx.z85.octets[2] * 85 * 85) + \ + ((ctx)->ctx.z85.octets[3] * 85) + \ + ((ctx)->ctx.z85.octets[4])) + + +# define Z85_HI_CTX_TO_32BIT_VAL(ctx) \ + ((int_fast64_t) (ctx)->ctx.z85.octets[0] * 85 * 85 * 85 * 85 ) + +/* + 0 - 9: 0 1 2 3 4 5 6 7 8 9 + 10 - 19: a b c d e f g h i j + 20 - 29: k l m n o p q r s t + 30 - 39: u v w x y z A B C D + 40 - 49: E F G H I J K L M N + 50 - 59: O P Q R S T U V W X + 60 - 69: Y Z . - : + = ^ ! / #dummy comment to workaround syntax-check + 70 - 79: * ? & < > ( ) [ ] { + 80 - 84: } @ % $ # +*/ +static signed char const z85_decoding[93] = { + 68, -1, 84, 83, 82, 72, -1, /* ! " # $ % & ' */ + 75, 76, 70, 65, -1, 63, 62, 69, /* ( ) * + , - . / */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* '0' to '9' */ + 64, -1, 73, 66, 74, 71, 81, /* : ; < = > ? @ */ + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, /* 'A' to 'J' */ + 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, /* 'K' to 'T' */ + 56, 57, 58, 59, 60, 61, /* 'U' to 'Z' */ + 77, -1, 78, 67, -1, -1, /* [ \ ] ^ _ ` */ + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /* 'a' to 'j' */ + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, /* 'k' to 't' */ + 30, 31, 32, 33, 34, 35, /* 'u' to 'z' */ + 79, -1, 80 /* { | } */ +}; + +static bool +z85_decode_ctx (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there are dangling values - we are missing entries, + so return false - indicating an invalid input. */ + if (inlen == 0) + { + if (ctx->ctx.z85.i > 0) + { + /* Z85 variant does not allow padding - input must + be a multiple of 5 - so return error. */ + return false; + } + return true; + } + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + /* z85 decoding */ + unsigned char c = *in; + + if (c >= 33 && c <= 125) + { + signed char ch = z85_decoding[c - 33]; + if (ch < 0) + return false; /* garbage - return false */ + c = ch; + } + else + return false; /* garbage - return false */ + + ++in; + + ctx->ctx.z85.octets[ctx->ctx.z85.i++] = c; + if (ctx->ctx.z85.i == 5) + { + /* decode the lowest 4 octets, then check for overflows. */ + int_fast64_t val = Z85_LO_CTX_TO_32BIT_VAL (ctx); + + /* The Z85 spec and the reference implementation say nothing + about overflows. To be on the safe side, reject them. */ + + val += Z85_HI_CTX_TO_32BIT_VAL (ctx); + if ((val >> 24) & ~0xFF) + return false; + + *out++ = val >> 24; + *out++ = (val >> 16) & 0xFF; + *out++ = (val >> 8) & 0xFF; + *out++ = val & 0xFF; + + *outlen += 4; + + ctx->ctx.z85.i = 0; + } + } + ctx->i = ctx->ctx.z85.i; + return true; +} + + +inline static bool +isbase2 (char ch) +{ + return ch == '0' || ch == '1'; +} + +static int +base2_length (int len) +{ + return len * 8; +} + + +inline static void +base2msbf_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + while (inlen--) + { + unsigned char c = *in; + for (int i = 0; i < 8; i++) + { + *out++ = c & 0x80 ? '1' : '0'; + c <<= 1; + } + outlen -= 8; + ++in; + } +} + +inline static void +base2lsbf_encode (char const *restrict in, idx_t inlen, + char *restrict out, idx_t outlen) +{ + while (inlen--) + { + unsigned char c = *in; + for (int i = 0; i < 8; i++) + { + *out++ = c & 0x01 ? '1' : '0'; + c >>= 1; + } + outlen -= 8; + ++in; + } +} + + +static void +base2_decode_ctx_init (struct base_decode_context *ctx) +{ + init_inbuf (ctx); + ctx->ctx.base2.octet = 0; + ctx->i = 0; +} + + +static bool +base2lsbf_decode_ctx (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling bit - we are missing some bits, + so return false - indicating an invalid input. */ + if (inlen == 0) + return ctx->i == 0; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + if (!isbase2 (*in)) + return false; + + bool bit = (*in == '1'); + ctx->ctx.base2.octet |= bit << ctx->i; + ++ctx->i; + + if (ctx->i == 8) + { + *out++ = ctx->ctx.base2.octet; + ctx->ctx.base2.octet = 0; + ++*outlen; + ctx->i = 0; + } + + ++in; + } + + return true; +} + +static bool +base2msbf_decode_ctx (struct base_decode_context *ctx, + char const *restrict in, idx_t inlen, + char *restrict out, idx_t *outlen) +{ + bool ignore_lines = true; /* for now, always ignore them */ + + *outlen = 0; + + /* inlen==0 is request to flush output. + if there is a dangling bit - we are missing some bits, + so return false - indicating an invalid input. */ + if (inlen == 0) + return ctx->i == 0; + + while (inlen--) + { + if (ignore_lines && *in == '\n') + { + ++in; + continue; + } + + if (!isbase2 (*in)) + return false; + + bool bit = (*in == '1'); + if (ctx->i == 0) + ctx->i = 8; + --ctx->i; + ctx->ctx.base2.octet |= bit << ctx->i; + + if (ctx->i == 0) + { + *out++ = ctx->ctx.base2.octet; + ctx->ctx.base2.octet = 0; + ++*outlen; + ctx->i = 0; + } + + ++in; + } + + return true; +} + +#endif /* BASE_TYPE == 42, i.e., "basenc"*/ + + + +static void +wrap_write (char const *buffer, idx_t len, + idx_t wrap_column, idx_t *current_column, FILE *out) +{ + if (wrap_column == 0) + { + /* Simple write. */ + if (fwrite (buffer, 1, len, stdout) < len) + write_error (); + } + else + for (idx_t written = 0; written < len; ) + { + idx_t to_write = MIN (wrap_column - *current_column, len - written); + + if (to_write == 0) + { + if (fputc ('\n', out) == EOF) + write_error (); + *current_column = 0; + } + else + { + if (fwrite (buffer + written, 1, to_write, stdout) < to_write) + write_error (); + *current_column += to_write; + written += to_write; + } + } +} + +static _Noreturn void +finish_and_exit (FILE *in, char const *infile) +{ + if (fclose (in) != 0) + { + if (STREQ (infile, "-")) + error (EXIT_FAILURE, errno, _("closing standard input")); + else + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + } + + exit (EXIT_SUCCESS); +} + +static _Noreturn void +do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column) +{ + idx_t current_column = 0; + char *inbuf, *outbuf; + idx_t sum; + + inbuf = xmalloc (ENC_BLOCKSIZE); + outbuf = xmalloc (BASE_LENGTH (ENC_BLOCKSIZE)); + + do + { + idx_t n; + + sum = 0; + do + { + n = fread (inbuf + sum, 1, ENC_BLOCKSIZE - sum, in); + sum += n; + } + while (!feof (in) && !ferror (in) && sum < ENC_BLOCKSIZE); + + if (sum > 0) + { + /* Process input one block at a time. Note that ENC_BLOCKSIZE + is sized so that no pad chars will appear in output. */ + base_encode (inbuf, sum, outbuf, BASE_LENGTH (sum)); + + wrap_write (outbuf, BASE_LENGTH (sum), wrap_column, + ¤t_column, out); + } + } + while (!feof (in) && !ferror (in) && sum == ENC_BLOCKSIZE); + + /* When wrapping, terminate last line. */ + if (wrap_column && current_column > 0 && fputc ('\n', out) == EOF) + write_error (); + + if (ferror (in)) + error (EXIT_FAILURE, errno, _("read error")); + + finish_and_exit (in, infile); +} + +static _Noreturn void +do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage) +{ + char *inbuf, *outbuf; + idx_t sum; + struct base_decode_context ctx; + + inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE)); + outbuf = xmalloc (DEC_BLOCKSIZE); + +#if BASE_TYPE == 42 + ctx.inbuf = nullptr; +#endif + base_decode_ctx_init (&ctx); + + do + { + bool ok; + + sum = 0; + do + { + idx_t n = fread (inbuf + sum, + 1, BASE_LENGTH (DEC_BLOCKSIZE) - sum, in); + + if (ignore_garbage) + { + for (idx_t i = 0; n > 0 && i < n;) + { + if (isbase (inbuf[sum + i]) || inbuf[sum + i] == '=') + i++; + else + memmove (inbuf + sum + i, inbuf + sum + i + 1, --n - i); + } + } + + sum += n; + + if (ferror (in)) + error (EXIT_FAILURE, errno, _("read error")); + } + while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in)); + + /* The following "loop" is usually iterated just once. + However, when it processes the final input buffer, we want + to iterate it one additional time, but with an indicator + telling it to flush what is in CTX. */ + for (int k = 0; k < 1 + !!feof (in); k++) + { + if (k == 1 && ctx.i == 0) + break; + idx_t n = DEC_BLOCKSIZE; + ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n); + + if (fwrite (outbuf, 1, n, out) < n) + write_error (); + + if (!ok) + error (EXIT_FAILURE, 0, _("invalid input")); + } + } + while (!feof (in)); + + finish_and_exit (in, infile); +} + +int +main (int argc, char **argv) +{ + int opt; + FILE *input_fh; + char const *infile; + + /* True if --decode has been given and we should decode data. */ + bool decode = false; + /* True if we should ignore non-base-alphabetic characters. */ + bool ignore_garbage = false; + /* Wrap encoded data around the 76th column, by default. */ + idx_t wrap_column = 76; + +#if BASE_TYPE == 42 + int base_type = 0; +#endif + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((opt = getopt_long (argc, argv, "diw:", long_options, nullptr)) != -1) + switch (opt) + { + case 'd': + decode = true; + break; + + case 'w': + { + intmax_t w; + strtol_error s_err = xstrtoimax (optarg, nullptr, 10, &w, ""); + if (LONGINT_OVERFLOW < s_err || w < 0) + error (EXIT_FAILURE, 0, "%s: %s", + _("invalid wrap size"), quote (optarg)); + wrap_column = s_err == LONGINT_OVERFLOW || IDX_MAX < w ? 0 : w; + } + break; + + case 'i': + ignore_garbage = true; + break; + +#if BASE_TYPE == 42 + case BASE64_OPTION: + case BASE64URL_OPTION: + case BASE32_OPTION: + case BASE32HEX_OPTION: + case BASE16_OPTION: + case BASE2MSBF_OPTION: + case BASE2LSBF_OPTION: + case Z85_OPTION: + base_type = opt; + break; +#endif + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + break; + } + +#if BASE_TYPE == 42 + switch (base_type) + { + case BASE64_OPTION: + base_length = base64_length_wrapper; + isbase = isbase64; + base_encode = base64_encode; + base_decode_ctx_init = base64_decode_ctx_init_wrapper; + base_decode_ctx = base64_decode_ctx_wrapper; + break; + + case BASE64URL_OPTION: + base_length = base64_length_wrapper; + isbase = isbase64url; + base_encode = base64url_encode; + base_decode_ctx_init = base64url_decode_ctx_init_wrapper; + base_decode_ctx = base64url_decode_ctx_wrapper; + break; + + case BASE32_OPTION: + base_length = base32_length_wrapper; + isbase = isbase32; + base_encode = base32_encode; + base_decode_ctx_init = base32_decode_ctx_init_wrapper; + base_decode_ctx = base32_decode_ctx_wrapper; + break; + + case BASE32HEX_OPTION: + base_length = base32_length_wrapper; + isbase = isbase32hex; + base_encode = base32hex_encode; + base_decode_ctx_init = base32hex_decode_ctx_init_wrapper; + base_decode_ctx = base32hex_decode_ctx_wrapper; + break; + + case BASE16_OPTION: + base_length = base16_length; + isbase = isbase16; + base_encode = base16_encode; + base_decode_ctx_init = base16_decode_ctx_init; + base_decode_ctx = base16_decode_ctx; + break; + + case BASE2MSBF_OPTION: + base_length = base2_length; + isbase = isbase2; + base_encode = base2msbf_encode; + base_decode_ctx_init = base2_decode_ctx_init; + base_decode_ctx = base2msbf_decode_ctx; + break; + + case BASE2LSBF_OPTION: + base_length = base2_length; + isbase = isbase2; + base_encode = base2lsbf_encode; + base_decode_ctx_init = base2_decode_ctx_init; + base_decode_ctx = base2lsbf_decode_ctx; + break; + + case Z85_OPTION: + base_length = z85_length; + isbase = isz85; + base_encode = z85_encode; + base_decode_ctx_init = z85_decode_ctx_init; + base_decode_ctx = z85_decode_ctx; + break; + + default: + error (0, 0, _("missing encoding type")); + usage (EXIT_FAILURE); + } +#endif + + if (argc - optind > 1) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + if (optind < argc) + infile = argv[optind]; + else + infile = "-"; + + if (STREQ (infile, "-")) + { + xset_binary_mode (STDIN_FILENO, O_BINARY); + input_fh = stdin; + } + else + { + input_fh = fopen (infile, "rb"); + if (input_fh == nullptr) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + } + + fadvise (input_fh, FADVISE_SEQUENTIAL); + + if (decode) + do_decode (input_fh, infile, stdout, ignore_garbage); + else + do_encode (input_fh, infile, stdout, wrap_column); +} diff --git a/src/blake2/b2sum.c b/src/blake2/b2sum.c new file mode 100644 index 0000000..1a7e99f --- /dev/null +++ b/src/blake2/b2sum.c @@ -0,0 +1,397 @@ +/* + BLAKE2 reference source code package - b2sum tool + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include + +#include +#include +#include + +#include "blake2.h" + +#if 0 +/* This will help compatibility with coreutils */ +int blake2s_stream( FILE *stream, void *resstream, size_t outbytes ) +{ + int ret = -1; + size_t sum, n; + blake2s_state S[1]; + static const size_t buffer_length = 32768; + uint8_t *buffer = ( uint8_t * )malloc( buffer_length ); + + if( !buffer ) return -1; + + blake2s_init( S, outbytes ); + + while( 1 ) + { + sum = 0; + + while( 1 ) + { + n = fread( buffer + sum, 1, buffer_length - sum, stream ); + sum += n; + + if( buffer_length == sum ) + break; + + if( 0 == n ) + { + if( ferror( stream ) ) + goto cleanup_buffer; + + goto final_process; + } + + if( feof( stream ) ) + goto final_process; + } + + blake2s_update( S, buffer, buffer_length ); + } + +final_process:; + + if( sum > 0 ) blake2s_update( S, buffer, sum ); + + blake2s_final( S, resstream, outbytes ); + ret = 0; +cleanup_buffer: + free( buffer ); + return ret; +} +#endif + +int blake2b_stream( FILE *stream, void *resstream, size_t outbytes ) +{ + int ret = -1; + size_t sum, n; + blake2b_state S[1]; + static const size_t buffer_length = 32768; + uint8_t *buffer = ( uint8_t * )malloc( buffer_length ); + + if( !buffer ) return -1; + + blake2b_init( S, outbytes ); + + while( 1 ) + { + sum = 0; + + while( 1 ) + { + n = fread( buffer + sum, 1, buffer_length - sum, stream ); + sum += n; + + if( buffer_length == sum ) + break; + + if( 0 == n ) + { + if( ferror( stream ) ) + goto cleanup_buffer; + + goto final_process; + } + + if( feof( stream ) ) + goto final_process; + } + + blake2b_update( S, buffer, buffer_length ); + } + +final_process:; + + if( sum > 0 ) blake2b_update( S, buffer, sum ); + + blake2b_final( S, resstream, outbytes ); + ret = 0; +cleanup_buffer: + free( buffer ); + return ret; +} + +#if 0 + +int blake2sp_stream( FILE *stream, void *resstream, size_t outbytes ) +{ + int ret = -1; + size_t sum, n; + blake2sp_state S[1]; + static const size_t buffer_length = 16 * ( 1UL << 20 ); + uint8_t *buffer = ( uint8_t * )malloc( buffer_length ); + + if( !buffer ) return -1; + + blake2sp_init( S, outbytes ); + + while( 1 ) + { + sum = 0; + + while( 1 ) + { + n = fread( buffer + sum, 1, buffer_length - sum, stream ); + sum += n; + + if( buffer_length == sum ) + break; + + if( 0 == n ) + { + if( ferror( stream ) ) + goto cleanup_buffer; + + goto final_process; + } + + if( feof( stream ) ) + goto final_process; + } + + blake2sp_update( S, buffer, buffer_length ); + } + +final_process:; + + if( sum > 0 ) blake2sp_update( S, buffer, sum ); + + blake2sp_final( S, resstream, outbytes ); + ret = 0; +cleanup_buffer: + free( buffer ); + return ret; +} + + +int blake2bp_stream( FILE *stream, void *resstream, size_t outbytes ) +{ + int ret = -1; + size_t sum, n; + blake2bp_state S[1]; + static const size_t buffer_length = 16 * ( 1UL << 20 ); + uint8_t *buffer = ( uint8_t * )malloc( buffer_length ); + + if( !buffer ) return -1; + + blake2bp_init( S, outbytes ); + + while( 1 ) + { + sum = 0; + + while( 1 ) + { + n = fread( buffer + sum, 1, buffer_length - sum, stream ); + sum += n; + + if( buffer_length == sum ) + break; + + if( 0 == n ) + { + if( ferror( stream ) ) + goto cleanup_buffer; + + goto final_process; + } + + if( feof( stream ) ) + goto final_process; + } + + blake2bp_update( S, buffer, buffer_length ); + } + +final_process:; + + if( sum > 0 ) blake2bp_update( S, buffer, sum ); + + blake2bp_final( S, resstream, outbytes ); + ret = 0; +cleanup_buffer: + free( buffer ); + return ret; +} + +typedef int ( *blake2fn )( FILE *, void *, size_t ); + + +static void usage( char **argv, int errcode ) +{ + FILE *out = errcode ? stderr : stdout; + fprintf( out, "Usage: %s [OPTION]... [FILE]...\n", argv[0] ); + fprintf( out, "\n" ); + fprintf( out, "With no FILE, or when FILE is -, read standard input.\n" ); + fprintf( out, "\n" ); + fprintf( out, " -a hash algorithm (blake2b is default): \n" + " [blake2b|blake2s|blake2bp|blake2sp]\n" ); + fprintf( out, " -l digest length in bits, must not exceed the maximum for\n" + " the selected algorithm and must be a multiple of 8\n" ); + fprintf( out, " --tag create a BSD-style checksum\n" ); + fprintf( out, " --help display this help and exit\n" ); + exit( errcode ); +} + + +int main( int argc, char **argv ) +{ + blake2fn blake2_stream = blake2b_stream; + unsigned long maxbytes = BLAKE2B_OUTBYTES; + const char *algorithm = "BLAKE2b"; + unsigned long outbytes = 0; + unsigned char hash[BLAKE2B_OUTBYTES] = {0}; + bool bsdstyle = false; + int c, i; + opterr = 1; + + while( 1 ) + { + int option_index = 0; + char *end = nullptr; + unsigned long outbits; + static struct option long_options[] = { + { "help", no_argument, 0, 0 }, + { "tag", no_argument, 0, 0 }, + { nullptr, 0, nullptr, 0 } + }; + + c = getopt_long( argc, argv, "a:l:", long_options, &option_index ); + if( c == -1 ) break; + switch( c ) + { + case 'a': + if( 0 == strcmp( optarg, "blake2b" ) ) + { + blake2_stream = blake2b_stream; + maxbytes = BLAKE2B_OUTBYTES; + algorithm = "BLAKE2b"; + } + else if ( 0 == strcmp( optarg, "blake2s" ) ) + { + blake2_stream = blake2s_stream; + maxbytes = BLAKE2S_OUTBYTES; + algorithm = "BLAKE2s"; + } + else if ( 0 == strcmp( optarg, "blake2bp" ) ) + { + blake2_stream = blake2bp_stream; + maxbytes = BLAKE2B_OUTBYTES; + algorithm = "BLAKE2bp"; + } + else if ( 0 == strcmp( optarg, "blake2sp" ) ) + { + blake2_stream = blake2sp_stream; + maxbytes = BLAKE2S_OUTBYTES; + algorithm = "BLAKE2sp"; + } + else + { + printf( "Invalid function name: `%s'\n", optarg ); + usage( argv, 111 ); + } + + break; + + case 'l': + outbits = strtoul(optarg, &end, 10); + if( !end || *end != '\0' || outbits % 8 != 0) + { + printf( "Invalid length argument: `%s'\n", optarg); + usage( argv, 111 ); + } + outbytes = outbits / 8; + break; + + case 0: + if( 0 == strcmp( "help", long_options[option_index].name ) ) + usage( argv, 0 ); + else if( 0 == strcmp( "tag", long_options[option_index].name ) ) + bsdstyle = true; + break; + + case '?': + usage( argv, 1 ); + break; + } + } + + if(outbytes > maxbytes) + { + printf( "Invalid length argument: %lu\n", outbytes * 8 ); + printf( "Maximum digest length for %s is %lu\n", algorithm, maxbytes * 8 ); + usage( argv, 111 ); + } + else if( outbytes == 0 ) + outbytes = maxbytes; + + if( optind == argc ) + argv[argc++] = (char *) "-"; + + for( i = optind; i < argc; ++i ) + { + FILE *f = nullptr; + if( argv[i][0] == '-' && argv[i][1] == '\0' ) + f = stdin; + else + f = fopen( argv[i], "rb" ); + + if( !f ) + { + fprintf( stderr, "Could not open `%s': %s\n", argv[i], strerror( errno ) ); + continue; + } + + if( blake2_stream( f, hash, outbytes ) < 0 ) + { + fprintf( stderr, "Failed to hash `%s'\n", argv[i] ); + } + else + { + size_t j; + if( bsdstyle ) + { + if( outbytes < maxbytes ) + printf( "%s-%lu (%s) = ", algorithm, outbytes * 8, argv[i] ); + else + printf( "%s (%s) = ", algorithm, argv[i] ); + } + + for( j = 0; j < outbytes; ++j ) + printf( "%02x", hash[j] ); + + if( bsdstyle ) + printf( "\n" ); + else + printf( " %s\n", argv[i] ); + } + + if( f == stdin ) + clearerr( f ); + else if( fclose( f ) != 0 ) + fprintf( stderr, "Could not close `%s': %s\n", argv[i], strerror( errno ) ); + } + + return 0; +} +#endif diff --git a/src/blake2/b2sum.h b/src/blake2/b2sum.h new file mode 100644 index 0000000..6517b25 --- /dev/null +++ b/src/blake2/b2sum.h @@ -0,0 +1,20 @@ +/* + BLAKE2 reference source code package - b2sum tool + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +int blake2b_stream (FILE *stream, void *resstream, size_t outbytes) + _GL_ATTRIBUTE_NONNULL ((1)); +typedef int ( *blake2fn )( FILE *, void *, size_t ); +#define BLAKE2S_OUTBYTES 32 +#define BLAKE2B_OUTBYTES 64 diff --git a/src/blake2/blake2-impl.h b/src/blake2/blake2-impl.h new file mode 100644 index 0000000..02a1b8f --- /dev/null +++ b/src/blake2/blake2-impl.h @@ -0,0 +1,164 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_IMPL_H +#define BLAKE2_IMPL_H + +#ifndef WORDS_BIGENDIAN +# define NATIVE_LITTLE_ENDIAN 1 +#endif + +#include +#include + +#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) + #if defined(_MSC_VER) + #define BLAKE2_INLINE __inline + #elif defined(__GNUC__) + #define BLAKE2_INLINE __inline__ + #else + #define BLAKE2_INLINE + #endif +#else + #define BLAKE2_INLINE inline +#endif + +static BLAKE2_INLINE uint32_t load32( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint32_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8) | + (( uint32_t )( p[2] ) << 16) | + (( uint32_t )( p[3] ) << 24) ; +#endif +} + +static BLAKE2_INLINE uint64_t load64( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint64_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) | + (( uint64_t )( p[6] ) << 48) | + (( uint64_t )( p[7] ) << 56) ; +#endif +} + +static BLAKE2_INLINE uint16_t load16( const void *src ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + uint16_t w; + memcpy(&w, src, sizeof w); + return w; +#else + const uint8_t *p = ( const uint8_t * )src; + return ( uint16_t )((( uint32_t )( p[0] ) << 0) | + (( uint32_t )( p[1] ) << 8)); +#endif +} + +static BLAKE2_INLINE void store16( void *dst, uint16_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + *p++ = ( uint8_t )w; w >>= 8; + *p++ = ( uint8_t )w; +#endif +} + +static BLAKE2_INLINE void store32( void *dst, uint32_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +#endif +} + +static BLAKE2_INLINE void store64( void *dst, uint64_t w ) +{ +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(dst, &w, sizeof w); +#else + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); + p[6] = (uint8_t)(w >> 48); + p[7] = (uint8_t)(w >> 56); +#endif +} + +static BLAKE2_INLINE uint64_t load48( const void *src ) +{ + const uint8_t *p = ( const uint8_t * )src; + return (( uint64_t )( p[0] ) << 0) | + (( uint64_t )( p[1] ) << 8) | + (( uint64_t )( p[2] ) << 16) | + (( uint64_t )( p[3] ) << 24) | + (( uint64_t )( p[4] ) << 32) | + (( uint64_t )( p[5] ) << 40) ; +} + +static BLAKE2_INLINE void store48( void *dst, uint64_t w ) +{ + uint8_t *p = ( uint8_t * )dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); + p[4] = (uint8_t)(w >> 32); + p[5] = (uint8_t)(w >> 40); +} + +static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 32 - c ) ); +} + +static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) +{ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/* prevents compiler optimizing out memset() */ +static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n) +{ + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(v, 0, n); +} + +#endif diff --git a/src/blake2/blake2.h b/src/blake2/blake2.h new file mode 100644 index 0000000..be8b176 --- /dev/null +++ b/src/blake2/blake2.h @@ -0,0 +1,198 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ +#ifndef BLAKE2_H +#define BLAKE2_H + +#include +#include + +/* Pack a structure if possible. This might save space, and is not + needed for correctness. */ +#ifdef _MSC_VER +# define BLAKE2_PACKED(x) __pragma (pack (push, 1)) x __pragma (pack (pop)) +#else +# define BLAKE2_PACKED(x) x _GL_ATTRIBUTE_PACKED +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + + enum blake2s_constant + { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32, + BLAKE2S_SALTBYTES = 8, + BLAKE2S_PERSONALBYTES = 8 + }; + + enum blake2b_constant + { + BLAKE2B_BLOCKBYTES = 128, + BLAKE2B_OUTBYTES = 64, + BLAKE2B_KEYBYTES = 64, + BLAKE2B_SALTBYTES = 16, + BLAKE2B_PERSONALBYTES = 16 + }; + + typedef struct blake2s_state__ + { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8_t last_node; + } blake2s_state; + + typedef struct blake2b_state__ + { + uint64_t h[8]; + uint64_t t[2]; + uint64_t f[2]; + uint8_t buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; + uint8_t last_node; + } blake2b_state; + + typedef struct blake2sp_state__ + { + blake2s_state S[8][1]; + blake2s_state R[1]; + uint8_t buf[8 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; + } blake2sp_state; + + typedef struct blake2bp_state__ + { + blake2b_state S[4][1]; + blake2b_state R[1]; + uint8_t buf[4 * BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; + } blake2bp_state; + + + BLAKE2_PACKED(struct blake2s_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint16_t xof_length; /* 14 */ + uint8_t node_depth; /* 15 */ + uint8_t inner_length; /* 16 */ + /* uint8_t reserved[0]; */ + uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */ + uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */ + }); + + typedef struct blake2s_param__ blake2s_param; + + BLAKE2_PACKED(struct blake2b_param__ + { + uint8_t digest_length; /* 1 */ + uint8_t key_length; /* 2 */ + uint8_t fanout; /* 3 */ + uint8_t depth; /* 4 */ + uint32_t leaf_length; /* 8 */ + uint32_t node_offset; /* 12 */ + uint32_t xof_length; /* 16 */ + uint8_t node_depth; /* 17 */ + uint8_t inner_length; /* 18 */ + uint8_t reserved[14]; /* 32 */ + uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ + uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + }); + + typedef struct blake2b_param__ blake2b_param; + + typedef struct blake2xs_state__ + { + blake2s_state S[1]; + blake2s_param P[1]; + } blake2xs_state; + + typedef struct blake2xb_state__ + { + blake2b_state S[1]; + blake2b_param P[1]; + } blake2xb_state; + + /* Padded structs result in a compile-time error */ + enum { + BLAKE2_DUMMY_1 = 1 / (sizeof (blake2s_param) == BLAKE2S_OUTBYTES), + BLAKE2_DUMMY_2 = 1 / (sizeof (blake2b_param) == BLAKE2B_OUTBYTES) + }; + + /* Streaming API */ + int blake2s_init( blake2s_state *S, size_t outlen ); + int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2s_init_param( blake2s_state *S, const blake2s_param *P ); + int blake2s_update( blake2s_state *S, const void *in, size_t inlen ); + int blake2s_final( blake2s_state *S, void *out, size_t outlen ); + + int blake2b_init( blake2b_state *S, size_t outlen ); + int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2b_init_param (blake2b_state *S, const blake2b_param *P) + _GL_ATTRIBUTE_NONNULL (); + int blake2b_update( blake2b_state *S, const void *in, size_t inlen ); + int blake2b_final( blake2b_state *S, void *out, size_t outlen ); + + int blake2sp_init( blake2sp_state *S, size_t outlen ); + int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen ); + int blake2sp_final( blake2sp_state *S, void *out, size_t outlen ); + + int blake2bp_init( blake2bp_state *S, size_t outlen ); + int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen ); + int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen ); + int blake2bp_final( blake2bp_state *S, void *out, size_t outlen ); + + /* Variable output length API */ + int blake2xs_init( blake2xs_state *S, const size_t outlen ); + int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen ); + int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ); + int blake2xs_final(blake2xs_state *S, void *out, size_t outlen); + + int blake2xb_init( blake2xb_state *S, const size_t outlen ); + int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen ); + int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ); + int blake2xb_final(blake2xb_state *S, void *out, size_t outlen); + + /* Simple API */ + int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + + /* This is simply an alias for blake2b */ + int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ); + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/src/blake2/blake2b-ref.c b/src/blake2/blake2b-ref.c new file mode 100644 index 0000000..436edca --- /dev/null +++ b/src/blake2/blake2b-ref.c @@ -0,0 +1,383 @@ +/* + BLAKE2 reference source code package - reference C implementations + + Copyright 2012, Samuel Neves . You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + + - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 + + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include "blake2.h" +#include "blake2-impl.h" + +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +static const uint8_t blake2b_sigma[12][16] = +{ + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + + +static void blake2b_set_lastnode( blake2b_state *S ) +{ + S->f[1] = (uint64_t)-1; +} + +/* Some helper functions, not necessarily useful */ +static int blake2b_is_lastblock( const blake2b_state *S ) +{ + return S->f[0] != 0; +} + +static void blake2b_set_lastblock( blake2b_state *S ) +{ + if( S->last_node ) blake2b_set_lastnode( S ); + + S->f[0] = (uint64_t)-1; +} + +static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) +{ + S->t[0] += inc; + S->t[1] += ( S->t[0] < inc ); +} + +static void blake2b_init0( blake2b_state *S ) +{ + size_t i; + memset( S, 0, sizeof( blake2b_state ) ); + + for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; +} + +/* init xors IV with input parameter block */ +int blake2b_init_param( blake2b_state *S, const blake2b_param *P ) +{ + const uint8_t *p = ( const uint8_t * )( P ); + size_t i; + + blake2b_init0( S ); + + /* IV XOR ParamBlock */ + for( i = 0; i < 8; ++i ) + S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); + + S->outlen = P->digest_length; + return 0; +} + + + +int blake2b_init( blake2b_state *S, size_t outlen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + P->digest_length = (uint8_t)outlen; + P->key_length = 0; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + return blake2b_init_param( S, P ); +} + + +int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen ) +{ + blake2b_param P[1]; + + if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1; + + if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1; + + P->digest_length = (uint8_t)outlen; + P->key_length = (uint8_t)keylen; + P->fanout = 1; + P->depth = 1; + store32( &P->leaf_length, 0 ); + store32( &P->node_offset, 0 ); + store32( &P->xof_length, 0 ); + P->node_depth = 0; + P->inner_length = 0; + memset( P->reserved, 0, sizeof( P->reserved ) ); + memset( P->salt, 0, sizeof( P->salt ) ); + memset( P->personal, 0, sizeof( P->personal ) ); + + if( blake2b_init_param( S, P ) < 0 ) return -1; + + { + uint8_t block[BLAKE2B_BLOCKBYTES]; + memset( block, 0, BLAKE2B_BLOCKBYTES ); + memcpy( block, key, keylen ); + blake2b_update( S, block, BLAKE2B_BLOCKBYTES ); + secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */ + } + return 0; +} + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + +static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) +{ + uint64_t m[16]; + uint64_t v[16]; + size_t i; + + for( i = 0; i < 16; ++i ) { + m[i] = load64( block + i * sizeof( m[i] ) ); + } + + for( i = 0; i < 8; ++i ) { + v[i] = S->h[i]; + } + + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + + ROUND( 0 ); + ROUND( 1 ); + ROUND( 2 ); + ROUND( 3 ); + ROUND( 4 ); + ROUND( 5 ); + ROUND( 6 ); + ROUND( 7 ); + ROUND( 8 ); + ROUND( 9 ); + ROUND( 10 ); + ROUND( 11 ); + + for( i = 0; i < 8; ++i ) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } +} + +#undef G +#undef ROUND + +int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) +{ + const unsigned char * in = (const unsigned char *)pin; + if( inlen > 0 ) + { + size_t left = S->buflen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + if( inlen > fill ) + { + S->buflen = 0; + memcpy( S->buf + left, in, fill ); /* Fill buffer */ + blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); + blake2b_compress( S, S->buf ); /* Compress */ + in += fill; inlen -= fill; + while(inlen > BLAKE2B_BLOCKBYTES) { + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + blake2b_compress( S, in ); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } + } + memcpy( S->buf + S->buflen, in, inlen ); + S->buflen += inlen; + } + return 0; +} + +int blake2b_final( blake2b_state *S, void *out, size_t outlen ) +{ + uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; + size_t i; + + if( out == NULL || outlen < S->outlen ) + return -1; + + if( blake2b_is_lastblock( S ) ) + return -1; + + blake2b_increment_counter( S, S->buflen ); + blake2b_set_lastblock( S ); + memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ + blake2b_compress( S, S->buf ); + + for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ + store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); + + memcpy( out, buffer, S->outlen ); + secure_zero_memory(buffer, sizeof(buffer)); + return 0; +} + +/* inlen, at least, should be uint64_t. Others can be size_t. */ +int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) +{ + blake2b_state S[1]; + + /* Verify parameters */ + if ( NULL == in && inlen > 0 ) return -1; + + if ( NULL == out ) return -1; + + if( NULL == key && keylen > 0 ) return -1; + + if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1; + + if( keylen > BLAKE2B_KEYBYTES ) return -1; + + if( keylen > 0 ) + { + if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1; + } + else + { + if( blake2b_init( S, outlen ) < 0 ) return -1; + } + + blake2b_update( S, ( const uint8_t * )in, inlen ); + blake2b_final( S, out, outlen ); + return 0; +} + +int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) { + return blake2b(out, outlen, in, inlen, key, keylen); +} + +#if defined(SUPERCOP) +int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen ) +{ + return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 ); +} +#endif + +#if defined(BLAKE2B_SELFTEST) +#include +#include "blake2-kat.h" +int main( void ) +{ + uint8_t key[BLAKE2B_KEYBYTES]; + uint8_t buf[BLAKE2_KAT_LENGTH]; + size_t i, step; + + for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) + key[i] = ( uint8_t )i; + + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + buf[i] = ( uint8_t )i; + + /* Test simple API */ + for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) + { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES ); + + if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) ) + { + goto fail; + } + } + + /* Test streaming API */ + for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) { + for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) { + uint8_t hash[BLAKE2B_OUTBYTES]; + blake2b_state S; + uint8_t * p = buf; + size_t mlen = i; + int err = 0; + + if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) { + goto fail; + } + + while (mlen >= step) { + if ( (err = blake2b_update(&S, p, step)) < 0 ) { + goto fail; + } + mlen -= step; + p += step; + } + if ( (err = blake2b_update(&S, p, mlen)) < 0) { + goto fail; + } + if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) { + goto fail; + } + + if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) { + goto fail; + } + } + } + + puts( "ok" ); + return 0; +fail: + puts("error"); + return -1; +} +#endif diff --git a/src/cat.c b/src/cat.c new file mode 100644 index 0000000..ac39a48 --- /dev/null +++ b/src/cat.c @@ -0,0 +1,803 @@ +/* cat -- concatenate files and print on the standard output. + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Differences from the Unix cat: + * Always unbuffered, -u is ignored. + * Usually much faster than other versions of cat, the difference + is especially apparent when using the -v option. + + By tege@sics.se, Torbjörn Granlund, advised by rms, Richard Stallman. */ + +#include + +#include +#include +#include +#include + +#if HAVE_STROPTS_H +# include +#endif +#include + +#include "system.h" +#include "alignalloc.h" +#include "ioblksize.h" +#include "fadvise.h" +#include "full-write.h" +#include "safe-read.h" +#include "xbinary-io.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "cat" + +#define AUTHORS \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("Richard M. Stallman") + +/* Name of input file. May be "-". */ +static char const *infile; + +/* Descriptor on which input file is open. */ +static int input_desc; + +/* Buffer for line numbers. + An 11 digit counter may overflow within an hour on a P2/466, + an 18 digit counter needs about 1000y */ +#define LINE_COUNTER_BUF_LEN 20 +static char line_buf[LINE_COUNTER_BUF_LEN] = + { + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', + '\t', '\0' + }; + +/* Position in 'line_buf' where printing starts. This will not change + unless the number of lines is larger than 999999. */ +static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8; + +/* Position of the first digit in 'line_buf'. */ +static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3; + +/* Position of the last digit in 'line_buf'. */ +static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3; + +/* Preserves the 'cat' function's local 'newlines' between invocations. */ +static int newlines2 = 0; + +/* Whether there is a pending CR to process. */ +static bool pending_cr = false; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Concatenate FILE(s) to standard output.\n\ +"), stdout); + + emit_stdin_note (); + + fputs (_("\ +\n\ + -A, --show-all equivalent to -vET\n\ + -b, --number-nonblank number nonempty output lines, overrides -n\n\ + -e equivalent to -vE\n\ + -E, --show-ends display $ at end of each line\n\ + -n, --number number all output lines\n\ + -s, --squeeze-blank suppress repeated empty output lines\n\ +"), stdout); + fputs (_("\ + -t equivalent to -vT\n\ + -T, --show-tabs display TAB characters as ^I\n\ + -u (ignored)\n\ + -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +Examples:\n\ + %s f - g Output f's contents, then standard input, then g's contents.\n\ + %s Copy standard input to standard output.\n\ +"), + program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Compute the next line number. */ + +static void +next_line_num (void) +{ + char *endp = line_num_end; + do + { + if ((*endp)++ < '9') + return; + *endp-- = '0'; + } + while (endp >= line_num_start); + + if (line_num_start > line_buf) + *--line_num_start = '1'; + else + *line_buf = '>'; + if (line_num_start < line_num_print) + line_num_print--; +} + +/* Plain cat. Copy the file behind 'input_desc' to STDOUT_FILENO. + BUF (of size BUFSIZE) is the I/O buffer, used by reads and writes. + Return true if successful. */ + +static bool +simple_cat (char *buf, idx_t bufsize) +{ + /* Loop until the end of the file. */ + + while (true) + { + /* Read a block of input. */ + + size_t n_read = safe_read (input_desc, buf, bufsize); + if (n_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (infile)); + return false; + } + + /* End of this file? */ + + if (n_read == 0) + return true; + + /* Write this block out. */ + + if (full_write (STDOUT_FILENO, buf, n_read) != n_read) + write_error (); + } +} + +/* Write any pending output to STDOUT_FILENO. + Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF. + Then set *BPOUT to OUTPUT if it's not already that value. */ + +static inline void +write_pending (char *outbuf, char **bpout) +{ + idx_t n_write = *bpout - outbuf; + if (0 < n_write) + { + if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write) + write_error (); + *bpout = outbuf; + } +} + +/* Copy the file behind 'input_desc' to STDOUT_FILENO. + Use INBUF and read INSIZE with each call, + and OUTBUF and write OUTSIZE with each call. + (The buffers are a bit larger than the I/O sizes.) + The remaining boolean args say what 'cat' options to use. + + Return true if successful. + Called if any option more than -u was specified. + + A newline character is always put at the end of the buffer, to make + an explicit test for buffer end unnecessary. */ + +static bool +cat (char *inbuf, idx_t insize, char *outbuf, idx_t outsize, + bool show_nonprinting, bool show_tabs, bool number, bool number_nonblank, + bool show_ends, bool squeeze_blank) +{ + /* Last character read from the input buffer. */ + unsigned char ch; + + /* Determines how many consecutive newlines there have been in the + input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1, + etc. Initially 0 to indicate that we are at the beginning of a + new line. The "state" of the procedure is determined by + NEWLINES. */ + int newlines = newlines2; + +#ifdef FIONREAD + /* If nonzero, use the FIONREAD ioctl, as an optimization. + (On Ultrix, it is not supported on NFS file systems.) */ + bool use_fionread = true; +#endif + + /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input + is read immediately. */ + + /* Pointer to the first non-valid byte in the input buffer, i.e., the + current end of the buffer. */ + char *eob = inbuf; + + /* Pointer to the next character in the input buffer. */ + char *bpin = eob + 1; + + /* Pointer to the position where the next character shall be written. */ + char *bpout = outbuf; + + while (true) + { + do + { + /* Write if there are at least OUTSIZE bytes in OUTBUF. */ + + if (outbuf + outsize <= bpout) + { + char *wp = outbuf; + idx_t remaining_bytes; + do + { + if (full_write (STDOUT_FILENO, wp, outsize) != outsize) + write_error (); + wp += outsize; + remaining_bytes = bpout - wp; + } + while (outsize <= remaining_bytes); + + /* Move the remaining bytes to the beginning of the + buffer. */ + + memmove (outbuf, wp, remaining_bytes); + bpout = outbuf + remaining_bytes; + } + + /* Is INBUF empty? */ + + if (bpin > eob) + { + bool input_pending = false; +#ifdef FIONREAD + int n_to_read = 0; + + /* Is there any input to read immediately? + If not, we are about to wait, + so write all buffered output before waiting. */ + + if (use_fionread + && ioctl (input_desc, FIONREAD, &n_to_read) < 0) + { + /* Ultrix returns EOPNOTSUPP on NFS; + HP-UX returns ENOTTY on pipes. + SunOS returns EINVAL and + More/BSD returns ENODEV on special files + like /dev/null. + Irix-5 returns ENOSYS on pipes. */ + if (errno == EOPNOTSUPP || errno == ENOTTY + || errno == EINVAL || errno == ENODEV + || errno == ENOSYS) + use_fionread = false; + else + { + error (0, errno, _("cannot do ioctl on %s"), + quoteaf (infile)); + newlines2 = newlines; + return false; + } + } + if (n_to_read != 0) + input_pending = true; +#endif + + if (!input_pending) + write_pending (outbuf, &bpout); + + /* Read more input into INBUF. */ + + size_t n_read = safe_read (input_desc, inbuf, insize); + if (n_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (infile)); + write_pending (outbuf, &bpout); + newlines2 = newlines; + return false; + } + if (n_read == 0) + { + write_pending (outbuf, &bpout); + newlines2 = newlines; + return true; + } + + /* Update the pointers and insert a sentinel at the buffer + end. */ + + bpin = inbuf; + eob = bpin + n_read; + *eob = '\n'; + } + else + { + /* It was a real (not a sentinel) newline. */ + + /* Was the last line empty? + (i.e., have two or more consecutive newlines been read?) */ + + if (++newlines > 0) + { + if (newlines >= 2) + { + /* Limit this to 2 here. Otherwise, with lots of + consecutive newlines, the counter could wrap + around at INT_MAX. */ + newlines = 2; + + /* Are multiple adjacent empty lines to be substituted + by single ditto (-s), and this was the second empty + line? */ + if (squeeze_blank) + { + ch = *bpin++; + continue; + } + } + + /* Are line numbers to be written at empty lines (-n)? */ + + if (number && !number_nonblank) + { + next_line_num (); + bpout = stpcpy (bpout, line_num_print); + } + } + + /* Output a currency symbol if requested (-e). */ + if (show_ends) + { + if (pending_cr) + { + *bpout++ = '^'; + *bpout++ = 'M'; + pending_cr = false; + } + *bpout++ = '$'; + } + + /* Output the newline. */ + + *bpout++ = '\n'; + } + ch = *bpin++; + } + while (ch == '\n'); + + /* Here CH cannot contain a newline character. */ + + if (pending_cr) + { + *bpout++ = '\r'; + pending_cr = false; + } + + /* Are we at the beginning of a line, and line numbers are requested? */ + + if (newlines >= 0 && number) + { + next_line_num (); + bpout = stpcpy (bpout, line_num_print); + } + + /* The loops below continue until a newline character is found, + which means that the buffer is empty or that a proper newline + has been found. */ + + /* If quoting, i.e., at least one of -v, -e, or -t specified, + scan for chars that need conversion. */ + if (show_nonprinting) + { + while (true) + { + if (ch >= 32) + { + if (ch < 127) + *bpout++ = ch; + else if (ch == 127) + { + *bpout++ = '^'; + *bpout++ = '?'; + } + else + { + *bpout++ = 'M'; + *bpout++ = '-'; + if (ch >= 128 + 32) + { + if (ch < 128 + 127) + *bpout++ = ch - 128; + else + { + *bpout++ = '^'; + *bpout++ = '?'; + } + } + else + { + *bpout++ = '^'; + *bpout++ = ch - 128 + 64; + } + } + } + else if (ch == '\t' && !show_tabs) + *bpout++ = '\t'; + else if (ch == '\n') + { + newlines = -1; + break; + } + else + { + *bpout++ = '^'; + *bpout++ = ch + 64; + } + + ch = *bpin++; + } + } + else + { + /* Not quoting, neither of -v, -e, or -t specified. */ + while (true) + { + if (ch == '\t' && show_tabs) + { + *bpout++ = '^'; + *bpout++ = ch + 64; + } + else if (ch != '\n') + { + if (ch == '\r' && *bpin == '\n' && show_ends) + { + if (bpin == eob) + pending_cr = true; + else + { + *bpout++ = '^'; + *bpout++ = 'M'; + } + } + else + *bpout++ = ch; + } + else + { + newlines = -1; + break; + } + + ch = *bpin++; + } + } + } +} + +/* Copy data from input to output using copy_file_range if possible. + Return 1 if successful, 0 if ordinary read+write should be tried, + -1 if a serious problem has been diagnosed. */ + +static int +copy_cat (void) +{ + /* Copy at most COPY_MAX bytes at a time; this is min + (SSIZE_MAX, SIZE_MAX) truncated to a value that is + surely aligned well. */ + ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30; + + /* copy_file_range does not support some cases, and it + incorrectly returns 0 when reading from the proc file + system on the Linux kernel through at least 5.6.19 (2020), + so fall back on read+write if the copy_file_range is + unsupported or the input file seems empty. */ + + for (bool some_copied = false; ; some_copied = true) + switch (copy_file_range (input_desc, nullptr, STDOUT_FILENO, nullptr, + copy_max, 0)) + { + case 0: + return some_copied; + + case -1: + if (errno == ENOSYS || is_ENOTSUP (errno) || errno == EINVAL + || errno == EBADF || errno == EXDEV || errno == ETXTBSY + || errno == EPERM) + return 0; + error (0, errno, "%s", quotef (infile)); + return -1; + } +} + + +int +main (int argc, char **argv) +{ + /* Nonzero if we have ever read standard input. */ + bool have_read_stdin = false; + + struct stat stat_buf; + + /* Variables that are set according to the specified options. */ + bool number = false; + bool number_nonblank = false; + bool squeeze_blank = false; + bool show_ends = false; + bool show_nonprinting = false; + bool show_tabs = false; + int file_open_mode = O_RDONLY; + + static struct option const long_options[] = + { + {"number-nonblank", no_argument, nullptr, 'b'}, + {"number", no_argument, nullptr, 'n'}, + {"squeeze-blank", no_argument, nullptr, 's'}, + {"show-nonprinting", no_argument, nullptr, 'v'}, + {"show-ends", no_argument, nullptr, 'E'}, + {"show-tabs", no_argument, nullptr, 'T'}, + {"show-all", no_argument, nullptr, 'A'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} + }; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Arrange to close stdout if we exit via the + case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code. + Normally STDOUT_FILENO is used rather than stdout, so + close_stdout does nothing. */ + atexit (close_stdout); + + /* Parse command line options. */ + + int c; + while ((c = getopt_long (argc, argv, "benstuvAET", long_options, nullptr)) + != -1) + { + switch (c) + { + case 'b': + number = true; + number_nonblank = true; + break; + + case 'e': + show_ends = true; + show_nonprinting = true; + break; + + case 'n': + number = true; + break; + + case 's': + squeeze_blank = true; + break; + + case 't': + show_tabs = true; + show_nonprinting = true; + break; + + case 'u': + /* We provide the -u feature unconditionally. */ + break; + + case 'v': + show_nonprinting = true; + break; + + case 'A': + show_nonprinting = true; + show_ends = true; + show_tabs = true; + break; + + case 'E': + show_ends = true; + break; + + case 'T': + show_tabs = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + /* Get device, i-node number, and optimal blocksize of output. */ + + if (fstat (STDOUT_FILENO, &stat_buf) < 0) + error (EXIT_FAILURE, errno, _("standard output")); + + /* Optimal size of i/o operations of output. */ + idx_t outsize = io_blksize (stat_buf); + + /* Device and I-node number of the output. */ + dev_t out_dev = stat_buf.st_dev; + ino_t out_ino = stat_buf.st_ino; + + /* True if the output is a regular file. */ + bool out_isreg = S_ISREG (stat_buf.st_mode) != 0; + + if (! (number || show_ends || squeeze_blank)) + { + file_open_mode |= O_BINARY; + xset_binary_mode (STDOUT_FILENO, O_BINARY); + } + + /* Main loop. */ + + infile = "-"; + int argind = optind; + bool ok = true; + idx_t page_size = getpagesize (); + + do + { + if (argind < argc) + infile = argv[argind]; + + bool reading_stdin = STREQ (infile, "-"); + if (reading_stdin) + { + have_read_stdin = true; + input_desc = STDIN_FILENO; + if (file_open_mode & O_BINARY) + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + { + input_desc = open (infile, file_open_mode); + if (input_desc < 0) + { + error (0, errno, "%s", quotef (infile)); + ok = false; + continue; + } + } + + if (fstat (input_desc, &stat_buf) < 0) + { + error (0, errno, "%s", quotef (infile)); + ok = false; + goto contin; + } + + /* Optimal size of i/o operations of input. */ + idx_t insize = io_blksize (stat_buf); + + fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL); + + /* Don't copy a nonempty regular file to itself, as that would + merely exhaust the output device. It's better to catch this + error earlier rather than later. */ + + if (out_isreg + && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino + && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size) + { + error (0, 0, _("%s: input file is output file"), quotef (infile)); + ok = false; + goto contin; + } + + /* Pointer to the input buffer. */ + char *inbuf; + + /* Select which version of 'cat' to use. If any format-oriented + options were given use 'cat'; if not, use 'copy_cat' if it + works, 'simple_cat' otherwise. */ + + if (! (number || show_ends || show_nonprinting + || show_tabs || squeeze_blank)) + { + int copy_cat_status = + out_isreg && S_ISREG (stat_buf.st_mode) ? copy_cat () : 0; + if (copy_cat_status != 0) + { + inbuf = nullptr; + ok &= 0 < copy_cat_status; + } + else + { + insize = MAX (insize, outsize); + inbuf = xalignalloc (page_size, insize); + ok &= simple_cat (inbuf, insize); + } + } + else + { + /* Allocate, with an extra byte for a newline sentinel. */ + inbuf = xalignalloc (page_size, insize + 1); + + /* Why are + (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN) + bytes allocated for the output buffer? + + A test whether output needs to be written is done when the input + buffer empties or when a newline appears in the input. After + output is written, at most (OUTSIZE - 1) bytes will remain in the + buffer. Now INSIZE bytes of input is read. Each input character + may grow by a factor of 4 (by the prepending of M-^). If all + characters do, and no newlines appear in this block of input, we + will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer. + If the last character in the preceding block of input was a + newline, a line number may be written (according to the given + options) as the first thing in the output buffer. (Done after the + new input is read, but before processing of the input begins.) + A line number requires seldom more than LINE_COUNTER_BUF_LEN + positions. + + Align the output buffer to a page size boundary, for efficiency + on some paging implementations. */ + + idx_t bufsize; + if (ckd_mul (&bufsize, insize, 4) + || ckd_add (&bufsize, bufsize, outsize) + || ckd_add (&bufsize, bufsize, LINE_COUNTER_BUF_LEN - 1)) + xalloc_die (); + char *outbuf = xalignalloc (page_size, bufsize); + + ok &= cat (inbuf, insize, outbuf, outsize, show_nonprinting, + show_tabs, number, number_nonblank, show_ends, + squeeze_blank); + + alignfree (outbuf); + } + + alignfree (inbuf); + + contin: + if (!reading_stdin && close (input_desc) < 0) + { + error (0, errno, "%s", quotef (infile)); + ok = false; + } + } + while (++argind < argc); + + if (pending_cr) + { + if (full_write (STDOUT_FILENO, "\r", 1) != 1) + write_error (); + } + + if (have_read_stdin && close (STDIN_FILENO) < 0) + error (EXIT_FAILURE, errno, _("closing standard input")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/chcon.c b/src/chcon.c new file mode 100644 index 0000000..41419b8 --- /dev/null +++ b/src/chcon.c @@ -0,0 +1,588 @@ +/* chcon -- change security context of files + Copyright (C) 2005-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "dev-ino.h" +#include "ignore-value.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "selinux-at.h" +#include "xfts.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "chcon" + +#define AUTHORS \ + proper_name ("Russell Coker"), \ + proper_name ("Jim Meyering") + +/* If nonzero, and the systems has support for it, change the context + of symbolic links rather than any files they point to. */ +static bool affect_symlink_referent; + +/* If true, change the modes of directories recursively. */ +static bool recurse; + +/* Level of verbosity. */ +static bool verbose; + +/* Pointer to the device and inode numbers of '/', when --recursive. + Otherwise nullptr. */ +static struct dev_ino *root_dev_ino; + +/* The name of the context file is being given. */ +static char const *specified_context; + +/* Specific components of the context */ +static char const *specified_user; +static char const *specified_role; +static char const *specified_range; +static char const *specified_type; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEREFERENCE_OPTION = CHAR_MAX + 1, + NO_PRESERVE_ROOT, + PRESERVE_ROOT, + REFERENCE_FILE_OPTION +}; + +static struct option const long_options[] = +{ + {"recursive", no_argument, nullptr, 'R'}, + {"dereference", no_argument, nullptr, DEREFERENCE_OPTION}, + {"no-dereference", no_argument, nullptr, 'h'}, + {"no-preserve-root", no_argument, nullptr, NO_PRESERVE_ROOT}, + {"preserve-root", no_argument, nullptr, PRESERVE_ROOT}, + {"reference", required_argument, nullptr, REFERENCE_FILE_OPTION}, + {"user", required_argument, nullptr, 'u'}, + {"role", required_argument, nullptr, 'r'}, + {"type", required_argument, nullptr, 't'}, + {"range", required_argument, nullptr, 'l'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Given a security context, CONTEXT, derive a context_t (*RET), + setting any portions selected via the global variables, specified_user, + specified_role, etc. */ +static int +compute_context_from_mask (char const *context, context_t *ret) +{ + bool ok = true; + context_t new_context = context_new (context); + if (!new_context) + { + error (0, errno, _("failed to create security context: %s"), + quote (context)); + return 1; + } + +#define SET_COMPONENT(C, comp) \ + do \ + { \ + if (specified_ ## comp \ + && context_ ## comp ## _set ((C), specified_ ## comp)) \ + { \ + error (0, errno, \ + _("failed to set %s security context component to %s"), \ + #comp, quote (specified_ ## comp)); \ + ok = false; \ + } \ + } \ + while (0) + + SET_COMPONENT (new_context, user); + SET_COMPONENT (new_context, range); + SET_COMPONENT (new_context, role); + SET_COMPONENT (new_context, type); + + if (!ok) + { + int saved_errno = errno; + context_free (new_context); + errno = saved_errno; + return 1; + } + + *ret = new_context; + return 0; +} + +/* Change the context of FILE, using specified components. + If it is a directory and -R is given, recurse. + Return 0 if successful, 1 if errors occurred. */ + +static int +change_file_context (int fd, char const *file) +{ + char *file_context = nullptr; + context_t context IF_LINT (= 0); + char const * context_string; + int errors = 0; + + if (specified_context == nullptr) + { + int status = (affect_symlink_referent + ? getfileconat (fd, file, &file_context) + : lgetfileconat (fd, file, &file_context)); + + if (status < 0 && errno != ENODATA) + { + error (0, errno, _("failed to get security context of %s"), + quoteaf (file)); + return 1; + } + + /* If the file doesn't have a context, and we're not setting all of + the context components, there isn't really an obvious default. + Thus, we just give up. */ + if (file_context == nullptr) + { + error (0, 0, _("can't apply partial context to unlabeled file %s"), + quoteaf (file)); + return 1; + } + + if (compute_context_from_mask (file_context, &context)) + return 1; + + context_string = context_str (context); + } + else + { + context_string = specified_context; + } + + if (file_context == nullptr || ! STREQ (context_string, file_context)) + { + int fail = (affect_symlink_referent + ? setfileconat (fd, file, context_string) + : lsetfileconat (fd, file, context_string)); + + if (fail) + { + errors = 1; + error (0, errno, _("failed to change context of %s to %s"), + quoteaf_n (0, file), quote_n (1, context_string)); + } + } + + if (specified_context == nullptr) + { + context_free (context); + freecon (file_context); + } + + return errors; +} + +/* Change the context of FILE. + Return true if successful. This function is called + once for every file system object that fts encounters. */ + +static bool +process_file (FTS *fts, FTSENT *ent) +{ + char const *file_full_name = ent->fts_path; + char const *file = ent->fts_accpath; + const struct stat *file_stats = ent->fts_statp; + bool ok = true; + + switch (ent->fts_info) + { + case FTS_D: + if (recurse) + { + if (ROOT_DEV_INO_CHECK (root_dev_ino, ent->fts_statp)) + { + /* This happens e.g., with "chcon -R --preserve-root ... /" + and with "chcon -RH --preserve-root ... symlink-to-root". */ + ROOT_DEV_INO_WARN (file_full_name); + /* Tell fts not to traverse into this hierarchy. */ + fts_set (fts, ent, FTS_SKIP); + /* Ensure that we do not process "/" on the second visit. */ + ignore_value (fts_read (fts)); + return false; + } + return true; + } + break; + + case FTS_DP: + if (! recurse) + return true; + break; + + case FTS_NS: + /* For a top-level file or directory, this FTS_NS (stat failed) + indicator is determined at the time of the initial fts_open call. + With programs like chmod, chown, and chgrp, that modify + permissions, it is possible that the file in question is + accessible when control reaches this point. So, if this is + the first time we've seen the FTS_NS for this file, tell + fts_read to stat it "again". */ + if (ent->fts_level == 0 && ent->fts_number == 0) + { + ent->fts_number = 1; + fts_set (fts, ent, FTS_AGAIN); + return true; + } + error (0, ent->fts_errno, _("cannot access %s"), + quoteaf (file_full_name)); + ok = false; + break; + + case FTS_ERR: + error (0, ent->fts_errno, "%s", quotef (file_full_name)); + ok = false; + break; + + case FTS_DNR: + error (0, ent->fts_errno, _("cannot read directory %s"), + quoteaf (file_full_name)); + ok = false; + break; + + case FTS_DC: /* directory that causes cycles */ + if (cycle_warning_required (fts, ent)) + { + emit_cycle_warning (file_full_name); + return false; + } + break; + + default: + break; + } + + if (ent->fts_info == FTS_DP + && ok && ROOT_DEV_INO_CHECK (root_dev_ino, file_stats)) + { + ROOT_DEV_INO_WARN (file_full_name); + ok = false; + } + + if (ok) + { + if (verbose) + printf (_("changing security context of %s\n"), + quoteaf (file_full_name)); + + if (change_file_context (fts->fts_cwd_fd, file) != 0) + ok = false; + } + + if ( ! recurse) + fts_set (fts, ent, FTS_SKIP); + + return ok; +} + +/* Recursively operate on the specified FILES (the last entry + of which is null). BIT_FLAGS controls how fts works. + Return true if successful. */ + +static bool +process_files (char **files, int bit_flags) +{ + bool ok = true; + + FTS *fts = xfts_open (files, bit_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + /* FIXME: try to give a better message */ + error (0, errno, _("fts_read failed")); + ok = false; + } + break; + } + + ok &= process_file (fts, ent); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + + return ok; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... CONTEXT FILE...\n\ + or: %s [OPTION]... [-u USER] [-r ROLE] [-l RANGE] [-t TYPE] FILE...\n\ + or: %s [OPTION]... --reference=RFILE FILE...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Change the SELinux security context of each FILE to CONTEXT.\n\ +With --reference, change the security context of each FILE to that of RFILE.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + --dereference affect the referent of each symbolic link (this is\n\ + the default), rather than the symbolic link itself\n\ + -h, --no-dereference affect symbolic links instead of any referenced file\n\ +"), stdout); + fputs (_("\ + -u, --user=USER set user USER in the target security context\n\ + -r, --role=ROLE set role ROLE in the target security context\n\ + -t, --type=TYPE set type TYPE in the target security context\n\ + -l, --range=RANGE set range RANGE in the target security context\n\ +"), stdout); + fputs (_("\ + --no-preserve-root do not treat '/' specially (the default)\n\ + --preserve-root fail to operate recursively on '/'\n\ +"), stdout); + fputs (_("\ + --reference=RFILE use RFILE's security context rather than specifying\n\ + a CONTEXT value\n\ +"), stdout); + fputs (_("\ + -R, --recursive operate on files and directories recursively\n\ +"), stdout); + fputs (_("\ + -v, --verbose output a diagnostic for every file processed\n\ +"), stdout); + fputs (_("\ +\n\ +The following options modify how a hierarchy is traversed when the -R\n\ +option is also specified. If more than one is specified, only the final\n\ +one takes effect.\n\ +\n\ + -H if a command line argument is a symbolic link\n\ + to a directory, traverse it\n\ + -L traverse every symbolic link to a directory\n\ + encountered\n\ + -P do not traverse any symbolic links (default)\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + /* Bit flags that control how fts works. */ + int bit_flags = FTS_PHYSICAL; + + /* 1 if --dereference, 0 if --no-dereference, -1 if neither has been + specified. */ + int dereference = -1; + + bool ok; + bool preserve_root = false; + bool component_specified = false; + char *reference_file = nullptr; + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "HLPRhvu:r:t:l:", + long_options, nullptr)) + != -1) + { + switch (optc) + { + case 'H': /* Traverse command-line symlinks-to-directories. */ + bit_flags = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* Traverse all symlinks-to-directories. */ + bit_flags = FTS_LOGICAL; + break; + + case 'P': /* Traverse no symlinks-to-directories. */ + bit_flags = FTS_PHYSICAL; + break; + + case 'h': /* --no-dereference: affect symlinks */ + dereference = 0; + break; + + case DEREFERENCE_OPTION: /* --dereference: affect the referent + of each symlink */ + dereference = 1; + break; + + case NO_PRESERVE_ROOT: + preserve_root = false; + break; + + case PRESERVE_ROOT: + preserve_root = true; + break; + + case REFERENCE_FILE_OPTION: + reference_file = optarg; + break; + + case 'R': + recurse = true; + break; + + case 'f': + /* ignore */ + break; + + case 'v': + verbose = true; + break; + + case 'u': + specified_user = optarg; + component_specified = true; + break; + + case 'r': + specified_role = optarg; + component_specified = true; + break; + + case 't': + specified_type = optarg; + component_specified = true; + break; + + case 'l': + specified_range = optarg; + component_specified = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (recurse) + { + if (bit_flags == FTS_PHYSICAL) + { + if (dereference == 1) + error (EXIT_FAILURE, 0, + _("-R --dereference requires either -H or -L")); + affect_symlink_referent = false; + } + else + { + if (dereference == 0) + error (EXIT_FAILURE, 0, _("-R -h requires -P")); + affect_symlink_referent = true; + } + } + else + { + bit_flags = FTS_PHYSICAL; + affect_symlink_referent = (dereference != 0); + } + + if (argc - optind < (reference_file || component_specified ? 1 : 2)) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + if (reference_file) + { + char *ref_context = nullptr; + + if (getfilecon (reference_file, &ref_context) < 0) + error (EXIT_FAILURE, errno, _("failed to get security context of %s"), + quoteaf (reference_file)); + + specified_context = ref_context; + } + else if (component_specified) + { + /* FIXME: it's already null, so this is a no-op. */ + specified_context = nullptr; + } + else + { + specified_context = argv[optind++]; + if (0 < is_selinux_enabled () + && security_check_context (specified_context) < 0) + error (EXIT_FAILURE, errno, _("invalid context: %s"), + quote (specified_context)); + } + + if (reference_file && component_specified) + { + error (0, 0, _("conflicting security context specifiers given")); + usage (EXIT_FAILURE); + } + + if (recurse && preserve_root) + { + static struct dev_ino dev_ino_buf; + root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + else + { + root_dev_ino = nullptr; + } + + ok = process_files (argv + optind, bit_flags | FTS_NOSTAT); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/chgrp.c b/src/chgrp.c new file mode 100644 index 0000000..6ec3b4d --- /dev/null +++ b/src/chgrp.c @@ -0,0 +1,315 @@ +/* chgrp -- change group ownership of files + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie . */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "chown-core.h" +#include "fts_.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "chgrp" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +#if ! HAVE_ENDGRENT +# define endgrent() ((void) 0) +#endif + +/* The argument to the --reference option. Use the group ID of this file. + This file must exist. */ +static char *reference_file; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEREFERENCE_OPTION = CHAR_MAX + 1, + NO_PRESERVE_ROOT, + PRESERVE_ROOT, + REFERENCE_FILE_OPTION +}; + +static struct option const long_options[] = +{ + {"recursive", no_argument, nullptr, 'R'}, + {"changes", no_argument, nullptr, 'c'}, + {"dereference", no_argument, nullptr, DEREFERENCE_OPTION}, + {"no-dereference", no_argument, nullptr, 'h'}, + {"no-preserve-root", no_argument, nullptr, NO_PRESERVE_ROOT}, + {"preserve-root", no_argument, nullptr, PRESERVE_ROOT}, + {"quiet", no_argument, nullptr, 'f'}, + {"silent", no_argument, nullptr, 'f'}, + {"reference", required_argument, nullptr, REFERENCE_FILE_OPTION}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return the group ID of NAME, or -1 if no name was specified. */ + +static gid_t +parse_group (char const *name) +{ + gid_t gid = -1; + + if (*name) + { + struct group *grp = getgrnam (name); + if (grp) + gid = grp->gr_gid; + else + { + uintmax_t tmp; + if (! (xstrtoumax (name, nullptr, 10, &tmp, "") == LONGINT_OK + && tmp <= GID_T_MAX)) + error (EXIT_FAILURE, 0, _("invalid group: %s"), + quote (name)); + gid = tmp; + } + endgrent (); /* Save a file descriptor. */ + } + + return gid; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... GROUP FILE...\n\ + or: %s [OPTION]... --reference=RFILE FILE...\n\ +"), + program_name, program_name); + fputs (_("\ +Change the group of each FILE to GROUP.\n\ +With --reference, change the group of each FILE to that of RFILE.\n\ +\n\ +"), stdout); + fputs (_("\ + -c, --changes like verbose but report only when a change is made\n\ + -f, --silent, --quiet suppress most error messages\n\ + -v, --verbose output a diagnostic for every file processed\n\ +"), stdout); + fputs (_("\ + --dereference affect the referent of each symbolic link (this is\n\ + the default), rather than the symbolic link itself\n\ + -h, --no-dereference affect symbolic links instead of any referenced file\n\ +"), stdout); + fputs (_("\ + (useful only on systems that can change the\n\ + ownership of a symlink)\n\ +"), stdout); + fputs (_("\ + --no-preserve-root do not treat '/' specially (the default)\n\ + --preserve-root fail to operate recursively on '/'\n\ +"), stdout); + fputs (_("\ + --reference=RFILE use RFILE's group rather than specifying a GROUP.\n\ + RFILE is always dereferenced if a symbolic link.\n\ +"), stdout); + fputs (_("\ + -R, --recursive operate on files and directories recursively\n\ +"), stdout); + fputs (_("\ +\n\ +The following options modify how a hierarchy is traversed when the -R\n\ +option is also specified. If more than one is specified, only the final\n\ +one takes effect.\n\ +\n\ + -H if a command line argument is a symbolic link\n\ + to a directory, traverse it\n\ + -L traverse every symbolic link to a directory\n\ + encountered\n\ + -P do not traverse any symbolic links (default)\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +Examples:\n\ + %s staff /u Change the group of /u to \"staff\".\n\ + %s -hR staff /u Change the group of /u and subfiles to \"staff\".\n\ +"), + program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + bool preserve_root = false; + gid_t gid; + + /* Bit flags that control how fts works. */ + int bit_flags = FTS_PHYSICAL; + + /* 1 if --dereference, 0 if --no-dereference, -1 if neither has been + specified. */ + int dereference = -1; + + struct Chown_option chopt; + bool ok; + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + chopt_init (&chopt); + + while ((optc = getopt_long (argc, argv, "HLPRcfhv", long_options, nullptr)) + != -1) + { + switch (optc) + { + case 'H': /* Traverse command-line symlinks-to-directories. */ + bit_flags = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* Traverse all symlinks-to-directories. */ + bit_flags = FTS_LOGICAL; + break; + + case 'P': /* Traverse no symlinks-to-directories. */ + bit_flags = FTS_PHYSICAL; + break; + + case 'h': /* --no-dereference: affect symlinks */ + dereference = 0; + break; + + case DEREFERENCE_OPTION: /* --dereference: affect the referent + of each symlink */ + dereference = 1; + break; + + case NO_PRESERVE_ROOT: + preserve_root = false; + break; + + case PRESERVE_ROOT: + preserve_root = true; + break; + + case REFERENCE_FILE_OPTION: + reference_file = optarg; + break; + + case 'R': + chopt.recurse = true; + break; + + case 'c': + chopt.verbosity = V_changes_only; + break; + + case 'f': + chopt.force_silent = true; + break; + + case 'v': + chopt.verbosity = V_high; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (chopt.recurse) + { + if (bit_flags == FTS_PHYSICAL) + { + if (dereference == 1) + error (EXIT_FAILURE, 0, + _("-R --dereference requires either -H or -L")); + dereference = 0; + } + } + else + { + bit_flags = FTS_PHYSICAL; + } + chopt.affect_symlink_referent = (dereference != 0); + + if (argc - optind < (reference_file ? 1 : 2)) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + if (reference_file) + { + struct stat ref_stats; + if (stat (reference_file, &ref_stats)) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf (reference_file)); + + gid = ref_stats.st_gid; + chopt.group_name = gid_to_name (ref_stats.st_gid); + } + else + { + char *group_name = argv[optind++]; + chopt.group_name = (*group_name ? xstrdup (group_name) : nullptr); + gid = parse_group (group_name); + } + + if (chopt.recurse && preserve_root) + { + static struct dev_ino dev_ino_buf; + chopt.root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (chopt.root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + + bit_flags |= FTS_DEFER_STAT; + ok = chown_files (argv + optind, bit_flags, + (uid_t) -1, gid, + (uid_t) -1, (gid_t) -1, &chopt); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/chmod.c b/src/chmod.c new file mode 100644 index 0000000..de0c5d0 --- /dev/null +++ b/src/chmod.c @@ -0,0 +1,571 @@ +/* chmod -- change permission modes of files + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie */ + +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "dev-ino.h" +#include "filemode.h" +#include "ignore-value.h" +#include "modechange.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "xfts.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "chmod" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +struct change_status +{ + enum + { + CH_NO_STAT, + CH_FAILED, + CH_NOT_APPLIED, + CH_NO_CHANGE_REQUESTED, + CH_SUCCEEDED + } + status; + mode_t old_mode; + mode_t new_mode; +}; + +enum Verbosity +{ + /* Print a message for each file that is processed. */ + V_high, + + /* Print a message for each file whose attributes we change. */ + V_changes_only, + + /* Do not be verbose. This is the default. */ + V_off +}; + +/* The desired change to the mode. */ +static struct mode_change *change; + +/* The initial umask value, if it might be needed. */ +static mode_t umask_value; + +/* If true, change the modes of directories recursively. */ +static bool recurse; + +/* If true, force silence (suppress most of error messages). */ +static bool force_silent; + +/* If true, diagnose surprises from naive misuses like "chmod -r file". + POSIX allows diagnostics here, as portable code is supposed to use + "chmod -- -r file". */ +static bool diagnose_surprises; + +/* Level of verbosity. */ +static enum Verbosity verbosity = V_off; + +/* Pointer to the device and inode numbers of '/', when --recursive. + Otherwise nullptr. */ +static struct dev_ino *root_dev_ino; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + NO_PRESERVE_ROOT = CHAR_MAX + 1, + PRESERVE_ROOT, + REFERENCE_FILE_OPTION +}; + +static struct option const long_options[] = +{ + {"changes", no_argument, nullptr, 'c'}, + {"recursive", no_argument, nullptr, 'R'}, + {"no-preserve-root", no_argument, nullptr, NO_PRESERVE_ROOT}, + {"preserve-root", no_argument, nullptr, PRESERVE_ROOT}, + {"quiet", no_argument, nullptr, 'f'}, + {"reference", required_argument, nullptr, REFERENCE_FILE_OPTION}, + {"silent", no_argument, nullptr, 'f'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return true if the chmodable permission bits of FILE changed. + The old mode was OLD_MODE, but it was changed to NEW_MODE. */ + +static bool +mode_changed (int dir_fd, char const *file, char const *file_full_name, + mode_t old_mode, mode_t new_mode) +{ + if (new_mode & (S_ISUID | S_ISGID | S_ISVTX)) + { + /* The new mode contains unusual bits that the call to chmod may + have silently cleared. Check whether they actually changed. */ + + struct stat new_stats; + + if (fstatat (dir_fd, file, &new_stats, 0) != 0) + { + if (! force_silent) + error (0, errno, _("getting new attributes of %s"), + quoteaf (file_full_name)); + return false; + } + + new_mode = new_stats.st_mode; + } + + return ((old_mode ^ new_mode) & CHMOD_MODE_BITS) != 0; +} + +/* Tell the user how/if the MODE of FILE has been changed. + CH describes what (if anything) has happened. */ + +static void +describe_change (char const *file, struct change_status const *ch) +{ + char perms[12]; /* "-rwxrwxrwx" ls-style modes. */ + char old_perms[12]; + char const *fmt; + char const *quoted_file = quoteaf (file); + + switch (ch->status) + { + case CH_NOT_APPLIED: + printf (_("neither symbolic link %s nor referent has been changed\n"), + quoted_file); + return; + + case CH_NO_STAT: + printf (_("%s could not be accessed\n"), quoted_file); + return; + + default: + break; + } + + unsigned long int + old_m = ch->old_mode & CHMOD_MODE_BITS, + m = ch->new_mode & CHMOD_MODE_BITS; + + strmode (ch->new_mode, perms); + perms[10] = '\0'; /* Remove trailing space. */ + + strmode (ch->old_mode, old_perms); + old_perms[10] = '\0'; /* Remove trailing space. */ + + switch (ch->status) + { + case CH_SUCCEEDED: + fmt = _("mode of %s changed from %04lo (%s) to %04lo (%s)\n"); + break; + case CH_FAILED: + fmt = _("failed to change mode of %s from %04lo (%s) to %04lo (%s)\n"); + break; + case CH_NO_CHANGE_REQUESTED: + fmt = _("mode of %s retained as %04lo (%s)\n"); + printf (fmt, quoted_file, m, &perms[1]); + return; + default: + affirm (false); + } + printf (fmt, quoted_file, old_m, &old_perms[1], m, &perms[1]); +} + +/* Change the mode of FILE. + Return true if successful. This function is called + once for every file system object that fts encounters. */ + +static bool +process_file (FTS *fts, FTSENT *ent) +{ + char const *file_full_name = ent->fts_path; + char const *file = ent->fts_accpath; + const struct stat *file_stats = ent->fts_statp; + struct change_status ch = { 0, }; + ch.status = CH_NO_STAT; + + switch (ent->fts_info) + { + case FTS_DP: + return true; + + case FTS_NS: + /* For a top-level file or directory, this FTS_NS (stat failed) + indicator is determined at the time of the initial fts_open call. + With programs like chmod, chown, and chgrp, that modify + permissions, it is possible that the file in question is + accessible when control reaches this point. So, if this is + the first time we've seen the FTS_NS for this file, tell + fts_read to stat it "again". */ + if (ent->fts_level == 0 && ent->fts_number == 0) + { + ent->fts_number = 1; + fts_set (fts, ent, FTS_AGAIN); + return true; + } + if (! force_silent) + error (0, ent->fts_errno, _("cannot access %s"), + quoteaf (file_full_name)); + break; + + case FTS_ERR: + if (! force_silent) + error (0, ent->fts_errno, "%s", quotef (file_full_name)); + break; + + case FTS_DNR: + if (! force_silent) + error (0, ent->fts_errno, _("cannot read directory %s"), + quoteaf (file_full_name)); + break; + + case FTS_SLNONE: + if (! force_silent) + error (0, 0, _("cannot operate on dangling symlink %s"), + quoteaf (file_full_name)); + break; + + case FTS_DC: /* directory that causes cycles */ + if (cycle_warning_required (fts, ent)) + { + emit_cycle_warning (file_full_name); + return false; + } + FALLTHROUGH; + default: + ch.status = CH_NOT_APPLIED; + break; + } + + if (ch.status == CH_NOT_APPLIED + && ROOT_DEV_INO_CHECK (root_dev_ino, file_stats)) + { + ROOT_DEV_INO_WARN (file_full_name); + /* Tell fts not to traverse into this hierarchy. */ + fts_set (fts, ent, FTS_SKIP); + /* Ensure that we do not process "/" on the second visit. */ + ignore_value (fts_read (fts)); + return false; + } + + if (ch.status == CH_NOT_APPLIED && ! S_ISLNK (file_stats->st_mode)) + { + ch.old_mode = file_stats->st_mode; + ch.new_mode = mode_adjust (ch.old_mode, S_ISDIR (ch.old_mode) != 0, + umask_value, change, nullptr); + if (chmodat (fts->fts_cwd_fd, file, ch.new_mode) == 0) + ch.status = CH_SUCCEEDED; + else + { + if (! force_silent) + error (0, errno, _("changing permissions of %s"), + quoteaf (file_full_name)); + ch.status = CH_FAILED; + } + } + + if (verbosity != V_off) + { + if (ch.status == CH_SUCCEEDED + && !mode_changed (fts->fts_cwd_fd, file, file_full_name, + ch.old_mode, ch.new_mode)) + ch.status = CH_NO_CHANGE_REQUESTED; + + if (ch.status == CH_SUCCEEDED || verbosity == V_high) + describe_change (file_full_name, &ch); + } + + if (CH_NO_CHANGE_REQUESTED <= ch.status && diagnose_surprises) + { + mode_t naively_expected_mode = + mode_adjust (ch.old_mode, S_ISDIR (ch.old_mode) != 0, + 0, change, nullptr); + if (ch.new_mode & ~naively_expected_mode) + { + char new_perms[12]; + char naively_expected_perms[12]; + strmode (ch.new_mode, new_perms); + strmode (naively_expected_mode, naively_expected_perms); + new_perms[10] = naively_expected_perms[10] = '\0'; + error (0, 0, + _("%s: new permissions are %s, not %s"), + quotef (file_full_name), + new_perms + 1, naively_expected_perms + 1); + ch.status = CH_FAILED; + } + } + + if ( ! recurse) + fts_set (fts, ent, FTS_SKIP); + + return CH_NOT_APPLIED <= ch.status; +} + +/* Recursively change the modes of the specified FILES (the last entry + of which is null). BIT_FLAGS controls how fts works. + Return true if successful. */ + +static bool +process_files (char **files, int bit_flags) +{ + bool ok = true; + + FTS *fts = xfts_open (files, bit_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + /* FIXME: try to give a better message */ + if (! force_silent) + error (0, errno, _("fts_read failed")); + ok = false; + } + break; + } + + ok &= process_file (fts, ent); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + + return ok; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... MODE[,MODE]... FILE...\n\ + or: %s [OPTION]... OCTAL-MODE FILE...\n\ + or: %s [OPTION]... --reference=RFILE FILE...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Change the mode of each FILE to MODE.\n\ +With --reference, change the mode of each FILE to that of RFILE.\n\ +\n\ +"), stdout); + fputs (_("\ + -c, --changes like verbose but report only when a change is made\n\ + -f, --silent, --quiet suppress most error messages\n\ + -v, --verbose output a diagnostic for every file processed\n\ +"), stdout); + fputs (_("\ + --no-preserve-root do not treat '/' specially (the default)\n\ + --preserve-root fail to operate recursively on '/'\n\ +"), stdout); + fputs (_("\ + --reference=RFILE use RFILE's mode instead of specifying MODE values.\n\ + RFILE is always dereferenced if a symbolic link.\n\ +"), stdout); + fputs (_("\ + -R, --recursive change files and directories recursively\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Each MODE is of the form '[ugoa]*([-+=]([rwxXst]*|[ugo]))+|[-+=][0-7]+'.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Parse the ASCII mode given on the command line into a linked list + of 'struct mode_change' and apply that to each file argument. */ + +int +main (int argc, char **argv) +{ + char *mode = nullptr; + idx_t mode_len = 0; + idx_t mode_alloc = 0; + bool ok; + bool preserve_root = false; + char const *reference_file = nullptr; + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + recurse = force_silent = diagnose_surprises = false; + + while ((c = getopt_long (argc, argv, + ("Rcfvr::w::x::X::s::t::u::g::o::a::,::+::=::" + "0::1::2::3::4::5::6::7::"), + long_options, nullptr)) + != -1) + { + switch (c) + { + case 'r': + case 'w': + case 'x': + case 'X': + case 's': + case 't': + case 'u': + case 'g': + case 'o': + case 'a': + case ',': + case '+': + case '=': + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + /* Support non-portable uses like "chmod -w", but diagnose + surprises due to umask confusion. Even though "--", "--r", + etc., are valid modes, there is no "case '-'" here since + getopt_long reserves leading "--" for long options. */ + { + /* Allocate a mode string (e.g., "-rwx") by concatenating + the argument containing this option. If a previous mode + string was given, concatenate the previous string, a + comma, and the new string (e.g., "-s,-rwx"). */ + + char const *arg = argv[optind - 1]; + idx_t arg_len = strlen (arg); + idx_t mode_comma_len = mode_len + !!mode_len; + idx_t new_mode_len = mode_comma_len + arg_len; + assume (0 <= new_mode_len); /* Pacify GCC bug #109613. */ + if (mode_alloc <= new_mode_len) + mode = xpalloc (mode, &mode_alloc, + new_mode_len + 1 - mode_alloc, -1, 1); + mode[mode_len] = ','; + memcpy (mode + mode_comma_len, arg, arg_len + 1); + mode_len = new_mode_len; + + diagnose_surprises = true; + } + break; + case NO_PRESERVE_ROOT: + preserve_root = false; + break; + case PRESERVE_ROOT: + preserve_root = true; + break; + case REFERENCE_FILE_OPTION: + reference_file = optarg; + break; + case 'R': + recurse = true; + break; + case 'c': + verbosity = V_changes_only; + break; + case 'f': + force_silent = true; + break; + case 'v': + verbosity = V_high; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (reference_file) + { + if (mode) + { + error (0, 0, _("cannot combine mode and --reference options")); + usage (EXIT_FAILURE); + } + } + else + { + if (!mode) + mode = argv[optind++]; + } + + if (optind >= argc) + { + if (!mode || mode != argv[optind - 1]) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + if (reference_file) + { + change = mode_create_from_ref (reference_file); + if (!change) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf (reference_file)); + } + else + { + change = mode_compile (mode); + if (!change) + { + error (0, 0, _("invalid mode: %s"), quote (mode)); + usage (EXIT_FAILURE); + } + umask_value = umask (0); + } + + if (recurse && preserve_root) + { + static struct dev_ino dev_ino_buf; + root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + else + { + root_dev_ino = nullptr; + } + + ok = process_files (argv + optind, + FTS_COMFOLLOW | FTS_PHYSICAL | FTS_DEFER_STAT); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/chown-core.c b/src/chown-core.c new file mode 100644 index 0000000..be86072 --- /dev/null +++ b/src/chown-core.c @@ -0,0 +1,584 @@ +/* chown-core.c -- core functions for changing ownership. + Copyright (C) 2000-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from chown.c/chgrp.c and librarified by Jim Meyering. */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "chown-core.h" +#include "ignore-value.h" +#include "root-dev-ino.h" +#include "xfts.h" + +#define FTSENT_IS_DIRECTORY(E) \ + ((E)->fts_info == FTS_D \ + || (E)->fts_info == FTS_DC \ + || (E)->fts_info == FTS_DP \ + || (E)->fts_info == FTS_DNR) + +enum RCH_status + { + /* we called fchown and close, and both succeeded */ + RC_ok = 2, + + /* required_uid and/or required_gid are specified, but don't match */ + RC_excluded, + + /* SAME_INODE check failed */ + RC_inode_changed, + + /* open/fchown isn't needed, isn't safe, or doesn't work due to + permissions problems; fall back on chown */ + RC_do_ordinary_chown, + + /* open, fstat, fchown, or close failed */ + RC_error + }; + +extern void +chopt_init (struct Chown_option *chopt) +{ + chopt->verbosity = V_off; + chopt->root_dev_ino = nullptr; + chopt->affect_symlink_referent = true; + chopt->recurse = false; + chopt->force_silent = false; + chopt->user_name = nullptr; + chopt->group_name = nullptr; +} + +extern void +chopt_free (struct Chown_option *chopt) +{ + free (chopt->user_name); + free (chopt->group_name); +} + +/* Convert the numeric user-id, UID, to a string stored in xmalloc'd memory, + and return it. Use the decimal representation of the ID. */ + +static char * +uid_to_str (uid_t uid) +{ + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + return xstrdup (TYPE_SIGNED (uid_t) ? imaxtostr (uid, buf) + : umaxtostr (uid, buf)); +} + +/* Convert the numeric group-id, GID, to a string stored in xmalloc'd memory, + and return it. Use the decimal representation of the ID. */ + +static char * +gid_to_str (gid_t gid) +{ + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + return xstrdup (TYPE_SIGNED (gid_t) ? imaxtostr (gid, buf) + : umaxtostr (gid, buf)); +} + +/* Convert the numeric group-id, GID, to a string stored in xmalloc'd memory, + and return it. If there's no corresponding group name, use the decimal + representation of the ID. */ + +extern char * +gid_to_name (gid_t gid) +{ + struct group *grp = getgrgid (gid); + return grp ? xstrdup (grp->gr_name) : gid_to_str (gid); +} + +/* Convert the numeric user-id, UID, to a string stored in xmalloc'd memory, + and return it. If there's no corresponding user name, use the decimal + representation of the ID. */ + +extern char * +uid_to_name (uid_t uid) +{ + struct passwd *pwd = getpwuid (uid); + return pwd ? xstrdup (pwd->pw_name) : uid_to_str (uid); +} + +/* Allocate a string representing USER and GROUP. */ + +static char * +user_group_str (char const *user, char const *group) +{ + char *spec = nullptr; + + if (user) + { + if (group) + { + spec = xmalloc (strlen (user) + 1 + strlen (group) + 1); + stpcpy (stpcpy (stpcpy (spec, user), ":"), group); + } + else + { + spec = xstrdup (user); + } + } + else if (group) + { + spec = xstrdup (group); + } + + return spec; +} + +/* Tell the user how/if the user and group of FILE have been changed. + If USER is null, give the group-oriented messages. + CHANGED describes what (if anything) has happened. */ + +static void +describe_change (char const *file, enum Change_status changed, + char const *old_user, char const *old_group, + char const *user, char const *group) +{ + char const *fmt; + char *old_spec; + char *spec; + + if (changed == CH_NOT_APPLIED) + { + printf (_("neither symbolic link %s nor referent has been changed\n"), + quoteaf (file)); + return; + } + + spec = user_group_str (user, group); + old_spec = user_group_str (user ? old_user : nullptr, + group ? old_group : nullptr); + + switch (changed) + { + case CH_SUCCEEDED: + fmt = (user ? _("changed ownership of %s from %s to %s\n") + : group ? _("changed group of %s from %s to %s\n") + : _("no change to ownership of %s\n")); + break; + case CH_FAILED: + if (old_spec) + { + fmt = (user ? _("failed to change ownership of %s from %s to %s\n") + : group ? _("failed to change group of %s from %s to %s\n") + : _("failed to change ownership of %s\n")); + } + else + { + fmt = (user ? _("failed to change ownership of %s to %s\n") + : group ? _("failed to change group of %s to %s\n") + : _("failed to change ownership of %s\n")); + free (old_spec); + old_spec = spec; + spec = nullptr; + } + break; + case CH_NO_CHANGE_REQUESTED: + fmt = (user ? _("ownership of %s retained as %s\n") + : group ? _("group of %s retained as %s\n") + : _("ownership of %s retained\n")); + break; + default: + affirm (false); + } + + printf (fmt, quoteaf (file), old_spec, spec); + + free (old_spec); + free (spec); +} + +/* Change the owner and/or group of the FILE to UID and/or GID (safely) + only if REQUIRED_UID and REQUIRED_GID match the owner and group IDs + of FILE. ORIG_ST must be the result of 'stat'ing FILE. + + The 'safely' part above means that we can't simply use chown(2), + since FILE might be replaced with some other file between the time + of the preceding stat/lstat and this chown call. So here we open + FILE and do everything else via the resulting file descriptor. + We first call fstat and verify that the dev/inode match those from + the preceding stat call, and only then, if appropriate (given the + required_uid and required_gid constraints) do we call fchown. + + Return RC_do_ordinary_chown if we can't open FILE, or if FILE is a + special file that might have undesirable side effects when opening. + In this case the caller can use the less-safe ordinary chown. + + Return one of the RCH_status values. */ + +static enum RCH_status +restricted_chown (int cwd_fd, char const *file, + struct stat const *orig_st, + uid_t uid, gid_t gid, + uid_t required_uid, gid_t required_gid) +{ + enum RCH_status status = RC_ok; + struct stat st; + int open_flags = O_NONBLOCK | O_NOCTTY; + int fd; + + if (required_uid == (uid_t) -1 && required_gid == (gid_t) -1) + return RC_do_ordinary_chown; + + if (! S_ISREG (orig_st->st_mode)) + { + if (S_ISDIR (orig_st->st_mode)) + open_flags |= O_DIRECTORY; + else + return RC_do_ordinary_chown; + } + + fd = openat (cwd_fd, file, O_RDONLY | open_flags); + if (! (0 <= fd + || (errno == EACCES && S_ISREG (orig_st->st_mode) + && 0 <= (fd = openat (cwd_fd, file, O_WRONLY | open_flags))))) + return (errno == EACCES ? RC_do_ordinary_chown : RC_error); + + if (fstat (fd, &st) != 0) + status = RC_error; + else if (! SAME_INODE (*orig_st, st)) + status = RC_inode_changed; + else if ((required_uid == (uid_t) -1 || required_uid == st.st_uid) + && (required_gid == (gid_t) -1 || required_gid == st.st_gid)) + { + if (fchown (fd, uid, gid) == 0) + { + status = (close (fd) == 0 + ? RC_ok : RC_error); + return status; + } + else + { + status = RC_error; + } + } + + int saved_errno = errno; + close (fd); + errno = saved_errno; + return status; +} + +/* Change the owner and/or group of the file specified by FTS and ENT + to UID and/or GID as appropriate. + If REQUIRED_UID is not -1, then skip files with any other user ID. + If REQUIRED_GID is not -1, then skip files with any other group ID. + CHOPT specifies additional options. + Return true if successful. */ +static bool +change_file_owner (FTS *fts, FTSENT *ent, + uid_t uid, gid_t gid, + uid_t required_uid, gid_t required_gid, + struct Chown_option const *chopt) +{ + char const *file_full_name = ent->fts_path; + char const *file = ent->fts_accpath; + struct stat const *file_stats; + struct stat stat_buf; + bool ok = true; + bool do_chown; + bool symlink_changed = true; + + switch (ent->fts_info) + { + case FTS_D: + if (chopt->recurse) + { + if (ROOT_DEV_INO_CHECK (chopt->root_dev_ino, ent->fts_statp)) + { + /* This happens e.g., with "chown -R --preserve-root 0 /" + and with "chown -RH --preserve-root 0 symlink-to-root". */ + ROOT_DEV_INO_WARN (file_full_name); + /* Tell fts not to traverse into this hierarchy. */ + fts_set (fts, ent, FTS_SKIP); + /* Ensure that we do not process "/" on the second visit. */ + ignore_value (fts_read (fts)); + return false; + } + return true; + } + break; + + case FTS_DP: + if (! chopt->recurse) + return true; + break; + + case FTS_NS: + /* For a top-level file or directory, this FTS_NS (stat failed) + indicator is determined at the time of the initial fts_open call. + With programs like chmod, chown, and chgrp, that modify + permissions, it is possible that the file in question is + accessible when control reaches this point. So, if this is + the first time we've seen the FTS_NS for this file, tell + fts_read to stat it "again". */ + if (ent->fts_level == 0 && ent->fts_number == 0) + { + ent->fts_number = 1; + fts_set (fts, ent, FTS_AGAIN); + return true; + } + if (! chopt->force_silent) + error (0, ent->fts_errno, _("cannot access %s"), + quoteaf (file_full_name)); + ok = false; + break; + + case FTS_ERR: + if (! chopt->force_silent) + error (0, ent->fts_errno, "%s", quotef (file_full_name)); + ok = false; + break; + + case FTS_DNR: + if (! chopt->force_silent) + error (0, ent->fts_errno, _("cannot read directory %s"), + quoteaf (file_full_name)); + ok = false; + break; + + case FTS_DC: /* directory that causes cycles */ + if (cycle_warning_required (fts, ent)) + { + emit_cycle_warning (file_full_name); + return false; + } + break; + + default: + break; + } + + if (!ok) + { + do_chown = false; + file_stats = nullptr; + } + else if (required_uid == (uid_t) -1 && required_gid == (gid_t) -1 + && chopt->verbosity == V_off + && ! chopt->root_dev_ino + && ! chopt->affect_symlink_referent) + { + do_chown = true; + file_stats = ent->fts_statp; + } + else + { + file_stats = ent->fts_statp; + + /* If this is a symlink and we're dereferencing them, + stat it to get info on the referent. */ + if (chopt->affect_symlink_referent && S_ISLNK (file_stats->st_mode)) + { + if (fstatat (fts->fts_cwd_fd, file, &stat_buf, 0) != 0) + { + if (! chopt->force_silent) + error (0, errno, _("cannot dereference %s"), + quoteaf (file_full_name)); + ok = false; + } + + file_stats = &stat_buf; + } + + do_chown = (ok + && (required_uid == (uid_t) -1 + || required_uid == file_stats->st_uid) + && (required_gid == (gid_t) -1 + || required_gid == file_stats->st_gid)); + } + + /* This happens when chown -LR --preserve-root encounters a symlink-to-/. */ + if (ok + && FTSENT_IS_DIRECTORY (ent) + && ROOT_DEV_INO_CHECK (chopt->root_dev_ino, file_stats)) + { + ROOT_DEV_INO_WARN (file_full_name); + return false; + } + + if (do_chown) + { + if ( ! chopt->affect_symlink_referent) + { + ok = (lchownat (fts->fts_cwd_fd, file, uid, gid) == 0); + + /* Ignore any error due to lack of support; POSIX requires + this behavior for top-level symbolic links with -h, and + implies that it's required for all symbolic links. */ + if (!ok && errno == EOPNOTSUPP) + { + ok = true; + symlink_changed = false; + } + } + else + { + /* If possible, avoid a race condition with --from=O:G and without the + (-h) --no-dereference option. If fts's stat call determined + that the uid/gid of FILE matched the --from=O:G-selected + owner and group IDs, blindly using chown(2) here could lead + chown(1) or chgrp(1) mistakenly to dereference a *symlink* + to an arbitrary file that an attacker had moved into the + place of FILE during the window between the stat and + chown(2) calls. If FILE is a regular file or a directory + that can be opened, this race condition can be avoided safely. */ + + enum RCH_status err + = restricted_chown (fts->fts_cwd_fd, file, file_stats, uid, gid, + required_uid, required_gid); + switch (err) + { + case RC_ok: + break; + + case RC_do_ordinary_chown: + ok = (chownat (fts->fts_cwd_fd, file, uid, gid) == 0); + break; + + case RC_error: + ok = false; + break; + + case RC_inode_changed: + /* FIXME: give a diagnostic in this case? */ + case RC_excluded: + do_chown = false; + ok = false; + break; + + default: + unreachable (); + } + } + + /* On some systems (e.g., GNU/Linux 2.4.x), + the chown function resets the 'special' permission bits. + Do *not* restore those bits; doing so would open a window in + which a malicious user, M, could subvert a chown command run + by some other user and operating on files in a directory + where M has write access. */ + + if (do_chown && !ok && ! chopt->force_silent) + error (0, errno, (uid != (uid_t) -1 + ? _("changing ownership of %s") + : _("changing group of %s")), + quoteaf (file_full_name)); + } + + if (chopt->verbosity != V_off) + { + bool changed = + ((do_chown && ok && symlink_changed) + && ! ((uid == (uid_t) -1 || uid == file_stats->st_uid) + && (gid == (gid_t) -1 || gid == file_stats->st_gid))); + + if (changed || chopt->verbosity == V_high) + { + enum Change_status ch_status = + (!ok ? CH_FAILED + : !symlink_changed ? CH_NOT_APPLIED + : !changed ? CH_NO_CHANGE_REQUESTED + : CH_SUCCEEDED); + char *old_usr = (file_stats + ? uid_to_name (file_stats->st_uid) : nullptr); + char *old_grp = (file_stats + ? gid_to_name (file_stats->st_gid) : nullptr); + char *new_usr = chopt->user_name + ? chopt->user_name : uid != -1 + ? uid_to_str (uid) : nullptr; + char *new_grp = chopt->group_name + ? chopt->group_name : gid != -1 + ? gid_to_str (gid) : nullptr; + describe_change (file_full_name, ch_status, + old_usr, old_grp, + new_usr, new_grp); + free (old_usr); + free (old_grp); + if (new_usr != chopt->user_name) + free (new_usr); + if (new_grp != chopt->group_name) + free (new_grp); + } + } + + if ( ! chopt->recurse) + fts_set (fts, ent, FTS_SKIP); + + return ok; +} + +/* Change the owner and/or group of the specified FILES. + BIT_FLAGS specifies how to treat each symlink-to-directory + that is encountered during a recursive traversal. + CHOPT specifies additional options. + If UID is not -1, then change the owner id of each file to UID. + If GID is not -1, then change the group id of each file to GID. + If REQUIRED_UID and/or REQUIRED_GID is not -1, then change only + files with user ID and group ID that match the non-(-1) value(s). + Return true if successful. */ +extern bool +chown_files (char **files, int bit_flags, + uid_t uid, gid_t gid, + uid_t required_uid, gid_t required_gid, + struct Chown_option const *chopt) +{ + bool ok = true; + + /* Use lstat and stat only if they're needed. */ + int stat_flags = ((required_uid != (uid_t) -1 || required_gid != (gid_t) -1 + || chopt->affect_symlink_referent + || chopt->verbosity != V_off) + ? 0 + : FTS_NOSTAT); + + FTS *fts = xfts_open (files, bit_flags | stat_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + /* FIXME: try to give a better message */ + if (! chopt->force_silent) + error (0, errno, _("fts_read failed")); + ok = false; + } + break; + } + + ok &= change_file_owner (fts, ent, uid, gid, + required_uid, required_gid, chopt); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + + return ok; +} diff --git a/src/chown-core.h b/src/chown-core.h new file mode 100644 index 0000000..8afdc0b --- /dev/null +++ b/src/chown-core.h @@ -0,0 +1,92 @@ +/* chown-core.h -- types and prototypes shared by chown and chgrp. + + Copyright (C) 2000-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef CHOWN_CORE_H +# define CHOWN_CORE_H + +# include "dev-ino.h" +# include + +enum Change_status +{ + CH_NOT_APPLIED = 1, + CH_SUCCEEDED, + CH_FAILED, + CH_NO_CHANGE_REQUESTED +}; + +enum Verbosity +{ + /* Print a message for each file that is processed. */ + V_high, + + /* Print a message for each file whose attributes we change. */ + V_changes_only, + + /* Do not be verbose. This is the default. */ + V_off +}; + +struct Chown_option +{ + /* Level of verbosity. */ + enum Verbosity verbosity; + + /* If nonzero, change the ownership of directories recursively. */ + bool recurse; + + /* Pointer to the device and inode numbers of '/', when --recursive. + Need not be freed. Otherwise nullptr. */ + struct dev_ino *root_dev_ino; + + /* This corresponds to the --dereference (opposite of -h) option. */ + bool affect_symlink_referent; + + /* If nonzero, force silence (no error messages). */ + bool force_silent; + + /* The name of the user to which ownership of the files is being given. */ + char *user_name; + + /* The name of the group to which ownership of the files is being given. */ + char *group_name; +}; + +void +chopt_init (struct Chown_option *); + +void +chopt_free (struct Chown_option *); + +char * +gid_to_name (gid_t) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE + _GL_ATTRIBUTE_RETURNS_NONNULL; + +char * +uid_to_name (uid_t) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE + _GL_ATTRIBUTE_RETURNS_NONNULL; + +bool +chown_files (char **files, int bit_flags, + uid_t uid, gid_t gid, + uid_t required_uid, gid_t required_gid, + struct Chown_option const *chopt) + _GL_ATTRIBUTE_NONNULL (); + +#endif /* CHOWN_CORE_H */ diff --git a/src/chown.c b/src/chown.c new file mode 100644 index 0000000..4e0e1c3 --- /dev/null +++ b/src/chown.c @@ -0,0 +1,330 @@ +/* chown -- change user and group ownership of files + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie . */ + +#include +#include +#include +#include + +#include "system.h" +#include "chown-core.h" +#include "fts_.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "userspec.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "chown" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +/* The argument to the --reference option. Use the owner and group IDs + of this file. This file must exist. */ +static char *reference_file; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEREFERENCE_OPTION = CHAR_MAX + 1, + FROM_OPTION, + NO_PRESERVE_ROOT, + PRESERVE_ROOT, + REFERENCE_FILE_OPTION +}; + +static struct option const long_options[] = +{ + {"recursive", no_argument, nullptr, 'R'}, + {"changes", no_argument, nullptr, 'c'}, + {"dereference", no_argument, nullptr, DEREFERENCE_OPTION}, + {"from", required_argument, nullptr, FROM_OPTION}, + {"no-dereference", no_argument, nullptr, 'h'}, + {"no-preserve-root", no_argument, nullptr, NO_PRESERVE_ROOT}, + {"preserve-root", no_argument, nullptr, PRESERVE_ROOT}, + {"quiet", no_argument, nullptr, 'f'}, + {"silent", no_argument, nullptr, 'f'}, + {"reference", required_argument, nullptr, REFERENCE_FILE_OPTION}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [OWNER][:[GROUP]] FILE...\n\ + or: %s [OPTION]... --reference=RFILE FILE...\n\ +"), + program_name, program_name); + fputs (_("\ +Change the owner and/or group of each FILE to OWNER and/or GROUP.\n\ +With --reference, change the owner and group of each FILE to those of RFILE.\n\ +\n\ +"), stdout); + fputs (_("\ + -c, --changes like verbose but report only when a change is made\n\ + -f, --silent, --quiet suppress most error messages\n\ + -v, --verbose output a diagnostic for every file processed\n\ +"), stdout); + fputs (_("\ + --dereference affect the referent of each symbolic link (this is\n\ + the default), rather than the symbolic link itself\n\ + -h, --no-dereference affect symbolic links instead of any referenced file\n\ +"), stdout); + fputs (_("\ + (useful only on systems that can change the\n\ + ownership of a symlink)\n\ +"), stdout); + fputs (_("\ + --from=CURRENT_OWNER:CURRENT_GROUP\n\ + change the owner and/or group of each file only if\n\ + its current owner and/or group match those specified\n\ + here. Either may be omitted, in which case a match\n\ + is not required for the omitted attribute\n\ +"), stdout); + fputs (_("\ + --no-preserve-root do not treat '/' specially (the default)\n\ + --preserve-root fail to operate recursively on '/'\n\ +"), stdout); + fputs (_("\ + --reference=RFILE use RFILE's owner and group rather than specifying\n\ + OWNER:GROUP values. RFILE is always dereferenced.\n\ +"), stdout); + fputs (_("\ + -R, --recursive operate on files and directories recursively\n\ +"), stdout); + fputs (_("\ +\n\ +The following options modify how a hierarchy is traversed when the -R\n\ +option is also specified. If more than one is specified, only the final\n\ +one takes effect.\n\ +\n\ + -H if a command line argument is a symbolic link\n\ + to a directory, traverse it\n\ + -L traverse every symbolic link to a directory\n\ + encountered\n\ + -P do not traverse any symbolic links (default)\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Owner is unchanged if missing. Group is unchanged if missing, but changed\n\ +to login group if implied by a ':' following a symbolic OWNER.\n\ +OWNER and GROUP may be numeric as well as symbolic.\n\ +"), stdout); + printf (_("\ +\n\ +Examples:\n\ + %s root /u Change the owner of /u to \"root\".\n\ + %s root:staff /u Likewise, but also change its group to \"staff\".\n\ + %s -hR root /u Change the owner of /u and subfiles to \"root\".\n\ +"), + program_name, program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + bool preserve_root = false; + + uid_t uid = -1; /* Specified uid; -1 if not to be changed. */ + gid_t gid = -1; /* Specified gid; -1 if not to be changed. */ + + /* Change the owner (group) of a file only if it has this uid (gid). + -1 means there's no restriction. */ + uid_t required_uid = -1; + gid_t required_gid = -1; + + /* Bit flags that control how fts works. */ + int bit_flags = FTS_PHYSICAL; + + /* 1 if --dereference, 0 if --no-dereference, -1 if neither has been + specified. */ + int dereference = -1; + + struct Chown_option chopt; + bool ok; + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + chopt_init (&chopt); + + while ((optc = getopt_long (argc, argv, "HLPRcfhv", long_options, nullptr)) + != -1) + { + switch (optc) + { + case 'H': /* Traverse command-line symlinks-to-directories. */ + bit_flags = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* Traverse all symlinks-to-directories. */ + bit_flags = FTS_LOGICAL; + break; + + case 'P': /* Traverse no symlinks-to-directories. */ + bit_flags = FTS_PHYSICAL; + break; + + case 'h': /* --no-dereference: affect symlinks */ + dereference = 0; + break; + + case DEREFERENCE_OPTION: /* --dereference: affect the referent + of each symlink */ + dereference = 1; + break; + + case NO_PRESERVE_ROOT: + preserve_root = false; + break; + + case PRESERVE_ROOT: + preserve_root = true; + break; + + case REFERENCE_FILE_OPTION: + reference_file = optarg; + break; + + case FROM_OPTION: + { + bool warn; + char const *e = parse_user_spec_warn (optarg, + &required_uid, &required_gid, + nullptr, nullptr, &warn); + if (e) + error (warn ? 0 : EXIT_FAILURE, 0, "%s: %s", e, quote (optarg)); + break; + } + + case 'R': + chopt.recurse = true; + break; + + case 'c': + chopt.verbosity = V_changes_only; + break; + + case 'f': + chopt.force_silent = true; + break; + + case 'v': + chopt.verbosity = V_high; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (chopt.recurse) + { + if (bit_flags == FTS_PHYSICAL) + { + if (dereference == 1) + error (EXIT_FAILURE, 0, + _("-R --dereference requires either -H or -L")); + dereference = 0; + } + } + else + { + bit_flags = FTS_PHYSICAL; + } + chopt.affect_symlink_referent = (dereference != 0); + + if (argc - optind < (reference_file ? 1 : 2)) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + if (reference_file) + { + struct stat ref_stats; + if (stat (reference_file, &ref_stats)) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf (reference_file)); + + uid = ref_stats.st_uid; + gid = ref_stats.st_gid; + chopt.user_name = uid_to_name (ref_stats.st_uid); + chopt.group_name = gid_to_name (ref_stats.st_gid); + } + else + { + bool warn; + char const *e = parse_user_spec_warn (argv[optind], &uid, &gid, + &chopt.user_name, + &chopt.group_name, &warn); + if (e) + error (warn ? 0 : EXIT_FAILURE, 0, "%s: %s", e, quote (argv[optind])); + + /* If a group is specified but no user, set the user name to the + empty string so that diagnostics say "ownership :GROUP" + rather than "group GROUP". */ + if (!chopt.user_name && chopt.group_name) + chopt.user_name = xstrdup (""); + + optind++; + } + + if (chopt.recurse && preserve_root) + { + static struct dev_ino dev_ino_buf; + chopt.root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (chopt.root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + + bit_flags |= FTS_DEFER_STAT; + ok = chown_files (argv + optind, bit_flags, + uid, gid, + required_uid, required_gid, &chopt); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/chroot.c b/src/chroot.c new file mode 100644 index 0000000..6150af5 --- /dev/null +++ b/src/chroot.c @@ -0,0 +1,431 @@ +/* chroot -- run command or shell with special root directory + Copyright (C) 1995-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Roland McGrath. */ + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "ignore-value.h" +#include "mgetgroups.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "userspec.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "chroot" + +#define AUTHORS proper_name ("Roland McGrath") + +#ifndef MAXGID +# define MAXGID GID_T_MAX +#endif + +static inline bool uid_unset (uid_t uid) { return uid == (uid_t) -1; } +static inline bool gid_unset (gid_t gid) { return gid == (gid_t) -1; } +#define uid_set(x) (!uid_unset (x)) +#define gid_set(x) (!gid_unset (x)) + +enum +{ + GROUPS = UCHAR_MAX + 1, + USERSPEC, + SKIP_CHDIR +}; + +static struct option const long_opts[] = +{ + {"groups", required_argument, nullptr, GROUPS}, + {"userspec", required_argument, nullptr, USERSPEC}, + {"skip-chdir", no_argument, nullptr, SKIP_CHDIR}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +#if ! HAVE_SETGROUPS +/* At least Interix lacks supplemental group support. */ +static int +setgroups (size_t size, MAYBE_UNUSED gid_t const *list) +{ + if (size == 0) + { + /* Return success when clearing supplemental groups + as ! HAVE_SETGROUPS should only be the case on + platforms that don't support supplemental groups. */ + return 0; + } + else + { + errno = ENOTSUP; + return -1; + } +} +#endif + +/* Determine the group IDs for the specified supplementary GROUPS, + which is a comma separated list of supplementary groups (names or numbers). + Allocate an array for the parsed IDs and store it in PGIDS, + which may be allocated even on parse failure. + Update the number of parsed groups in PN_GIDS on success. + Upon any failure return nonzero, and issue diagnostic if SHOW_ERRORS is true. + Otherwise return zero. */ + +static int +parse_additional_groups (char const *groups, GETGROUPS_T **pgids, + size_t *pn_gids, bool show_errors) +{ + GETGROUPS_T *gids = nullptr; + size_t n_gids_allocated = 0; + size_t n_gids = 0; + char *buffer = xstrdup (groups); + char const *tmp; + int ret = 0; + + for (tmp = strtok (buffer, ","); tmp; tmp = strtok (nullptr, ",")) + { + struct group *g; + uintmax_t value; + + if (xstrtoumax (tmp, nullptr, 10, &value, "") == LONGINT_OK + && value <= MAXGID) + { + while (isspace (to_uchar (*tmp))) + tmp++; + if (*tmp != '+') + { + /* Handle the case where the name is numeric. */ + g = getgrnam (tmp); + if (g != nullptr) + value = g->gr_gid; + } + /* Flag that we've got a group from the number. */ + g = (struct group *) (intptr_t) ! nullptr; + } + else + { + g = getgrnam (tmp); + if (g != nullptr) + value = g->gr_gid; + } + + if (g == nullptr) + { + ret = -1; + + if (show_errors) + { + error (0, errno, _("invalid group %s"), quote (tmp)); + continue; + } + + break; + } + + if (n_gids == n_gids_allocated) + gids = X2NREALLOC (gids, &n_gids_allocated); + gids[n_gids++] = value; + } + + if (ret == 0 && n_gids == 0) + { + if (show_errors) + error (0, 0, _("invalid group list %s"), quote (groups)); + ret = -1; + } + + *pgids = gids; + + if (ret == 0) + *pn_gids = n_gids; + + free (buffer); + return ret; +} + +/* Return whether the passed path is equivalent to "/". + Note we don't compare against get_root_dev_ino() as "/" + could be bind mounted to a separate location. */ + +static bool +is_root (char const *dir) +{ + char *resolved = canonicalize_file_name (dir); + bool is_res_root = resolved && STREQ ("/", resolved); + free (resolved); + return is_res_root; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION] NEWROOT [COMMAND [ARG]...]\n\ + or: %s OPTION\n\ +"), program_name, program_name); + + fputs (_("\ +Run COMMAND with root directory set to NEWROOT.\n\ +\n\ +"), stdout); + + fputs (_("\ + --groups=G_LIST specify supplementary groups as g1,g2,..,gN\n\ +"), stdout); + fputs (_("\ + --userspec=USER:GROUP specify user and group (ID or name) to use\n\ +"), stdout); + printf (_("\ + --skip-chdir do not change working directory to %s\n\ +"), quoteaf ("/")); + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +If no command is given, run '\"$SHELL\" -i' (default: '/bin/sh -i').\n\ +"), stdout); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int c; + + /* Input user and groups spec. */ + char *userspec = nullptr; + char const *username = nullptr; + char const *groups = nullptr; + bool skip_chdir = false; + + /* Parsed user and group IDs. */ + uid_t uid = -1; + gid_t gid = -1; + GETGROUPS_T *out_gids = nullptr; + size_t n_gids = 0; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "+", long_opts, nullptr)) != -1) + { + switch (c) + { + case USERSPEC: + { + userspec = optarg; + /* Treat 'user:' just like 'user' + as we lookup the primary group by default + (and support doing so for UIDs as well as names. */ + size_t userlen = strlen (userspec); + if (userlen && userspec[userlen - 1] == ':') + userspec[userlen - 1] = '\0'; + break; + } + + case GROUPS: + groups = optarg; + break; + + case SKIP_CHDIR: + skip_chdir = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_CANCELED); + } + } + + if (argc <= optind) + { + error (0, 0, _("missing operand")); + usage (EXIT_CANCELED); + } + + char const *newroot = argv[optind]; + bool is_oldroot = is_root (newroot); + + if (! is_oldroot && skip_chdir) + { + error (0, 0, _("option --skip-chdir only permitted if NEWROOT is old %s"), + quoteaf ("/")); + usage (EXIT_CANCELED); + } + + if (! is_oldroot) + { + /* We have to look up users and groups twice. + - First, outside the chroot to load potentially necessary passwd/group + parsing plugins (e.g. NSS); + - Second, inside chroot to redo parsing in case IDs are different. + Within chroot lookup is the main justification for having + the --user option supported by the chroot command itself. */ + if (userspec) + ignore_value (parse_user_spec (userspec, &uid, &gid, nullptr, nullptr)); + + /* If no gid is supplied or looked up, do so now. + Also lookup the username for use with getgroups. */ + if (uid_set (uid) && (! groups || gid_unset (gid))) + { + const struct passwd *pwd; + if ((pwd = getpwuid (uid))) + { + if (gid_unset (gid)) + gid = pwd->pw_gid; + username = pwd->pw_name; + } + } + + if (groups && *groups) + ignore_value (parse_additional_groups (groups, &out_gids, &n_gids, + false)); +#if HAVE_SETGROUPS + else if (! groups && gid_set (gid) && username) + { + int ngroups = xgetgroups (username, gid, &out_gids); + if (0 < ngroups) + n_gids = ngroups; + } +#endif + } + + if (chroot (newroot) != 0) + error (EXIT_CANCELED, errno, _("cannot change root directory to %s"), + quoteaf (newroot)); + + if (! skip_chdir && chdir ("/")) + error (EXIT_CANCELED, errno, _("cannot chdir to root directory")); + + if (argc == optind + 1) + { + /* No command. Run an interactive shell. */ + char *shell = getenv ("SHELL"); + if (shell == nullptr) + shell = bad_cast ("/bin/sh"); + argv[0] = shell; + argv[1] = bad_cast ("-i"); + argv[2] = nullptr; + } + else + { + /* The following arguments give the command. */ + argv += optind + 1; + } + + /* Attempt to set all three: supplementary groups, group ID, user ID. + Diagnose any failures. If any have failed, exit before execvp. */ + if (userspec) + { + bool warn; + char const *err = parse_user_spec_warn (userspec, &uid, &gid, + nullptr, nullptr, &warn); + if (err) + error (warn ? 0 : EXIT_CANCELED, 0, "%s", (err)); + } + + /* If no gid is supplied or looked up, do so now. + Also lookup the username for use with getgroups. */ + if (uid_set (uid) && (! groups || gid_unset (gid))) + { + const struct passwd *pwd; + if ((pwd = getpwuid (uid))) + { + if (gid_unset (gid)) + gid = pwd->pw_gid; + username = pwd->pw_name; + } + else if (gid_unset (gid)) + { + error (EXIT_CANCELED, errno, + _("no group specified for unknown uid: %d"), (int) uid); + } + } + + GETGROUPS_T *gids = out_gids; + GETGROUPS_T *in_gids = nullptr; + if (groups && *groups) + { + if (parse_additional_groups (groups, &in_gids, &n_gids, !n_gids) != 0) + { + if (! n_gids) + return EXIT_CANCELED; + /* else look-up outside the chroot worked, then go with those. */ + } + else + gids = in_gids; + } +#if HAVE_SETGROUPS + else if (! groups && gid_set (gid) && username) + { + int ngroups = xgetgroups (username, gid, &in_gids); + if (ngroups <= 0) + { + if (! n_gids) + error (EXIT_CANCELED, errno, + _("failed to get supplemental groups")); + /* else look-up outside the chroot worked, then go with those. */ + } + else + { + n_gids = ngroups; + gids = in_gids; + } + } +#endif + + if ((uid_set (uid) || groups) && setgroups (n_gids, gids) != 0) + error (EXIT_CANCELED, errno, _("failed to set supplemental groups")); + + free (in_gids); + free (out_gids); + + if (gid_set (gid) && setgid (gid)) + error (EXIT_CANCELED, errno, _("failed to set group-ID")); + + if (uid_set (uid) && setuid (uid)) + error (EXIT_CANCELED, errno, _("failed to set user-ID")); + + /* Execute the given command. */ + execvp (argv[0], argv); + + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + error (0, errno, _("failed to run command %s"), quote (argv[0])); + return exit_status; +} diff --git a/src/cksum.c b/src/cksum.c new file mode 100644 index 0000000..41146d2 --- /dev/null +++ b/src/cksum.c @@ -0,0 +1,270 @@ +/* cksum -- calculate and print POSIX checksums and sizes of files + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Q. Frank Xia, qx@math.columbia.edu. + Cosmetic changes and reorganization by David MacKenzie, djm@gnu.ai.mit.edu. + + Usage: cksum [file...] + + The code segment between "#ifdef CRCTAB" and "#else" is the code + which calculates the "crctab". It is included for those who want + verify the correctness of the "crctab". To recreate the "crctab", + do something like the following: + + cc -I../lib -DCRCTAB -o crctab cksum.c + crctab > crctab.c + + This software is compatible with neither the System V nor the BSD + 'sum' program. It is supposed to conform to POSIX, except perhaps + for foreign language support. Any inconsistency with the standard + (other than foreign language support) is a bug. */ + +#include + +#include +#include +#include +#include "system.h" + +#include +#ifdef WORDS_BIGENDIAN +# define SWAP(n) (n) +#else +# define SWAP(n) bswap_32 (n) +#endif + +#ifdef CRCTAB + +# define BIT(x) ((uint_fast32_t) 1 << (x)) +# define SBIT BIT (31) + +/* The generating polynomial is + + 32 26 23 22 16 12 11 10 8 7 5 4 2 1 + G(X)=X + X + X + X + X + X + X + X + X + X + X + X + X + X + 1 + + The i bit in GEN is set if X^i is a summand of G(X) except X^32. */ + +# define GEN (BIT (26) | BIT (23) | BIT (22) | BIT (16) | BIT (12) \ + | BIT (11) | BIT (10) | BIT (8) | BIT (7) | BIT (5) \ + | BIT (4) | BIT (2) | BIT (1) | BIT (0)) + +static uint_fast32_t r[8]; + +static void +fill_r (void) +{ + r[0] = GEN; + for (int i = 1; i < 8; i++) + r[i] = (r[i - 1] << 1) ^ ((r[i - 1] & SBIT) ? GEN : 0); +} + +static uint_fast32_t +crc_remainder (int m) +{ + uint_fast32_t rem = 0; + + for (int i = 0; i < 8; i++) + if (BIT (i) & m) + rem ^= r[i]; + + return rem & 0xFFFFFFFF; /* Make it run on 64-bit machine. */ +} + +int +main (void) +{ + int i; + static uint_fast32_t crctab[8][256]; + + fill_r (); + + for (i = 0; i < 256; i++) + { + crctab[0][i] = crc_remainder (i); + } + + /* CRC(0x11 0x22 0x33 0x44) + is equal to + CRC(0x11 0x00 0x00 0x00) XOR CRC(0x22 0x00 0x00) XOR + CRC(0x33 0x00) XOR CRC(0x44) + We precompute the CRC values for the offset values into + separate CRC tables. We can then use them to speed up + CRC calculation by processing multiple bytes at the time. */ + for (i = 0; i < 256; i++) + { + uint32_t crc = 0; + + crc = (crc << 8) ^ crctab[0][((crc >> 24) ^ (i & 0xFF)) & 0xFF]; + for (idx_t offset = 1; offset < 8; offset++) + { + crc = (crc << 8) ^ crctab[0][((crc >> 24) ^ 0x00) & 0xFF]; + crctab[offset][i] = crc; + } + } + + printf ("#include \n"); + printf ("#include \n"); + printf ("\nuint_fast32_t const crctab[8][256] = {\n"); + for (int y = 0; y < 8; y++) + { + printf ("{\n 0x%08x", crctab[y][0]); + for (i = 0; i < 51; i++) + { + printf (",\n 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x", + crctab[y][i * 5 + 1], crctab[y][i * 5 + 2], + crctab[y][i * 5 + 3], crctab[y][i * 5 + 4], + crctab[y][i * 5 + 5]); + } + printf ("\n},\n"); + } + printf ("};\n"); + return EXIT_SUCCESS; +} + +#else /* !CRCTAB */ + +# include "cksum.h" + +/* Number of bytes to read at once. */ +# define BUFLEN (1 << 16) + +# if USE_PCLMUL_CRC32 +static bool +pclmul_supported (void) +{ + bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul") + && 0 < __builtin_cpu_supports ("avx")); + + if (cksum_debug) + error (0, 0, "%s", + (pclmul_enabled + ? _("using pclmul hardware support") + : _("pclmul support not detected"))); + + return pclmul_enabled; +} +# endif /* USE_PCLMUL_CRC32 */ + +static bool +cksum_slice8 (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out) +{ + uint32_t buf[BUFLEN / sizeof (uint32_t)]; + uint_fast32_t crc = 0; + uintmax_t length = 0; + size_t bytes_read; + + if (!fp || !crc_out || !length_out) + return false; + + while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0) + { + uint32_t *datap; + + if (length + bytes_read < length) + { + errno = EOVERFLOW; + return false; + } + length += bytes_read; + + /* Process multiples of 8 bytes */ + datap = (uint32_t *)buf; + while (bytes_read >= 8) + { + uint32_t first = *datap++, second = *datap++; + crc ^= SWAP (first); + second = SWAP (second); + crc = (crctab[7][(crc >> 24) & 0xFF] + ^ crctab[6][(crc >> 16) & 0xFF] + ^ crctab[5][(crc >> 8) & 0xFF] + ^ crctab[4][(crc) & 0xFF] + ^ crctab[3][(second >> 24) & 0xFF] + ^ crctab[2][(second >> 16) & 0xFF] + ^ crctab[1][(second >> 8) & 0xFF] + ^ crctab[0][(second) & 0xFF]); + bytes_read -= 8; + } + + /* And finish up last 0-7 bytes in a byte by byte fashion */ + unsigned char *cp = (unsigned char *)datap; + while (bytes_read--) + crc = (crc << 8) ^ crctab[0][((crc >> 24) ^ *cp++) & 0xFF]; + if (feof (fp)) + break; + } + + *crc_out = crc; + *length_out = length; + + return !ferror (fp); +} + +/* Calculate the checksum and length in bytes of stream STREAM. + Return -1 on error, 0 on success. */ + +int +crc_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + uintmax_t total_bytes = 0; + uint_fast32_t crc = 0; + +# if USE_PCLMUL_CRC32 + static bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *); + if (! cksum_fp) + cksum_fp = pclmul_supported () ? cksum_pclmul : cksum_slice8; +# else + bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *) = cksum_slice8; +# endif + + if (! cksum_fp (stream, &crc, &total_bytes)) + return -1; + + *length = total_bytes; + + for (; total_bytes; total_bytes >>= 8) + crc = (crc << 8) ^ crctab[0][((crc >> 24) ^ total_bytes) & 0xFF]; + crc = ~crc & 0xFFFFFFFF; + + unsigned int crc_out = crc; + memcpy (resstream, &crc_out, sizeof crc_out); + + return 0; +} + +/* Print the checksum and size to stdout. + If ARGS is true, also print the FILE name. */ + +void +output_crc (char const *file, int binary_file, void const *digest, bool raw, + bool tagged, unsigned char delim, bool args, uintmax_t length) +{ + if (raw) + { + /* Output in network byte order (big endian). */ + uint32_t out_int = SWAP (*(uint32_t *)digest); + fwrite (&out_int, 1, 32/8, stdout); + return; + } + + char length_buf[INT_BUFSIZE_BOUND (uintmax_t)]; + printf ("%u %s", *(unsigned int *)digest, umaxtostr (length, length_buf)); + if (args) + printf (" %s", file); + putchar (delim); +} + +#endif /* !CRCTAB */ diff --git a/src/cksum.h b/src/cksum.h new file mode 100644 index 0000000..58e9310 --- /dev/null +++ b/src/cksum.h @@ -0,0 +1,19 @@ +#ifndef __CKSUM_H__ +# define __CKSUM_H__ + +extern bool cksum_debug; + +extern int +crc_sum_stream (FILE *stream, void *resstream, uintmax_t *length); + +extern void +output_crc (char const *file, int binary_file, void const *digest, bool raw, + bool tagged, unsigned char delim, bool args, uintmax_t length) + _GL_ATTRIBUTE_NONNULL ((3)); + +extern bool +cksum_pclmul (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out); + +extern uint_fast32_t const crctab[8][256]; + +#endif diff --git a/src/cksum_pclmul.c b/src/cksum_pclmul.c new file mode 100644 index 0000000..9dba1c9 --- /dev/null +++ b/src/cksum_pclmul.c @@ -0,0 +1,189 @@ +/* cksum -- calculate and print POSIX checksums and sizes of files + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include +#include +#include "system.h" + +/* Number of bytes to read at once. */ +#define BUFLEN (1 << 16) + +extern uint_fast32_t const crctab[8][256]; + +extern bool +cksum_pclmul (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out); + +/* Calculate CRC32 using PCLMULQDQ CPU instruction found in x86/x64 CPUs */ + +bool +cksum_pclmul (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out) +{ + __m128i buf[BUFLEN / sizeof (__m128i)]; + uint_fast32_t crc = 0; + uintmax_t length = 0; + size_t bytes_read; + __m128i single_mult_constant; + __m128i four_mult_constant; + __m128i shuffle_constant; + + if (!fp || !crc_out || !length_out) + return false; + + /* These constants and general algorithms are taken from the Intel whitepaper + "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" + */ + single_mult_constant = _mm_set_epi64x (0xC5B9CD4C, 0xE8A45605); + four_mult_constant = _mm_set_epi64x (0x8833794C, 0xE6228B11); + + /* Constant to byteswap a full SSE register */ + shuffle_constant = _mm_set_epi8 (0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15); + + while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0) + { + __m128i *datap; + __m128i data; + __m128i data2; + __m128i data3; + __m128i data4; + __m128i data5; + __m128i data6; + __m128i data7; + __m128i data8; + __m128i fold_data; + __m128i xor_crc; + + if (length + bytes_read < length) + { + errno = EOVERFLOW; + return false; + } + length += bytes_read; + + datap = (__m128i *)buf; + + /* Fold in parallel eight 16-byte blocks into four 16-byte blocks */ + if (bytes_read >= 16 * 8) + { + data = _mm_loadu_si128 (datap); + data = _mm_shuffle_epi8 (data, shuffle_constant); + /* XOR in initial CRC value (for us 0 so no effect), or CRC value + calculated for previous BUFLEN buffer from fread */ + xor_crc = _mm_set_epi32 (crc, 0, 0, 0); + crc = 0; + data = _mm_xor_si128 (data, xor_crc); + data3 = _mm_loadu_si128 (datap + 1); + data3 = _mm_shuffle_epi8 (data3, shuffle_constant); + data5 = _mm_loadu_si128 (datap + 2); + data5 = _mm_shuffle_epi8 (data5, shuffle_constant); + data7 = _mm_loadu_si128 (datap + 3); + data7 = _mm_shuffle_epi8 (data7, shuffle_constant); + + + while (bytes_read >= 16 * 8) + { + datap += 4; + + /* Do multiplication here for four consecutive 16 byte blocks */ + data2 = _mm_clmulepi64_si128 (data, four_mult_constant, 0x00); + data = _mm_clmulepi64_si128 (data, four_mult_constant, 0x11); + data4 = _mm_clmulepi64_si128 (data3, four_mult_constant, 0x00); + data3 = _mm_clmulepi64_si128 (data3, four_mult_constant, 0x11); + data6 = _mm_clmulepi64_si128 (data5, four_mult_constant, 0x00); + data5 = _mm_clmulepi64_si128 (data5, four_mult_constant, 0x11); + data8 = _mm_clmulepi64_si128 (data7, four_mult_constant, 0x00); + data7 = _mm_clmulepi64_si128 (data7, four_mult_constant, 0x11); + + /* Now multiplication results for the four blocks is xor:ed with + next four 16 byte blocks from the buffer. This effectively + "consumes" the first four blocks from the buffer. + Keep xor result in variables for multiplication in next + round of loop. */ + data = _mm_xor_si128 (data, data2); + data2 = _mm_loadu_si128 (datap); + data2 = _mm_shuffle_epi8 (data2, shuffle_constant); + data = _mm_xor_si128 (data, data2); + + data3 = _mm_xor_si128 (data3, data4); + data4 = _mm_loadu_si128 (datap + 1); + data4 = _mm_shuffle_epi8 (data4, shuffle_constant); + data3 = _mm_xor_si128 (data3, data4); + + data5 = _mm_xor_si128 (data5, data6); + data6 = _mm_loadu_si128 (datap + 2); + data6 = _mm_shuffle_epi8 (data6, shuffle_constant); + data5 = _mm_xor_si128 (data5, data6); + + data7 = _mm_xor_si128 (data7, data8); + data8 = _mm_loadu_si128 (datap + 3); + data8 = _mm_shuffle_epi8 (data8, shuffle_constant); + data7 = _mm_xor_si128 (data7, data8); + + bytes_read -= (16 * 4); + } + /* At end of loop we write out results from variables back into + the buffer, for use in single fold loop */ + data = _mm_shuffle_epi8 (data, shuffle_constant); + _mm_storeu_si128 (datap, data); + data3 = _mm_shuffle_epi8 (data3, shuffle_constant); + _mm_storeu_si128 (datap + 1, data3); + data5 = _mm_shuffle_epi8 (data5, shuffle_constant); + _mm_storeu_si128 (datap + 2, data5); + data7 = _mm_shuffle_epi8 (data7, shuffle_constant); + _mm_storeu_si128 (datap + 3, data7); + } + + /* Fold two 16-byte blocks into one 16-byte block */ + if (bytes_read >= 32) + { + data = _mm_loadu_si128 (datap); + data = _mm_shuffle_epi8 (data, shuffle_constant); + xor_crc = _mm_set_epi32 (crc, 0, 0, 0); + crc = 0; + data = _mm_xor_si128 (data, xor_crc); + while (bytes_read >= 32) + { + datap++; + + data2 = _mm_clmulepi64_si128 (data, single_mult_constant, 0x00); + data = _mm_clmulepi64_si128 (data, single_mult_constant, 0x11); + fold_data = _mm_loadu_si128 (datap); + fold_data = _mm_shuffle_epi8 (fold_data, shuffle_constant); + data = _mm_xor_si128 (data, data2); + data = _mm_xor_si128 (data, fold_data); + bytes_read -= 16; + } + data = _mm_shuffle_epi8 (data, shuffle_constant); + _mm_storeu_si128 (datap, data); + } + + /* And finish up last 0-31 bytes in a byte by byte fashion */ + unsigned char *cp = (unsigned char *)datap; + while (bytes_read--) + crc = (crc << 8) ^ crctab[0][((crc >> 24) ^ *cp++) & 0xFF]; + if (feof (fp)) + break; + } + + *crc_out = crc; + *length_out = length; + + return !ferror (fp); +} diff --git a/src/comm.c b/src/comm.c new file mode 100644 index 0000000..5cb2410 --- /dev/null +++ b/src/comm.c @@ -0,0 +1,512 @@ +/* comm -- compare two sorted files line by line. + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Richard Stallman and David MacKenzie. */ + +#include + +#include +#include +#include "system.h" +#include "linebuffer.h" +#include "fadvise.h" +#include "hard-locale.h" +#include "quote.h" +#include "stdio--.h" +#include "memcmp2.h" +#include "xmemcoll.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "comm" + +#define AUTHORS \ + proper_name ("Richard M. Stallman"), \ + proper_name ("David MacKenzie") + +/* Undefine, to avoid warning about redefinition on some systems. */ +#undef min +#define min(x, y) ((x) < (y) ? (x) : (y)) + +/* True if the LC_COLLATE locale is hard. */ +static bool hard_LC_COLLATE; + +/* If true, print lines that are found only in file 1. */ +static bool only_file_1; + +/* If true, print lines that are found only in file 2. */ +static bool only_file_2; + +/* If true, print lines that are found in both files. */ +static bool both; + +/* If nonzero, we have seen at least one unpairable line. */ +static bool seen_unpairable; + +/* If nonzero, we have warned about disorder in that file. */ +static bool issued_disorder_warning[2]; + +/* line delimiter. */ +static unsigned char delim = '\n'; + +/* If true, print a summary. */ +static bool total_option; + +/* If nonzero, check that the input is correctly ordered. */ +static enum + { + CHECK_ORDER_DEFAULT, + CHECK_ORDER_ENABLED, + CHECK_ORDER_DISABLED + } check_input_order; + +/* Output columns will be delimited with this string, which may be set + on the command-line with --output-delimiter=STR. */ +static char const *col_sep = "\t"; +static size_t col_sep_len = 0; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + CHECK_ORDER_OPTION = CHAR_MAX + 1, + NOCHECK_ORDER_OPTION, + OUTPUT_DELIMITER_OPTION, + TOTAL_OPTION +}; + +static struct option const long_options[] = +{ + {"check-order", no_argument, nullptr, CHECK_ORDER_OPTION}, + {"nocheck-order", no_argument, nullptr, NOCHECK_ORDER_OPTION}, + {"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, + {"total", no_argument, nullptr, TOTAL_OPTION}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... FILE1 FILE2\n\ +"), + program_name); + fputs (_("\ +Compare sorted files FILE1 and FILE2 line by line.\n\ +"), stdout); + fputs (_("\ +\n\ +When FILE1 or FILE2 (not both) is -, read standard input.\n\ +"), stdout); + fputs (_("\ +\n\ +With no options, produce three-column output. Column one contains\n\ +lines unique to FILE1, column two contains lines unique to FILE2,\n\ +and column three contains lines common to both files.\n\ +"), stdout); + fputs (_("\ +\n\ + -1 suppress column 1 (lines unique to FILE1)\n\ + -2 suppress column 2 (lines unique to FILE2)\n\ + -3 suppress column 3 (lines that appear in both files)\n\ +"), stdout); + fputs (_("\ +\n\ + --check-order check that the input is correctly sorted, even\n\ + if all input lines are pairable\n\ + --nocheck-order do not check that the input is correctly sorted\n\ +"), stdout); + fputs (_("\ + --output-delimiter=STR separate columns with STR\n\ +"), stdout); + fputs (_("\ + --total output a summary\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\ +"), stdout); + printf (_("\ +\n\ +Examples:\n\ + %s -12 file1 file2 Print only lines present in both file1 and file2.\n\ + %s -3 file1 file2 Print lines in file1 not in file2, and vice versa.\n\ +"), + program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Output the line in linebuffer LINE to stdout + provided the switches say it should be output. + CLASS is 1 for a line found only in file 1, + 2 for a line only in file 2, 3 for a line in both. */ + +static void +writeline (struct linebuffer const *line, int class) +{ + switch (class) + { + case 1: + if (!only_file_1) + return; + break; + + case 2: + if (!only_file_2) + return; + if (only_file_1) + fwrite (col_sep, 1, col_sep_len, stdout); + break; + + case 3: + if (!both) + return; + if (only_file_1) + fwrite (col_sep, 1, col_sep_len, stdout); + if (only_file_2) + fwrite (col_sep, 1, col_sep_len, stdout); + break; + } + + fwrite (line->buffer, sizeof (char), line->length, stdout); + + if (ferror (stdout)) + write_error (); +} + +/* Check that successive input lines PREV and CURRENT from input file + WHATFILE are presented in order. + + If the user specified --nocheck-order, the check is not made. + If the user specified --check-order, the problem is fatal. + Otherwise (the default), the message is simply a warning. + + A message is printed at most once per input file. + + This function was copied (nearly) verbatim from 'src/join.c'. */ + +static void +check_order (struct linebuffer const *prev, + struct linebuffer const *current, + int whatfile) +{ + + if (check_input_order != CHECK_ORDER_DISABLED + && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) + { + if (!issued_disorder_warning[whatfile - 1]) + { + int order; + + if (hard_LC_COLLATE) + order = xmemcoll (prev->buffer, prev->length - 1, + current->buffer, current->length - 1); + else + order = memcmp2 (prev->buffer, prev->length - 1, + current->buffer, current->length - 1); + + if (0 < order) + { + error ((check_input_order == CHECK_ORDER_ENABLED + ? EXIT_FAILURE : 0), + 0, _("file %d is not in sorted order"), whatfile); + + /* If we get to here, the message was just a warning, but we + want only to issue it once. */ + issued_disorder_warning[whatfile - 1] = true; + } + } + } +} + +/* Compare INFILES[0] and INFILES[1]. + If either is "-", use the standard input for that file. + Assume that each input file is sorted; + merge them and output the result. + Exit the program when done. */ + +static _Noreturn void +compare_files (char **infiles) +{ + /* For each file, we have four linebuffers in lba. */ + struct linebuffer lba[2][4]; + + /* thisline[i] points to the linebuffer holding the next available line + in file i, or is null if there are no lines left in that file. */ + struct linebuffer *thisline[2]; + + /* all_line[i][alt[i][0]] also points to the linebuffer holding the + current line in file i. We keep two buffers of history around so we + can look two lines back when we get to the end of a file. */ + struct linebuffer *all_line[2][4]; + + /* This is used to rotate through the buffers for each input file. */ + int alt[2][3]; + + /* streams[i] holds the input stream for file i. */ + FILE *streams[2]; + + /* Counters for the summary. */ + uintmax_t total[] = {0, 0, 0}; + + int i, j; + + /* Initialize the storage. */ + for (i = 0; i < 2; i++) + { + for (j = 0; j < 4; j++) + { + initbuffer (&lba[i][j]); + all_line[i][j] = &lba[i][j]; + } + alt[i][0] = 0; + alt[i][1] = 0; + alt[i][2] = 0; + streams[i] = (STREQ (infiles[i], "-") ? stdin : fopen (infiles[i], "r")); + if (!streams[i]) + error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); + + fadvise (streams[i], FADVISE_SEQUENTIAL); + + thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], streams[i], + delim); + if (ferror (streams[i])) + error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); + } + + while (thisline[0] || thisline[1]) + { + int order; + bool fill_up[2] = { false, false }; + + /* Compare the next available lines of the two files. */ + + if (!thisline[0]) + order = 1; + else if (!thisline[1]) + order = -1; + else + { + if (hard_LC_COLLATE) + order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1, + thisline[1]->buffer, thisline[1]->length - 1); + else + { + size_t len = min (thisline[0]->length, thisline[1]->length) - 1; + order = memcmp (thisline[0]->buffer, thisline[1]->buffer, len); + if (order == 0) + order = ((thisline[0]->length > thisline[1]->length) + - (thisline[0]->length < thisline[1]->length)); + } + } + + /* Output the line that is lesser. */ + if (order == 0) + { + /* Line is seen in both files. */ + total[2]++; + writeline (thisline[1], 3); + } + else + { + seen_unpairable = true; + if (order <= 0) + { + /* Line is seen in file 1 only. */ + total[0]++; + writeline (thisline[0], 1); + } + else + { + /* Line is seen in file 2 only. */ + total[1]++; + writeline (thisline[1], 2); + } + } + + /* Step the file the line came from. + If the files match, step both files. */ + if (0 <= order) + fill_up[1] = true; + if (order <= 0) + fill_up[0] = true; + + for (i = 0; i < 2; i++) + if (fill_up[i]) + { + /* Rotate the buffers for this file. */ + alt[i][2] = alt[i][1]; + alt[i][1] = alt[i][0]; + alt[i][0] = (alt[i][0] + 1) & 0x03; + + thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], + streams[i], delim); + + if (thisline[i]) + check_order (all_line[i][alt[i][1]], thisline[i], i + 1); + + /* If this is the end of the file we may need to re-check + the order of the previous two lines, since we might have + discovered an unpairable match since we checked before. */ + else if (all_line[i][alt[i][2]]->buffer) + check_order (all_line[i][alt[i][2]], + all_line[i][alt[i][1]], i + 1); + + if (ferror (streams[i])) + error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); + + fill_up[i] = false; + } + } + + for (i = 0; i < 2; i++) + if (fclose (streams[i]) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); + + if (total_option) + { + /* Print the summary, minding the column and line delimiters. */ + char buf1[INT_BUFSIZE_BOUND (uintmax_t)]; + char buf2[INT_BUFSIZE_BOUND (uintmax_t)]; + char buf3[INT_BUFSIZE_BOUND (uintmax_t)]; + if (col_sep_len == 1) + { /* Separate to handle NUL char. */ + printf ("%s%c%s%c%s%c%s%c", + umaxtostr (total[0], buf1), *col_sep, + umaxtostr (total[1], buf2), *col_sep, + umaxtostr (total[2], buf3), *col_sep, + _("total"), delim); + } + else + { + printf ("%s%s%s%s%s%s%s%c", + umaxtostr (total[0], buf1), col_sep, + umaxtostr (total[1], buf2), col_sep, + umaxtostr (total[2], buf3), col_sep, + _("total"), delim); + } + } + + if (issued_disorder_warning[0] || issued_disorder_warning[1]) + error (EXIT_FAILURE, 0, _("input is not in sorted order")); + + /* Exit here to pacify gcc -fsanitizer=leak. */ + exit (EXIT_SUCCESS); +} + +int +main (int argc, char **argv) +{ + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + hard_LC_COLLATE = hard_locale (LC_COLLATE); + + atexit (close_stdout); + + only_file_1 = true; + only_file_2 = true; + both = true; + + seen_unpairable = false; + issued_disorder_warning[0] = issued_disorder_warning[1] = false; + check_input_order = CHECK_ORDER_DEFAULT; + total_option = false; + + while ((c = getopt_long (argc, argv, "123z", long_options, nullptr)) != -1) + switch (c) + { + case '1': + only_file_1 = false; + break; + + case '2': + only_file_2 = false; + break; + + case '3': + both = false; + break; + + case 'z': + delim = '\0'; + break; + + case NOCHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_DISABLED; + break; + + case CHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_ENABLED; + break; + + case OUTPUT_DELIMITER_OPTION: + if (col_sep_len && !STREQ (col_sep, optarg)) + error (EXIT_FAILURE, 0, _("multiple output delimiters specified")); + col_sep = optarg; + col_sep_len = *optarg ? strlen (optarg) : 1; + break; + + case TOTAL_OPTION: + total_option = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + if (! col_sep_len) + col_sep_len = 1; + + if (argc - optind < 2) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + if (2 < argc - optind) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); + usage (EXIT_FAILURE); + } + + compare_files (argv + optind); +} diff --git a/src/copy.c b/src/copy.c new file mode 100644 index 0000000..b9fff03 --- /dev/null +++ b/src/copy.c @@ -0,0 +1,3492 @@ +/* copy.c -- core functions for copying files and directories + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from cp.c and librarified by Jim Meyering. */ + +#include +#include +#include +#include +#include +#include + +#if HAVE_HURD_H +# include +#endif +#if HAVE_PRIV_H +# include +#endif + +#include "system.h" +#include "acl.h" +#include "alignalloc.h" +#include "assure.h" +#include "backupfile.h" +#include "buffer-lcm.h" +#include "canonicalize.h" +#include "copy.h" +#include "cp-hash.h" +#include "fadvise.h" +#include "fcntl--.h" +#include "file-set.h" +#include "filemode.h" +#include "filenamecat.h" +#include "force-link.h" +#include "full-write.h" +#include "hash.h" +#include "hash-triple.h" +#include "ignore-value.h" +#include "ioblksize.h" +#include "quote.h" +#include "renameatu.h" +#include "root-uid.h" +#include "same.h" +#include "savedir.h" +#include "stat-size.h" +#include "stat-time.h" +#include "utimecmp.h" +#include "utimens.h" +#include "write-any-file.h" +#include "areadlink.h" +#include "yesno.h" +#include "selinux.h" + +#ifndef USE_XATTR +# define USE_XATTR false +#endif + +#if USE_XATTR +# include +# include +# include +# include "verror.h" +#endif + +#if HAVE_LINUX_FALLOC_H +# include +#endif + +/* See HAVE_FALLOCATE workaround when including this file. */ +#ifdef HAVE_LINUX_FS_H +# include +#endif + +#if !defined FICLONE && defined __linux__ +# define FICLONE _IOW (0x94, 9, int) +#endif + +#if HAVE_FCLONEFILEAT && !USE_XATTR +# include +#endif + +#ifndef HAVE_FCHOWN +# define HAVE_FCHOWN false +# define fchown(fd, uid, gid) (-1) +#endif + +#ifndef USE_ACL +# define USE_ACL 0 +#endif + +#define SAME_OWNER(A, B) ((A).st_uid == (B).st_uid) +#define SAME_GROUP(A, B) ((A).st_gid == (B).st_gid) +#define SAME_OWNER_AND_GROUP(A, B) (SAME_OWNER (A, B) && SAME_GROUP (A, B)) + +/* LINK_FOLLOWS_SYMLINKS is tri-state; if it is -1, we don't know + how link() behaves, so assume we can't hardlink symlinks in that case. */ +#if (defined HAVE_LINKAT && ! LINKAT_SYMLINK_NOTSUP) || ! LINK_FOLLOWS_SYMLINKS +# define CAN_HARDLINK_SYMLINKS 1 +#else +# define CAN_HARDLINK_SYMLINKS 0 +#endif + +struct dir_list +{ + struct dir_list *parent; + ino_t ino; + dev_t dev; +}; + +/* Initial size of the cp.dest_info hash table. */ +#define DEST_INFO_INITIAL_CAPACITY 61 + +static bool copy_internal (char const *src_name, char const *dst_name, + int dst_dirfd, char const *dst_relname, + int nonexistent_dst, struct stat const *parent, + struct dir_list *ancestors, + const struct cp_options *x, + bool command_line_arg, + bool *first_dir_created_per_command_line_arg, + bool *copy_into_self, + bool *rename_succeeded); +static bool owner_failure_ok (struct cp_options const *x); + +/* Pointers to the file names: they're used in the diagnostic that is issued + when we detect the user is trying to copy a directory into itself. */ +static char const *top_level_src_name; +static char const *top_level_dst_name; + +enum copy_debug_val + { + COPY_DEBUG_UNKNOWN, + COPY_DEBUG_NO, + COPY_DEBUG_YES, + COPY_DEBUG_EXTERNAL, + COPY_DEBUG_EXTERNAL_INTERNAL, + COPY_DEBUG_AVOIDED, + COPY_DEBUG_UNSUPPORTED, + }; + +/* debug info about the last file copy. */ +static struct copy_debug +{ + enum copy_debug_val offload; + enum copy_debug_val reflink; + enum copy_debug_val sparse_detection; +} copy_debug; + +static const char* +copy_debug_string (enum copy_debug_val debug_val) +{ + switch (debug_val) + { + case COPY_DEBUG_NO: return "no"; + case COPY_DEBUG_YES: return "yes"; + case COPY_DEBUG_AVOIDED: return "avoided"; + case COPY_DEBUG_UNSUPPORTED: return "unsupported"; + default: return "unknown"; + } +} + +static const char* +copy_debug_sparse_string (enum copy_debug_val debug_val) +{ + switch (debug_val) + { + case COPY_DEBUG_NO: return "no"; + case COPY_DEBUG_YES: return "zeros"; + case COPY_DEBUG_EXTERNAL: return "SEEK_HOLE"; + case COPY_DEBUG_EXTERNAL_INTERNAL: return "SEEK_HOLE + zeros"; + default: return "unknown"; + } +} + +/* Print --debug output on standard output. */ +static void +emit_debug (const struct cp_options *x) +{ + if (! x->hard_link && ! x->symbolic_link && x->data_copy_required) + printf ("copy offload: %s, reflink: %s, sparse detection: %s\n", + copy_debug_string (copy_debug.offload), + copy_debug_string (copy_debug.reflink), + copy_debug_sparse_string (copy_debug.sparse_detection)); +} + +#ifndef DEV_FD_MIGHT_BE_CHR +# define DEV_FD_MIGHT_BE_CHR false +#endif + +/* Act like fstat (DIRFD, FILENAME, ST, FLAGS), except when following + symbolic links on Solaris-like systems, treat any character-special + device like /dev/fd/0 as if it were the file it is open on. */ +static int +follow_fstatat (int dirfd, char const *filename, struct stat *st, int flags) +{ + int result = fstatat (dirfd, filename, st, flags); + + if (DEV_FD_MIGHT_BE_CHR && result == 0 && !(flags & AT_SYMLINK_NOFOLLOW) + && S_ISCHR (st->st_mode)) + { + static dev_t stdin_rdev; + static signed char stdin_rdev_status; + if (stdin_rdev_status == 0) + { + struct stat stdin_st; + if (stat ("/dev/stdin", &stdin_st) == 0 && S_ISCHR (stdin_st.st_mode) + && minor (stdin_st.st_rdev) == STDIN_FILENO) + { + stdin_rdev = stdin_st.st_rdev; + stdin_rdev_status = 1; + } + else + stdin_rdev_status = -1; + } + if (0 < stdin_rdev_status && major (stdin_rdev) == major (st->st_rdev)) + result = fstat (minor (st->st_rdev), st); + } + + return result; +} + +/* Attempt to punch a hole to avoid any permanent + speculative preallocation on file systems such as XFS. + Return values as per fallocate(2) except ENOSYS etc. are ignored. */ + +static int +punch_hole (int fd, off_t offset, off_t length) +{ + int ret = 0; +/* +0 is to work around older defining HAVE_FALLOCATE to empty. */ +#if HAVE_FALLOCATE + 0 +# if defined FALLOC_FL_PUNCH_HOLE && defined FALLOC_FL_KEEP_SIZE + ret = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, length); + if (ret < 0 && (is_ENOTSUP (errno) || errno == ENOSYS)) + ret = 0; +# endif +#endif + return ret; +} + +/* Create a hole at the end of a file, + avoiding preallocation if requested. */ + +static bool +create_hole (int fd, char const *name, bool punch_holes, off_t size) +{ + off_t file_end = lseek (fd, size, SEEK_CUR); + + if (file_end < 0) + { + error (0, errno, _("cannot lseek %s"), quoteaf (name)); + return false; + } + + /* Some file systems (like XFS) preallocate when write extending a file. + I.e., a previous write() may have preallocated extra space + that the seek above will not discard. A subsequent write() could + then make this allocation permanent. */ + if (punch_holes && punch_hole (fd, file_end - size, size) < 0) + { + error (0, errno, _("error deallocating %s"), quoteaf (name)); + return false; + } + + return true; +} + + +/* Whether an errno value ERR, set by FICLONE or copy_file_range, + indicates that the copying operation has terminally failed, even + though it was invoked correctly (so that, e.g, EBADF cannot occur) + and even though !is_CLONENOTSUP (ERR). */ + +static bool +is_terminal_error (int err) +{ + return err == EIO || err == ENOMEM || err == ENOSPC || err == EDQUOT; +} + +/* Similarly, whether ERR indicates that the copying operation is not + supported or allowed for this file or process, even though the + operation was invoked correctly. */ + +static bool +is_CLONENOTSUP (int err) +{ + return err == ENOSYS || err == ENOTTY || is_ENOTSUP (err) + || err == EINVAL || err == EBADF + || err == EXDEV || err == ETXTBSY + || err == EPERM || err == EACCES; +} + + +/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, + honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer + *ABUF for temporary storage, allocating it lazily if *ABUF is null. + Copy no more than MAX_N_READ bytes. + Return true upon successful completion; + print a diagnostic and return false upon error. + Note that for best results, BUF should be "well"-aligned. + Set *LAST_WRITE_MADE_HOLE to true if the final operation on + DEST_FD introduced a hole. Set *TOTAL_N_READ to the number of + bytes read. */ +static bool +sparse_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, + size_t hole_size, bool punch_holes, bool allow_reflink, + char const *src_name, char const *dst_name, + uintmax_t max_n_read, off_t *total_n_read, + bool *last_write_made_hole) +{ + *last_write_made_hole = false; + *total_n_read = 0; + + if (copy_debug.sparse_detection == COPY_DEBUG_UNKNOWN) + copy_debug.sparse_detection = hole_size ? COPY_DEBUG_YES : COPY_DEBUG_NO; + else if (hole_size && copy_debug.sparse_detection == COPY_DEBUG_EXTERNAL) + copy_debug.sparse_detection = COPY_DEBUG_EXTERNAL_INTERNAL; + + /* If not looking for holes, use copy_file_range if functional, + but don't use if reflink disallowed as that may be implicit. */ + if (!hole_size && allow_reflink) + while (max_n_read) + { + /* Copy at most COPY_MAX bytes at a time; this is min + (SSIZE_MAX, SIZE_MAX) truncated to a value that is + surely aligned well. */ + ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30; + ssize_t n_copied = copy_file_range (src_fd, nullptr, dest_fd, nullptr, + MIN (max_n_read, copy_max), 0); + if (n_copied == 0) + { + /* copy_file_range incorrectly returns 0 when reading from + the proc file system on the Linux kernel through at + least 5.6.19 (2020), so fall back on 'read' if the + input file seems empty. */ + if (*total_n_read == 0) + break; + copy_debug.offload = COPY_DEBUG_YES; + return true; + } + if (n_copied < 0) + { + copy_debug.offload = COPY_DEBUG_UNSUPPORTED; + + /* Consider operation unsupported only if no data copied. + For example, EPERM could occur if copy_file_range not enabled + in seccomp filters, so retry with a standard copy. EPERM can + also occur for immutable files, but that would only be in the + edge case where the file is made immutable after creating, + in which case the (more accurate) error is still shown. */ + if (*total_n_read == 0 && is_CLONENOTSUP (errno)) + break; + + /* ENOENT was seen sometimes across CIFS shares, resulting in + no data being copied, but subsequent standard copies succeed. */ + if (*total_n_read == 0 && errno == ENOENT) + break; + + if (errno == EINTR) + n_copied = 0; + else + { + error (0, errno, _("error copying %s to %s"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + return false; + } + } + copy_debug.offload = COPY_DEBUG_YES; + max_n_read -= n_copied; + *total_n_read += n_copied; + } + else + copy_debug.offload = COPY_DEBUG_AVOIDED; + + + bool make_hole = false; + off_t psize = 0; + + while (max_n_read) + { + if (!*abuf) + *abuf = xalignalloc (getpagesize (), buf_size); + char *buf = *abuf; + ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); + if (n_read < 0) + { + if (errno == EINTR) + continue; + error (0, errno, _("error reading %s"), quoteaf (src_name)); + return false; + } + if (n_read == 0) + break; + max_n_read -= n_read; + *total_n_read += n_read; + + /* Loop over the input buffer in chunks of hole_size. */ + size_t csize = hole_size ? hole_size : buf_size; + char *cbuf = buf; + char *pbuf = buf; + + while (n_read) + { + bool prev_hole = make_hole; + csize = MIN (csize, n_read); + + if (hole_size && csize) + make_hole = is_nul (cbuf, csize); + + bool transition = (make_hole != prev_hole) && psize; + bool last_chunk = (n_read == csize && ! make_hole) || ! csize; + + if (transition || last_chunk) + { + if (! transition) + psize += csize; + + if (! prev_hole) + { + if (full_write (dest_fd, pbuf, psize) != psize) + { + error (0, errno, _("error writing %s"), + quoteaf (dst_name)); + return false; + } + } + else + { + if (! create_hole (dest_fd, dst_name, punch_holes, psize)) + return false; + } + + pbuf = cbuf; + psize = csize; + + if (last_chunk) + { + if (! csize) + n_read = 0; /* Finished processing buffer. */ + + if (transition) + csize = 0; /* Loop again to deal with last chunk. */ + else + psize = 0; /* Reset for next read loop. */ + } + } + else /* Coalesce writes/seeks. */ + { + if (ckd_add (&psize, psize, csize)) + { + error (0, 0, _("overflow reading %s"), quoteaf (src_name)); + return false; + } + } + + n_read -= csize; + cbuf += csize; + } + + *last_write_made_hole = make_hole; + + /* It's tempting to break early here upon a short read from + a regular file. That would save the final read syscall + for each file. Unfortunately that doesn't work for + certain files in /proc or /sys with linux kernels. */ + } + + /* Ensure a trailing hole is created, so that subsequent + calls of sparse_copy() start at the correct offset. */ + if (make_hole && ! create_hole (dest_fd, dst_name, punch_holes, psize)) + return false; + else + return true; +} + +/* Perform the O(1) btrfs clone operation, if possible. + Upon success, return 0. Otherwise, return -1 and set errno. */ +static inline int +clone_file (int dest_fd, int src_fd) +{ +#ifdef FICLONE + return ioctl (dest_fd, FICLONE, src_fd); +#else + (void) dest_fd; + (void) src_fd; + errno = ENOTSUP; + return -1; +#endif +} + +/* Write N_BYTES zero bytes to file descriptor FD. Return true if successful. + Upon write failure, set errno and return false. */ +static bool +write_zeros (int fd, off_t n_bytes) +{ + static char *zeros; + static size_t nz = IO_BUFSIZE; + + /* Attempt to use a relatively large calloc'd source buffer for + efficiency, but if that allocation fails, resort to a smaller + statically allocated one. */ + if (zeros == nullptr) + { + static char fallback[1024]; + zeros = calloc (nz, 1); + if (zeros == nullptr) + { + zeros = fallback; + nz = sizeof fallback; + } + } + + while (n_bytes) + { + size_t n = MIN (nz, n_bytes); + if ((full_write (fd, zeros, n)) != n) + return false; + n_bytes -= n; + } + + return true; +} + +#ifdef SEEK_HOLE +/* Perform an efficient extent copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus makes copying sparse files much more efficient. + Copy from SRC_FD to DEST_FD, using *ABUF (of size BUF_SIZE) for a buffer. + Allocate *ABUF lazily if *ABUF is null. + Look for holes of size HOLE_SIZE in the input. + The input file is of size SRC_TOTAL_SIZE. + Use SPARSE_MODE to determine whether to create holes in the output. + SRC_NAME and DST_NAME are the input and output file names. + Return true if successful, false (with a diagnostic) otherwise. */ + +static bool +lseek_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, + size_t hole_size, off_t ext_start, off_t src_total_size, + enum Sparse_type sparse_mode, + bool allow_reflink, + char const *src_name, char const *dst_name) +{ + off_t last_ext_start = 0; + off_t last_ext_len = 0; + off_t dest_pos = 0; + bool wrote_hole_at_eof = true; + + copy_debug.sparse_detection = COPY_DEBUG_EXTERNAL; + + while (0 <= ext_start) + { + off_t ext_end = lseek (src_fd, ext_start, SEEK_HOLE); + if (ext_end < 0) + { + if (errno != ENXIO) + goto cannot_lseek; + ext_end = src_total_size; + if (ext_end <= ext_start) + { + /* The input file grew; get its current size. */ + src_total_size = lseek (src_fd, 0, SEEK_END); + if (src_total_size < 0) + goto cannot_lseek; + + /* If the input file shrank after growing, stop copying. */ + if (src_total_size <= ext_start) + break; + + ext_end = src_total_size; + } + } + /* If the input file must have grown, increase its measured size. */ + if (src_total_size < ext_end) + src_total_size = ext_end; + + if (lseek (src_fd, ext_start, SEEK_SET) < 0) + goto cannot_lseek; + + wrote_hole_at_eof = false; + off_t ext_hole_size = ext_start - last_ext_start - last_ext_len; + + if (ext_hole_size) + { + if (sparse_mode != SPARSE_NEVER) + { + if (! create_hole (dest_fd, dst_name, + sparse_mode == SPARSE_ALWAYS, + ext_hole_size)) + return false; + wrote_hole_at_eof = true; + } + else + { + /* When not inducing holes and when there is a hole between + the end of the previous extent and the beginning of the + current one, write zeros to the destination file. */ + if (! write_zeros (dest_fd, ext_hole_size)) + { + error (0, errno, _("%s: write failed"), + quotef (dst_name)); + return false; + } + } + } + + off_t ext_len = ext_end - ext_start; + last_ext_start = ext_start; + last_ext_len = ext_len; + + /* Copy this extent, looking for further opportunities to not + bother to write zeros if --sparse=always, since SEEK_HOLE + is conservative and may miss some holes. */ + off_t n_read; + bool read_hole; + if ( ! sparse_copy (src_fd, dest_fd, abuf, buf_size, + sparse_mode != SPARSE_ALWAYS ? 0 : hole_size, + true, allow_reflink, src_name, dst_name, + ext_len, &n_read, &read_hole)) + return false; + + dest_pos = ext_start + n_read; + if (n_read) + wrote_hole_at_eof = read_hole; + if (n_read < ext_len) + { + /* The input file shrank. */ + src_total_size = dest_pos; + break; + } + + ext_start = lseek (src_fd, dest_pos, SEEK_DATA); + if (ext_start < 0 && errno != ENXIO) + goto cannot_lseek; + } + + /* When the source file ends with a hole, we have to do a little more work, + since the above copied only up to and including the final extent. + In order to complete the copy, we may have to insert a hole or write + zeros in the destination corresponding to the source file's hole-at-EOF. + + In addition, if the final extent was a block of zeros at EOF and we've + just converted them to a hole in the destination, we must call ftruncate + here in order to record the proper length in the destination. */ + if ((dest_pos < src_total_size || wrote_hole_at_eof) + && ! (sparse_mode == SPARSE_NEVER + ? write_zeros (dest_fd, src_total_size - dest_pos) + : ftruncate (dest_fd, src_total_size) == 0)) + { + error (0, errno, _("failed to extend %s"), quoteaf (dst_name)); + return false; + } + + if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size + && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0) + { + error (0, errno, _("error deallocating %s"), quoteaf (dst_name)); + return false; + } + + return true; + + cannot_lseek: + error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); + return false; +} +#endif + +/* FIXME: describe */ +/* FIXME: rewrite this to use a hash table so we avoid the quadratic + performance hit that's probably noticeable only on trees deeper + than a few hundred levels. See use of active_dir_map in remove.c */ + +ATTRIBUTE_PURE +static bool +is_ancestor (const struct stat *sb, const struct dir_list *ancestors) +{ + while (ancestors != 0) + { + if (ancestors->ino == sb->st_ino && ancestors->dev == sb->st_dev) + return true; + ancestors = ancestors->parent; + } + return false; +} + +static bool +errno_unsupported (int err) +{ + return err == ENOTSUP || err == ENODATA; +} + +#if USE_XATTR +ATTRIBUTE_FORMAT ((printf, 2, 3)) +static void +copy_attr_error (MAYBE_UNUSED struct error_context *ctx, + char const *fmt, ...) +{ + if (!errno_unsupported (errno)) + { + int err = errno; + va_list ap; + + /* use verror module to print error message */ + va_start (ap, fmt); + verror (0, err, fmt, ap); + va_end (ap); + } +} + +ATTRIBUTE_FORMAT ((printf, 2, 3)) +static void +copy_attr_allerror (MAYBE_UNUSED struct error_context *ctx, + char const *fmt, ...) +{ + int err = errno; + va_list ap; + + /* use verror module to print error message */ + va_start (ap, fmt); + verror (0, err, fmt, ap); + va_end (ap); +} + +static char const * +copy_attr_quote (MAYBE_UNUSED struct error_context *ctx, char const *str) +{ + return quoteaf (str); +} + +static void +copy_attr_free (MAYBE_UNUSED struct error_context *ctx, + MAYBE_UNUSED char const *str) +{ +} + +/* Exclude SELinux extended attributes that are otherwise handled, + and are problematic to copy again. Also honor attributes + configured for exclusion in /etc/xattr.conf. + FIXME: Should we handle POSIX ACLs similarly? + Return zero to skip. */ +static int +check_selinux_attr (char const *name, struct error_context *ctx) +{ + return STRNCMP_LIT (name, "security.selinux") + && attr_copy_check_permissions (name, ctx); +} + +/* If positive SRC_FD and DST_FD descriptors are passed, + then copy by fd, otherwise copy by name. */ + +static bool +copy_attr (char const *src_path, int src_fd, + char const *dst_path, int dst_fd, struct cp_options const *x) +{ + bool all_errors = (!x->data_copy_required || x->require_preserve_xattr); + bool some_errors = (!all_errors && !x->reduce_diagnostics); + int (*check) (char const *, struct error_context *) + = (x->preserve_security_context || x->set_security_context + ? check_selinux_attr : nullptr); + +# if 4 < __GNUC__ + (8 <= __GNUC_MINOR__) + /* Pacify gcc -Wsuggest-attribute=format through at least GCC 11.2.1. */ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsuggest-attribute=format" +# endif + struct error_context *ctx + = (all_errors || some_errors + ? (&(struct error_context) { + .error = all_errors ? copy_attr_allerror : copy_attr_error, + .quote = copy_attr_quote, + .quote_free = copy_attr_free + }) + : nullptr); +# if 4 < __GNUC__ + (8 <= __GNUC_MINOR__) +# pragma GCC diagnostic pop +# endif + + return ! (0 <= src_fd && 0 <= dst_fd + ? attr_copy_fd (src_path, src_fd, dst_path, dst_fd, check, ctx) + : attr_copy_file (src_path, dst_path, check, ctx)); +} +#else /* USE_XATTR */ + +static bool +copy_attr (MAYBE_UNUSED char const *src_path, + MAYBE_UNUSED int src_fd, + MAYBE_UNUSED char const *dst_path, + MAYBE_UNUSED int dst_fd, + MAYBE_UNUSED struct cp_options const *x) +{ + return true; +} +#endif /* USE_XATTR */ + +/* Read the contents of the directory SRC_NAME_IN, and recursively + copy the contents to DST_NAME_IN aka DST_DIRFD+DST_RELNAME_IN. + NEW_DST is true if DST_NAME_IN is a directory + that was created previously in the recursion. + SRC_SB and ANCESTORS describe SRC_NAME_IN. + Set *COPY_INTO_SELF if SRC_NAME_IN is a parent of + (or the same as) DST_NAME_IN; otherwise, clear it. + Propagate *FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG from + caller to each invocation of copy_internal. Be careful to + pass the address of a temporary, and to update + *FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG only upon completion. + Return true if successful. */ + +static bool +copy_dir (char const *src_name_in, char const *dst_name_in, + int dst_dirfd, char const *dst_relname_in, bool new_dst, + const struct stat *src_sb, struct dir_list *ancestors, + const struct cp_options *x, + bool *first_dir_created_per_command_line_arg, + bool *copy_into_self) +{ + char *name_space; + char *namep; + struct cp_options non_command_line_options = *x; + bool ok = true; + + name_space = savedir (src_name_in, SAVEDIR_SORT_FASTREAD); + if (name_space == nullptr) + { + /* This diagnostic is a bit vague because savedir can fail in + several different ways. */ + error (0, errno, _("cannot access %s"), quoteaf (src_name_in)); + return false; + } + + /* For cp's -H option, dereference command line arguments, but do not + dereference symlinks that are found via recursive traversal. */ + if (x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) + non_command_line_options.dereference = DEREF_NEVER; + + bool new_first_dir_created = false; + namep = name_space; + while (*namep != '\0') + { + bool local_copy_into_self; + char *src_name = file_name_concat (src_name_in, namep, nullptr); + char *dst_name = file_name_concat (dst_name_in, namep, nullptr); + bool first_dir_created = *first_dir_created_per_command_line_arg; + bool rename_succeeded; + + ok &= copy_internal (src_name, dst_name, dst_dirfd, + dst_name + (dst_relname_in - dst_name_in), + new_dst, src_sb, + ancestors, &non_command_line_options, false, + &first_dir_created, + &local_copy_into_self, &rename_succeeded); + *copy_into_self |= local_copy_into_self; + + free (dst_name); + free (src_name); + + /* If we're copying into self, there's no point in continuing, + and in fact, that would even infloop, now that we record only + the first created directory per command line argument. */ + if (local_copy_into_self) + break; + + new_first_dir_created |= first_dir_created; + namep += strlen (namep) + 1; + } + free (name_space); + *first_dir_created_per_command_line_arg = new_first_dir_created; + + return ok; +} + +/* Set the owner and owning group of DEST_DESC to the st_uid and + st_gid fields of SRC_SB. If DEST_DESC is undefined (-1), set + the owner and owning group of DST_NAME aka DST_DIRFD+DST_RELNAME + instead; for safety prefer lchownat since no + symbolic links should be involved. DEST_DESC must + refer to the same file as DST_NAME if defined. + Upon failure to set both UID and GID, try to set only the GID. + NEW_DST is true if the file was newly created; otherwise, + DST_SB is the status of the destination. + Return 1 if the initial syscall succeeds, 0 if it fails but it's OK + not to preserve ownership, -1 otherwise. */ + +static int +set_owner (const struct cp_options *x, char const *dst_name, + int dst_dirfd, char const *dst_relname, int dest_desc, + struct stat const *src_sb, bool new_dst, + struct stat const *dst_sb) +{ + uid_t uid = src_sb->st_uid; + gid_t gid = src_sb->st_gid; + + /* Naively changing the ownership of an already-existing file before + changing its permissions would create a window of vulnerability if + the file's old permissions are too generous for the new owner and + group. Avoid the window by first changing to a restrictive + temporary mode if necessary. */ + + if (!new_dst && (x->preserve_mode || x->move_mode || x->set_mode)) + { + mode_t old_mode = dst_sb->st_mode; + mode_t new_mode = + (x->preserve_mode || x->move_mode ? src_sb->st_mode : x->mode); + mode_t restrictive_temp_mode = old_mode & new_mode & S_IRWXU; + + if ((USE_ACL + || (old_mode & CHMOD_MODE_BITS + & (~new_mode | S_ISUID | S_ISGID | S_ISVTX))) + && qset_acl (dst_name, dest_desc, restrictive_temp_mode) != 0) + { + if (! owner_failure_ok (x)) + error (0, errno, _("clearing permissions for %s"), + quoteaf (dst_name)); + return -x->require_preserve; + } + } + + if (HAVE_FCHOWN && dest_desc != -1) + { + if (fchown (dest_desc, uid, gid) == 0) + return 1; + if (errno == EPERM || errno == EINVAL) + { + /* We've failed to set *both*. Now, try to set just the group + ID, but ignore any failure here, and don't change errno. */ + int saved_errno = errno; + ignore_value (fchown (dest_desc, -1, gid)); + errno = saved_errno; + } + } + else + { + if (lchownat (dst_dirfd, dst_relname, uid, gid) == 0) + return 1; + if (errno == EPERM || errno == EINVAL) + { + /* We've failed to set *both*. Now, try to set just the group + ID, but ignore any failure here, and don't change errno. */ + int saved_errno = errno; + ignore_value (lchownat (dst_dirfd, dst_relname, -1, gid)); + errno = saved_errno; + } + } + + if (! chown_failure_ok (x)) + { + error (0, errno, _("failed to preserve ownership for %s"), + quoteaf (dst_name)); + if (x->require_preserve) + return -1; + } + + return 0; +} + +/* Set the st_author field of DEST_DESC to the st_author field of + SRC_SB. If DEST_DESC is undefined (-1), set the st_author field + of DST_NAME instead. DEST_DESC must refer to the same file as + DST_NAME if defined. */ + +static void +set_author (char const *dst_name, int dest_desc, const struct stat *src_sb) +{ +#if HAVE_STRUCT_STAT_ST_AUTHOR + /* FIXME: Modify the following code so that it does not + follow symbolic links. */ + + /* Preserve the st_author field. */ + file_t file = (dest_desc < 0 + ? file_name_lookup (dst_name, 0, 0) + : getdport (dest_desc)); + if (file == MACH_PORT_NULL) + error (0, errno, _("failed to lookup file %s"), quoteaf (dst_name)); + else + { + error_t err = file_chauthor (file, src_sb->st_author); + if (err) + error (0, err, _("failed to preserve authorship for %s"), + quoteaf (dst_name)); + mach_port_deallocate (mach_task_self (), file); + } +#else + (void) dst_name; + (void) dest_desc; + (void) src_sb; +#endif +} + +/* Set the default security context for the process. New files will + have this security context set. Also existing files can have their + context adjusted based on this process context, by + set_file_security_ctx() called with PROCESS_LOCAL=true. + This should be called before files are created so there is no race + where a file may be present without an appropriate security context. + Based on CP_OPTIONS, diagnose warnings and fail when appropriate. + Return FALSE on failure, TRUE on success. */ + +bool +set_process_security_ctx (char const *src_name, char const *dst_name, + mode_t mode, bool new_dst, const struct cp_options *x) +{ + if (x->preserve_security_context) + { + /* Set the default context for the process to match the source. */ + bool all_errors = !x->data_copy_required || x->require_preserve_context; + bool some_errors = !all_errors && !x->reduce_diagnostics; + char *con; + + if (0 <= lgetfilecon (src_name, &con)) + { + if (setfscreatecon (con) < 0) + { + if (all_errors || (some_errors && !errno_unsupported (errno))) + error (0, errno, + _("failed to set default file creation context to %s"), + quote (con)); + if (x->require_preserve_context) + { + freecon (con); + return false; + } + } + freecon (con); + } + else + { + if (all_errors || (some_errors && !errno_unsupported (errno))) + { + error (0, errno, + _("failed to get security context of %s"), + quoteaf (src_name)); + } + if (x->require_preserve_context) + return false; + } + } + else if (x->set_security_context) + { + /* With -Z, adjust the default context for the process + to have the type component adjusted as per the destination path. */ + if (new_dst && defaultcon (x->set_security_context, dst_name, mode) < 0 + && ! ignorable_ctx_err (errno)) + { + error (0, errno, + _("failed to set default file creation context for %s"), + quoteaf (dst_name)); + } + } + + return true; +} + +/* Reset the security context of DST_NAME, to that already set + as the process default if !X->set_security_context. Otherwise + adjust the type component of DST_NAME's security context as + per the system default for that path. Issue warnings upon + failure, when allowed by various settings in X. + Return false on failure, true on success. */ + +bool +set_file_security_ctx (char const *dst_name, + bool recurse, const struct cp_options *x) +{ + bool all_errors = (!x->data_copy_required + || x->require_preserve_context); + bool some_errors = !all_errors && !x->reduce_diagnostics; + + if (! restorecon (x->set_security_context, dst_name, recurse)) + { + if (all_errors || (some_errors && !errno_unsupported (errno))) + error (0, errno, _("failed to set the security context of %s"), + quoteaf_n (0, dst_name)); + return false; + } + + return true; +} + +/* Change the file mode bits of the file identified by DESC or + DIRFD+NAME to MODE. Use DESC if DESC is valid and fchmod is + available, DIRFD+NAME otherwise. */ + +static int +fchmod_or_lchmod (int desc, int dirfd, char const *name, mode_t mode) +{ +#if HAVE_FCHMOD + if (0 <= desc) + return fchmod (desc, mode); +#endif + return lchmodat (dirfd, name, mode); +} + +#ifndef HAVE_STRUCT_STAT_ST_BLOCKS +# define HAVE_STRUCT_STAT_ST_BLOCKS 0 +#endif + +/* Type of scan being done on the input when looking for sparseness. */ +enum scantype + { + /* An error was found when determining scantype. */ + ERROR_SCANTYPE, + + /* No fancy scanning; just read and write. */ + PLAIN_SCANTYPE, + + /* Read and examine data looking for zero blocks; useful when + attempting to create sparse output. */ + ZERO_SCANTYPE, + + /* lseek information is available. */ + LSEEK_SCANTYPE, + }; + +/* Result of infer_scantype. */ +union scan_inference +{ + /* Used if infer_scantype returns LSEEK_SCANTYPE. This is the + offset of the first data block, or -1 if the file has no data. */ + off_t ext_start; +}; + +/* Return how to scan a file with descriptor FD and stat buffer SB. + *SCAN_INFERENCE is set to a valid value if returning LSEEK_SCANTYPE. */ +static enum scantype +infer_scantype (int fd, struct stat const *sb, + union scan_inference *scan_inference) +{ + scan_inference->ext_start = -1; /* avoid -Wmaybe-uninitialized */ + + /* Only attempt SEEK_HOLE if this heuristic + suggests the file is sparse. */ + if (! (HAVE_STRUCT_STAT_ST_BLOCKS + && S_ISREG (sb->st_mode) + && ST_NBLOCKS (*sb) < sb->st_size / ST_NBLOCKSIZE)) + return PLAIN_SCANTYPE; + +#ifdef SEEK_HOLE + off_t ext_start = lseek (fd, 0, SEEK_DATA); + if (0 <= ext_start || errno == ENXIO) + { + scan_inference->ext_start = ext_start; + return LSEEK_SCANTYPE; + } + else if (errno != EINVAL && !is_ENOTSUP (errno)) + return ERROR_SCANTYPE; +#endif + + return ZERO_SCANTYPE; +} + +#if HAVE_FCLONEFILEAT && !USE_XATTR +# include +/* Return true if FD has a nontrivial ACL. */ +static bool +fd_has_acl (int fd) +{ + /* Every platform with fclonefileat (macOS 10.12 or later) also has + acl_get_fd_np. */ + bool has_acl = false; + acl_t acl = acl_get_fd_np (fd, ACL_TYPE_EXTENDED); + if (acl) + { + acl_entry_t ace; + has_acl = 0 <= acl_get_entry (acl, ACL_FIRST_ENTRY, &ace); + acl_free (acl); + } + return has_acl; +} +#endif + +/* Handle failure from FICLONE or fclonefileat. + Return FALSE if it's a terminal failure for this file. */ + +static bool +handle_clone_fail (int dst_dirfd, char const *dst_relname, + char const *src_name, char const *dst_name, + int dest_desc, bool new_dst, enum Reflink_type reflink_mode) +{ + /* When the clone operation fails, report failure only with errno values + known to mean trouble when the clone is supported and called properly. + Do not report failure merely because !is_CLONENOTSUP (errno), + as systems may yield oddball errno values here with FICLONE, + and is_CLONENOTSUP is not appropriate for fclonefileat. */ + bool report_failure = is_terminal_error (errno); + + if (reflink_mode == REFLINK_ALWAYS || report_failure) + error (0, errno, _("failed to clone %s from %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, src_name)); + + /* Remove the destination if cp --reflink=always created it + but cloned no data. */ + if (new_dst /* currently not for fclonefileat(). */ + && reflink_mode == REFLINK_ALWAYS + && ((! report_failure) || lseek (dest_desc, 0, SEEK_END) == 0) + && unlinkat (dst_dirfd, dst_relname, 0) != 0 && errno != ENOENT) + error (0, errno, _("cannot remove %s"), quoteaf (dst_name)); + + if (! report_failure) + copy_debug.reflink = COPY_DEBUG_UNSUPPORTED; + + if (reflink_mode == REFLINK_ALWAYS || report_failure) + return false; + + return true; +} + + +/* Copy a regular file from SRC_NAME to DST_NAME aka DST_DIRFD+DST_RELNAME. + If the source file contains holes, copies holes and blocks of zeros + in the source file as holes in the destination file. + (Holes are read as zeroes by the 'read' system call.) + When creating the destination, use DST_MODE & ~OMITTED_PERMISSIONS + as the third argument in the call to open, adding + OMITTED_PERMISSIONS after copying as needed. + X provides many option settings. + Return true if successful. + *NEW_DST is initially as in copy_internal. + If successful, set *NEW_DST to true if the destination file was created and + to false otherwise; if unsuccessful, perhaps set *NEW_DST to some value. + SRC_SB is the result of calling follow_fstatat on SRC_NAME. */ + +static bool +copy_reg (char const *src_name, char const *dst_name, + int dst_dirfd, char const *dst_relname, + const struct cp_options *x, + mode_t dst_mode, mode_t omitted_permissions, bool *new_dst, + struct stat const *src_sb) +{ + char *buf = nullptr; + int dest_desc; + int dest_errno; + int source_desc; + mode_t src_mode = src_sb->st_mode; + mode_t extra_permissions; + struct stat sb; + struct stat src_open_sb; + union scan_inference scan_inference; + bool return_val = true; + bool data_copy_required = x->data_copy_required; + bool preserve_xattr = USE_XATTR & x->preserve_xattr; + + copy_debug.offload = COPY_DEBUG_UNKNOWN; + copy_debug.reflink = x->reflink_mode ? COPY_DEBUG_UNKNOWN : COPY_DEBUG_NO; + copy_debug.sparse_detection = COPY_DEBUG_UNKNOWN; + + source_desc = open (src_name, + (O_RDONLY | O_BINARY + | (x->dereference == DEREF_NEVER ? O_NOFOLLOW : 0))); + if (source_desc < 0) + { + error (0, errno, _("cannot open %s for reading"), quoteaf (src_name)); + return false; + } + + if (fstat (source_desc, &src_open_sb) != 0) + { + error (0, errno, _("cannot fstat %s"), quoteaf (src_name)); + return_val = false; + goto close_src_desc; + } + + /* Compare the source dev/ino from the open file to the incoming, + saved ones obtained via a previous call to stat. */ + if (! SAME_INODE (*src_sb, src_open_sb)) + { + error (0, 0, + _("skipping file %s, as it was replaced while being copied"), + quoteaf (src_name)); + return_val = false; + goto close_src_desc; + } + + /* The semantics of the following open calls are mandated + by the specs for both cp and mv. */ + if (! *new_dst) + { + int open_flags = + O_WRONLY | O_BINARY | (data_copy_required ? O_TRUNC : 0); + dest_desc = openat (dst_dirfd, dst_relname, open_flags); + dest_errno = errno; + + /* When using cp --preserve=context to copy to an existing destination, + reset the context as per the default context, which has already been + set according to the src. + When using the mutually exclusive -Z option, then adjust the type of + the existing context according to the system default for the dest. + Note we set the context here, _after_ the file is opened, lest the + new context disallow that. */ + if (0 <= dest_desc + && (x->set_security_context || x->preserve_security_context)) + { + if (! set_file_security_ctx (dst_name, false, x)) + { + if (x->require_preserve_context) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + } + + if (dest_desc < 0 && dest_errno != ENOENT + && x->unlink_dest_after_failed_open) + { + if (unlinkat (dst_dirfd, dst_relname, 0) == 0) + { + if (x->verbose) + printf (_("removed %s\n"), quoteaf (dst_name)); + } + else if (errno != ENOENT) + { + error (0, errno, _("cannot remove %s"), quoteaf (dst_name)); + return_val = false; + goto close_src_desc; + } + + dest_errno = ENOENT; + } + + if (dest_desc < 0 && dest_errno == ENOENT) + { + /* Ensure there is no race where a file may be left without + an appropriate security context. */ + if (x->set_security_context) + { + if (! set_process_security_ctx (src_name, dst_name, dst_mode, + true, x)) + { + return_val = false; + goto close_src_desc; + } + } + + /* Tell caller that the destination file is created. */ + *new_dst = true; + } + } + + if (*new_dst) + { +#if HAVE_FCLONEFILEAT && !USE_XATTR +# ifndef CLONE_ACL +# define CLONE_ACL 0 /* Added in macOS 12.6. */ +# endif +# ifndef CLONE_NOOWNERCOPY +# define CLONE_NOOWNERCOPY 0 /* Added in macOS 10.13. */ +# endif + /* Try fclonefileat if copying data in reflink mode. + Use CLONE_NOFOLLOW to avoid security issues that could occur + if writing through dangling symlinks. Although the circa + 2023 macOS documentation doesn't say so, CLONE_NOFOLLOW + affects the destination file too. */ + if (data_copy_required && x->reflink_mode + && (CLONE_NOOWNERCOPY || x->preserve_ownership)) + { + /* Try fclonefileat so long as it won't create the + destination with unwanted permissions, which could lead + to a security race. */ + mode_t cloned_mode_bits = S_ISVTX | S_IRWXUGO; + mode_t cloned_mode = src_mode & cloned_mode_bits; + mode_t desired_mode + = (x->preserve_mode ? src_mode & CHMOD_MODE_BITS + : x->set_mode ? x->mode + : ((x->explicit_no_preserve_mode ? MODE_RW_UGO : dst_mode) + & ~ cached_umask ())); + if (! (cloned_mode & ~desired_mode)) + { + int fc_flags + = (CLONE_NOFOLLOW + | (x->preserve_mode ? CLONE_ACL : 0) + | (x->preserve_ownership ? 0 : CLONE_NOOWNERCOPY)); + int s = fclonefileat (source_desc, dst_dirfd, dst_relname, + fc_flags); + if (s != 0 && (fc_flags & CLONE_ACL) && errno == EINVAL) + { + fc_flags &= ~CLONE_ACL; + s = fclonefileat (source_desc, dst_dirfd, dst_relname, + fc_flags); + } + if (s == 0) + { + copy_debug.reflink = COPY_DEBUG_YES; + + /* Update the clone's timestamps and permissions + as needed. */ + + if (!x->preserve_timestamps) + { + struct timespec timespec[2]; + timespec[0].tv_nsec = timespec[1].tv_nsec = UTIME_NOW; + if (utimensat (dst_dirfd, dst_relname, timespec, + AT_SYMLINK_NOFOLLOW) + != 0) + { + error (0, errno, _("updating times for %s"), + quoteaf (dst_name)); + return_val = false; + goto close_src_desc; + } + } + + extra_permissions = desired_mode & ~cloned_mode; + if (!extra_permissions + && (!x->preserve_mode || (fc_flags & CLONE_ACL) + || !fd_has_acl (source_desc))) + { + goto close_src_desc; + } + + /* Either some desired permissions were not cloned, + or ACLs were not cloned despite that being requested. */ + omitted_permissions = 0; + dest_desc = -1; + goto set_dest_mode; + } + if (! handle_clone_fail (dst_dirfd, dst_relname, src_name, + dst_name, + -1, false /* We didn't create dst */, + x->reflink_mode)) + { + return_val = false; + goto close_src_desc; + } + } + else + copy_debug.reflink = COPY_DEBUG_AVOIDED; + } + else if (data_copy_required && x->reflink_mode) + { + if (! CLONE_NOOWNERCOPY) + copy_debug.reflink = COPY_DEBUG_AVOIDED; + } +#endif + + /* To allow copying xattrs on read-only files, create with u+w. + This satisfies an inode permission check done by + xattr_permission in fs/xattr.c of the GNU/Linux kernel. */ + mode_t open_mode = + ((dst_mode & ~omitted_permissions) + | (preserve_xattr && !x->owner_privileges ? S_IWUSR : 0)); + extra_permissions = open_mode & ~dst_mode; /* either 0 or S_IWUSR */ + + int open_flags = O_WRONLY | O_CREAT | O_BINARY; + dest_desc = openat (dst_dirfd, dst_relname, open_flags | O_EXCL, + open_mode); + dest_errno = errno; + + /* When trying to copy through a dangling destination symlink, + the above open fails with EEXIST. If that happens, and + readlinkat shows that it is a symlink, then we + have a problem: trying to resolve this dangling symlink to + a directory/destination-entry pair is fundamentally racy, + so punt. If x->open_dangling_dest_symlink is set (cp sets + that when POSIXLY_CORRECT is set in the environment), simply + call open again, but without O_EXCL (potentially dangerous). + If not, fail with a diagnostic. These shenanigans are necessary + only when copying, i.e., not in move_mode. */ + if (dest_desc < 0 && dest_errno == EEXIST && ! x->move_mode) + { + char dummy[1]; + if (0 <= readlinkat (dst_dirfd, dst_relname, dummy, sizeof dummy)) + { + if (x->open_dangling_dest_symlink) + { + dest_desc = openat (dst_dirfd, dst_relname, + open_flags, open_mode); + dest_errno = errno; + } + else + { + error (0, 0, _("not writing through dangling symlink %s"), + quoteaf (dst_name)); + return_val = false; + goto close_src_desc; + } + } + } + + /* Improve quality of diagnostic when a nonexistent dst_name + ends in a slash and open fails with errno == EISDIR. */ + if (dest_desc < 0 && dest_errno == EISDIR + && *dst_name && dst_name[strlen (dst_name) - 1] == '/') + dest_errno = ENOTDIR; + } + else + { + omitted_permissions = extra_permissions = 0; + } + + if (dest_desc < 0) + { + error (0, dest_errno, _("cannot create regular file %s"), + quoteaf (dst_name)); + return_val = false; + goto close_src_desc; + } + + /* --attributes-only overrides --reflink. */ + if (data_copy_required && x->reflink_mode) + { + if (clone_file (dest_desc, source_desc) == 0) + { + data_copy_required = false; + copy_debug.reflink = COPY_DEBUG_YES; + } + else + { + if (! handle_clone_fail (dst_dirfd, dst_relname, src_name, dst_name, + dest_desc, *new_dst, x->reflink_mode)) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + } + + if (! (data_copy_required | x->preserve_ownership | extra_permissions)) + sb.st_mode = 0; + else if (fstat (dest_desc, &sb) != 0) + { + error (0, errno, _("cannot fstat %s"), quoteaf (dst_name)); + return_val = false; + goto close_src_and_dst_desc; + } + + /* If extra permissions needed for copy_xattr didn't happen (e.g., + due to umask) chmod to add them temporarily; if that fails give + up with extra permissions, letting copy_attr fail later. */ + mode_t temporary_mode = sb.st_mode | extra_permissions; + if (temporary_mode != sb.st_mode + && (fchmod_or_lchmod (dest_desc, dst_dirfd, dst_relname, temporary_mode) + != 0)) + extra_permissions = 0; + + if (data_copy_required) + { + /* Choose a suitable buffer size; it may be adjusted later. */ + size_t buf_size = io_blksize (sb); + size_t hole_size = ST_BLKSIZE (sb); + + /* Deal with sparse files. */ + enum scantype scantype = infer_scantype (source_desc, &src_open_sb, + &scan_inference); + if (scantype == ERROR_SCANTYPE) + { + error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); + return_val = false; + goto close_src_and_dst_desc; + } + bool make_holes + = (S_ISREG (sb.st_mode) + && (x->sparse_mode == SPARSE_ALWAYS + || (x->sparse_mode == SPARSE_AUTO + && scantype != PLAIN_SCANTYPE))); + + fdadvise (source_desc, 0, 0, FADVISE_SEQUENTIAL); + + /* If not making a sparse file, try to use a more-efficient + buffer size. */ + if (! make_holes) + { + /* Compute the least common multiple of the input and output + buffer sizes, adjusting for outlandish values. + Note we read in multiples of the reported block size + to support (unusual) devices that have this constraint. */ + size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX); + size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size, + blcm_max); + + /* Do not bother with a buffer larger than the input file, plus one + byte to make sure the file has not grown while reading it. */ + if (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size < buf_size) + buf_size = src_open_sb.st_size + 1; + + /* However, stick with a block size that is a positive multiple of + blcm, overriding the above adjustments. Watch out for + overflow. */ + buf_size += blcm - 1; + buf_size -= buf_size % blcm; + if (buf_size == 0 || blcm_max < buf_size) + buf_size = blcm; + } + + off_t n_read; + bool wrote_hole_at_eof = false; + if (! ( +#ifdef SEEK_HOLE + scantype == LSEEK_SCANTYPE + ? lseek_copy (source_desc, dest_desc, &buf, buf_size, hole_size, + scan_inference.ext_start, src_open_sb.st_size, + make_holes ? x->sparse_mode : SPARSE_NEVER, + x->reflink_mode != REFLINK_NEVER, + src_name, dst_name) + : +#endif + sparse_copy (source_desc, dest_desc, &buf, buf_size, + make_holes ? hole_size : 0, + x->sparse_mode == SPARSE_ALWAYS, + x->reflink_mode != REFLINK_NEVER, + src_name, dst_name, UINTMAX_MAX, &n_read, + &wrote_hole_at_eof))) + { + return_val = false; + goto close_src_and_dst_desc; + } + else if (wrote_hole_at_eof && ftruncate (dest_desc, n_read) < 0) + { + error (0, errno, _("failed to extend %s"), quoteaf (dst_name)); + return_val = false; + goto close_src_and_dst_desc; + } + } + + if (x->preserve_timestamps) + { + struct timespec timespec[2]; + timespec[0] = get_stat_atime (src_sb); + timespec[1] = get_stat_mtime (src_sb); + + if (fdutimensat (dest_desc, dst_dirfd, dst_relname, timespec, 0) != 0) + { + error (0, errno, _("preserving times for %s"), quoteaf (dst_name)); + if (x->require_preserve) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + } + + /* Set ownership before xattrs as changing owners will + clear capabilities. */ + if (x->preserve_ownership && ! SAME_OWNER_AND_GROUP (*src_sb, sb)) + { + switch (set_owner (x, dst_name, dst_dirfd, dst_relname, dest_desc, + src_sb, *new_dst, &sb)) + { + case -1: + return_val = false; + goto close_src_and_dst_desc; + + case 0: + src_mode &= ~ (S_ISUID | S_ISGID | S_ISVTX); + break; + } + } + + if (preserve_xattr) + { + if (!copy_attr (src_name, source_desc, dst_name, dest_desc, x) + && x->require_preserve_xattr) + return_val = false; + } + + set_author (dst_name, dest_desc, src_sb); + +#if HAVE_FCLONEFILEAT && !USE_XATTR +set_dest_mode: +#endif + if (x->preserve_mode || x->move_mode) + { + if (copy_acl (src_name, source_desc, dst_name, dest_desc, src_mode) != 0 + && x->require_preserve) + return_val = false; + } + else if (x->set_mode) + { + if (set_acl (dst_name, dest_desc, x->mode) != 0) + return_val = false; + } + else if (x->explicit_no_preserve_mode && *new_dst) + { + if (set_acl (dst_name, dest_desc, MODE_RW_UGO & ~cached_umask ()) != 0) + return_val = false; + } + else if (omitted_permissions | extra_permissions) + { + omitted_permissions &= ~ cached_umask (); + if ((omitted_permissions | extra_permissions) + && (fchmod_or_lchmod (dest_desc, dst_dirfd, dst_relname, + dst_mode & ~ cached_umask ()) + != 0)) + { + error (0, errno, _("preserving permissions for %s"), + quoteaf (dst_name)); + if (x->require_preserve) + return_val = false; + } + } + + if (dest_desc < 0) + goto close_src_desc; + +close_src_and_dst_desc: + if (close (dest_desc) < 0) + { + error (0, errno, _("failed to close %s"), quoteaf (dst_name)); + return_val = false; + } +close_src_desc: + if (close (source_desc) < 0) + { + error (0, errno, _("failed to close %s"), quoteaf (src_name)); + return_val = false; + } + + /* Output debug info for data copying operations. */ + if (x->debug) + emit_debug (x); + + alignfree (buf); + return return_val; +} + +/* Return whether it's OK that two files are the "same" by some measure. + The first file is SRC_NAME and has status SRC_SB. + The second is DST_DIRFD+DST_RELNAME and has status DST_SB. + The copying options are X. The goal is to avoid + making the 'copy' operation remove both copies of the file + in that case, while still allowing the user to e.g., move or + copy a regular file onto a symlink that points to it. + Try to minimize the cost of this function in the common case. + Set *RETURN_NOW if we've determined that the caller has no more + work to do and should return successfully, right away. */ + +static bool +same_file_ok (char const *src_name, struct stat const *src_sb, + int dst_dirfd, char const *dst_relname, struct stat const *dst_sb, + const struct cp_options *x, bool *return_now) +{ + const struct stat *src_sb_link; + const struct stat *dst_sb_link; + struct stat tmp_dst_sb; + struct stat tmp_src_sb; + + bool same_link; + bool same = SAME_INODE (*src_sb, *dst_sb); + + *return_now = false; + + /* FIXME: this should (at the very least) be moved into the following + if-block. More likely, it should be removed, because it inhibits + making backups. But removing it will result in a change in behavior + that will probably have to be documented -- and tests will have to + be updated. */ + if (same && x->hard_link) + { + *return_now = true; + return true; + } + + if (x->dereference == DEREF_NEVER) + { + same_link = same; + + /* If both the source and destination files are symlinks (and we'll + know this here IFF preserving symlinks), then it's usually ok + when they are distinct. */ + if (S_ISLNK (src_sb->st_mode) && S_ISLNK (dst_sb->st_mode)) + { + bool sn = same_nameat (AT_FDCWD, src_name, dst_dirfd, dst_relname); + if ( ! sn) + { + /* It's fine when we're making any type of backup. */ + if (x->backup_type != no_backups) + return true; + + /* Here we have two symlinks that are hard-linked together, + and we're not making backups. In this unusual case, simply + returning true would lead to mv calling "rename(A,B)", + which would do nothing and return 0. */ + if (same_link) + { + *return_now = true; + return ! x->move_mode; + } + } + + return ! sn; + } + + src_sb_link = src_sb; + dst_sb_link = dst_sb; + } + else + { + if (!same) + return true; + + if (fstatat (dst_dirfd, dst_relname, &tmp_dst_sb, + AT_SYMLINK_NOFOLLOW) != 0 + || lstat (src_name, &tmp_src_sb) != 0) + return true; + + src_sb_link = &tmp_src_sb; + dst_sb_link = &tmp_dst_sb; + + same_link = SAME_INODE (*src_sb_link, *dst_sb_link); + + /* If both are symlinks, then it's ok, but only if the destination + will be unlinked before being opened. This is like the test + above, but with the addition of the unlink_dest_before_opening + conjunct because otherwise, with two symlinks to the same target, + we'd end up truncating the source file. */ + if (S_ISLNK (src_sb_link->st_mode) && S_ISLNK (dst_sb_link->st_mode) + && x->unlink_dest_before_opening) + return true; + } + + /* The backup code ensures there's a copy, so it's usually ok to + remove any destination file. One exception is when both + source and destination are the same directory entry. In that + case, moving the destination file aside (in making the backup) + would also rename the source file and result in an error. */ + if (x->backup_type != no_backups) + { + if (!same_link) + { + /* In copy mode when dereferencing symlinks, if the source is a + symlink and the dest is not, then backing up the destination + (moving it aside) would make it a dangling symlink, and the + subsequent attempt to open it in copy_reg would fail with + a misleading diagnostic. Avoid that by returning zero in + that case so the caller can make cp (or mv when it has to + resort to reading the source file) fail now. */ + + /* FIXME-note: even with the following kludge, we can still provoke + the offending diagnostic. It's just a little harder to do :-) + $ rm -f a b c; touch c; ln -s c b; ln -s b a; cp -b a b + cp: cannot open 'a' for reading: No such file or directory + That's misleading, since a subsequent 'ls' shows that 'a' + is still there. + One solution would be to open the source file *before* moving + aside the destination, but that'd involve a big rewrite. */ + if ( ! x->move_mode + && x->dereference != DEREF_NEVER + && S_ISLNK (src_sb_link->st_mode) + && ! S_ISLNK (dst_sb_link->st_mode)) + return false; + + return true; + } + + /* FIXME: What about case insensitive file systems ? */ + return ! same_nameat (AT_FDCWD, src_name, dst_dirfd, dst_relname); + } + +#if 0 + /* FIXME: use or remove */ + + /* If we're making a backup, we'll detect the problem case in + copy_reg because SRC_NAME will no longer exist. Allowing + the test to be deferred lets cp do some useful things. + But when creating hardlinks and SRC_NAME is a symlink + but DST_RELNAME is not we must test anyway. */ + if (x->hard_link + || !S_ISLNK (src_sb_link->st_mode) + || S_ISLNK (dst_sb_link->st_mode)) + return true; + + if (x->dereference != DEREF_NEVER) + return true; +#endif + + if (x->move_mode || x->unlink_dest_before_opening) + { + /* They may refer to the same file if we're in move mode and the + target is a symlink. That is ok, since we remove any existing + destination file before opening it -- via 'rename' if they're on + the same file system, via unlinkat otherwise. */ + if (S_ISLNK (dst_sb_link->st_mode)) + return true; + + /* It's not ok if they're distinct hard links to the same file as + this causes a race condition and we may lose data in this case. */ + if (same_link + && 1 < dst_sb_link->st_nlink + && ! same_nameat (AT_FDCWD, src_name, dst_dirfd, dst_relname)) + return ! x->move_mode; + } + + /* If neither is a symlink, then it's ok as long as they aren't + hard links to the same file. */ + if (!S_ISLNK (src_sb_link->st_mode) && !S_ISLNK (dst_sb_link->st_mode)) + { + if (!SAME_INODE (*src_sb_link, *dst_sb_link)) + return true; + + /* If they are the same file, it's ok if we're making hard links. */ + if (x->hard_link) + { + *return_now = true; + return true; + } + } + + /* At this point, it is normally an error (data loss) to move a symlink + onto its referent, but in at least one narrow case, it is not: + In move mode, when + 1) src is a symlink, + 2) dest has a link count of 2 or more and + 3) dest and the referent of src are not the same directory entry, + then it's ok, since while we'll lose one of those hard links, + src will still point to a remaining link. + Note that technically, condition #3 obviates condition #2, but we + retain the 1 < st_nlink condition because that means fewer invocations + of the more expensive #3. + + Given this, + $ touch f && ln f l && ln -s f s + $ ls -og f l s + -rw-------. 2 0 Jan 4 22:46 f + -rw-------. 2 0 Jan 4 22:46 l + lrwxrwxrwx. 1 1 Jan 4 22:46 s -> f + this must fail: mv s f + this must succeed: mv s l */ + if (x->move_mode + && S_ISLNK (src_sb->st_mode) + && 1 < dst_sb_link->st_nlink) + { + char *abs_src = canonicalize_file_name (src_name); + if (abs_src) + { + bool result = ! same_nameat (AT_FDCWD, abs_src, + dst_dirfd, dst_relname); + free (abs_src); + return result; + } + } + + /* It's ok to recreate a destination symlink. */ + if (x->symbolic_link && S_ISLNK (dst_sb_link->st_mode)) + return true; + + if (x->dereference == DEREF_NEVER) + { + if ( ! S_ISLNK (src_sb_link->st_mode)) + tmp_src_sb = *src_sb_link; + else if (stat (src_name, &tmp_src_sb) != 0) + return true; + + if ( ! S_ISLNK (dst_sb_link->st_mode)) + tmp_dst_sb = *dst_sb_link; + else if (fstatat (dst_dirfd, dst_relname, &tmp_dst_sb, 0) != 0) + return true; + + if ( ! SAME_INODE (tmp_src_sb, tmp_dst_sb)) + return true; + + if (x->hard_link) + { + /* It's ok to attempt to hardlink the same file, + and return early if not replacing a symlink. + Note we need to return early to avoid a later + unlink() of DST (when SRC is a symlink). */ + *return_now = ! S_ISLNK (dst_sb_link->st_mode); + return true; + } + } + + return false; +} + +/* Return whether DST_DIRFD+DST_RELNAME, with mode MODE, + is writable in the sense of 'mv'. + Always consider a symbolic link to be writable. */ +static bool +writable_destination (int dst_dirfd, char const *dst_relname, mode_t mode) +{ + return (S_ISLNK (mode) + || can_write_any_file () + || faccessat (dst_dirfd, dst_relname, W_OK, AT_EACCESS) == 0); +} + +static bool +overwrite_ok (struct cp_options const *x, char const *dst_name, + int dst_dirfd, char const *dst_relname, + struct stat const *dst_sb) +{ + if (! writable_destination (dst_dirfd, dst_relname, dst_sb->st_mode)) + { + char perms[12]; /* "-rwxrwxrwx " ls-style modes. */ + strmode (dst_sb->st_mode, perms); + perms[10] = '\0'; + fprintf (stderr, + (x->move_mode || x->unlink_dest_before_opening + || x->unlink_dest_after_failed_open) + ? _("%s: replace %s, overriding mode %04lo (%s)? ") + : _("%s: unwritable %s (mode %04lo, %s); try anyway? "), + program_name, quoteaf (dst_name), + (unsigned long int) (dst_sb->st_mode & CHMOD_MODE_BITS), + &perms[1]); + } + else + { + fprintf (stderr, _("%s: overwrite %s? "), + program_name, quoteaf (dst_name)); + } + + return yesno (); +} + +/* Initialize the hash table implementing a set of F_triple entries + corresponding to destination files. */ +extern void +dest_info_init (struct cp_options *x) +{ + x->dest_info + = hash_initialize (DEST_INFO_INITIAL_CAPACITY, + nullptr, + triple_hash, + triple_compare, + triple_free); + if (! x->dest_info) + xalloc_die (); +} + +/* Initialize the hash table implementing a set of F_triple entries + corresponding to source files listed on the command line. */ +extern void +src_info_init (struct cp_options *x) +{ + + /* Note that we use triple_hash_no_name here. + Contrast with the use of triple_hash above. + That is necessary because a source file may be specified + in many different ways. We want to warn about this + cp a a d/ + as well as this: + cp a ./a d/ + */ + x->src_info + = hash_initialize (DEST_INFO_INITIAL_CAPACITY, + nullptr, + triple_hash_no_name, + triple_compare, + triple_free); + if (! x->src_info) + xalloc_die (); +} + +/* When effecting a move (e.g., for mv(1)), and given the name DST_NAME + aka DST_DIRFD+DST_RELNAME + of the destination and a corresponding stat buffer, DST_SB, return + true if the logical 'move' operation should _not_ proceed. + Otherwise, return false. + Depending on options specified in X, this code may issue an + interactive prompt asking whether it's ok to overwrite DST_NAME. */ +static bool +abandon_move (const struct cp_options *x, + char const *dst_name, + int dst_dirfd, char const *dst_relname, + struct stat const *dst_sb) +{ + affirm (x->move_mode); + return (x->interactive == I_ALWAYS_NO + || x->interactive == I_ALWAYS_SKIP + || ((x->interactive == I_ASK_USER + || (x->interactive == I_UNSPECIFIED + && x->stdin_tty + && ! writable_destination (dst_dirfd, dst_relname, + dst_sb->st_mode))) + && ! overwrite_ok (x, dst_name, dst_dirfd, dst_relname, dst_sb))); +} + +/* Print --verbose output on standard output, e.g. 'new' -> 'old'. + If BACKUP_DST_NAME is non-null, then also indicate that it is + the name of a backup file. */ +static void +emit_verbose (char const *src, char const *dst, char const *backup_dst_name) +{ + printf ("%s -> %s", quoteaf_n (0, src), quoteaf_n (1, dst)); + if (backup_dst_name) + printf (_(" (backup: %s)"), quoteaf (backup_dst_name)); + putchar ('\n'); +} + +/* A wrapper around "setfscreatecon (nullptr)" that exits upon failure. */ +static void +restore_default_fscreatecon_or_die (void) +{ + if (setfscreatecon (nullptr) != 0) + error (EXIT_FAILURE, errno, + _("failed to restore the default file creation context")); +} + +/* Return a newly-allocated string that is like STR + except replace its suffix SUFFIX with NEWSUFFIX. */ +static char * +subst_suffix (char const *str, char const *suffix, char const *newsuffix) +{ + idx_t prefixlen = suffix - str; + idx_t newsuffixsize = strlen (newsuffix) + 1; + char *r = ximalloc (prefixlen + newsuffixsize); + memcpy (r + prefixlen, newsuffix, newsuffixsize); + return memcpy (r, str, prefixlen); +} + +/* Create a hard link to SRC_NAME aka SRC_DIRFD+SRC_RELNAME; + the new link is at DST_NAME aka DST_DIRFD+DST_RELNAME. + A null SRC_NAME stands for the file whose name is like DST_NAME + except with DST_RELNAME replaced with SRC_RELNAME. + Honor the REPLACE, VERBOSE and DEREFERENCE settings. + Return true upon success. Otherwise, diagnose the + failure and return false. If SRC_NAME is a symbolic link, then it will not + be followed unless DEREFERENCE is true. + If the system doesn't support hard links to symbolic links, then DST_NAME + will be created as a symbolic link to SRC_NAME. */ +static bool +create_hard_link (char const *src_name, int src_dirfd, char const *src_relname, + char const *dst_name, int dst_dirfd, char const *dst_relname, + bool replace, bool verbose, bool dereference) +{ + int err = force_linkat (src_dirfd, src_relname, dst_dirfd, dst_relname, + dereference ? AT_SYMLINK_FOLLOW : 0, + replace, -1); + if (0 < err) + { + + char *a_src_name = nullptr; + if (!src_name) + src_name = a_src_name = subst_suffix (dst_name, dst_relname, + src_relname); + error (0, err, _("cannot create hard link %s to %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, src_name)); + free (a_src_name); + return false; + } + if (err < 0 && verbose) + printf (_("removed %s\n"), quoteaf (dst_name)); + return true; +} + +/* Return true if the current file should be (tried to be) dereferenced: + either for DEREF_ALWAYS or for DEREF_COMMAND_LINE_ARGUMENTS in the case + where the current file is a COMMAND_LINE_ARG; otherwise return false. */ +ATTRIBUTE_PURE +static inline bool +should_dereference (const struct cp_options *x, bool command_line_arg) +{ + return x->dereference == DEREF_ALWAYS + || (x->dereference == DEREF_COMMAND_LINE_ARGUMENTS + && command_line_arg); +} + +/* Return true if the source file with basename SRCBASE and status SRC_ST + is likely to be the simple backup file for DST_DIRFD+DST_RELNAME. */ +static bool +source_is_dst_backup (char const *srcbase, struct stat const *src_st, + int dst_dirfd, char const *dst_relname) +{ + size_t srcbaselen = strlen (srcbase); + char const *dstbase = last_component (dst_relname); + size_t dstbaselen = strlen (dstbase); + size_t suffixlen = strlen (simple_backup_suffix); + if (! (srcbaselen == dstbaselen + suffixlen + && memcmp (srcbase, dstbase, dstbaselen) == 0 + && STREQ (srcbase + dstbaselen, simple_backup_suffix))) + return false; + char *dst_back = subst_suffix (dst_relname, + dst_relname + strlen (dst_relname), + simple_backup_suffix); + struct stat dst_back_sb; + int dst_back_status = fstatat (dst_dirfd, dst_back, &dst_back_sb, 0); + free (dst_back); + return dst_back_status == 0 && SAME_INODE (*src_st, dst_back_sb); +} + +/* Copy the file SRC_NAME to the file DST_NAME aka DST_DIRFD+DST_RELNAME. + If NONEXISTENT_DST is positive, DST_NAME does not exist even as a + dangling symlink; if negative, it does not exist except possibly + as a dangling symlink; if zero, its existence status is unknown. + A non-null PARENT describes the parent directory. + ANCESTORS points to a linked, null terminated list of + devices and inodes of parent directories of SRC_NAME. + X summarizes the command-line options. + COMMAND_LINE_ARG means SRC_NAME was specified on the command line. + FIRST_DIR_CREATED_PER_COMMAND_LINE_ARG is both input and output. + Set *COPY_INTO_SELF if SRC_NAME is a parent of (or the + same as) DST_NAME; otherwise, clear it. + If X->move_mode, set *RENAME_SUCCEEDED according to whether + the source was simply renamed to the destination. + Return true if successful. */ +static bool +copy_internal (char const *src_name, char const *dst_name, + int dst_dirfd, char const *dst_relname, + int nonexistent_dst, + struct stat const *parent, + struct dir_list *ancestors, + const struct cp_options *x, + bool command_line_arg, + bool *first_dir_created_per_command_line_arg, + bool *copy_into_self, + bool *rename_succeeded) +{ + struct stat src_sb; + struct stat dst_sb; + mode_t src_mode IF_LINT ( = 0); + mode_t dst_mode IF_LINT ( = 0); + mode_t dst_mode_bits; + mode_t omitted_permissions; + bool restore_dst_mode = false; + char *earlier_file = nullptr; + char *dst_backup = nullptr; + char const *drelname = *dst_relname ? dst_relname : "."; + bool delayed_ok; + bool copied_as_regular = false; + bool dest_is_symlink = false; + bool have_dst_lstat = false; + + /* Whether the destination is (or was) known to be new, updated as + more info comes in. This may become true if the destination is a + dangling symlink, in contexts where dangling symlinks should be + treated the same as nonexistent files. */ + bool new_dst = 0 < nonexistent_dst; + + *copy_into_self = false; + + int rename_errno = x->rename_errno; + if (x->move_mode) + { + if (rename_errno < 0) + rename_errno = (renameatu (AT_FDCWD, src_name, dst_dirfd, drelname, + RENAME_NOREPLACE) + ? errno : 0); + nonexistent_dst = *rename_succeeded = new_dst = rename_errno == 0; + } + + if (rename_errno == 0 + ? !x->last_file + : rename_errno != EEXIST + || (x->interactive != I_ALWAYS_NO && x->interactive != I_ALWAYS_SKIP)) + { + char const *name = rename_errno == 0 ? dst_name : src_name; + int dirfd = rename_errno == 0 ? dst_dirfd : AT_FDCWD; + char const *relname = rename_errno == 0 ? drelname : src_name; + int fstatat_flags + = x->dereference == DEREF_NEVER ? AT_SYMLINK_NOFOLLOW : 0; + if (follow_fstatat (dirfd, relname, &src_sb, fstatat_flags) != 0) + { + error (0, errno, _("cannot stat %s"), quoteaf (name)); + return false; + } + + src_mode = src_sb.st_mode; + + if (S_ISDIR (src_mode) && !x->recursive) + { + error (0, 0, ! x->install_mode /* cp */ + ? _("-r not specified; omitting directory %s") + : _("omitting directory %s"), + quoteaf (src_name)); + return false; + } + } + else + { +#if defined lint && (defined __clang__ || defined __COVERITY__) + affirm (x->move_mode); + memset (&src_sb, 0, sizeof src_sb); +#endif + } + + /* Detect the case in which the same source file appears more than + once on the command line and no backup option has been selected. + If so, simply warn and don't copy it the second time. + This check is enabled only if x->src_info is non-null. */ + if (command_line_arg && x->src_info) + { + if ( ! S_ISDIR (src_mode) + && x->backup_type == no_backups + && seen_file (x->src_info, src_name, &src_sb)) + { + error (0, 0, _("warning: source file %s specified more than once"), + quoteaf (src_name)); + return true; + } + + record_file (x->src_info, src_name, &src_sb); + } + + bool dereference = should_dereference (x, command_line_arg); + + if (nonexistent_dst <= 0) + { + if (! (rename_errno == EEXIST + && (x->interactive == I_ALWAYS_NO + || x->interactive == I_ALWAYS_SKIP))) + { + /* Regular files can be created by writing through symbolic + links, but other files cannot. So use stat on the + destination when copying a regular file, and lstat otherwise. + However, if we intend to unlink or remove the destination + first, use lstat, since a copy won't actually be made to the + destination in that case. */ + bool use_lstat + = ((! S_ISREG (src_mode) + && (! x->copy_as_regular + || S_ISDIR (src_mode) || S_ISLNK (src_mode))) + || x->move_mode || x->symbolic_link || x->hard_link + || x->backup_type != no_backups + || x->unlink_dest_before_opening); + int fstatat_flags = use_lstat ? AT_SYMLINK_NOFOLLOW : 0; + if (!use_lstat && nonexistent_dst < 0) + new_dst = true; + else if (follow_fstatat (dst_dirfd, drelname, &dst_sb, fstatat_flags) + == 0) + { + have_dst_lstat = use_lstat; + rename_errno = EEXIST; + } + else + { + if (errno == ELOOP && x->unlink_dest_after_failed_open) + /* leave new_dst=false so we unlink later. */; + else if (errno != ENOENT) + { + error (0, errno, _("cannot stat %s"), quoteaf (dst_name)); + return false; + } + else + new_dst = true; + } + } + + if (rename_errno == EEXIST) + { + bool return_now = false; + bool return_val = true; + bool skipped = false; + + if ((x->interactive != I_ALWAYS_NO && x->interactive != I_ALWAYS_SKIP) + && ! same_file_ok (src_name, &src_sb, dst_dirfd, drelname, + &dst_sb, x, &return_now)) + { + error (0, 0, _("%s and %s are the same file"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + return false; + } + + if (x->update && !S_ISDIR (src_mode)) + { + /* When preserving timestamps (but not moving within a file + system), don't worry if the destination timestamp is + less than the source merely because of timestamp + truncation. */ + int options = ((x->preserve_timestamps + && ! (x->move_mode + && dst_sb.st_dev == src_sb.st_dev)) + ? UTIMECMP_TRUNCATE_SOURCE + : 0); + + if (0 <= utimecmpat (dst_dirfd, dst_relname, &dst_sb, + &src_sb, options)) + { + /* We're using --update and the destination is not older + than the source, so do not copy or move. Pretend the + rename succeeded, so the caller (if it's mv) doesn't + end up removing the source file. */ + if (rename_succeeded) + *rename_succeeded = true; + + /* However, we still must record that we've processed + this src/dest pair, in case this source file is + hard-linked to another one. In that case, we'll use + the mapping information to link the corresponding + destination names. */ + earlier_file = remember_copied (dst_relname, src_sb.st_ino, + src_sb.st_dev); + if (earlier_file) + { + /* Note we currently replace DST_NAME unconditionally, + even if it was a newer separate file. */ + if (! create_hard_link (nullptr, dst_dirfd, earlier_file, + dst_name, dst_dirfd, dst_relname, + true, + x->verbose, dereference)) + { + goto un_backup; + } + } + + skipped = true; + goto skip; + } + } + + /* When there is an existing destination file, we may end up + returning early, and hence not copying/moving the file. + This may be due to an interactive 'negative' reply to the + prompt about the existing file. It may also be due to the + use of the --no-clobber option. + + cp and mv treat -i and -f differently. */ + if (x->move_mode) + { + if (abandon_move (x, dst_name, dst_dirfd, drelname, &dst_sb)) + { + /* Pretend the rename succeeded, so the caller (mv) + doesn't end up removing the source file. */ + if (rename_succeeded) + *rename_succeeded = true; + + skipped = true; + return_val = x->interactive == I_ALWAYS_SKIP; + } + } + else + { + if (! S_ISDIR (src_mode) + && (x->interactive == I_ALWAYS_NO + || x->interactive == I_ALWAYS_SKIP + || (x->interactive == I_ASK_USER + && ! overwrite_ok (x, dst_name, dst_dirfd, + dst_relname, &dst_sb)))) + { + skipped = true; + return_val = x->interactive == I_ALWAYS_SKIP; + } + } + +skip: + if (skipped) + { + if (x->interactive == I_ALWAYS_NO) + error (0, 0, _("not replacing %s"), quoteaf (dst_name)); + else if (x->debug) + printf (_("skipped %s\n"), quoteaf (dst_name)); + + return_now = true; + } + + if (return_now) + return return_val; + + if (!S_ISDIR (dst_sb.st_mode)) + { + if (S_ISDIR (src_mode)) + { + if (x->move_mode && x->backup_type != no_backups) + { + /* Moving a directory onto an existing + non-directory is ok only with --backup. */ + } + else + { + error (0, 0, + _("cannot overwrite non-directory %s with directory %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, src_name)); + return false; + } + } + + /* Don't let the user destroy their data, even if they try hard: + This mv command must fail (likewise for cp): + rm -rf a b c; mkdir a b c; touch a/f b/f; mv a/f b/f c + Otherwise, the contents of b/f would be lost. + In the case of 'cp', b/f would be lost if the user simulated + a move using cp and rm. + Note that it works fine if you use --backup=numbered. */ + if (command_line_arg + && x->backup_type != numbered_backups + && seen_file (x->dest_info, dst_relname, &dst_sb)) + { + error (0, 0, + _("will not overwrite just-created %s with %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, src_name)); + return false; + } + } + + if (!S_ISDIR (src_mode)) + { + if (S_ISDIR (dst_sb.st_mode)) + { + if (x->move_mode && x->backup_type != no_backups) + { + /* Moving a non-directory onto an existing + directory is ok only with --backup. */ + } + else + { + error (0, 0, + _("cannot overwrite directory %s with non-directory"), + quoteaf (dst_name)); + return false; + } + } + } + + if (x->move_mode) + { + /* Don't allow user to move a directory onto a non-directory. */ + if (S_ISDIR (src_sb.st_mode) && !S_ISDIR (dst_sb.st_mode) + && x->backup_type == no_backups) + { + error (0, 0, + _("cannot move directory onto non-directory: %s -> %s"), + quotef_n (0, src_name), quotef_n (0, dst_name)); + return false; + } + } + + char const *srcbase; + if (x->backup_type != no_backups + /* Don't try to back up a destination if the last + component of src_name is "." or "..". */ + && ! dot_or_dotdot (srcbase = last_component (src_name)) + /* Create a backup of each destination directory in move mode, + but not in copy mode. FIXME: it might make sense to add an + option to suppress backup creation also for move mode. + That would let one use mv to merge new content into an + existing hierarchy. */ + && (x->move_mode || ! S_ISDIR (dst_sb.st_mode))) + { + /* Fail if creating the backup file would likely destroy + the source file. Otherwise, the commands: + cd /tmp; rm -f a a~; : > a; echo A > a~; cp --b=simple a~ a + would leave two zero-length files: a and a~. */ + if (x->backup_type != numbered_backups + && source_is_dst_backup (srcbase, &src_sb, + dst_dirfd, dst_relname)) + { + char const *fmt; + fmt = (x->move_mode + ? _("backing up %s might destroy source; %s not moved") + : _("backing up %s might destroy source; %s not copied")); + error (0, 0, fmt, + quoteaf_n (0, dst_name), + quoteaf_n (1, src_name)); + return false; + } + + char *tmp_backup = backup_file_rename (dst_dirfd, dst_relname, + x->backup_type); + + /* FIXME: use fts: + Using alloca for a file name that may be arbitrarily + long is not recommended. In fact, even forming such a name + should be discouraged. Eventually, this code will be rewritten + to use fts, so using alloca here will be less of a problem. */ + if (tmp_backup) + { + idx_t dirlen = dst_relname - dst_name; + idx_t backupsize = strlen (tmp_backup) + 1; + dst_backup = alloca (dirlen + backupsize); + memcpy (mempcpy (dst_backup, dst_name, dirlen), + tmp_backup, backupsize); + free (tmp_backup); + } + else if (errno != ENOENT) + { + error (0, errno, _("cannot backup %s"), quoteaf (dst_name)); + return false; + } + new_dst = true; + } + else if (! S_ISDIR (dst_sb.st_mode) + /* Never unlink dst_name when in move mode. */ + && ! x->move_mode + && (x->unlink_dest_before_opening + || (x->data_copy_required + && ((x->preserve_links && 1 < dst_sb.st_nlink) + || (x->dereference == DEREF_NEVER + && ! S_ISREG (src_sb.st_mode)))) + )) + { + if (unlinkat (dst_dirfd, dst_relname, 0) != 0 && errno != ENOENT) + { + error (0, errno, _("cannot remove %s"), quoteaf (dst_name)); + return false; + } + new_dst = true; + if (x->verbose) + printf (_("removed %s\n"), quoteaf (dst_name)); + } + } + } + + /* Ensure we don't try to copy through a symlink that was + created by a prior call to this function. */ + if (command_line_arg + && x->dest_info + && ! x->move_mode + && x->backup_type == no_backups) + { + bool lstat_ok = true; + struct stat tmp_buf; + struct stat *dst_lstat_sb; + + /* If we did not follow symlinks above, good: use that data. + Otherwise, use AT_SYMLINK_NOFOLLOW, in case dst_name is a symlink. */ + if (have_dst_lstat) + dst_lstat_sb = &dst_sb; + else if (fstatat (dst_dirfd, drelname, &tmp_buf, AT_SYMLINK_NOFOLLOW) + == 0) + dst_lstat_sb = &tmp_buf; + else + lstat_ok = false; + + /* Never copy through a symlink we've just created. */ + if (lstat_ok + && S_ISLNK (dst_lstat_sb->st_mode) + && seen_file (x->dest_info, dst_relname, dst_lstat_sb)) + { + error (0, 0, + _("will not copy %s through just-created symlink %s"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + return false; + } + } + + /* If the source is a directory, we don't always create the destination + directory. So --verbose should not announce anything until we're + sure we'll create a directory. Also don't announce yet when moving + so we can distinguish renames versus copies. */ + if (x->verbose && !x->move_mode && !S_ISDIR (src_mode)) + emit_verbose (src_name, dst_name, dst_backup); + + /* Associate the destination file name with the source device and inode + so that if we encounter a matching dev/ino pair in the source tree + we can arrange to create a hard link between the corresponding names + in the destination tree. + + When using the --link (-l) option, there is no need to take special + measures, because (barring race conditions) files that are hard-linked + in the source tree will also be hard-linked in the destination tree. + + Sometimes, when preserving links, we have to record dev/ino even + though st_nlink == 1: + - when in move_mode, since we may be moving a group of N hard-linked + files (via two or more command line arguments) to a different + partition; the links may be distributed among the command line + arguments (possibly hierarchies) so that the link count of + the final, once-linked source file is reduced to 1 when it is + considered below. But in this case (for mv) we don't need to + incur the expense of recording the dev/ino => name mapping; all we + really need is a lookup, to see if the dev/ino pair has already + been copied. + - when using -H and processing a command line argument; + that command line argument could be a symlink pointing to another + command line argument. With 'cp -H --preserve=link', we hard-link + those two destination files. + - likewise for -L except that it applies to all files, not just + command line arguments. + + Also, with --recursive, record dev/ino of each command-line directory. + We'll use that info to detect this problem: cp -R dir dir. */ + + if (rename_errno == 0) + earlier_file = nullptr; + else if (x->recursive && S_ISDIR (src_mode)) + { + if (command_line_arg) + earlier_file = remember_copied (dst_relname, + src_sb.st_ino, src_sb.st_dev); + else + earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); + } + else if (x->move_mode && src_sb.st_nlink == 1) + { + earlier_file = src_to_dest_lookup (src_sb.st_ino, src_sb.st_dev); + } + else if (x->preserve_links + && !x->hard_link + && (1 < src_sb.st_nlink + || (command_line_arg + && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS) + || x->dereference == DEREF_ALWAYS)) + { + earlier_file = remember_copied (dst_relname, + src_sb.st_ino, src_sb.st_dev); + } + + /* Did we copy this inode somewhere else (in this command line argument) + and therefore this is a second hard link to the inode? */ + + if (earlier_file) + { + /* Avoid damaging the destination file system by refusing to preserve + hard-linked directories (which are found at least in Netapp snapshot + directories). */ + if (S_ISDIR (src_mode)) + { + /* If src_name and earlier_file refer to the same directory entry, + then warn about copying a directory into itself. */ + if (same_nameat (AT_FDCWD, src_name, dst_dirfd, earlier_file)) + { + error (0, 0, _("cannot copy a directory, %s, into itself, %s"), + quoteaf_n (0, top_level_src_name), + quoteaf_n (1, top_level_dst_name)); + *copy_into_self = true; + goto un_backup; + } + else if (same_nameat (dst_dirfd, dst_relname, + dst_dirfd, earlier_file)) + { + error (0, 0, _("warning: source directory %s " + "specified more than once"), + quoteaf (top_level_src_name)); + /* In move mode, if a previous rename succeeded, then + we won't be in this path as the source is missing. If the + rename previously failed, then that has been handled, so + pretend this attempt succeeded so the source isn't removed. */ + if (x->move_mode && rename_succeeded) + *rename_succeeded = true; + /* We only do backups in move mode, and for non directories. + So just ignore this repeated entry. */ + return true; + } + else if (x->dereference == DEREF_ALWAYS + || (command_line_arg + && x->dereference == DEREF_COMMAND_LINE_ARGUMENTS)) + { + /* This happens when e.g., encountering a directory for the + second or subsequent time via symlinks when cp is invoked + with -R and -L. E.g., + rm -rf a b c d; mkdir a b c d; ln -s ../c a; ln -s ../c b; + cp -RL a b d + */ + } + else + { + char *earlier = subst_suffix (dst_name, dst_relname, + earlier_file); + error (0, 0, _("will not create hard link %s to directory %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, earlier)); + free (earlier); + goto un_backup; + } + } + else + { + if (! create_hard_link (nullptr, dst_dirfd, earlier_file, + dst_name, dst_dirfd, dst_relname, + true, x->verbose, dereference)) + goto un_backup; + + return true; + } + } + + if (x->move_mode) + { + if (rename_errno == EEXIST) + rename_errno = (renameat (AT_FDCWD, src_name, dst_dirfd, drelname) == 0 + ? 0 : errno); + + if (rename_errno == 0) + { + if (x->verbose) + { + printf (_("renamed ")); + emit_verbose (src_name, dst_name, dst_backup); + } + + if (x->set_security_context) + { + /* -Z failures are only warnings currently. */ + (void) set_file_security_ctx (dst_name, true, x); + } + + if (rename_succeeded) + *rename_succeeded = true; + + if (command_line_arg && !x->last_file) + { + /* Record destination dev/ino/name, so that if we are asked + to overwrite that file again, we can detect it and fail. */ + /* It's fine to use the _source_ stat buffer (src_sb) to get the + _destination_ dev/ino, since the rename above can't have + changed those, and 'mv' always uses lstat. + We could limit it further by operating + only on non-directories. */ + record_file (x->dest_info, dst_relname, &src_sb); + } + + return true; + } + + /* FIXME: someday, consider what to do when moving a directory into + itself but when source and destination are on different devices. */ + + /* This happens when attempting to rename a directory to a + subdirectory of itself. */ + if (rename_errno == EINVAL) + { + /* FIXME: this is a little fragile in that it relies on rename(2) + failing with a specific errno value. Expect problems on + non-POSIX systems. */ + error (0, 0, _("cannot move %s to a subdirectory of itself, %s"), + quoteaf_n (0, top_level_src_name), + quoteaf_n (1, top_level_dst_name)); + + /* Note that there is no need to call forget_created here, + (compare with the other calls in this file) since the + destination directory didn't exist before. */ + + *copy_into_self = true; + /* FIXME-cleanup: Don't return true here; adjust mv.c accordingly. + The only caller that uses this code (mv.c) ends up setting its + exit status to nonzero when copy_into_self is nonzero. */ + return true; + } + + /* WARNING: there probably exist systems for which an inter-device + rename fails with a value of errno not handled here. + If/as those are reported, add them to the condition below. + If this happens to you, please do the following and send the output + to the bug-reporting address (e.g., in the output of cp --help): + touch k; perl -e 'rename "k","/tmp/k" or print "$!(",$!+0,")\n"' + where your current directory is on one partition and /tmp is the other. + Also, please try to find the E* errno macro name corresponding to + the diagnostic and parenthesized integer, and include that in your + e-mail. One way to do that is to run a command like this + find /usr/include/. -type f \ + | xargs grep 'define.*\.*\<18\>' /dev/null + where you'd replace '18' with the integer in parentheses that + was output from the perl one-liner above. + If necessary, of course, change '/tmp' to some other directory. */ + if (rename_errno != EXDEV || x->no_copy) + { + /* There are many ways this can happen due to a race condition. + When something happens between the initial follow_fstatat and the + subsequent rename, we can get many different types of errors. + For example, if the destination is initially a non-directory + or non-existent, but it is created as a directory, the rename + fails. If two 'mv' commands try to rename the same file at + about the same time, one will succeed and the other will fail. + If the permissions on the directory containing the source or + destination file are made too restrictive, the rename will + fail. Etc. */ + char const *quoted_dst_name = quoteaf_n (1, dst_name); + switch (rename_errno) + { + case EDQUOT: case EEXIST: case EISDIR: case EMLINK: + case ENOSPC: case ETXTBSY: +#if ENOTEMPTY != EEXIST + case ENOTEMPTY: +#endif + /* The destination must be the problem. Don't mention + the source as that is more likely to confuse the user + than be helpful. */ + error (0, rename_errno, _("cannot overwrite %s"), + quoted_dst_name); + break; + + default: + error (0, rename_errno, _("cannot move %s to %s"), + quoteaf_n (0, src_name), quoted_dst_name); + break; + } + forget_created (src_sb.st_ino, src_sb.st_dev); + return false; + } + + /* The rename attempt has failed. Remove any existing destination + file so that a cross-device 'mv' acts as if it were really using + the rename syscall. Note both src and dst must both be directories + or not, and this is enforced above. Therefore we check the src_mode + and operate on dst_name here as a tighter constraint and also because + src_mode is readily available here. */ + if ((unlinkat (dst_dirfd, drelname, + S_ISDIR (src_mode) ? AT_REMOVEDIR : 0) + != 0) + && errno != ENOENT) + { + error (0, errno, + _("inter-device move failed: %s to %s; unable to remove target"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + forget_created (src_sb.st_ino, src_sb.st_dev); + return false; + } + + if (x->verbose && !S_ISDIR (src_mode)) + { + printf (_("copied ")); + emit_verbose (src_name, dst_name, dst_backup); + } + new_dst = true; + } + + /* If the ownership might change, or if it is a directory (whose + special mode bits may change after the directory is created), + omit some permissions at first, so unauthorized users cannot nip + in before the file is ready. */ + dst_mode_bits = (x->set_mode ? x->mode : src_mode) & CHMOD_MODE_BITS; + omitted_permissions = + (dst_mode_bits + & (x->preserve_ownership ? S_IRWXG | S_IRWXO + : S_ISDIR (src_mode) ? S_IWGRP | S_IWOTH + : 0)); + + delayed_ok = true; + + /* If required, set the default security context for new files. + Also for existing files this is used as a reference + when copying the context with --preserve=context. + FIXME: Do we need to consider dst_mode_bits here? */ + if (! set_process_security_ctx (src_name, dst_name, src_mode, new_dst, x)) + return false; + + if (S_ISDIR (src_mode)) + { + struct dir_list *dir; + + /* If this directory has been copied before during the + recursion, there is a symbolic link to an ancestor + directory of the symbolic link. It is impossible to + continue to copy this, unless we've got an infinite file system. */ + + if (is_ancestor (&src_sb, ancestors)) + { + error (0, 0, _("cannot copy cyclic symbolic link %s"), + quoteaf (src_name)); + goto un_backup; + } + + /* Insert the current directory in the list of parents. */ + + dir = alloca (sizeof *dir); + dir->parent = ancestors; + dir->ino = src_sb.st_ino; + dir->dev = src_sb.st_dev; + + if (new_dst || !S_ISDIR (dst_sb.st_mode)) + { + /* POSIX says mkdir's behavior is implementation-defined when + (src_mode & ~S_IRWXUGO) != 0. However, common practice is + to ask mkdir to copy all the CHMOD_MODE_BITS, letting mkdir + decide what to do with S_ISUID | S_ISGID | S_ISVTX. */ + mode_t mode = dst_mode_bits & ~omitted_permissions; + if (mkdirat (dst_dirfd, drelname, mode) != 0) + { + error (0, errno, _("cannot create directory %s"), + quoteaf (dst_name)); + goto un_backup; + } + + /* We need search and write permissions to the new directory + for writing the directory's contents. Check if these + permissions are there. */ + + if (fstatat (dst_dirfd, drelname, &dst_sb, AT_SYMLINK_NOFOLLOW) != 0) + { + error (0, errno, _("cannot stat %s"), quoteaf (dst_name)); + goto un_backup; + } + else if ((dst_sb.st_mode & S_IRWXU) != S_IRWXU) + { + /* Make the new directory searchable and writable. */ + + dst_mode = dst_sb.st_mode; + restore_dst_mode = true; + + if (lchmodat (dst_dirfd, drelname, dst_mode | S_IRWXU) != 0) + { + error (0, errno, _("setting permissions for %s"), + quoteaf (dst_name)); + goto un_backup; + } + } + + /* Record the created directory's inode and device numbers into + the search structure, so that we can avoid copying it again. + Do this only for the first directory that is created for each + source command line argument. */ + if (!*first_dir_created_per_command_line_arg) + { + remember_copied (dst_relname, dst_sb.st_ino, dst_sb.st_dev); + *first_dir_created_per_command_line_arg = true; + } + + if (x->verbose) + { + if (x->move_mode) + printf (_("created directory %s\n"), quoteaf (dst_name)); + else + emit_verbose (src_name, dst_name, nullptr); + } + } + else + { + omitted_permissions = 0; + + /* For directories, the process global context could be reset for + descendants, so use it to set the context for existing dirs here. + This will also give earlier indication of failure to set ctx. */ + if (x->set_security_context || x->preserve_security_context) + if (! set_file_security_ctx (dst_name, false, x)) + { + if (x->require_preserve_context) + goto un_backup; + } + } + + /* Decide whether to copy the contents of the directory. */ + if (x->one_file_system && parent && parent->st_dev != src_sb.st_dev) + { + /* Here, we are crossing a file system boundary and cp's -x option + is in effect: so don't copy the contents of this directory. */ + } + else + { + /* Copy the contents of the directory. Don't just return if + this fails -- otherwise, the failure to read a single file + in a source directory would cause the containing destination + directory not to have owner/perms set properly. */ + delayed_ok = copy_dir (src_name, dst_name, dst_dirfd, dst_relname, + new_dst, &src_sb, dir, x, + first_dir_created_per_command_line_arg, + copy_into_self); + } + } + else if (x->symbolic_link) + { + dest_is_symlink = true; + if (*src_name != '/') + { + /* Check that DST_NAME denotes a file in the current directory. */ + struct stat dot_sb; + struct stat dst_parent_sb; + char *dst_parent; + bool in_current_dir; + + dst_parent = dir_name (dst_relname); + + in_current_dir = ((dst_dirfd == AT_FDCWD && STREQ (".", dst_parent)) + /* If either stat call fails, it's ok not to report + the failure and say dst_name is in the current + directory. Other things will fail later. */ + || stat (".", &dot_sb) != 0 + || (fstatat (dst_dirfd, dst_parent, &dst_parent_sb, + 0) != 0) + || SAME_INODE (dot_sb, dst_parent_sb)); + free (dst_parent); + + if (! in_current_dir) + { + error (0, 0, + _("%s: can make relative symbolic links only in current directory"), + quotef (dst_name)); + goto un_backup; + } + } + + int err = force_symlinkat (src_name, dst_dirfd, dst_relname, + x->unlink_dest_after_failed_open, -1); + if (0 < err) + { + error (0, err, _("cannot create symbolic link %s to %s"), + quoteaf_n (0, dst_name), quoteaf_n (1, src_name)); + goto un_backup; + } + } + + /* POSIX 2008 states that it is implementation-defined whether + link() on a symlink creates a hard-link to the symlink, or only + to the referent (effectively dereferencing the symlink) (POSIX + 2001 required the latter behavior, although many systems provided + the former). Yet cp, invoked with '--link --no-dereference', + should not follow the link. We can approximate the desired + behavior by skipping this hard-link creating block and instead + copying the symlink, via the 'S_ISLNK'- copying code below. + + Note gnulib's linkat module, guarantees that the symlink is not + dereferenced. However its emulation currently doesn't maintain + timestamps or ownership so we only call it when we know the + emulation will not be needed. */ + else if (x->hard_link + && !(! CAN_HARDLINK_SYMLINKS && S_ISLNK (src_mode) + && x->dereference == DEREF_NEVER)) + { + bool replace = (x->unlink_dest_after_failed_open + || x->interactive == I_ASK_USER); + if (! create_hard_link (src_name, AT_FDCWD, src_name, + dst_name, dst_dirfd, dst_relname, + replace, false, dereference)) + goto un_backup; + } + else if (S_ISREG (src_mode) + || (x->copy_as_regular && !S_ISLNK (src_mode))) + { + copied_as_regular = true; + /* POSIX says the permission bits of the source file must be + used as the 3rd argument in the open call. Historical + practice passed all the source mode bits to 'open', but the extra + bits were ignored, so it should be the same either way. + + This call uses DST_MODE_BITS, not SRC_MODE. These are + normally the same, and the exception (where x->set_mode) is + used only by 'install', which POSIX does not specify and + where DST_MODE_BITS is what's wanted. */ + if (! copy_reg (src_name, dst_name, dst_dirfd, dst_relname, + x, dst_mode_bits & S_IRWXUGO, + omitted_permissions, &new_dst, &src_sb)) + goto un_backup; + } + else if (S_ISFIFO (src_mode)) + { + /* Use mknodat, rather than mkfifoat, because the former preserves + the special mode bits of a fifo on Solaris 10, while mkfifoat + does not. But fall back on mkfifoat, because on some BSD systems, + mknodat always fails when asked to create a FIFO. */ + mode_t mode = src_mode & ~omitted_permissions; + if (mknodat (dst_dirfd, dst_relname, mode, 0) != 0) + if (mkfifoat (dst_dirfd, dst_relname, mode & ~S_IFIFO) != 0) + { + error (0, errno, _("cannot create fifo %s"), quoteaf (dst_name)); + goto un_backup; + } + } + else if (S_ISBLK (src_mode) || S_ISCHR (src_mode) || S_ISSOCK (src_mode)) + { + mode_t mode = src_mode & ~omitted_permissions; + if (mknodat (dst_dirfd, dst_relname, mode, src_sb.st_rdev) != 0) + { + error (0, errno, _("cannot create special file %s"), + quoteaf (dst_name)); + goto un_backup; + } + } + else if (S_ISLNK (src_mode)) + { + char *src_link_val = areadlink_with_size (src_name, src_sb.st_size); + dest_is_symlink = true; + if (src_link_val == nullptr) + { + error (0, errno, _("cannot read symbolic link %s"), + quoteaf (src_name)); + goto un_backup; + } + + int symlink_err = force_symlinkat (src_link_val, dst_dirfd, dst_relname, + x->unlink_dest_after_failed_open, -1); + if (0 < symlink_err && x->update && !new_dst && S_ISLNK (dst_sb.st_mode) + && dst_sb.st_size == strlen (src_link_val)) + { + /* See if the destination is already the desired symlink. + FIXME: This behavior isn't documented, and seems wrong + in some cases, e.g., if the destination symlink has the + wrong ownership, permissions, or timestamps. */ + char *dest_link_val = + areadlinkat_with_size (dst_dirfd, dst_relname, dst_sb.st_size); + if (dest_link_val) + { + if (STREQ (dest_link_val, src_link_val)) + symlink_err = 0; + free (dest_link_val); + } + } + free (src_link_val); + if (0 < symlink_err) + { + error (0, symlink_err, _("cannot create symbolic link %s"), + quoteaf (dst_name)); + goto un_backup; + } + + if (x->preserve_security_context) + restore_default_fscreatecon_or_die (); + + if (x->preserve_ownership) + { + /* Preserve the owner and group of the just-'copied' + symbolic link, if possible. */ + if (HAVE_LCHOWN + && (lchownat (dst_dirfd, dst_relname, + src_sb.st_uid, src_sb.st_gid) + != 0) + && ! chown_failure_ok (x)) + { + error (0, errno, _("failed to preserve ownership for %s"), + dst_name); + if (x->require_preserve) + goto un_backup; + } + else + { + /* Can't preserve ownership of symlinks. + FIXME: maybe give a warning or even error for symlinks + in directories with the sticky bit set -- there, not + preserving owner/group is a potential security problem. */ + } + } + } + else + { + error (0, 0, _("%s has unknown file type"), quoteaf (src_name)); + goto un_backup; + } + + /* With -Z or --preserve=context, set the context for existing files. + Note this is done already for copy_reg() for reasons described therein. */ + if (!new_dst && !x->copy_as_regular && !S_ISDIR (src_mode) + && (x->set_security_context || x->preserve_security_context)) + { + if (! set_file_security_ctx (dst_name, false, x)) + { + if (x->require_preserve_context) + goto un_backup; + } + } + + if (command_line_arg && x->dest_info) + { + /* Now that the destination file is very likely to exist, + add its info to the set. */ + struct stat sb; + if (fstatat (dst_dirfd, drelname, &sb, AT_SYMLINK_NOFOLLOW) == 0) + record_file (x->dest_info, dst_relname, &sb); + } + + /* If we've just created a hard-link due to cp's --link option, + we're done. */ + if (x->hard_link && ! S_ISDIR (src_mode) + && !(! CAN_HARDLINK_SYMLINKS && S_ISLNK (src_mode) + && x->dereference == DEREF_NEVER)) + return delayed_ok; + + if (copied_as_regular) + return delayed_ok; + + /* POSIX says that 'cp -p' must restore the following: + - permission bits + - setuid, setgid bits + - owner and group + If it fails to restore any of those, we may give a warning but + the destination must not be removed. + FIXME: implement the above. */ + + /* Adjust the times (and if possible, ownership) for the copy. + chown turns off set[ug]id bits for non-root, + so do the chmod last. */ + + if (x->preserve_timestamps) + { + struct timespec timespec[2]; + timespec[0] = get_stat_atime (&src_sb); + timespec[1] = get_stat_mtime (&src_sb); + + int utimensat_flags = dest_is_symlink ? AT_SYMLINK_NOFOLLOW : 0; + if (utimensat (dst_dirfd, drelname, timespec, utimensat_flags) != 0) + { + error (0, errno, _("preserving times for %s"), quoteaf (dst_name)); + if (x->require_preserve) + return false; + } + } + + /* Avoid calling chown if we know it's not necessary. */ + if (!dest_is_symlink && x->preserve_ownership + && (new_dst || !SAME_OWNER_AND_GROUP (src_sb, dst_sb))) + { + switch (set_owner (x, dst_name, dst_dirfd, drelname, -1, + &src_sb, new_dst, &dst_sb)) + { + case -1: + return false; + + case 0: + src_mode &= ~ (S_ISUID | S_ISGID | S_ISVTX); + break; + } + } + + /* Set xattrs after ownership as changing owners will clear capabilities. */ + if (x->preserve_xattr && ! copy_attr (src_name, -1, dst_name, -1, x) + && x->require_preserve_xattr) + return false; + + /* The operations beyond this point may dereference a symlink. */ + if (dest_is_symlink) + return delayed_ok; + + set_author (dst_name, -1, &src_sb); + + if (x->preserve_mode || x->move_mode) + { + if (copy_acl (src_name, -1, dst_name, -1, src_mode) != 0 + && x->require_preserve) + return false; + } + else if (x->set_mode) + { + if (set_acl (dst_name, -1, x->mode) != 0) + return false; + } + else if (x->explicit_no_preserve_mode && new_dst) + { + int default_permissions = S_ISDIR (src_mode) || S_ISSOCK (src_mode) + ? S_IRWXUGO : MODE_RW_UGO; + if (set_acl (dst_name, -1, default_permissions & ~cached_umask ()) != 0) + return false; + } + else + { + if (omitted_permissions) + { + omitted_permissions &= ~ cached_umask (); + + if (omitted_permissions && !restore_dst_mode) + { + /* Permissions were deliberately omitted when the file + was created due to security concerns. See whether + they need to be re-added now. It'd be faster to omit + the lstat, but deducing the current destination mode + is tricky in the presence of implementation-defined + rules for special mode bits. */ + if (new_dst && (fstatat (dst_dirfd, drelname, &dst_sb, + AT_SYMLINK_NOFOLLOW) + != 0)) + { + error (0, errno, _("cannot stat %s"), quoteaf (dst_name)); + return false; + } + dst_mode = dst_sb.st_mode; + if (omitted_permissions & ~dst_mode) + restore_dst_mode = true; + } + } + + if (restore_dst_mode) + { + if (lchmodat (dst_dirfd, drelname, dst_mode | omitted_permissions) + != 0) + { + error (0, errno, _("preserving permissions for %s"), + quoteaf (dst_name)); + if (x->require_preserve) + return false; + } + } + } + + return delayed_ok; + +un_backup: + + if (x->preserve_security_context) + restore_default_fscreatecon_or_die (); + + /* We have failed to create the destination file. + If we've just added a dev/ino entry via the remember_copied + call above (i.e., unless we've just failed to create a hard link), + remove the entry associating the source dev/ino with the + destination file name, so we don't try to 'preserve' a link + to a file we didn't create. */ + if (earlier_file == nullptr) + forget_created (src_sb.st_ino, src_sb.st_dev); + + if (dst_backup) + { + char const *dst_relbackup = &dst_backup[dst_relname - dst_name]; + if (renameat (dst_dirfd, dst_relbackup, dst_dirfd, drelname) != 0) + error (0, errno, _("cannot un-backup %s"), quoteaf (dst_name)); + else + { + if (x->verbose) + printf (_("%s -> %s (unbackup)\n"), + quoteaf_n (0, dst_backup), quoteaf_n (1, dst_name)); + } + } + return false; +} + +static void +valid_options (const struct cp_options *co) +{ + affirm (VALID_BACKUP_TYPE (co->backup_type)); + affirm (VALID_SPARSE_MODE (co->sparse_mode)); + affirm (VALID_REFLINK_MODE (co->reflink_mode)); + affirm (!(co->hard_link && co->symbolic_link)); + affirm (! + (co->reflink_mode == REFLINK_ALWAYS + && co->sparse_mode != SPARSE_AUTO)); +} + +/* Copy the file SRC_NAME to the file DST_NAME aka DST_DIRFD+DST_RELNAME. + If NONEXISTENT_DST is positive, DST_NAME does not exist even as a + dangling symlink; if negative, it does not exist except possibly + as a dangling symlink; if zero, its existence status is unknown. + OPTIONS summarizes the command-line options. + Set *COPY_INTO_SELF if SRC_NAME is a parent of (or the + same as) DST_NAME; otherwise, set clear it. + If X->move_mode, set *RENAME_SUCCEEDED according to whether + the source was simply renamed to the destination. + Return true if successful. */ + +extern bool +copy (char const *src_name, char const *dst_name, + int dst_dirfd, char const *dst_relname, + int nonexistent_dst, const struct cp_options *options, + bool *copy_into_self, bool *rename_succeeded) +{ + valid_options (options); + + /* Record the file names: they're used in case of error, when copying + a directory into itself. I don't like to make these tools do *any* + extra work in the common case when that work is solely to handle + exceptional cases, but in this case, I don't see a way to derive the + top level source and destination directory names where they're used. + An alternative is to use COPY_INTO_SELF and print the diagnostic + from every caller -- but I don't want to do that. */ + top_level_src_name = src_name; + top_level_dst_name = dst_name; + + bool first_dir_created_per_command_line_arg = false; + return copy_internal (src_name, dst_name, dst_dirfd, dst_relname, + nonexistent_dst, nullptr, nullptr, + options, true, + &first_dir_created_per_command_line_arg, + copy_into_self, rename_succeeded); +} + +/* Set *X to the default options for a value of type struct cp_options. */ + +extern void +cp_options_default (struct cp_options *x) +{ + memset (x, 0, sizeof *x); +#ifdef PRIV_FILE_CHOWN + { + priv_set_t *pset = priv_allocset (); + if (!pset) + xalloc_die (); + if (getppriv (PRIV_EFFECTIVE, pset) == 0) + { + x->chown_privileges = priv_ismember (pset, PRIV_FILE_CHOWN); + x->owner_privileges = priv_ismember (pset, PRIV_FILE_OWNER); + } + priv_freeset (pset); + } +#else + x->chown_privileges = x->owner_privileges = (geteuid () == ROOT_UID); +#endif + x->rename_errno = -1; +} + +/* Return true if it's OK for chown to fail, where errno is + the error number that chown failed with and X is the copying + option set. */ + +extern bool +chown_failure_ok (struct cp_options const *x) +{ + /* If non-root uses -p, it's ok if we can't preserve ownership. + But root probably wants to know, e.g. if NFS disallows it, + or if the target system doesn't support file ownership. */ + + return ((errno == EPERM || errno == EINVAL) && !x->chown_privileges); +} + +/* Similarly, return true if it's OK for chmod and similar operations + to fail, where errno is the error number that chmod failed with and + X is the copying option set. */ + +static bool +owner_failure_ok (struct cp_options const *x) +{ + return ((errno == EPERM || errno == EINVAL) && !x->owner_privileges); +} + +/* Return the user's umask, caching the result. + + FIXME: If the destination's parent directory has has a default ACL, + some operating systems (e.g., GNU/Linux's "POSIX" ACLs) use that + ACL's mask rather than the process umask. Currently, the callers + of cached_umask incorrectly assume that this situation cannot occur. */ +extern mode_t +cached_umask (void) +{ + static mode_t mask = (mode_t) -1; + if (mask == (mode_t) -1) + { + mask = umask (0); + umask (mask); + } + return mask; +} diff --git a/src/copy.h b/src/copy.h new file mode 100644 index 0000000..1c43ea3 --- /dev/null +++ b/src/copy.h @@ -0,0 +1,332 @@ +/* core functions for copying files and directories + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from cp.c and librarified by Jim Meyering. */ + +#ifndef COPY_H +# define COPY_H + +# include "hash.h" + +struct selabel_handle; + +/* Control creation of sparse files (files with holes). */ +enum Sparse_type +{ + SPARSE_UNUSED, + + /* Never create holes in DEST. */ + SPARSE_NEVER, + + /* This is the default. Use a crude (and sometimes inaccurate) + heuristic to determine if SOURCE has holes. If so, try to create + holes in DEST. */ + SPARSE_AUTO, + + /* For every sufficiently long sequence of bytes in SOURCE, try to + create a corresponding hole in DEST. There is a performance penalty + here because CP has to search for holes in SRC. But if the holes are + big enough, that penalty can be offset by the decrease in the amount + of data written to the file system. */ + SPARSE_ALWAYS +}; + +/* Control creation of COW files. */ +enum Reflink_type +{ + /* Do a standard copy. */ + REFLINK_NEVER, + + /* Try a COW copy and fall back to a standard copy; this is the default. */ + REFLINK_AUTO, + + /* Require a COW copy and fail if not available. */ + REFLINK_ALWAYS +}; + +/* Control how existing destination files are updated. */ +enum Update_type +{ + /* Always update.. */ + UPDATE_ALL, + + /* Update if dest older. */ + UPDATE_OLDER, + + /* Leave existing files. */ + UPDATE_NONE, +}; + +/* This type is used to help mv (via copy.c) distinguish these cases. */ +enum Interactive +{ + I_ALWAYS_YES = 1, + I_ALWAYS_NO, /* Skip and fail. */ + I_ALWAYS_SKIP, /* Skip and ignore. */ + I_ASK_USER, + I_UNSPECIFIED +}; + +/* How to handle symbolic links. */ +enum Dereference_symlink +{ + DEREF_UNDEFINED = 1, + + /* Copy the symbolic link itself. -P */ + DEREF_NEVER, + + /* If the symbolic is a command line argument, then copy + its referent. Otherwise, copy the symbolic link itself. -H */ + DEREF_COMMAND_LINE_ARGUMENTS, + + /* Copy the referent of the symbolic link. -L */ + DEREF_ALWAYS +}; + +# define VALID_SPARSE_MODE(Mode) \ + ((Mode) == SPARSE_NEVER \ + || (Mode) == SPARSE_AUTO \ + || (Mode) == SPARSE_ALWAYS) + +# define VALID_REFLINK_MODE(Mode) \ + ((Mode) == REFLINK_NEVER \ + || (Mode) == REFLINK_AUTO \ + || (Mode) == REFLINK_ALWAYS) + +/* These options control how files are copied by at least the + following programs: mv (when rename doesn't work), cp, install. + So, if you add a new member, be sure to initialize it in + mv.c, cp.c, and install.c. */ +struct cp_options +{ + enum backup_type backup_type; + + /* How to handle symlinks in the source. */ + enum Dereference_symlink dereference; + + /* This value is used to determine whether to prompt before removing + each existing destination file. It works differently depending on + whether move_mode is set. See code/comments in copy.c. */ + enum Interactive interactive; + + /* Control creation of sparse files. */ + enum Sparse_type sparse_mode; + + /* Set the mode of the destination file to exactly this value + if SET_MODE is nonzero. */ + mode_t mode; + + /* If true, copy all files except (directories and, if not dereferencing + them, symbolic links,) as if they were regular files. */ + bool copy_as_regular; + + /* If true, remove each existing destination nondirectory before + trying to open it. */ + bool unlink_dest_before_opening; + + /* If true, first try to open each existing destination nondirectory, + then, if the open fails, unlink and try again. + This option must be set for 'cp -f', in case the destination file + exists when the open is attempted. It is irrelevant to 'mv' since + any destination is sure to be removed before the open. */ + bool unlink_dest_after_failed_open; + + /* If true, create hard links instead of copying files. + Create destination directories as usual. */ + bool hard_link; + + /* If MOVE_MODE, first try to rename. + If that fails and NO_COPY, fail instead of copying. */ + bool move_mode, no_copy; + + /* If true, install(1) is the caller. */ + bool install_mode; + + /* Whether this process has appropriate privileges to chown a file + whose owner is not the effective user ID. */ + bool chown_privileges; + + /* Whether this process has appropriate privileges to do the + following operations on a file even when it is owned by some + other user: set the file's atime, mtime, mode, or ACL; remove or + rename an entry in the file even though it is a sticky directory, + or to mount on the file. */ + bool owner_privileges; + + /* If true, when copying recursively, skip any subdirectories that are + on different file systems from the one we started on. */ + bool one_file_system; + + /* If true, attempt to give the copies the original files' permissions, + owner, group, and timestamps. */ + bool preserve_ownership; + bool preserve_mode; + bool preserve_timestamps; + bool explicit_no_preserve_mode; + + /* If non-null, attempt to set specified security context */ + struct selabel_handle *set_security_context; + + /* Enabled for mv, and for cp by the --preserve=links option. + If true, attempt to preserve in the destination files any + logical hard links between the source files. If used with cp's + --no-dereference option, and copying two hard-linked files, + the two corresponding destination files will also be hard linked. + + If used with cp's --dereference (-L) option, then, as that option implies, + hard links are *not* preserved. However, when copying a file F and + a symlink S to F, the resulting S and F in the destination directory + will be hard links to the same file (a copy of F). */ + bool preserve_links; + + /* Optionally don't copy the data, either with CoW reflink files or + explicitly with the --attributes-only option. */ + bool data_copy_required; + + /* If true and any of the above (for preserve) file attributes cannot + be applied to a destination file, treat it as a failure and return + nonzero immediately. E.g. for cp -p this must be true, for mv it + must be false. */ + bool require_preserve; + + /* If true, attempt to preserve the SELinux security context, too. + Set this only if the kernel is SELinux enabled. */ + bool preserve_security_context; + + /* Useful only when preserve_context is true. + If true, a failed attempt to preserve file's security context + propagates failure "out" to the caller, along with full diagnostics. + If false, a failure to preserve file's security context does not + change the invoking application's exit status, but may output diagnostics. + For example, with 'cp --preserve=context' this flag is "true", + while with 'cp --preserve=all' or 'cp -a', it is "false". */ + bool require_preserve_context; + + /* If true, attempt to preserve extended attributes using libattr. + Ignored if coreutils are compiled without xattr support. */ + bool preserve_xattr; + + /* Useful only when preserve_xattr is true. + If true, a failed attempt to preserve file's extended attributes + propagates failure "out" to the caller, along with full diagnostics. + If false, a failure to preserve file's extended attributes does not + change the invoking application's exit status, but may output diagnostics. + For example, with 'cp --preserve=xattr' this flag is "true", + while with 'cp --preserve=all' or 'cp -a', it is "false". */ + bool require_preserve_xattr; + + /* This allows us to output warnings in cases 2 and 4 below, + while being quiet for case 1 (when reduce_diagnostics is true). + 1. cp -a try to copy xattrs with no errors + 2. cp --preserve=all copy xattrs with all but ENOTSUP warnings + 3. cp --preserve=xattr,context copy xattrs with all errors + 4. mv copy xattrs with all but ENOTSUP warnings + */ + bool reduce_diagnostics; + + /* If true, copy directories recursively and copy special files + as themselves rather than copying their contents. */ + bool recursive; + + /* If true, set file mode to value of MODE. Otherwise, + set it based on current umask modified by UMASK_KILL. */ + bool set_mode; + + /* If true, create symbolic links instead of copying files. + Create destination directories as usual. */ + bool symbolic_link; + + /* If true, do not copy a nondirectory that has an existing destination + with the same or newer modification time. */ + bool update; + + /* If true, display the names of the files before copying them. */ + bool verbose; + + /* If true, display details of how files were copied. */ + bool debug; + + /* If true, stdin is a tty. */ + bool stdin_tty; + + /* If true, open a dangling destination symlink when not in move_mode. + Otherwise, copy_reg gives a diagnostic (it refuses to write through + such a symlink) and returns false. */ + bool open_dangling_dest_symlink; + + /* If true, this is the last filed to be copied. mv uses this to + avoid some unnecessary work. */ + bool last_file; + + /* Zero if the source has already been renamed to the destination; a + positive errno number if this failed with the given errno; -1 if + no attempt has been made to rename. Always -1, except for mv. */ + int rename_errno; + + /* Control creation of COW files. */ + enum Reflink_type reflink_mode; + + /* This is a set of destination name/inode/dev triples. Each such triple + represents a file we have created corresponding to a source file name + that was specified on the command line. Use it to avoid clobbering + source files in commands like this: + rm -rf a b c; mkdir a b c; touch a/f b/f; mv a/f b/f c + For now, it protects only regular files when copying (i.e., not renaming). + When renaming, it protects all non-directories. + Use dest_info_init to initialize it, or set it to nullptr to disable + this feature. */ + Hash_table *dest_info; + + /* FIXME */ + Hash_table *src_info; +}; + +/* Arrange to make rename calls go through the wrapper function + on systems with a rename function that fails for a source file name + specified with a trailing slash. */ +# if RENAME_TRAILING_SLASH_BUG +int rpl_rename (char const *, char const *); +# undef rename +# define rename rpl_rename +# endif + +bool copy (char const *src_name, char const *dst_name, + int dst_dirfd, char const *dst_relname, + int nonexistent_dst, const struct cp_options *options, + bool *copy_into_self, bool *rename_succeeded) + _GL_ATTRIBUTE_NONNULL ((1, 2, 4, 6, 7)); + +extern bool set_process_security_ctx (char const *src_name, + char const *dst_name, + mode_t mode, bool new_dst, + const struct cp_options *x) + _GL_ATTRIBUTE_NONNULL (); + +extern bool set_file_security_ctx (char const *dst_name, + bool recurse, const struct cp_options *x) + _GL_ATTRIBUTE_NONNULL (); + +void dest_info_init (struct cp_options *) _GL_ATTRIBUTE_NONNULL (); +void dest_info_free (struct cp_options *) _GL_ATTRIBUTE_NONNULL (); +void src_info_init (struct cp_options *) _GL_ATTRIBUTE_NONNULL (); +void src_info_free (struct cp_options *) _GL_ATTRIBUTE_NONNULL (); + +void cp_options_default (struct cp_options *) _GL_ATTRIBUTE_NONNULL (); +bool chown_failure_ok (struct cp_options const *) + _GL_ATTRIBUTE_NONNULL () _GL_ATTRIBUTE_PURE; +mode_t cached_umask (void); + +#endif diff --git a/src/coreutils-arch.c b/src/coreutils-arch.c new file mode 100644 index 0000000..232b09f --- /dev/null +++ b/src/coreutils-arch.c @@ -0,0 +1,33 @@ +/* arch -- wrapper to uname with the right uname_mode. + Copyright (C) 2014-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Alex Deymo . */ + +#include +#include "system.h" + +#include "uname.h" +/* Ensure that the main for uname is declared even if the tool is not being + built in this single-binary. */ +int single_binary_main_uname (int argc, char **argv); +int single_binary_main_arch (int argc, char **argv); + +int +single_binary_main_arch (int argc, char **argv) +{ + uname_mode = UNAME_ARCH; + return single_binary_main_uname (argc, argv); +} diff --git a/src/coreutils-dir.c b/src/coreutils-dir.c new file mode 100644 index 0000000..b772218 --- /dev/null +++ b/src/coreutils-dir.c @@ -0,0 +1,33 @@ +/* dir -- wrapper to ls with the right ls_mode. + Copyright (C) 2014-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Alex Deymo . */ + +#include +#include "system.h" + +#include "ls.h" +/* Ensure that the main for ls is declared even if the tool is not being built + in this single-binary. */ +int single_binary_main_ls (int argc, char **argv); +int single_binary_main_dir (int argc, char **argv); + +int +single_binary_main_dir (int argc, char **argv) +{ + ls_mode = LS_MULTI_COL; + return single_binary_main_ls (argc, argv); +} diff --git a/src/coreutils-vdir.c b/src/coreutils-vdir.c new file mode 100644 index 0000000..aa2065b --- /dev/null +++ b/src/coreutils-vdir.c @@ -0,0 +1,33 @@ +/* vdir -- wrapper to ls with the right ls_mode. + Copyright (C) 2014-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Alex Deymo . */ + +#include +#include "system.h" + +#include "ls.h" +/* Ensure that the main for ls is declared even if the tool is not being built + in this single-binary. */ +int single_binary_main_ls (int argc, char **argv); +int single_binary_main_vdir (int argc, char **argv); + +int +single_binary_main_vdir (int argc, char **argv) +{ + ls_mode = LS_LONG_FORMAT; + return single_binary_main_ls (argc, argv); +} diff --git a/src/coreutils.c b/src/coreutils.c new file mode 100644 index 0000000..92ba41b --- /dev/null +++ b/src/coreutils.c @@ -0,0 +1,206 @@ +/* Copyright (C) 2014-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* coreutils.c aggregates the functionality of every other tool into a single + binary multiplexed by the value of argv[0]. This is enabled by passing + --enable-single-binary to configure. + + Written by Alex Deymo . */ + +#include +#include +#include +#if HAVE_PRCTL +# include +#endif + +#include "system.h" +#include "quote.h" + +#ifdef SINGLE_BINARY +/* Declare the main function on each one of the selected tools. This name + needs to match the one passed as CFLAGS on single-binary.mk (generated + by gen-single-binary.sh). */ +# define SINGLE_BINARY_PROGRAM(prog_name_str, main_name) \ + int single_binary_main_##main_name (int, char **); +# include "coreutils.h" +# undef SINGLE_BINARY_PROGRAM +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "coreutils" + +#define AUTHORS \ + proper_name ("Alex Deymo") + +static struct option const long_options[] = +{ + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s --coreutils-prog=PROGRAM_NAME [PARAMETERS]... \n"), + program_name); + fputs (_("\ +Execute the PROGRAM_NAME built-in program with the given PARAMETERS.\n\ +\n"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + +#ifdef SINGLE_BINARY +/* XXX: Ideally we'd like to present "install" here, not "ginstall". */ + char const *prog_name_list = +# define SINGLE_BINARY_PROGRAM(prog_name_str, main_name) " " prog_name_str +# include "coreutils.h" +# undef SINGLE_BINARY_PROGRAM + ; + printf ("\n\ +Built-in programs:\n\ +%s\n", prog_name_list); +#endif + + printf (_("\ +\n\ +Use: '%s --coreutils-prog=PROGRAM_NAME --help' for individual program help.\n"), + program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +launch_program (char const *prog_name, int prog_argc, char **prog_argv) +{ + int (*prog_main) (int, char **) = nullptr; + + /* Ensure that at least one parameter was passed. */ + if (!prog_argc || !prog_argv || !prog_argv[0] || !prog_name) + return; + +#ifdef SINGLE_BINARY + if (false); + /* Look up the right main program. */ +# define SINGLE_BINARY_PROGRAM(prog_name_str, main_name) \ + else if (STREQ (prog_name_str, prog_name)) \ + prog_main = single_binary_main_##main_name; +# include "coreutils.h" +# undef SINGLE_BINARY_PROGRAM +#endif + + if (! prog_main) + return; + +#if HAVE_PRCTL && defined PR_SET_NAME + /* Not being able to set the program name is not a fatal error. */ + prctl (PR_SET_NAME, prog_argv[0]); +#endif +#if HAVE_PRCTL && defined PR_SET_MM_ARG_START + /* Shift the beginning of the command line to prog_argv[0] (if set) so + /proc/$pid/cmdline reflects a more specific value. Note one needs + CAP_SYS_RESOURCE or root privileges for this to succeed. */ + prctl (PR_SET_MM, PR_SET_MM_ARG_START, prog_argv[0], 0, 0); +#endif + + exit (prog_main (prog_argc, prog_argv)); +} + +int +main (int argc, char **argv) +{ + char *prog_name = last_component (argv[0]); + int optc; + + /* Map external name to internal name. */ + char ginstall[] = "ginstall"; + if (STREQ (prog_name, "install")) + prog_name = ginstall; + + /* If this program is called directly as "coreutils" or if the value of + argv[0] is an unknown tool (which "coreutils" is), we proceed and parse + the options. */ + launch_program (prog_name, argc, argv); + + /* No known program was selected via argv[0]. Try parsing the first + argument as --coreutils-prog=PROGRAM to determine the program. The + invocation for this case should be: + path/to/coreutils --coreutils-prog=someprog someprog ... + The third argument is what the program will see as argv[0]. */ + + if (argc >= 2) + { + size_t nskip = 0; + char *arg_name = nullptr; + + /* If calling coreutils directly, the "script" name isn't passed. + Distinguish the two cases with a -shebang suffix. */ + if (STRPREFIX (argv[1], "--coreutils-prog=")) + { + nskip = 1; + arg_name = prog_name = argv[1] + strlen ("--coreutils-prog="); + } + else if (STRPREFIX (argv[1], "--coreutils-prog-shebang=")) + { + nskip = 2; + prog_name = argv[1] + strlen ("--coreutils-prog-shebang="); + if (argc >= 3) + arg_name = last_component (argv[2]); + else + arg_name = prog_name; + } + + if (nskip) + { + argv[nskip] = arg_name; /* XXX: Discards any specified path. */ + launch_program (prog_name, argc - nskip, argv + nskip); + error (EXIT_FAILURE, 0, _("unknown program %s"), + quote (prog_name)); + } + } + + /* No known program was selected. From here on, we behave like any other + coreutils program. */ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + atexit (close_stdout); + + if ((optc = getopt_long (argc, argv, "", long_options, nullptr)) != -1) + switch (optc) + { + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + } + + /* Only print the error message when no options have been passed + to coreutils. */ + if (optind == 1 && prog_name && !STREQ (prog_name, "coreutils")) + error (0, 0, _("unknown program %s"), + quote (prog_name)); + + usage (EXIT_FAILURE); +} diff --git a/src/cp-hash.c b/src/cp-hash.c new file mode 100644 index 0000000..b481274 --- /dev/null +++ b/src/cp-hash.c @@ -0,0 +1,155 @@ +/* cp-hash.c -- file copying (hash search routines) + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Torbjörn Granlund, Sweden (tege@sics.se). + Rewritten to use lib/hash.c by Jim Meyering. */ + +#include + +#include +#include "system.h" + +#include "hash.h" +#include "cp-hash.h" + +/* Use ST_DEV and ST_INO as the key, FILENAME as the value. + These are used e.g., in copy.c to associate the destination name with + the source device/inode pair so that if we encounter a matching dev/ino + pair in the source tree we can arrange to create a hard link between + the corresponding names in the destination tree. */ +struct Src_to_dest +{ + ino_t st_ino; + dev_t st_dev; + /* Destination file name (of non-directory or pre-existing directory) + corresponding to the dev/ino of a copied file, or the destination file + name corresponding to a dev/ino pair for a newly-created directory. */ + char *name; +}; + +/* This table maps source dev/ino to destination file name. + We use it to preserve hard links when copying. */ +static Hash_table *src_to_dest; + +/* Initial size of the above hash table. */ +#define INITIAL_TABLE_SIZE 103 + +static size_t +src_to_dest_hash (void const *x, size_t table_size) +{ + struct Src_to_dest const *p = x; + + /* Ignoring the device number here should be fine. */ + /* The cast to uintmax_t prevents negative remainders + if st_ino is negative. */ + return (uintmax_t) p->st_ino % table_size; +} + +/* Compare two Src_to_dest entries. + Return true if their keys are judged 'equal'. */ +static bool +src_to_dest_compare (void const *x, void const *y) +{ + struct Src_to_dest const *a = x; + struct Src_to_dest const *b = y; + return SAME_INODE (*a, *b) ? true : false; +} + +static void +src_to_dest_free (void *x) +{ + struct Src_to_dest *a = x; + free (a->name); + free (x); +} + +/* Remove the entry matching INO/DEV from the table + that maps source ino/dev to destination file name. */ +extern void +forget_created (ino_t ino, dev_t dev) +{ + struct Src_to_dest probe; + struct Src_to_dest *ent; + + probe.st_ino = ino; + probe.st_dev = dev; + probe.name = nullptr; + + ent = hash_remove (src_to_dest, &probe); + if (ent) + src_to_dest_free (ent); +} + +/* If INO/DEV correspond to an already-copied source file, return the + name of the corresponding destination file. Otherwise, return nullptr. */ + +extern char * +src_to_dest_lookup (ino_t ino, dev_t dev) +{ + struct Src_to_dest ent; + struct Src_to_dest const *e; + ent.st_ino = ino; + ent.st_dev = dev; + e = hash_lookup (src_to_dest, &ent); + return e ? e->name : nullptr; +} + +/* Add file NAME, copied from inode number INO and device number DEV, + to the list of files we have copied. + Return nullptr if inserted, otherwise a non-null pointer. */ + +extern char * +remember_copied (char const *name, ino_t ino, dev_t dev) +{ + struct Src_to_dest *ent; + struct Src_to_dest *ent_from_table; + + ent = xmalloc (sizeof *ent); + ent->name = xstrdup (name); + ent->st_ino = ino; + ent->st_dev = dev; + + ent_from_table = hash_insert (src_to_dest, ent); + if (ent_from_table == nullptr) + { + /* Insertion failed due to lack of memory. */ + xalloc_die (); + } + + /* Determine whether there was already an entry in the table + with a matching key. If so, free ENT (it wasn't inserted) and + return the 'name' from the table entry. */ + if (ent_from_table != ent) + { + src_to_dest_free (ent); + return (char *) ent_from_table->name; + } + + /* New key; insertion succeeded. */ + return nullptr; +} + +/* Initialize the hash table. */ +extern void +hash_init (void) +{ + src_to_dest = hash_initialize (INITIAL_TABLE_SIZE, nullptr, + src_to_dest_hash, + src_to_dest_compare, + src_to_dest_free); + if (src_to_dest == nullptr) + xalloc_die (); +} diff --git a/src/cp-hash.h b/src/cp-hash.h new file mode 100644 index 0000000..989fcc5 --- /dev/null +++ b/src/cp-hash.h @@ -0,0 +1,5 @@ +void hash_init (void); +void forget_created (ino_t ino, dev_t dev); +char *remember_copied (char const *node, ino_t ino, dev_t dev) + _GL_ATTRIBUTE_NONNULL (); +char *src_to_dest_lookup (ino_t ino, dev_t dev); diff --git a/src/cp.c b/src/cp.c new file mode 100644 index 0000000..04a5cbe --- /dev/null +++ b/src/cp.c @@ -0,0 +1,1290 @@ +/* cp.c -- file copying (main routines) + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Torbjörn Granlund, David MacKenzie, and Jim Meyering. */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "backupfile.h" +#include "copy.h" +#include "cp-hash.h" +#include "filenamecat.h" +#include "ignore-value.h" +#include "quote.h" +#include "stat-time.h" +#include "targetdir.h" +#include "utimens.h" +#include "acl.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "cp" + +#define AUTHORS \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +/* Used by do_copy, make_dir_parents_private, and re_protect + to keep a list of leading directories whose protections + need to be fixed after copying. */ +struct dir_attr +{ + struct stat st; + bool restore_mode; + size_t slash_offset; + struct dir_attr *next; +}; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + ATTRIBUTES_ONLY_OPTION = CHAR_MAX + 1, + COPY_CONTENTS_OPTION, + DEBUG_OPTION, + NO_PRESERVE_ATTRIBUTES_OPTION, + PARENTS_OPTION, + PRESERVE_ATTRIBUTES_OPTION, + REFLINK_OPTION, + SPARSE_OPTION, + STRIP_TRAILING_SLASHES_OPTION, + UNLINK_DEST_BEFORE_OPENING +}; + +/* True if the kernel is SELinux enabled. */ +static bool selinux_enabled; + +/* If true, the command "cp x/e_file e_dir" uses "e_dir/x/e_file" + as its destination instead of the usual "e_dir/e_file." */ +static bool parents_option = false; + +/* Remove any trailing slashes from each SOURCE argument. */ +static bool remove_trailing_slashes; + +static char const *const sparse_type_string[] = +{ + "never", "auto", "always", nullptr +}; +static enum Sparse_type const sparse_type[] = +{ + SPARSE_NEVER, SPARSE_AUTO, SPARSE_ALWAYS +}; +ARGMATCH_VERIFY (sparse_type_string, sparse_type); + +static char const *const reflink_type_string[] = +{ + "auto", "always", "never", nullptr +}; +static enum Reflink_type const reflink_type[] = +{ + REFLINK_AUTO, REFLINK_ALWAYS, REFLINK_NEVER +}; +ARGMATCH_VERIFY (reflink_type_string, reflink_type); + +static char const *const update_type_string[] = +{ + "all", "none", "older", nullptr +}; +static enum Update_type const update_type[] = +{ + UPDATE_ALL, UPDATE_NONE, UPDATE_OLDER, +}; +ARGMATCH_VERIFY (update_type_string, update_type); + +static struct option const long_opts[] = +{ + {"archive", no_argument, nullptr, 'a'}, + {"attributes-only", no_argument, nullptr, ATTRIBUTES_ONLY_OPTION}, + {"backup", optional_argument, nullptr, 'b'}, + {"copy-contents", no_argument, nullptr, COPY_CONTENTS_OPTION}, + {"debug", no_argument, nullptr, DEBUG_OPTION}, + {"dereference", no_argument, nullptr, 'L'}, + {"force", no_argument, nullptr, 'f'}, + {"interactive", no_argument, nullptr, 'i'}, + {"link", no_argument, nullptr, 'l'}, + {"no-clobber", no_argument, nullptr, 'n'}, + {"no-dereference", no_argument, nullptr, 'P'}, + {"no-preserve", required_argument, nullptr, NO_PRESERVE_ATTRIBUTES_OPTION}, + {"no-target-directory", no_argument, nullptr, 'T'}, + {"one-file-system", no_argument, nullptr, 'x'}, + {"parents", no_argument, nullptr, PARENTS_OPTION}, + {"path", no_argument, nullptr, PARENTS_OPTION}, /* Deprecated. */ + {"preserve", optional_argument, nullptr, PRESERVE_ATTRIBUTES_OPTION}, + {"recursive", no_argument, nullptr, 'R'}, + {"remove-destination", no_argument, nullptr, UNLINK_DEST_BEFORE_OPENING}, + {"sparse", required_argument, nullptr, SPARSE_OPTION}, + {"reflink", optional_argument, nullptr, REFLINK_OPTION}, + {"strip-trailing-slashes", no_argument, nullptr, + STRIP_TRAILING_SLASHES_OPTION}, + {"suffix", required_argument, nullptr, 'S'}, + {"symbolic-link", no_argument, nullptr, 's'}, + {"target-directory", required_argument, nullptr, 't'}, + {"update", optional_argument, nullptr, 'u'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [-T] SOURCE DEST\n\ + or: %s [OPTION]... SOURCE... DIRECTORY\n\ + or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Copy SOURCE to DEST, or multiple SOURCE(s) to DIRECTORY.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -a, --archive same as -dR --preserve=all\n\ + --attributes-only don't copy the file data, just the attributes\n\ + --backup[=CONTROL] make a backup of each existing destination file\ +\n\ + -b like --backup but does not accept an argument\n\ + --copy-contents copy contents of special files when recursive\n\ + -d same as --no-dereference --preserve=links\n\ +"), stdout); + fputs (_("\ + --debug explain how a file is copied. Implies -v\n\ +"), stdout); + fputs (_("\ + -f, --force if an existing destination file cannot be\n\ + opened, remove it and try again (this option\n\ + is ignored when the -n option is also used)\n\ + -i, --interactive prompt before overwrite (overrides a previous -n\ +\n\ + option)\n\ + -H follow command-line symbolic links in SOURCE\n\ +"), stdout); + fputs (_("\ + -l, --link hard link files instead of copying\n\ + -L, --dereference always follow symbolic links in SOURCE\n\ +"), stdout); + fputs (_("\ + -n, --no-clobber do not overwrite an existing file (overrides a\n\ + -u or previous -i option). See also --update\n\ +"), stdout); + fputs (_("\ + -P, --no-dereference never follow symbolic links in SOURCE\n\ +"), stdout); + fputs (_("\ + -p same as --preserve=mode,ownership,timestamps\n\ + --preserve[=ATTR_LIST] preserve the specified attributes\n\ +"), stdout); + fputs (_("\ + --no-preserve=ATTR_LIST don't preserve the specified attributes\n\ + --parents use full source file name under DIRECTORY\n\ +"), stdout); + fputs (_("\ + -R, -r, --recursive copy directories recursively\n\ + --reflink[=WHEN] control clone/CoW copies. See below\n\ + --remove-destination remove each existing destination file before\n\ + attempting to open it (contrast with --force)\ +\n"), stdout); + fputs (_("\ + --sparse=WHEN control creation of sparse files. See below\n\ + --strip-trailing-slashes remove any trailing slashes from each SOURCE\n\ + argument\n\ +"), stdout); + fputs (_("\ + -s, --symbolic-link make symbolic links instead of copying\n\ + -S, --suffix=SUFFIX override the usual backup suffix\n\ + -t, --target-directory=DIRECTORY copy all SOURCE arguments into DIRECTORY\n\ + -T, --no-target-directory treat DEST as a normal file\n\ +"), stdout); + fputs (_("\ + --update[=UPDATE] control which existing files are updated;\n\ + UPDATE={all,none,older(default)}. See below\n\ + -u equivalent to --update[=older]\n\ +"), stdout); + fputs (_("\ + -v, --verbose explain what is being done\n\ +"), stdout); + fputs (_("\ + -x, --one-file-system stay on this file system\n\ +"), stdout); + fputs (_("\ + -Z set SELinux security context of destination\n\ + file to default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the\n\ + SELinux or SMACK security context to CTX\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +ATTR_LIST is a comma-separated list of attributes. Attributes are 'mode' for\n\ +permissions (including any ACL and xattr permissions), 'ownership' for user\n\ +and group, 'timestamps' for file timestamps, 'links' for hard links, 'context'\ +\nfor security context, 'xattr' for extended attributes, and 'all' for all\n\ +attributes.\n\ +"), stdout); + fputs (_("\ +\n\ +By default, sparse SOURCE files are detected by a crude heuristic and the\n\ +corresponding DEST file is made sparse as well. That is the behavior\n\ +selected by --sparse=auto. Specify --sparse=always to create a sparse DEST\n\ +file whenever the SOURCE file contains a long enough sequence of zero bytes.\n\ +Use --sparse=never to inhibit creation of sparse files.\n\ +"), stdout); + emit_update_parameters_note (); + fputs (_("\ +\n\ +When --reflink[=always] is specified, perform a lightweight copy, where the\n\ +data blocks are copied only when modified. If this is not possible the copy\n\ +fails, or if --reflink=auto is specified, fall back to a standard copy.\n\ +Use --reflink=never to ensure a standard copy is performed.\n\ +"), stdout); + emit_backup_suffix_note (); + fputs (_("\ +\n\ +As a special case, cp makes a backup of SOURCE when the force and backup\n\ +options are given and SOURCE and DEST are the same name for an existing,\n\ +regular file.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Ensure that parents of CONST_DST_NAME have correct protections, for + the --parents option. This is done after all copying has been + completed, to allow permissions that don't include user write/execute. + + DST_SRC_NAME is the suffix of CONST_DST_NAME that is the source file name, + DST_DIRFD+DST_RELNAME is equivalent to CONST_DST_NAME, and + DST_RELNAME equals DST_SRC_NAME after skipping any leading '/'s. + + ATTR_LIST is a null-terminated linked list of structures that + indicates the end of the filename of each intermediate directory + in CONST_DST_NAME that may need to have its attributes changed. + The command 'cp --parents --preserve a/b/c d/e_dir' changes the + attributes of the directories d/e_dir/a and d/e_dir/a/b to match + the corresponding source directories regardless of whether they + existed before the 'cp' command was given. + + Return true if the parent of CONST_DST_NAME and any intermediate + directories specified by ATTR_LIST have the proper permissions + when done. */ + +static bool +re_protect (char const *const_dst_name, char const *dst_src_name, + int dst_dirfd, char const *dst_relname, + struct dir_attr *attr_list, const struct cp_options *x) +{ + struct dir_attr *p; + char *dst_name; /* A copy of CONST_DST_NAME we can change. */ + + ASSIGN_STRDUPA (dst_name, const_dst_name); + + /* The suffix of DST_NAME that is a copy of the source file name, + possibly truncated to name a parent directory. */ + char const *src_name = dst_name + (dst_src_name - const_dst_name); + + /* Likewise, but with any leading '/'s skipped. */ + char const *relname = dst_name + (dst_relname - const_dst_name); + + for (p = attr_list; p; p = p->next) + { + dst_name[p->slash_offset] = '\0'; + + /* Adjust the times (and if possible, ownership) for the copy. + chown turns off set[ug]id bits for non-root, + so do the chmod last. */ + + if (x->preserve_timestamps) + { + struct timespec timespec[2]; + + timespec[0] = get_stat_atime (&p->st); + timespec[1] = get_stat_mtime (&p->st); + + if (utimensat (dst_dirfd, relname, timespec, 0)) + { + error (0, errno, _("failed to preserve times for %s"), + quoteaf (dst_name)); + return false; + } + } + + if (x->preserve_ownership) + { + if (lchownat (dst_dirfd, relname, p->st.st_uid, p->st.st_gid) + != 0) + { + if (! chown_failure_ok (x)) + { + error (0, errno, _("failed to preserve ownership for %s"), + quoteaf (dst_name)); + return false; + } + /* Failing to preserve ownership is OK. Still, try to preserve + the group, but ignore the possible error. */ + ignore_value (lchownat (dst_dirfd, relname, -1, p->st.st_gid)); + } + } + + if (x->preserve_mode) + { + if (copy_acl (src_name, -1, dst_name, -1, p->st.st_mode) != 0) + return false; + } + else if (p->restore_mode) + { + if (lchmodat (dst_dirfd, relname, p->st.st_mode) != 0) + { + error (0, errno, _("failed to preserve permissions for %s"), + quoteaf (dst_name)); + return false; + } + } + + dst_name[p->slash_offset] = '/'; + } + return true; +} + +/* Ensure that the parent directory of CONST_DIR exists, for + the --parents option. + + SRC_OFFSET is the index in CONST_DIR (which is a destination + directory) of the beginning of the source directory name. + Create any leading directories that don't already exist. + DST_DIRFD is a file descriptor for the target directory. + If VERBOSE_FMT_STRING is nonzero, use it as a printf format + string for printing a message after successfully making a directory. + The format should take two string arguments: the names of the + source and destination directories. + Creates a linked list of attributes of intermediate directories, + *ATTR_LIST, for re_protect to use after calling copy. + Sets *NEW_DST if this function creates parent of CONST_DIR. + + Return true if parent of CONST_DIR exists as a directory with the proper + permissions when done. */ + +/* FIXME: Synch this function with the one in ../lib/mkdir-p.c. */ + +static bool +make_dir_parents_private (char const *const_dir, size_t src_offset, + int dst_dirfd, + char const *verbose_fmt_string, + struct dir_attr **attr_list, bool *new_dst, + const struct cp_options *x) +{ + struct stat stats; + char *dir; /* A copy of CONST_DIR we can change. */ + char *src; /* Source name in DIR. */ + char *dst_dir; /* Leading directory of DIR. */ + idx_t dirlen = dir_len (const_dir); + + *attr_list = nullptr; + + /* Succeed immediately if the parent of CONST_DIR must already exist, + as the target directory has already been checked. */ + if (dirlen <= src_offset) + return true; + + ASSIGN_STRDUPA (dir, const_dir); + + src = dir + src_offset; + + dst_dir = alloca (dirlen + 1); + memcpy (dst_dir, dir, dirlen); + dst_dir[dirlen] = '\0'; + char const *dst_reldir = dst_dir + src_offset; + while (*dst_reldir == '/') + dst_reldir++; + + /* XXX: If all dirs are present at the destination, + no permissions or security contexts will be updated. */ + if (fstatat (dst_dirfd, dst_reldir, &stats, 0) != 0) + { + /* A parent of CONST_DIR does not exist. + Make all missing intermediate directories. */ + char *slash; + + slash = src; + while (*slash == '/') + slash++; + dst_reldir = slash; + + while ((slash = strchr (slash, '/'))) + { + struct dir_attr *new; + bool missing_dir; + + *slash = '\0'; + missing_dir = fstatat (dst_dirfd, dst_reldir, &stats, 0) != 0; + + if (missing_dir || x->preserve_ownership || x->preserve_mode + || x->preserve_timestamps) + { + /* Add this directory to the list of directories whose + modes might need fixing later. */ + struct stat src_st; + int src_errno = (stat (src, &src_st) != 0 + ? errno + : S_ISDIR (src_st.st_mode) + ? 0 + : ENOTDIR); + if (src_errno) + { + error (0, src_errno, _("failed to get attributes of %s"), + quoteaf (src)); + return false; + } + + new = xmalloc (sizeof *new); + new->st = src_st; + new->slash_offset = slash - dir; + new->restore_mode = false; + new->next = *attr_list; + *attr_list = new; + } + + /* If required set the default context for created dirs. */ + if (! set_process_security_ctx (src, dir, + missing_dir ? new->st.st_mode : 0, + missing_dir, x)) + return false; + + if (missing_dir) + { + mode_t src_mode; + mode_t omitted_permissions; + mode_t mkdir_mode; + + /* This component does not exist. We must set + *new_dst and new->st.st_mode inside this loop because, + for example, in the command 'cp --parents ../a/../b/c e_dir', + make_dir_parents_private creates only e_dir/../a if + ./b already exists. */ + *new_dst = true; + src_mode = new->st.st_mode; + + /* If the ownership or special mode bits might change, + omit some permissions at first, so unauthorized users + cannot nip in before the file is ready. */ + omitted_permissions = (src_mode + & (x->preserve_ownership + ? S_IRWXG | S_IRWXO + : x->preserve_mode + ? S_IWGRP | S_IWOTH + : 0)); + + /* POSIX says mkdir's behavior is implementation-defined when + (src_mode & ~S_IRWXUGO) != 0. However, common practice is + to ask mkdir to copy all the CHMOD_MODE_BITS, letting mkdir + decide what to do with S_ISUID | S_ISGID | S_ISVTX. */ + mkdir_mode = x->explicit_no_preserve_mode ? S_IRWXUGO : src_mode; + mkdir_mode &= CHMOD_MODE_BITS & ~omitted_permissions; + if (mkdirat (dst_dirfd, dst_reldir, mkdir_mode) != 0) + { + error (0, errno, _("cannot make directory %s"), + quoteaf (dir)); + return false; + } + else + { + if (verbose_fmt_string != nullptr) + printf (verbose_fmt_string, src, dir); + } + + /* We need search and write permissions to the new directory + for writing the directory's contents. Check if these + permissions are there. */ + + if (fstatat (dst_dirfd, dst_reldir, &stats, AT_SYMLINK_NOFOLLOW)) + { + error (0, errno, _("failed to get attributes of %s"), + quoteaf (dir)); + return false; + } + + + if (! x->preserve_mode) + { + if (omitted_permissions & ~stats.st_mode) + omitted_permissions &= ~ cached_umask (); + if (omitted_permissions & ~stats.st_mode + || (stats.st_mode & S_IRWXU) != S_IRWXU) + { + new->st.st_mode = stats.st_mode | omitted_permissions; + new->restore_mode = true; + } + } + + mode_t accessible = stats.st_mode | S_IRWXU; + if (stats.st_mode != accessible) + { + /* Make the new directory searchable and writable. + The original permissions will be restored later. */ + + if (lchmodat (dst_dirfd, dst_reldir, accessible) != 0) + { + error (0, errno, _("setting permissions for %s"), + quoteaf (dir)); + return false; + } + } + } + else if (!S_ISDIR (stats.st_mode)) + { + error (0, 0, _("%s exists but is not a directory"), + quoteaf (dir)); + return false; + } + else + *new_dst = false; + + /* For existing dirs, set the security context as per that already + set for the process global context. */ + if (! *new_dst + && (x->set_security_context || x->preserve_security_context)) + { + if (! set_file_security_ctx (dir, false, x) + && x->require_preserve_context) + return false; + } + + *slash++ = '/'; + + /* Avoid unnecessary calls to 'stat' when given + file names containing multiple adjacent slashes. */ + while (*slash == '/') + slash++; + } + } + + /* We get here if the parent of DIR already exists. */ + + else if (!S_ISDIR (stats.st_mode)) + { + error (0, 0, _("%s exists but is not a directory"), quoteaf (dst_dir)); + return false; + } + else + { + *new_dst = false; + } + return true; +} + +/* Scan the arguments, and copy each by calling copy. + Return true if successful. */ + +static bool +do_copy (int n_files, char **file, char const *target_directory, + bool no_target_directory, struct cp_options *x) +{ + struct stat sb; + bool new_dst = false; + bool ok = true; + + if (n_files <= !target_directory) + { + if (n_files <= 0) + error (0, 0, _("missing file operand")); + else + error (0, 0, _("missing destination file operand after %s"), + quoteaf (file[0])); + usage (EXIT_FAILURE); + } + + sb.st_mode = 0; + int target_dirfd = AT_FDCWD; + if (no_target_directory) + { + if (target_directory) + error (EXIT_FAILURE, 0, + _("cannot combine --target-directory (-t) " + "and --no-target-directory (-T)")); + if (2 < n_files) + { + error (0, 0, _("extra operand %s"), quoteaf (file[2])); + usage (EXIT_FAILURE); + } + } + else if (target_directory) + { + target_dirfd = target_directory_operand (target_directory, &sb); + if (! target_dirfd_valid (target_dirfd)) + error (EXIT_FAILURE, errno, _("target directory %s"), + quoteaf (target_directory)); + } + else + { + char const *lastfile = file[n_files - 1]; + int fd = target_directory_operand (lastfile, &sb); + if (target_dirfd_valid (fd)) + { + target_dirfd = fd; + target_directory = lastfile; + n_files--; + } + else + { + int err = errno; + if (err == ENOENT) + new_dst = true; + + /* The last operand LASTFILE cannot be opened as a directory. + If there are more than two operands, report an error. + + Also, report an error if LASTFILE is known to be a directory + even though it could not be opened, which can happen if + opening failed with EACCES on a platform lacking O_PATH. + In this case use stat to test whether LASTFILE is a + directory, in case opening a non-directory with (O_SEARCH + | O_DIRECTORY) failed with EACCES not ENOTDIR. */ + if (2 < n_files + || (O_PATHSEARCH == O_SEARCH && err == EACCES + && (sb.st_mode || stat (lastfile, &sb) == 0) + && S_ISDIR (sb.st_mode))) + error (EXIT_FAILURE, err, _("target %s"), quoteaf (lastfile)); + } + } + + if (target_directory) + { + /* cp file1...filen edir + Copy the files 'file1' through 'filen' + to the existing directory 'edir'. */ + + /* Initialize these hash tables only if we'll need them. + The problems they're used to detect can arise only if + there are two or more files to copy. */ + if (2 <= n_files) + { + dest_info_init (x); + src_info_init (x); + } + + for (int i = 0; i < n_files; i++) + { + char *dst_name; + bool parent_exists = true; /* True if dir_name (dst_name) exists. */ + struct dir_attr *attr_list; + char *arg_in_concat; + char *arg = file[i]; + + /* Trailing slashes are meaningful (i.e., maybe worth preserving) + only in the source file names. */ + if (remove_trailing_slashes) + strip_trailing_slashes (arg); + + if (parents_option) + { + char *arg_no_trailing_slash; + + /* Use 'arg' without trailing slashes in constructing destination + file names. Otherwise, we can end up trying to create a + directory using a name with trailing slash, which fails on + NetBSD 1.[34] systems. */ + ASSIGN_STRDUPA (arg_no_trailing_slash, arg); + strip_trailing_slashes (arg_no_trailing_slash); + + /* Append all of 'arg' (minus any trailing slash) to 'dest'. */ + dst_name = file_name_concat (target_directory, + arg_no_trailing_slash, + &arg_in_concat); + + /* For --parents, we have to make sure that the directory + dir_name (dst_name) exists. We may have to create a few + leading directories. */ + parent_exists = + (make_dir_parents_private + (dst_name, arg_in_concat - dst_name, target_dirfd, + (x->verbose ? "%s -> %s\n" : nullptr), + &attr_list, &new_dst, x)); + } + else + { + char *arg_base; + /* Append the last component of 'arg' to 'target_directory'. */ + ASSIGN_STRDUPA (arg_base, last_component (arg)); + strip_trailing_slashes (arg_base); + /* For 'cp -R source/.. dest', don't copy into 'dest/..'. */ + arg_base += STREQ (arg_base, ".."); + dst_name = file_name_concat (target_directory, arg_base, + &arg_in_concat); + } + + if (!parent_exists) + { + /* make_dir_parents_private failed, so don't even + attempt the copy. */ + ok = false; + } + else + { + char const *dst_relname = arg_in_concat; + while (*dst_relname == '/') + dst_relname++; + + bool copy_into_self; + ok &= copy (arg, dst_name, target_dirfd, dst_relname, + new_dst, x, ©_into_self, nullptr); + + if (parents_option) + ok &= re_protect (dst_name, arg_in_concat, target_dirfd, + dst_relname, attr_list, x); + } + + if (parents_option) + { + while (attr_list) + { + struct dir_attr *p = attr_list; + attr_list = attr_list->next; + free (p); + } + } + + free (dst_name); + } + } + else /* !target_directory */ + { + char const *source = file[0]; + char const *dest = file[1]; + bool unused; + + if (parents_option) + { + error (0, 0, + _("with --parents, the destination must be a directory")); + usage (EXIT_FAILURE); + } + + /* When the force and backup options have been specified and + the source and destination are the same name for an existing + regular file, convert the user's command, e.g., + 'cp --force --backup foo foo' to 'cp --force foo fooSUFFIX' + where SUFFIX is determined by any version control options used. */ + + if (x->unlink_dest_after_failed_open + && x->backup_type != no_backups + && STREQ (source, dest) + && !new_dst + && (sb.st_mode != 0 || stat (dest, &sb) == 0) && S_ISREG (sb.st_mode)) + { + static struct cp_options x_tmp; + + dest = find_backup_file_name (AT_FDCWD, dest, x->backup_type); + /* Set x->backup_type to 'no_backups' so that the normal backup + mechanism is not used when performing the actual copy. + backup_type must be set to 'no_backups' only *after* the above + call to find_backup_file_name -- that function uses + backup_type to determine the suffix it applies. */ + x_tmp = *x; + x_tmp.backup_type = no_backups; + x = &x_tmp; + } + + ok = copy (source, dest, AT_FDCWD, dest, -new_dst, x, &unused, nullptr); + } + + return ok; +} + +static void +cp_option_init (struct cp_options *x) +{ + cp_options_default (x); + x->copy_as_regular = true; + x->dereference = DEREF_UNDEFINED; + x->unlink_dest_before_opening = false; + x->unlink_dest_after_failed_open = false; + x->hard_link = false; + x->interactive = I_UNSPECIFIED; + x->move_mode = false; + x->install_mode = false; + x->one_file_system = false; + x->reflink_mode = REFLINK_AUTO; + + x->preserve_ownership = false; + x->preserve_links = false; + x->preserve_mode = false; + x->preserve_timestamps = false; + x->explicit_no_preserve_mode = false; + x->preserve_security_context = false; /* -a or --preserve=context. */ + x->require_preserve_context = false; /* --preserve=context. */ + x->set_security_context = nullptr; /* -Z, set sys default context. */ + x->preserve_xattr = false; + x->reduce_diagnostics = false; + x->require_preserve_xattr = false; + + x->data_copy_required = true; + x->require_preserve = false; + x->recursive = false; + x->sparse_mode = SPARSE_AUTO; + x->symbolic_link = false; + x->set_mode = false; + x->mode = 0; + + /* Not used. */ + x->stdin_tty = false; + + x->update = false; + x->verbose = false; + + /* By default, refuse to open a dangling destination symlink, because + in general one cannot do that safely, give the current semantics of + open's O_EXCL flag, (which POSIX doesn't even allow cp to use, btw). + But POSIX requires it. */ + x->open_dangling_dest_symlink = getenv ("POSIXLY_CORRECT") != nullptr; + + x->dest_info = nullptr; + x->src_info = nullptr; +} + +/* Given a string, ARG, containing a comma-separated list of arguments + to the --preserve option, set the appropriate fields of X to ON_OFF. */ +static void +decode_preserve_arg (char const *arg, struct cp_options *x, bool on_off) +{ + enum File_attribute + { + PRESERVE_MODE, + PRESERVE_TIMESTAMPS, + PRESERVE_OWNERSHIP, + PRESERVE_LINK, + PRESERVE_CONTEXT, + PRESERVE_XATTR, + PRESERVE_ALL + }; + static enum File_attribute const preserve_vals[] = + { + PRESERVE_MODE, PRESERVE_TIMESTAMPS, + PRESERVE_OWNERSHIP, PRESERVE_LINK, PRESERVE_CONTEXT, PRESERVE_XATTR, + PRESERVE_ALL + }; + /* Valid arguments to the '--preserve' option. */ + static char const *const preserve_args[] = + { + "mode", "timestamps", + "ownership", "links", "context", "xattr", "all", nullptr + }; + ARGMATCH_VERIFY (preserve_args, preserve_vals); + + char *arg_writable = xstrdup (arg); + char *s = arg_writable; + do + { + /* find next comma */ + char *comma = strchr (s, ','); + enum File_attribute val; + + /* If we found a comma, put a NUL in its place and advance. */ + if (comma) + *comma++ = 0; + + /* process S. */ + val = XARGMATCH (on_off ? "--preserve" : "--no-preserve", + s, preserve_args, preserve_vals); + switch (val) + { + case PRESERVE_MODE: + x->preserve_mode = on_off; + x->explicit_no_preserve_mode = !on_off; + break; + + case PRESERVE_TIMESTAMPS: + x->preserve_timestamps = on_off; + break; + + case PRESERVE_OWNERSHIP: + x->preserve_ownership = on_off; + break; + + case PRESERVE_LINK: + x->preserve_links = on_off; + break; + + case PRESERVE_CONTEXT: + x->require_preserve_context = on_off; + x->preserve_security_context = on_off; + break; + + case PRESERVE_XATTR: + x->preserve_xattr = on_off; + x->require_preserve_xattr = on_off; + break; + + case PRESERVE_ALL: + x->preserve_mode = on_off; + x->preserve_timestamps = on_off; + x->preserve_ownership = on_off; + x->preserve_links = on_off; + x->explicit_no_preserve_mode = !on_off; + if (selinux_enabled) + x->preserve_security_context = on_off; + x->preserve_xattr = on_off; + break; + + default: + affirm (false); + } + s = comma; + } + while (s); + + free (arg_writable); +} + +int +main (int argc, char **argv) +{ + int c; + bool ok; + bool make_backups = false; + char const *backup_suffix = nullptr; + char *version_control_string = nullptr; + struct cp_options x; + bool copy_contents = false; + char *target_directory = nullptr; + bool no_target_directory = false; + char const *scontext = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdin); + + selinux_enabled = (0 < is_selinux_enabled ()); + cp_option_init (&x); + + while ((c = getopt_long (argc, argv, "abdfHilLnprst:uvxPRS:TZ", + long_opts, nullptr)) + != -1) + { + switch (c) + { + case SPARSE_OPTION: + x.sparse_mode = XARGMATCH ("--sparse", optarg, + sparse_type_string, sparse_type); + break; + + case REFLINK_OPTION: + if (optarg == nullptr) + x.reflink_mode = REFLINK_ALWAYS; + else + x.reflink_mode = XARGMATCH ("--reflink", optarg, + reflink_type_string, reflink_type); + break; + + case 'a': + /* Like -dR --preserve=all with reduced failure diagnostics. */ + x.dereference = DEREF_NEVER; + x.preserve_links = true; + x.preserve_ownership = true; + x.preserve_mode = true; + x.preserve_timestamps = true; + x.require_preserve = true; + if (selinux_enabled) + x.preserve_security_context = true; + x.preserve_xattr = true; + x.reduce_diagnostics = true; + x.recursive = true; + break; + + case 'b': + make_backups = true; + if (optarg) + version_control_string = optarg; + break; + + case ATTRIBUTES_ONLY_OPTION: + x.data_copy_required = false; + break; + + case DEBUG_OPTION: + x.debug = x.verbose = true; + break; + + case COPY_CONTENTS_OPTION: + copy_contents = true; + break; + + case 'd': + x.preserve_links = true; + x.dereference = DEREF_NEVER; + break; + + case 'f': + x.unlink_dest_after_failed_open = true; + break; + + case 'H': + x.dereference = DEREF_COMMAND_LINE_ARGUMENTS; + break; + + case 'i': + x.interactive = I_ASK_USER; + break; + + case 'l': + x.hard_link = true; + break; + + case 'L': + x.dereference = DEREF_ALWAYS; + break; + + case 'n': + x.interactive = I_ALWAYS_NO; + break; + + case 'P': + x.dereference = DEREF_NEVER; + break; + + case NO_PRESERVE_ATTRIBUTES_OPTION: + decode_preserve_arg (optarg, &x, false); + break; + + case PRESERVE_ATTRIBUTES_OPTION: + if (optarg == nullptr) + { + /* Fall through to the case for 'p' below. */ + } + else + { + decode_preserve_arg (optarg, &x, true); + x.require_preserve = true; + break; + } + FALLTHROUGH; + + case 'p': + x.preserve_ownership = true; + x.preserve_mode = true; + x.preserve_timestamps = true; + x.require_preserve = true; + break; + + case PARENTS_OPTION: + parents_option = true; + break; + + case 'r': + case 'R': + x.recursive = true; + break; + + case UNLINK_DEST_BEFORE_OPENING: + x.unlink_dest_before_opening = true; + break; + + case STRIP_TRAILING_SLASHES_OPTION: + remove_trailing_slashes = true; + break; + + case 's': + x.symbolic_link = true; + break; + + case 't': + if (target_directory) + error (EXIT_FAILURE, 0, + _("multiple target directories specified")); + target_directory = optarg; + break; + + case 'T': + no_target_directory = true; + break; + + case 'u': + if (optarg == nullptr) + x.update = true; + else if (x.interactive != I_ALWAYS_NO) /* -n takes precedence. */ + { + enum Update_type update_opt; + update_opt = XARGMATCH ("--update", optarg, + update_type_string, update_type); + if (update_opt == UPDATE_ALL) + { + /* Default cp operation. */ + x.update = false; + x.interactive = I_UNSPECIFIED; + } + else if (update_opt == UPDATE_NONE) + { + x.update = false; + x.interactive = I_ALWAYS_SKIP; + } + else if (update_opt == UPDATE_OLDER) + { + x.update = true; + x.interactive = I_UNSPECIFIED; + } + } + break; + + case 'v': + x.verbose = true; + break; + + case 'x': + x.one_file_system = true; + break; + + case 'Z': + /* politely decline if we're not on a selinux-enabled kernel. */ + if (selinux_enabled) + { + if (optarg) + scontext = optarg; + else + { + x.set_security_context = selabel_open (SELABEL_CTX_FILE, + nullptr, 0); + if (! x.set_security_context) + error (0, errno, _("warning: ignoring --context")); + } + } + else if (optarg) + { + error (0, 0, + _("warning: ignoring --context; " + "it requires an SELinux-enabled kernel")); + } + break; + + case 'S': + make_backups = true; + backup_suffix = optarg; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + /* With --sparse=never, disable reflinking so we create a non sparse copy. + This will also have the effect of disabling copy offload as that may + propagate holes. For e.g. FreeBSD documents that copy_file_range() + will try to propagate holes. */ + if (x.reflink_mode == REFLINK_AUTO && x.sparse_mode == SPARSE_NEVER) + x.reflink_mode = REFLINK_NEVER; + + if (x.hard_link && x.symbolic_link) + { + error (0, 0, _("cannot make both hard and symbolic links")); + usage (EXIT_FAILURE); + } + + if (x.interactive == I_ALWAYS_NO) + x.update = false; + + if (make_backups && x.interactive == I_ALWAYS_NO) + { + error (0, 0, + _("options --backup and --no-clobber are mutually exclusive")); + usage (EXIT_FAILURE); + } + + if (x.reflink_mode == REFLINK_ALWAYS && x.sparse_mode != SPARSE_AUTO) + { + error (0, 0, _("--reflink can be used only with --sparse=auto")); + usage (EXIT_FAILURE); + } + + x.backup_type = (make_backups + ? xget_version (_("backup type"), + version_control_string) + : no_backups); + set_simple_backup_suffix (backup_suffix); + + if (x.dereference == DEREF_UNDEFINED) + { + if (x.recursive && ! x.hard_link) + /* This is compatible with FreeBSD. */ + x.dereference = DEREF_NEVER; + else + x.dereference = DEREF_ALWAYS; + } + + if (x.recursive) + x.copy_as_regular = copy_contents; + + /* Ensure -Z overrides -a. */ + if ((x.set_security_context || scontext) + && ! x.require_preserve_context) + x.preserve_security_context = false; + + if (x.preserve_security_context && (x.set_security_context || scontext)) + error (EXIT_FAILURE, 0, + _("cannot set target context and preserve it")); + + if (x.require_preserve_context && ! selinux_enabled) + error (EXIT_FAILURE, 0, + _("cannot preserve security context " + "without an SELinux-enabled kernel")); + + /* FIXME: This handles new files. But what about existing files? + I.e., if updating a tree, new files would have the specified context, + but shouldn't existing files be updated for consistency like this? + if (scontext && !restorecon (nullptr, dst_path, 0)) + error (...); + */ + if (scontext && setfscreatecon (scontext) < 0) + error (EXIT_FAILURE, errno, + _("failed to set default file creation context to %s"), + quote (scontext)); + +#if !USE_XATTR + if (x.require_preserve_xattr) + error (EXIT_FAILURE, 0, _("cannot preserve extended attributes, cp is " + "built without xattr support")); +#endif + + /* Allocate space for remembering copied and created files. */ + + hash_init (); + + ok = do_copy (argc - optind, argv + optind, + target_directory, no_target_directory, &x); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/crctab.c b/src/crctab.c new file mode 100644 index 0000000..fdf2fdf --- /dev/null +++ b/src/crctab.c @@ -0,0 +1,437 @@ +#include +#include + +uint_fast32_t const crctab[8][256] = { +{ + 0x00000000, + 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, + 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, + 0x2b4bcb61, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, + 0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9, 0x5f15adac, + 0x5bd4b01b, 0x569796c2, 0x52568b75, 0x6a1936c8, 0x6ed82b7f, + 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 0x709f7b7a, + 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, + 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, + 0xbaea46ef, 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, + 0xa4ad16ea, 0xa06c0b5d, 0xd4326d90, 0xd0f37027, 0xddb056fe, + 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95, + 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, + 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, + 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, + 0x2ac12072, 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, + 0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca, 0x7897ab07, + 0x7c56b6b0, 0x71159069, 0x75d48dde, 0x6b93dddb, 0x6f52c06c, + 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, + 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, + 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, + 0xbb60adfc, 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, + 0x832f1041, 0x87ee0df6, 0x99a95df3, 0x9d684044, 0x902b669d, + 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e, + 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 0xc6bcf05f, + 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, + 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, + 0x644fc637, 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, + 0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f, 0x5c007b8a, + 0x58c1663d, 0x558240e4, 0x51435d53, 0x251d3b9e, 0x21dc2629, + 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 0x3f9b762c, + 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, + 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, + 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, + 0xeba91bbc, 0xef68060b, 0xd727bbb6, 0xd3e6a601, 0xdea580d8, + 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, + 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 0xae3afba2, + 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, + 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, + 0x857130c3, 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, + 0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c, 0x7b827d21, + 0x7f436096, 0x7200464f, 0x76c15bf8, 0x68860bfd, 0x6c47164a, + 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 0x18197087, + 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, + 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, + 0x2056cd3a, 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, + 0xcc2b1d17, 0xc8ea00a0, 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, + 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18, + 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 0x89b8fd09, + 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, + 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, + 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 +}, +{ + 0x00000000, + 0xd219c1dc, 0xa0f29e0f, 0x72eb5fd3, 0x452421a9, 0x973de075, + 0xe5d6bfa6, 0x37cf7e7a, 0x8a484352, 0x5851828e, 0x2abadd5d, + 0xf8a31c81, 0xcf6c62fb, 0x1d75a327, 0x6f9efcf4, 0xbd873d28, + 0x10519b13, 0xc2485acf, 0xb0a3051c, 0x62bac4c0, 0x5575baba, + 0x876c7b66, 0xf58724b5, 0x279ee569, 0x9a19d841, 0x4800199d, + 0x3aeb464e, 0xe8f28792, 0xdf3df9e8, 0x0d243834, 0x7fcf67e7, + 0xadd6a63b, 0x20a33626, 0xf2baf7fa, 0x8051a829, 0x524869f5, + 0x6587178f, 0xb79ed653, 0xc5758980, 0x176c485c, 0xaaeb7574, + 0x78f2b4a8, 0x0a19eb7b, 0xd8002aa7, 0xefcf54dd, 0x3dd69501, + 0x4f3dcad2, 0x9d240b0e, 0x30f2ad35, 0xe2eb6ce9, 0x9000333a, + 0x4219f2e6, 0x75d68c9c, 0xa7cf4d40, 0xd5241293, 0x073dd34f, + 0xbabaee67, 0x68a32fbb, 0x1a487068, 0xc851b1b4, 0xff9ecfce, + 0x2d870e12, 0x5f6c51c1, 0x8d75901d, 0x41466c4c, 0x935fad90, + 0xe1b4f243, 0x33ad339f, 0x04624de5, 0xd67b8c39, 0xa490d3ea, + 0x76891236, 0xcb0e2f1e, 0x1917eec2, 0x6bfcb111, 0xb9e570cd, + 0x8e2a0eb7, 0x5c33cf6b, 0x2ed890b8, 0xfcc15164, 0x5117f75f, + 0x830e3683, 0xf1e56950, 0x23fca88c, 0x1433d6f6, 0xc62a172a, + 0xb4c148f9, 0x66d88925, 0xdb5fb40d, 0x094675d1, 0x7bad2a02, + 0xa9b4ebde, 0x9e7b95a4, 0x4c625478, 0x3e890bab, 0xec90ca77, + 0x61e55a6a, 0xb3fc9bb6, 0xc117c465, 0x130e05b9, 0x24c17bc3, + 0xf6d8ba1f, 0x8433e5cc, 0x562a2410, 0xebad1938, 0x39b4d8e4, + 0x4b5f8737, 0x994646eb, 0xae893891, 0x7c90f94d, 0x0e7ba69e, + 0xdc626742, 0x71b4c179, 0xa3ad00a5, 0xd1465f76, 0x035f9eaa, + 0x3490e0d0, 0xe689210c, 0x94627edf, 0x467bbf03, 0xfbfc822b, + 0x29e543f7, 0x5b0e1c24, 0x8917ddf8, 0xbed8a382, 0x6cc1625e, + 0x1e2a3d8d, 0xcc33fc51, 0x828cd898, 0x50951944, 0x227e4697, + 0xf067874b, 0xc7a8f931, 0x15b138ed, 0x675a673e, 0xb543a6e2, + 0x08c49bca, 0xdadd5a16, 0xa83605c5, 0x7a2fc419, 0x4de0ba63, + 0x9ff97bbf, 0xed12246c, 0x3f0be5b0, 0x92dd438b, 0x40c48257, + 0x322fdd84, 0xe0361c58, 0xd7f96222, 0x05e0a3fe, 0x770bfc2d, + 0xa5123df1, 0x189500d9, 0xca8cc105, 0xb8679ed6, 0x6a7e5f0a, + 0x5db12170, 0x8fa8e0ac, 0xfd43bf7f, 0x2f5a7ea3, 0xa22feebe, + 0x70362f62, 0x02dd70b1, 0xd0c4b16d, 0xe70bcf17, 0x35120ecb, + 0x47f95118, 0x95e090c4, 0x2867adec, 0xfa7e6c30, 0x889533e3, + 0x5a8cf23f, 0x6d438c45, 0xbf5a4d99, 0xcdb1124a, 0x1fa8d396, + 0xb27e75ad, 0x6067b471, 0x128ceba2, 0xc0952a7e, 0xf75a5404, + 0x254395d8, 0x57a8ca0b, 0x85b10bd7, 0x383636ff, 0xea2ff723, + 0x98c4a8f0, 0x4add692c, 0x7d121756, 0xaf0bd68a, 0xdde08959, + 0x0ff94885, 0xc3cab4d4, 0x11d37508, 0x63382adb, 0xb121eb07, + 0x86ee957d, 0x54f754a1, 0x261c0b72, 0xf405caae, 0x4982f786, + 0x9b9b365a, 0xe9706989, 0x3b69a855, 0x0ca6d62f, 0xdebf17f3, + 0xac544820, 0x7e4d89fc, 0xd39b2fc7, 0x0182ee1b, 0x7369b1c8, + 0xa1707014, 0x96bf0e6e, 0x44a6cfb2, 0x364d9061, 0xe45451bd, + 0x59d36c95, 0x8bcaad49, 0xf921f29a, 0x2b383346, 0x1cf74d3c, + 0xceee8ce0, 0xbc05d333, 0x6e1c12ef, 0xe36982f2, 0x3170432e, + 0x439b1cfd, 0x9182dd21, 0xa64da35b, 0x74546287, 0x06bf3d54, + 0xd4a6fc88, 0x6921c1a0, 0xbb38007c, 0xc9d35faf, 0x1bca9e73, + 0x2c05e009, 0xfe1c21d5, 0x8cf77e06, 0x5eeebfda, 0xf33819e1, + 0x2121d83d, 0x53ca87ee, 0x81d34632, 0xb61c3848, 0x6405f994, + 0x16eea647, 0xc4f7679b, 0x79705ab3, 0xab699b6f, 0xd982c4bc, + 0x0b9b0560, 0x3c547b1a, 0xee4dbac6, 0x9ca6e515, 0x4ebf24c9 +}, +{ + 0x00000000, + 0x01d8ac87, 0x03b1590e, 0x0269f589, 0x0762b21c, 0x06ba1e9b, + 0x04d3eb12, 0x050b4795, 0x0ec56438, 0x0f1dc8bf, 0x0d743d36, + 0x0cac91b1, 0x09a7d624, 0x087f7aa3, 0x0a168f2a, 0x0bce23ad, + 0x1d8ac870, 0x1c5264f7, 0x1e3b917e, 0x1fe33df9, 0x1ae87a6c, + 0x1b30d6eb, 0x19592362, 0x18818fe5, 0x134fac48, 0x129700cf, + 0x10fef546, 0x112659c1, 0x142d1e54, 0x15f5b2d3, 0x179c475a, + 0x1644ebdd, 0x3b1590e0, 0x3acd3c67, 0x38a4c9ee, 0x397c6569, + 0x3c7722fc, 0x3daf8e7b, 0x3fc67bf2, 0x3e1ed775, 0x35d0f4d8, + 0x3408585f, 0x3661add6, 0x37b90151, 0x32b246c4, 0x336aea43, + 0x31031fca, 0x30dbb34d, 0x269f5890, 0x2747f417, 0x252e019e, + 0x24f6ad19, 0x21fdea8c, 0x2025460b, 0x224cb382, 0x23941f05, + 0x285a3ca8, 0x2982902f, 0x2beb65a6, 0x2a33c921, 0x2f388eb4, + 0x2ee02233, 0x2c89d7ba, 0x2d517b3d, 0x762b21c0, 0x77f38d47, + 0x759a78ce, 0x7442d449, 0x714993dc, 0x70913f5b, 0x72f8cad2, + 0x73206655, 0x78ee45f8, 0x7936e97f, 0x7b5f1cf6, 0x7a87b071, + 0x7f8cf7e4, 0x7e545b63, 0x7c3daeea, 0x7de5026d, 0x6ba1e9b0, + 0x6a794537, 0x6810b0be, 0x69c81c39, 0x6cc35bac, 0x6d1bf72b, + 0x6f7202a2, 0x6eaaae25, 0x65648d88, 0x64bc210f, 0x66d5d486, + 0x670d7801, 0x62063f94, 0x63de9313, 0x61b7669a, 0x606fca1d, + 0x4d3eb120, 0x4ce61da7, 0x4e8fe82e, 0x4f5744a9, 0x4a5c033c, + 0x4b84afbb, 0x49ed5a32, 0x4835f6b5, 0x43fbd518, 0x4223799f, + 0x404a8c16, 0x41922091, 0x44996704, 0x4541cb83, 0x47283e0a, + 0x46f0928d, 0x50b47950, 0x516cd5d7, 0x5305205e, 0x52dd8cd9, + 0x57d6cb4c, 0x560e67cb, 0x54679242, 0x55bf3ec5, 0x5e711d68, + 0x5fa9b1ef, 0x5dc04466, 0x5c18e8e1, 0x5913af74, 0x58cb03f3, + 0x5aa2f67a, 0x5b7a5afd, 0xec564380, 0xed8eef07, 0xefe71a8e, + 0xee3fb609, 0xeb34f19c, 0xeaec5d1b, 0xe885a892, 0xe95d0415, + 0xe29327b8, 0xe34b8b3f, 0xe1227eb6, 0xe0fad231, 0xe5f195a4, + 0xe4293923, 0xe640ccaa, 0xe798602d, 0xf1dc8bf0, 0xf0042777, + 0xf26dd2fe, 0xf3b57e79, 0xf6be39ec, 0xf766956b, 0xf50f60e2, + 0xf4d7cc65, 0xff19efc8, 0xfec1434f, 0xfca8b6c6, 0xfd701a41, + 0xf87b5dd4, 0xf9a3f153, 0xfbca04da, 0xfa12a85d, 0xd743d360, + 0xd69b7fe7, 0xd4f28a6e, 0xd52a26e9, 0xd021617c, 0xd1f9cdfb, + 0xd3903872, 0xd24894f5, 0xd986b758, 0xd85e1bdf, 0xda37ee56, + 0xdbef42d1, 0xdee40544, 0xdf3ca9c3, 0xdd555c4a, 0xdc8df0cd, + 0xcac91b10, 0xcb11b797, 0xc978421e, 0xc8a0ee99, 0xcdaba90c, + 0xcc73058b, 0xce1af002, 0xcfc25c85, 0xc40c7f28, 0xc5d4d3af, + 0xc7bd2626, 0xc6658aa1, 0xc36ecd34, 0xc2b661b3, 0xc0df943a, + 0xc10738bd, 0x9a7d6240, 0x9ba5cec7, 0x99cc3b4e, 0x981497c9, + 0x9d1fd05c, 0x9cc77cdb, 0x9eae8952, 0x9f7625d5, 0x94b80678, + 0x9560aaff, 0x97095f76, 0x96d1f3f1, 0x93dab464, 0x920218e3, + 0x906bed6a, 0x91b341ed, 0x87f7aa30, 0x862f06b7, 0x8446f33e, + 0x859e5fb9, 0x8095182c, 0x814db4ab, 0x83244122, 0x82fceda5, + 0x8932ce08, 0x88ea628f, 0x8a839706, 0x8b5b3b81, 0x8e507c14, + 0x8f88d093, 0x8de1251a, 0x8c39899d, 0xa168f2a0, 0xa0b05e27, + 0xa2d9abae, 0xa3010729, 0xa60a40bc, 0xa7d2ec3b, 0xa5bb19b2, + 0xa463b535, 0xafad9698, 0xae753a1f, 0xac1ccf96, 0xadc46311, + 0xa8cf2484, 0xa9178803, 0xab7e7d8a, 0xaaa6d10d, 0xbce23ad0, + 0xbd3a9657, 0xbf5363de, 0xbe8bcf59, 0xbb8088cc, 0xba58244b, + 0xb831d1c2, 0xb9e97d45, 0xb2275ee8, 0xb3fff26f, 0xb19607e6, + 0xb04eab61, 0xb545ecf4, 0xb49d4073, 0xb6f4b5fa, 0xb72c197d +}, +{ + 0x00000000, + 0xdc6d9ab7, 0xbc1a28d9, 0x6077b26e, 0x7cf54c05, 0xa098d6b2, + 0xc0ef64dc, 0x1c82fe6b, 0xf9ea980a, 0x258702bd, 0x45f0b0d3, + 0x999d2a64, 0x851fd40f, 0x59724eb8, 0x3905fcd6, 0xe5686661, + 0xf7142da3, 0x2b79b714, 0x4b0e057a, 0x97639fcd, 0x8be161a6, + 0x578cfb11, 0x37fb497f, 0xeb96d3c8, 0x0efeb5a9, 0xd2932f1e, + 0xb2e49d70, 0x6e8907c7, 0x720bf9ac, 0xae66631b, 0xce11d175, + 0x127c4bc2, 0xeae946f1, 0x3684dc46, 0x56f36e28, 0x8a9ef49f, + 0x961c0af4, 0x4a719043, 0x2a06222d, 0xf66bb89a, 0x1303defb, + 0xcf6e444c, 0xaf19f622, 0x73746c95, 0x6ff692fe, 0xb39b0849, + 0xd3ecba27, 0x0f812090, 0x1dfd6b52, 0xc190f1e5, 0xa1e7438b, + 0x7d8ad93c, 0x61082757, 0xbd65bde0, 0xdd120f8e, 0x017f9539, + 0xe417f358, 0x387a69ef, 0x580ddb81, 0x84604136, 0x98e2bf5d, + 0x448f25ea, 0x24f89784, 0xf8950d33, 0xd1139055, 0x0d7e0ae2, + 0x6d09b88c, 0xb164223b, 0xade6dc50, 0x718b46e7, 0x11fcf489, + 0xcd916e3e, 0x28f9085f, 0xf49492e8, 0x94e32086, 0x488eba31, + 0x540c445a, 0x8861deed, 0xe8166c83, 0x347bf634, 0x2607bdf6, + 0xfa6a2741, 0x9a1d952f, 0x46700f98, 0x5af2f1f3, 0x869f6b44, + 0xe6e8d92a, 0x3a85439d, 0xdfed25fc, 0x0380bf4b, 0x63f70d25, + 0xbf9a9792, 0xa31869f9, 0x7f75f34e, 0x1f024120, 0xc36fdb97, + 0x3bfad6a4, 0xe7974c13, 0x87e0fe7d, 0x5b8d64ca, 0x470f9aa1, + 0x9b620016, 0xfb15b278, 0x277828cf, 0xc2104eae, 0x1e7dd419, + 0x7e0a6677, 0xa267fcc0, 0xbee502ab, 0x6288981c, 0x02ff2a72, + 0xde92b0c5, 0xcceefb07, 0x108361b0, 0x70f4d3de, 0xac994969, + 0xb01bb702, 0x6c762db5, 0x0c019fdb, 0xd06c056c, 0x3504630d, + 0xe969f9ba, 0x891e4bd4, 0x5573d163, 0x49f12f08, 0x959cb5bf, + 0xf5eb07d1, 0x29869d66, 0xa6e63d1d, 0x7a8ba7aa, 0x1afc15c4, + 0xc6918f73, 0xda137118, 0x067eebaf, 0x660959c1, 0xba64c376, + 0x5f0ca517, 0x83613fa0, 0xe3168dce, 0x3f7b1779, 0x23f9e912, + 0xff9473a5, 0x9fe3c1cb, 0x438e5b7c, 0x51f210be, 0x8d9f8a09, + 0xede83867, 0x3185a2d0, 0x2d075cbb, 0xf16ac60c, 0x911d7462, + 0x4d70eed5, 0xa81888b4, 0x74751203, 0x1402a06d, 0xc86f3ada, + 0xd4edc4b1, 0x08805e06, 0x68f7ec68, 0xb49a76df, 0x4c0f7bec, + 0x9062e15b, 0xf0155335, 0x2c78c982, 0x30fa37e9, 0xec97ad5e, + 0x8ce01f30, 0x508d8587, 0xb5e5e3e6, 0x69887951, 0x09ffcb3f, + 0xd5925188, 0xc910afe3, 0x157d3554, 0x750a873a, 0xa9671d8d, + 0xbb1b564f, 0x6776ccf8, 0x07017e96, 0xdb6ce421, 0xc7ee1a4a, + 0x1b8380fd, 0x7bf43293, 0xa799a824, 0x42f1ce45, 0x9e9c54f2, + 0xfeebe69c, 0x22867c2b, 0x3e048240, 0xe26918f7, 0x821eaa99, + 0x5e73302e, 0x77f5ad48, 0xab9837ff, 0xcbef8591, 0x17821f26, + 0x0b00e14d, 0xd76d7bfa, 0xb71ac994, 0x6b775323, 0x8e1f3542, + 0x5272aff5, 0x32051d9b, 0xee68872c, 0xf2ea7947, 0x2e87e3f0, + 0x4ef0519e, 0x929dcb29, 0x80e180eb, 0x5c8c1a5c, 0x3cfba832, + 0xe0963285, 0xfc14ccee, 0x20795659, 0x400ee437, 0x9c637e80, + 0x790b18e1, 0xa5668256, 0xc5113038, 0x197caa8f, 0x05fe54e4, + 0xd993ce53, 0xb9e47c3d, 0x6589e68a, 0x9d1cebb9, 0x4171710e, + 0x2106c360, 0xfd6b59d7, 0xe1e9a7bc, 0x3d843d0b, 0x5df38f65, + 0x819e15d2, 0x64f673b3, 0xb89be904, 0xd8ec5b6a, 0x0481c1dd, + 0x18033fb6, 0xc46ea501, 0xa419176f, 0x78748dd8, 0x6a08c61a, + 0xb6655cad, 0xd612eec3, 0x0a7f7474, 0x16fd8a1f, 0xca9010a8, + 0xaae7a2c6, 0x768a3871, 0x93e25e10, 0x4f8fc4a7, 0x2ff876c9, + 0xf395ec7e, 0xef171215, 0x337a88a2, 0x530d3acc, 0x8f60a07b +}, +{ + 0x00000000, + 0x490d678d, 0x921acf1a, 0xdb17a897, 0x20f48383, 0x69f9e40e, + 0xb2ee4c99, 0xfbe32b14, 0x41e90706, 0x08e4608b, 0xd3f3c81c, + 0x9afeaf91, 0x611d8485, 0x2810e308, 0xf3074b9f, 0xba0a2c12, + 0x83d20e0c, 0xcadf6981, 0x11c8c116, 0x58c5a69b, 0xa3268d8f, + 0xea2bea02, 0x313c4295, 0x78312518, 0xc23b090a, 0x8b366e87, + 0x5021c610, 0x192ca19d, 0xe2cf8a89, 0xabc2ed04, 0x70d54593, + 0x39d8221e, 0x036501af, 0x4a686622, 0x917fceb5, 0xd872a938, + 0x2391822c, 0x6a9ce5a1, 0xb18b4d36, 0xf8862abb, 0x428c06a9, + 0x0b816124, 0xd096c9b3, 0x999bae3e, 0x6278852a, 0x2b75e2a7, + 0xf0624a30, 0xb96f2dbd, 0x80b70fa3, 0xc9ba682e, 0x12adc0b9, + 0x5ba0a734, 0xa0438c20, 0xe94eebad, 0x3259433a, 0x7b5424b7, + 0xc15e08a5, 0x88536f28, 0x5344c7bf, 0x1a49a032, 0xe1aa8b26, + 0xa8a7ecab, 0x73b0443c, 0x3abd23b1, 0x06ca035e, 0x4fc764d3, + 0x94d0cc44, 0xddddabc9, 0x263e80dd, 0x6f33e750, 0xb4244fc7, + 0xfd29284a, 0x47230458, 0x0e2e63d5, 0xd539cb42, 0x9c34accf, + 0x67d787db, 0x2edae056, 0xf5cd48c1, 0xbcc02f4c, 0x85180d52, + 0xcc156adf, 0x1702c248, 0x5e0fa5c5, 0xa5ec8ed1, 0xece1e95c, + 0x37f641cb, 0x7efb2646, 0xc4f10a54, 0x8dfc6dd9, 0x56ebc54e, + 0x1fe6a2c3, 0xe40589d7, 0xad08ee5a, 0x761f46cd, 0x3f122140, + 0x05af02f1, 0x4ca2657c, 0x97b5cdeb, 0xdeb8aa66, 0x255b8172, + 0x6c56e6ff, 0xb7414e68, 0xfe4c29e5, 0x444605f7, 0x0d4b627a, + 0xd65ccaed, 0x9f51ad60, 0x64b28674, 0x2dbfe1f9, 0xf6a8496e, + 0xbfa52ee3, 0x867d0cfd, 0xcf706b70, 0x1467c3e7, 0x5d6aa46a, + 0xa6898f7e, 0xef84e8f3, 0x34934064, 0x7d9e27e9, 0xc7940bfb, + 0x8e996c76, 0x558ec4e1, 0x1c83a36c, 0xe7608878, 0xae6deff5, + 0x757a4762, 0x3c7720ef, 0x0d9406bc, 0x44996131, 0x9f8ec9a6, + 0xd683ae2b, 0x2d60853f, 0x646de2b2, 0xbf7a4a25, 0xf6772da8, + 0x4c7d01ba, 0x05706637, 0xde67cea0, 0x976aa92d, 0x6c898239, + 0x2584e5b4, 0xfe934d23, 0xb79e2aae, 0x8e4608b0, 0xc74b6f3d, + 0x1c5cc7aa, 0x5551a027, 0xaeb28b33, 0xe7bfecbe, 0x3ca84429, + 0x75a523a4, 0xcfaf0fb6, 0x86a2683b, 0x5db5c0ac, 0x14b8a721, + 0xef5b8c35, 0xa656ebb8, 0x7d41432f, 0x344c24a2, 0x0ef10713, + 0x47fc609e, 0x9cebc809, 0xd5e6af84, 0x2e058490, 0x6708e31d, + 0xbc1f4b8a, 0xf5122c07, 0x4f180015, 0x06156798, 0xdd02cf0f, + 0x940fa882, 0x6fec8396, 0x26e1e41b, 0xfdf64c8c, 0xb4fb2b01, + 0x8d23091f, 0xc42e6e92, 0x1f39c605, 0x5634a188, 0xadd78a9c, + 0xe4daed11, 0x3fcd4586, 0x76c0220b, 0xccca0e19, 0x85c76994, + 0x5ed0c103, 0x17dda68e, 0xec3e8d9a, 0xa533ea17, 0x7e244280, + 0x3729250d, 0x0b5e05e2, 0x4253626f, 0x9944caf8, 0xd049ad75, + 0x2baa8661, 0x62a7e1ec, 0xb9b0497b, 0xf0bd2ef6, 0x4ab702e4, + 0x03ba6569, 0xd8adcdfe, 0x91a0aa73, 0x6a438167, 0x234ee6ea, + 0xf8594e7d, 0xb15429f0, 0x888c0bee, 0xc1816c63, 0x1a96c4f4, + 0x539ba379, 0xa878886d, 0xe175efe0, 0x3a624777, 0x736f20fa, + 0xc9650ce8, 0x80686b65, 0x5b7fc3f2, 0x1272a47f, 0xe9918f6b, + 0xa09ce8e6, 0x7b8b4071, 0x328627fc, 0x083b044d, 0x413663c0, + 0x9a21cb57, 0xd32cacda, 0x28cf87ce, 0x61c2e043, 0xbad548d4, + 0xf3d82f59, 0x49d2034b, 0x00df64c6, 0xdbc8cc51, 0x92c5abdc, + 0x692680c8, 0x202be745, 0xfb3c4fd2, 0xb231285f, 0x8be90a41, + 0xc2e46dcc, 0x19f3c55b, 0x50fea2d6, 0xab1d89c2, 0xe210ee4f, + 0x390746d8, 0x700a2155, 0xca000d47, 0x830d6aca, 0x581ac25d, + 0x1117a5d0, 0xeaf48ec4, 0xa3f9e949, 0x78ee41de, 0x31e32653 +}, +{ + 0x00000000, + 0x1b280d78, 0x36501af0, 0x2d781788, 0x6ca035e0, 0x77883898, + 0x5af02f10, 0x41d82268, 0xd9406bc0, 0xc26866b8, 0xef107130, + 0xf4387c48, 0xb5e05e20, 0xaec85358, 0x83b044d0, 0x989849a8, + 0xb641ca37, 0xad69c74f, 0x8011d0c7, 0x9b39ddbf, 0xdae1ffd7, + 0xc1c9f2af, 0xecb1e527, 0xf799e85f, 0x6f01a1f7, 0x7429ac8f, + 0x5951bb07, 0x4279b67f, 0x03a19417, 0x1889996f, 0x35f18ee7, + 0x2ed9839f, 0x684289d9, 0x736a84a1, 0x5e129329, 0x453a9e51, + 0x04e2bc39, 0x1fcab141, 0x32b2a6c9, 0x299aabb1, 0xb102e219, + 0xaa2aef61, 0x8752f8e9, 0x9c7af591, 0xdda2d7f9, 0xc68ada81, + 0xebf2cd09, 0xf0dac071, 0xde0343ee, 0xc52b4e96, 0xe853591e, + 0xf37b5466, 0xb2a3760e, 0xa98b7b76, 0x84f36cfe, 0x9fdb6186, + 0x0743282e, 0x1c6b2556, 0x311332de, 0x2a3b3fa6, 0x6be31dce, + 0x70cb10b6, 0x5db3073e, 0x469b0a46, 0xd08513b2, 0xcbad1eca, + 0xe6d50942, 0xfdfd043a, 0xbc252652, 0xa70d2b2a, 0x8a753ca2, + 0x915d31da, 0x09c57872, 0x12ed750a, 0x3f956282, 0x24bd6ffa, + 0x65654d92, 0x7e4d40ea, 0x53355762, 0x481d5a1a, 0x66c4d985, + 0x7decd4fd, 0x5094c375, 0x4bbcce0d, 0x0a64ec65, 0x114ce11d, + 0x3c34f695, 0x271cfbed, 0xbf84b245, 0xa4acbf3d, 0x89d4a8b5, + 0x92fca5cd, 0xd32487a5, 0xc80c8add, 0xe5749d55, 0xfe5c902d, + 0xb8c79a6b, 0xa3ef9713, 0x8e97809b, 0x95bf8de3, 0xd467af8b, + 0xcf4fa2f3, 0xe237b57b, 0xf91fb803, 0x6187f1ab, 0x7aaffcd3, + 0x57d7eb5b, 0x4cffe623, 0x0d27c44b, 0x160fc933, 0x3b77debb, + 0x205fd3c3, 0x0e86505c, 0x15ae5d24, 0x38d64aac, 0x23fe47d4, + 0x622665bc, 0x790e68c4, 0x54767f4c, 0x4f5e7234, 0xd7c63b9c, + 0xccee36e4, 0xe196216c, 0xfabe2c14, 0xbb660e7c, 0xa04e0304, + 0x8d36148c, 0x961e19f4, 0xa5cb3ad3, 0xbee337ab, 0x939b2023, + 0x88b32d5b, 0xc96b0f33, 0xd243024b, 0xff3b15c3, 0xe41318bb, + 0x7c8b5113, 0x67a35c6b, 0x4adb4be3, 0x51f3469b, 0x102b64f3, + 0x0b03698b, 0x267b7e03, 0x3d53737b, 0x138af0e4, 0x08a2fd9c, + 0x25daea14, 0x3ef2e76c, 0x7f2ac504, 0x6402c87c, 0x497adff4, + 0x5252d28c, 0xcaca9b24, 0xd1e2965c, 0xfc9a81d4, 0xe7b28cac, + 0xa66aaec4, 0xbd42a3bc, 0x903ab434, 0x8b12b94c, 0xcd89b30a, + 0xd6a1be72, 0xfbd9a9fa, 0xe0f1a482, 0xa12986ea, 0xba018b92, + 0x97799c1a, 0x8c519162, 0x14c9d8ca, 0x0fe1d5b2, 0x2299c23a, + 0x39b1cf42, 0x7869ed2a, 0x6341e052, 0x4e39f7da, 0x5511faa2, + 0x7bc8793d, 0x60e07445, 0x4d9863cd, 0x56b06eb5, 0x17684cdd, + 0x0c4041a5, 0x2138562d, 0x3a105b55, 0xa28812fd, 0xb9a01f85, + 0x94d8080d, 0x8ff00575, 0xce28271d, 0xd5002a65, 0xf8783ded, + 0xe3503095, 0x754e2961, 0x6e662419, 0x431e3391, 0x58363ee9, + 0x19ee1c81, 0x02c611f9, 0x2fbe0671, 0x34960b09, 0xac0e42a1, + 0xb7264fd9, 0x9a5e5851, 0x81765529, 0xc0ae7741, 0xdb867a39, + 0xf6fe6db1, 0xedd660c9, 0xc30fe356, 0xd827ee2e, 0xf55ff9a6, + 0xee77f4de, 0xafafd6b6, 0xb487dbce, 0x99ffcc46, 0x82d7c13e, + 0x1a4f8896, 0x016785ee, 0x2c1f9266, 0x37379f1e, 0x76efbd76, + 0x6dc7b00e, 0x40bfa786, 0x5b97aafe, 0x1d0ca0b8, 0x0624adc0, + 0x2b5cba48, 0x3074b730, 0x71ac9558, 0x6a849820, 0x47fc8fa8, + 0x5cd482d0, 0xc44ccb78, 0xdf64c600, 0xf21cd188, 0xe934dcf0, + 0xa8ecfe98, 0xb3c4f3e0, 0x9ebce468, 0x8594e910, 0xab4d6a8f, + 0xb06567f7, 0x9d1d707f, 0x86357d07, 0xc7ed5f6f, 0xdcc55217, + 0xf1bd459f, 0xea9548e7, 0x720d014f, 0x69250c37, 0x445d1bbf, + 0x5f7516c7, 0x1ead34af, 0x058539d7, 0x28fd2e5f, 0x33d52327 +}, +{ + 0x00000000, + 0x4f576811, 0x9eaed022, 0xd1f9b833, 0x399cbdf3, 0x76cbd5e2, + 0xa7326dd1, 0xe86505c0, 0x73397be6, 0x3c6e13f7, 0xed97abc4, + 0xa2c0c3d5, 0x4aa5c615, 0x05f2ae04, 0xd40b1637, 0x9b5c7e26, + 0xe672f7cc, 0xa9259fdd, 0x78dc27ee, 0x378b4fff, 0xdfee4a3f, + 0x90b9222e, 0x41409a1d, 0x0e17f20c, 0x954b8c2a, 0xda1ce43b, + 0x0be55c08, 0x44b23419, 0xacd731d9, 0xe38059c8, 0x3279e1fb, + 0x7d2e89ea, 0xc824f22f, 0x87739a3e, 0x568a220d, 0x19dd4a1c, + 0xf1b84fdc, 0xbeef27cd, 0x6f169ffe, 0x2041f7ef, 0xbb1d89c9, + 0xf44ae1d8, 0x25b359eb, 0x6ae431fa, 0x8281343a, 0xcdd65c2b, + 0x1c2fe418, 0x53788c09, 0x2e5605e3, 0x61016df2, 0xb0f8d5c1, + 0xffafbdd0, 0x17cab810, 0x589dd001, 0x89646832, 0xc6330023, + 0x5d6f7e05, 0x12381614, 0xc3c1ae27, 0x8c96c636, 0x64f3c3f6, + 0x2ba4abe7, 0xfa5d13d4, 0xb50a7bc5, 0x9488f9e9, 0xdbdf91f8, + 0x0a2629cb, 0x457141da, 0xad14441a, 0xe2432c0b, 0x33ba9438, + 0x7cedfc29, 0xe7b1820f, 0xa8e6ea1e, 0x791f522d, 0x36483a3c, + 0xde2d3ffc, 0x917a57ed, 0x4083efde, 0x0fd487cf, 0x72fa0e25, + 0x3dad6634, 0xec54de07, 0xa303b616, 0x4b66b3d6, 0x0431dbc7, + 0xd5c863f4, 0x9a9f0be5, 0x01c375c3, 0x4e941dd2, 0x9f6da5e1, + 0xd03acdf0, 0x385fc830, 0x7708a021, 0xa6f11812, 0xe9a67003, + 0x5cac0bc6, 0x13fb63d7, 0xc202dbe4, 0x8d55b3f5, 0x6530b635, + 0x2a67de24, 0xfb9e6617, 0xb4c90e06, 0x2f957020, 0x60c21831, + 0xb13ba002, 0xfe6cc813, 0x1609cdd3, 0x595ea5c2, 0x88a71df1, + 0xc7f075e0, 0xbadefc0a, 0xf589941b, 0x24702c28, 0x6b274439, + 0x834241f9, 0xcc1529e8, 0x1dec91db, 0x52bbf9ca, 0xc9e787ec, + 0x86b0effd, 0x574957ce, 0x181e3fdf, 0xf07b3a1f, 0xbf2c520e, + 0x6ed5ea3d, 0x2182822c, 0x2dd0ee65, 0x62878674, 0xb37e3e47, + 0xfc295656, 0x144c5396, 0x5b1b3b87, 0x8ae283b4, 0xc5b5eba5, + 0x5ee99583, 0x11befd92, 0xc04745a1, 0x8f102db0, 0x67752870, + 0x28224061, 0xf9dbf852, 0xb68c9043, 0xcba219a9, 0x84f571b8, + 0x550cc98b, 0x1a5ba19a, 0xf23ea45a, 0xbd69cc4b, 0x6c907478, + 0x23c71c69, 0xb89b624f, 0xf7cc0a5e, 0x2635b26d, 0x6962da7c, + 0x8107dfbc, 0xce50b7ad, 0x1fa90f9e, 0x50fe678f, 0xe5f41c4a, + 0xaaa3745b, 0x7b5acc68, 0x340da479, 0xdc68a1b9, 0x933fc9a8, + 0x42c6719b, 0x0d91198a, 0x96cd67ac, 0xd99a0fbd, 0x0863b78e, + 0x4734df9f, 0xaf51da5f, 0xe006b24e, 0x31ff0a7d, 0x7ea8626c, + 0x0386eb86, 0x4cd18397, 0x9d283ba4, 0xd27f53b5, 0x3a1a5675, + 0x754d3e64, 0xa4b48657, 0xebe3ee46, 0x70bf9060, 0x3fe8f871, + 0xee114042, 0xa1462853, 0x49232d93, 0x06744582, 0xd78dfdb1, + 0x98da95a0, 0xb958178c, 0xf60f7f9d, 0x27f6c7ae, 0x68a1afbf, + 0x80c4aa7f, 0xcf93c26e, 0x1e6a7a5d, 0x513d124c, 0xca616c6a, + 0x8536047b, 0x54cfbc48, 0x1b98d459, 0xf3fdd199, 0xbcaab988, + 0x6d5301bb, 0x220469aa, 0x5f2ae040, 0x107d8851, 0xc1843062, + 0x8ed35873, 0x66b65db3, 0x29e135a2, 0xf8188d91, 0xb74fe580, + 0x2c139ba6, 0x6344f3b7, 0xb2bd4b84, 0xfdea2395, 0x158f2655, + 0x5ad84e44, 0x8b21f677, 0xc4769e66, 0x717ce5a3, 0x3e2b8db2, + 0xefd23581, 0xa0855d90, 0x48e05850, 0x07b73041, 0xd64e8872, + 0x9919e063, 0x02459e45, 0x4d12f654, 0x9ceb4e67, 0xd3bc2676, + 0x3bd923b6, 0x748e4ba7, 0xa577f394, 0xea209b85, 0x970e126f, + 0xd8597a7e, 0x09a0c24d, 0x46f7aa5c, 0xae92af9c, 0xe1c5c78d, + 0x303c7fbe, 0x7f6b17af, 0xe4376989, 0xab600198, 0x7a99b9ab, + 0x35ced1ba, 0xddabd47a, 0x92fcbc6b, 0x43050458, 0x0c526c49 +}, +{ + 0x00000000, + 0x5ba1dcca, 0xb743b994, 0xece2655e, 0x6a466e9f, 0x31e7b255, + 0xdd05d70b, 0x86a40bc1, 0xd48cdd3e, 0x8f2d01f4, 0x63cf64aa, + 0x386eb860, 0xbecab3a1, 0xe56b6f6b, 0x09890a35, 0x5228d6ff, + 0xadd8a7cb, 0xf6797b01, 0x1a9b1e5f, 0x413ac295, 0xc79ec954, + 0x9c3f159e, 0x70dd70c0, 0x2b7cac0a, 0x79547af5, 0x22f5a63f, + 0xce17c361, 0x95b61fab, 0x1312146a, 0x48b3c8a0, 0xa451adfe, + 0xfff07134, 0x5f705221, 0x04d18eeb, 0xe833ebb5, 0xb392377f, + 0x35363cbe, 0x6e97e074, 0x8275852a, 0xd9d459e0, 0x8bfc8f1f, + 0xd05d53d5, 0x3cbf368b, 0x671eea41, 0xe1bae180, 0xba1b3d4a, + 0x56f95814, 0x0d5884de, 0xf2a8f5ea, 0xa9092920, 0x45eb4c7e, + 0x1e4a90b4, 0x98ee9b75, 0xc34f47bf, 0x2fad22e1, 0x740cfe2b, + 0x262428d4, 0x7d85f41e, 0x91679140, 0xcac64d8a, 0x4c62464b, + 0x17c39a81, 0xfb21ffdf, 0xa0802315, 0xbee0a442, 0xe5417888, + 0x09a31dd6, 0x5202c11c, 0xd4a6cadd, 0x8f071617, 0x63e57349, + 0x3844af83, 0x6a6c797c, 0x31cda5b6, 0xdd2fc0e8, 0x868e1c22, + 0x002a17e3, 0x5b8bcb29, 0xb769ae77, 0xecc872bd, 0x13380389, + 0x4899df43, 0xa47bba1d, 0xffda66d7, 0x797e6d16, 0x22dfb1dc, + 0xce3dd482, 0x959c0848, 0xc7b4deb7, 0x9c15027d, 0x70f76723, + 0x2b56bbe9, 0xadf2b028, 0xf6536ce2, 0x1ab109bc, 0x4110d576, + 0xe190f663, 0xba312aa9, 0x56d34ff7, 0x0d72933d, 0x8bd698fc, + 0xd0774436, 0x3c952168, 0x6734fda2, 0x351c2b5d, 0x6ebdf797, + 0x825f92c9, 0xd9fe4e03, 0x5f5a45c2, 0x04fb9908, 0xe819fc56, + 0xb3b8209c, 0x4c4851a8, 0x17e98d62, 0xfb0be83c, 0xa0aa34f6, + 0x260e3f37, 0x7dafe3fd, 0x914d86a3, 0xcaec5a69, 0x98c48c96, + 0xc365505c, 0x2f873502, 0x7426e9c8, 0xf282e209, 0xa9233ec3, + 0x45c15b9d, 0x1e608757, 0x79005533, 0x22a189f9, 0xce43eca7, + 0x95e2306d, 0x13463bac, 0x48e7e766, 0xa4058238, 0xffa45ef2, + 0xad8c880d, 0xf62d54c7, 0x1acf3199, 0x416eed53, 0xc7cae692, + 0x9c6b3a58, 0x70895f06, 0x2b2883cc, 0xd4d8f2f8, 0x8f792e32, + 0x639b4b6c, 0x383a97a6, 0xbe9e9c67, 0xe53f40ad, 0x09dd25f3, + 0x527cf939, 0x00542fc6, 0x5bf5f30c, 0xb7179652, 0xecb64a98, + 0x6a124159, 0x31b39d93, 0xdd51f8cd, 0x86f02407, 0x26700712, + 0x7dd1dbd8, 0x9133be86, 0xca92624c, 0x4c36698d, 0x1797b547, + 0xfb75d019, 0xa0d40cd3, 0xf2fcda2c, 0xa95d06e6, 0x45bf63b8, + 0x1e1ebf72, 0x98bab4b3, 0xc31b6879, 0x2ff90d27, 0x7458d1ed, + 0x8ba8a0d9, 0xd0097c13, 0x3ceb194d, 0x674ac587, 0xe1eece46, + 0xba4f128c, 0x56ad77d2, 0x0d0cab18, 0x5f247de7, 0x0485a12d, + 0xe867c473, 0xb3c618b9, 0x35621378, 0x6ec3cfb2, 0x8221aaec, + 0xd9807626, 0xc7e0f171, 0x9c412dbb, 0x70a348e5, 0x2b02942f, + 0xada69fee, 0xf6074324, 0x1ae5267a, 0x4144fab0, 0x136c2c4f, + 0x48cdf085, 0xa42f95db, 0xff8e4911, 0x792a42d0, 0x228b9e1a, + 0xce69fb44, 0x95c8278e, 0x6a3856ba, 0x31998a70, 0xdd7bef2e, + 0x86da33e4, 0x007e3825, 0x5bdfe4ef, 0xb73d81b1, 0xec9c5d7b, + 0xbeb48b84, 0xe515574e, 0x09f73210, 0x5256eeda, 0xd4f2e51b, + 0x8f5339d1, 0x63b15c8f, 0x38108045, 0x9890a350, 0xc3317f9a, + 0x2fd31ac4, 0x7472c60e, 0xf2d6cdcf, 0xa9771105, 0x4595745b, + 0x1e34a891, 0x4c1c7e6e, 0x17bda2a4, 0xfb5fc7fa, 0xa0fe1b30, + 0x265a10f1, 0x7dfbcc3b, 0x9119a965, 0xcab875af, 0x3548049b, + 0x6ee9d851, 0x820bbd0f, 0xd9aa61c5, 0x5f0e6a04, 0x04afb6ce, + 0xe84dd390, 0xb3ec0f5a, 0xe1c4d9a5, 0xba65056f, 0x56876031, + 0x0d26bcfb, 0x8b82b73a, 0xd0236bf0, 0x3cc10eae, 0x6760d264 +}, +}; diff --git a/src/csplit.c b/src/csplit.c new file mode 100644 index 0000000..dca525a --- /dev/null +++ b/src/csplit.c @@ -0,0 +1,1486 @@ +/* csplit - split a file into sections determined by context lines + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au. + Modified by David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include + +#include +#include +#include +#include + +#include "system.h" + +#include + +#include "fd-reopen.h" +#include "quote.h" +#include "safe-read.h" +#include "stdio--.h" +#include "xdectoint.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "csplit" + +#define AUTHORS \ + proper_name ("Stuart Kemp"), \ + proper_name ("David MacKenzie") + +/* The default prefix for output file names. */ +#define DEFAULT_PREFIX "xx" + +/* A compiled pattern arg. */ +struct control +{ + intmax_t offset; /* Offset from regexp to split at. */ + intmax_t lines_required; /* Number of lines required. */ + intmax_t repeat; /* Repeat count. */ + int argnum; /* ARGV index. */ + bool repeat_forever; /* True if '*' used as a repeat count. */ + bool ignore; /* If true, produce no output (for regexp). */ + bool regexpr; /* True if regular expression was used. */ + struct re_pattern_buffer re_compiled; /* Compiled regular expression. */ +}; + +/* Initial size of data area in buffers. */ +#define START_SIZE 8191 + +/* Number of lines kept in each node in line list. */ +#define CTRL_SIZE 80 + +#ifdef DEBUG +/* Some small values to test the algorithms. */ +# define START_SIZE 200 +# define CTRL_SIZE 1 +#endif + +/* A string with a length count. */ +struct cstring +{ + idx_t len; + char *str; +}; + +/* Pointers to the beginnings of lines in the buffer area. + These structures are linked together if needed. */ +struct line +{ + idx_t used; /* Number of offsets used in this struct. */ + idx_t insert_index; /* Next offset to use when inserting line. */ + idx_t retrieve_index; /* Next index to use when retrieving line. */ + struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */ + struct line *next; /* Next in linked list. */ +}; + +/* The structure to hold the input lines. + Contains a pointer to the data area and a list containing + pointers to the individual lines. */ +struct buffer_record +{ + idx_t bytes_alloc; /* Size of the buffer area. */ + idx_t bytes_used; /* Bytes used in the buffer area. */ + intmax_t start_line; /* First line number in this buffer. */ + intmax_t first_available; /* First line that can be retrieved. */ + idx_t num_lines; /* Number of complete lines in this buffer. */ + char *buffer; /* Data area. */ + struct line *line_start; /* Head of list of pointers to lines. */ + struct line *curr_line; /* The line start record currently in use. */ + struct buffer_record *next; +}; + +static void close_output_file (void); +static void create_output_file (void); +static void delete_all_files (bool); +static void save_line_to_file (const struct cstring *line); + +/* Start of buffer list. */ +static struct buffer_record *head = nullptr; + +/* Partially read line. */ +static char *hold_area = nullptr; + +/* Number of bytes in 'hold_area'. */ +static idx_t hold_count = 0; + +/* Number of the last line in the buffers. */ +static intmax_t last_line_number = 0; + +/* Number of the line currently being examined. */ +static intmax_t current_line = 0; + +/* If true, we have read EOF. */ +static bool have_read_eof = false; + +/* Name of output files. */ +static char *volatile filename_space = nullptr; + +/* Prefix part of output file names. */ +static char const *volatile prefix = nullptr; + +/* Suffix part of output file names. */ +static char *volatile suffix = nullptr; + +/* Number of digits to use in output file names. */ +static int volatile digits = 2; + +/* Number of files created so far. */ +static int volatile files_created = 0; + +/* Number of bytes written to current file. */ +static intmax_t bytes_written; + +/* Output file pointer. */ +static FILE *output_stream = nullptr; + +/* Output file name. */ +static char *output_filename = nullptr; + +/* Perhaps it would be cleaner to pass arg values instead of indexes. */ +static char **global_argv; + +/* If true, do not print the count of bytes in each output file. */ +static bool suppress_count; + +/* If true, remove output files on error. */ +static bool volatile remove_files; + +/* If true, remove all output files which have a zero length. */ +static bool elide_empty_files; + +/* If true, suppress the lines that match the PATTERN */ +static bool suppress_matched; + +/* The compiled pattern arguments, which determine how to split + the input file. */ +static struct control *controls; + +/* Number of elements in 'controls'. */ +static idx_t control_used; + +/* The set of signals that are caught. */ +static sigset_t caught_signals; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"digits", required_argument, nullptr, 'n'}, + {"quiet", no_argument, nullptr, 'q'}, + {"silent", no_argument, nullptr, 's'}, + {"keep-files", no_argument, nullptr, 'k'}, + {"elide-empty-files", no_argument, nullptr, 'z'}, + {"prefix", required_argument, nullptr, 'f'}, + {"suffix-format", required_argument, nullptr, 'b'}, + {"suppress-matched", no_argument, nullptr, SUPPRESS_MATCHED_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Optionally remove files created so far; then exit. + Called when an error detected. */ + +static void +cleanup (void) +{ + sigset_t oldset; + + close_output_file (); + + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + delete_all_files (false); + sigprocmask (SIG_SETMASK, &oldset, nullptr); +} + +static _Noreturn void +cleanup_fatal (void) +{ + cleanup (); + exit (EXIT_FAILURE); +} + +extern void +xalloc_die (void) +{ + error (0, 0, "%s", _("memory exhausted")); + cleanup_fatal (); +} + +static void +interrupt_handler (int sig) +{ + delete_all_files (true); + signal (sig, SIG_DFL); + /* The signal has been reset to SIG_DFL, but blocked during this + handler. Force the default action of this signal once the + handler returns and the block is removed. */ + raise (sig); +} + +/* Keep track of NUM bytes of a partial line in buffer START. + These bytes will be retrieved later when another large buffer is read. */ + +static void +save_to_hold_area (char *start, idx_t num) +{ + free (hold_area); + hold_area = start; + hold_count = num; +} + +/* Read up to MAX_N_BYTES bytes from the input stream into DEST. + Return the number of bytes read. */ + +static idx_t +read_input (char *dest, idx_t max_n_bytes) +{ + idx_t bytes_read; + + if (max_n_bytes == 0) + return 0; + + bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes); + + if (bytes_read == 0) + have_read_eof = true; + + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("read error")); + cleanup_fatal (); + } + + return bytes_read; +} + +/* Initialize existing line record P. */ + +static void +clear_line_control (struct line *p) +{ + p->used = 0; + p->insert_index = 0; + p->retrieve_index = 0; +} + +/* Return a new, initialized line record. */ + +static struct line * +new_line_control (void) +{ + struct line *p = xmalloc (sizeof *p); + + p->next = nullptr; + clear_line_control (p); + + return p; +} + +/* Record LINE_START, which is the address of the start of a line + of length LINE_LEN in the large buffer, in the lines buffer of B. */ + +static void +keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len) +{ + struct line *l; + + /* If there is no existing area to keep line info, get some. */ + if (b->line_start == nullptr) + b->line_start = b->curr_line = new_line_control (); + + /* If existing area for lines is full, get more. */ + if (b->curr_line->used == CTRL_SIZE) + { + b->curr_line->next = new_line_control (); + b->curr_line = b->curr_line->next; + } + + l = b->curr_line; + + /* Record the start of the line, and update counters. */ + l->starts[l->insert_index].str = line_start; + l->starts[l->insert_index].len = line_len; + l->used++; + l->insert_index++; +} + +/* Scan the buffer in B for newline characters + and record the line start locations and lengths in B. + Return the number of lines found in this buffer. + + There may be an incomplete line at the end of the buffer; + a pointer is kept to this area, which will be used when + the next buffer is filled. */ + +static idx_t +record_line_starts (struct buffer_record *b) +{ + char *line_start; /* Start of current line. */ + idx_t lines; /* Number of lines found. */ + idx_t line_length; /* Length of each line found. */ + + if (b->bytes_used == 0) + return 0; + + lines = 0; + line_start = b->buffer; + char *buffer_end = line_start + b->bytes_used; + *buffer_end = '\n'; + + while (true) + { + char *line_end = rawmemchr (line_start, '\n'); + if (line_end == buffer_end) + break; + line_length = line_end - line_start + 1; + keep_new_line (b, line_start, line_length); + line_start = line_end + 1; + lines++; + } + + /* Check for an incomplete last line. */ + idx_t bytes_left = buffer_end - line_start; + if (bytes_left) + { + if (have_read_eof) + { + keep_new_line (b, line_start, bytes_left); + lines++; + } + else + save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left); + } + + b->num_lines = lines; + b->first_available = b->start_line = last_line_number + 1; + last_line_number += lines; + + return lines; +} + +/* Work around . */ +#if 13 <= __GNUC__ +# pragma GCC diagnostic ignored "-Wanalyzer-mismatching-deallocation" +# pragma GCC diagnostic ignored "-Wanalyzer-use-after-free" +# pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value" +#endif + +static void +free_buffer (struct buffer_record *buf) +{ + for (struct line *l = buf->line_start; l;) + { + struct line *n = l->next; + free (l); + l = n; + } + free (buf->buffer); + free (buf); +} + +/* Return a new buffer of at least MINSIZE bytes. */ + +static ATTRIBUTE_DEALLOC (free_buffer, 1) +struct buffer_record * +get_new_buffer (idx_t min_size) +{ + struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer); + new_buffer->bytes_alloc = 0; + new_buffer->buffer = xpalloc (nullptr, &new_buffer->bytes_alloc, min_size, + -1, 1); + new_buffer->bytes_used = 0; + new_buffer->start_line = new_buffer->first_available = last_line_number + 1; + new_buffer->num_lines = 0; + new_buffer->line_start = new_buffer->curr_line = nullptr; + new_buffer->next = nullptr; + + return new_buffer; +} + +/* Append buffer BUF to the linked list of buffers that contain + some data yet to be processed. */ + +static void +save_buffer (struct buffer_record *buf) +{ + struct buffer_record *p; + + buf->next = nullptr; + buf->curr_line = buf->line_start; + + if (head == nullptr) + head = buf; + else + { + for (p = head; p->next; p = p->next) + /* Do nothing. */ ; + p->next = buf; + } +} + +/* Fill a buffer of input. + + Set the initial size of the buffer to a default. + Fill the buffer (from the hold area and input stream) + and find the individual lines. + If no lines are found (the buffer is too small to hold the next line), + release the current buffer (whose contents would have been put in the + hold area) and repeat the process with another large buffer until at least + one entire line has been read. + + Return true if a new buffer was obtained, otherwise false + (in which case end-of-file must have been encountered). */ + +static bool +load_buffer (void) +{ + if (have_read_eof) + return false; + + /* We must make the buffer at least as large as the amount of data + in the partial line left over from the last call, + plus room for a sentinel '\n'. */ + idx_t bytes_wanted = MAX (START_SIZE, hold_count + 1); + + while (true) + { + struct buffer_record *b = get_new_buffer (bytes_wanted); + idx_t bytes_alloc = b->bytes_alloc; + idx_t bytes_avail = bytes_alloc; + char *p = b->buffer; + + /* First check the 'holding' area for a partial line. */ + if (hold_count) + { + p = mempcpy (p, hold_area, hold_count); + b->bytes_used += hold_count; + bytes_avail -= hold_count; + hold_count = 0; + } + + b->bytes_used += read_input (p, bytes_avail - 1); + + if (record_line_starts (b) != 0) + { + save_buffer (b); + return true; + } + + free_buffer (b); + if (have_read_eof) + return false; + if (ckd_add (&bytes_wanted, bytes_alloc, bytes_alloc >> 1)) + xalloc_die (); + } +} + +/* Return the line number of the first line that has not yet been retrieved. */ + +static intmax_t +get_first_line_in_buffer (void) +{ + if (head == nullptr && !load_buffer ()) + error (EXIT_FAILURE, errno, _("input disappeared")); + + return head->first_available; +} + +/* Return a pointer to the logical first line in the buffer and make the + next line the logical first line. + Return nullptr if there is no more input. */ + +static struct cstring * +remove_line (void) +{ + /* If non-null, this is the buffer for which the previous call + returned the final line. So now, presuming that line has been + processed, we can free the buffer and reset this pointer. */ + static struct buffer_record *prev_buf = nullptr; + + struct cstring *line; /* Return value. */ + struct line *l; /* For convenience. */ + + if (prev_buf) + { + free_buffer (prev_buf); + prev_buf = nullptr; + } + + if (head == nullptr && !load_buffer ()) + return nullptr; + + if (current_line < head->first_available) + current_line = head->first_available; + + ++(head->first_available); + + l = head->curr_line; + + line = &l->starts[l->retrieve_index]; + + /* Advance index to next line. */ + if (++l->retrieve_index == l->used) + { + /* Go on to the next line record. */ + head->curr_line = l->next; + if (head->curr_line == nullptr || head->curr_line->used == 0) + { + /* Go on to the next data block. + but first record the current one so we can free it + once the line we're returning has been processed. */ + prev_buf = head; + head = head->next; + } + } + + return line; +} + +/* Search the buffers for line LINENUM, reading more input if necessary. + Return a pointer to the line, or nullptr if it is not found in the file. */ + +static struct cstring * +find_line (intmax_t linenum) +{ + struct buffer_record *b; + + if (head == nullptr && !load_buffer ()) + return nullptr; + + if (linenum < head->start_line) + return nullptr; + + for (b = head;;) + { + if (linenum < b->start_line + b->num_lines) + { + /* The line is in this buffer. */ + struct line *l; + idx_t offset; /* How far into the buffer the line is. */ + + l = b->line_start; + offset = linenum - b->start_line; + /* Find the control record. */ + while (offset >= CTRL_SIZE) + { + l = l->next; + offset -= CTRL_SIZE; + } + return &l->starts[offset]; + } + if (b->next == nullptr && !load_buffer ()) + return nullptr; + b = b->next; /* Try the next data block. */ + } +} + +/* Return true if at least one more line is available for input. */ + +static bool +no_more_lines (void) +{ + return find_line (current_line + 1) == nullptr; +} + +/* Open NAME as standard input. */ + +static void +set_input_file (char const *name) +{ + if (! STREQ (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (name)); +} + +/* Write all lines from the beginning of the buffer up to, but + not including, line LAST_LINE, to the current output file. + If IGNORE is true, do not output lines selected here. + ARGNUM is the index in ARGV of the current pattern. */ + +static void +write_to_file (intmax_t last_line, bool ignore, int argnum) +{ + struct cstring *line; + intmax_t first_line; /* First available input line. */ + intmax_t lines; /* Number of lines to output. */ + intmax_t i; + + first_line = get_first_line_in_buffer (); + + if (first_line > last_line) + { + error (0, 0, _("%s: line number out of range"), + quote (global_argv[argnum])); + cleanup_fatal (); + } + + lines = last_line - first_line; + + for (i = 0; i < lines; i++) + { + line = remove_line (); + if (line == nullptr) + { + error (0, 0, _("%s: line number out of range"), + quote (global_argv[argnum])); + cleanup_fatal (); + } + if (!ignore) + save_line_to_file (line); + } +} + +/* Output any lines left after all regexps have been processed. */ + +static void +dump_rest_of_file (void) +{ + struct cstring *line; + + while ((line = remove_line ()) != nullptr) + save_line_to_file (line); +} + +/* Handle an attempt to read beyond EOF under the control of record P, + on iteration REPETITION if nonzero. */ + +static void +handle_line_error (const struct control *p, intmax_t repetition) +{ + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + + fprintf (stderr, _("%s: %s: line number out of range"), + program_name, quote (imaxtostr (p->lines_required, buf))); + if (repetition) + fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf)); + else + fprintf (stderr, "\n"); + + cleanup_fatal (); +} + +/* Determine the line number that marks the end of this file, + then get those lines and save them to the output file. + P is the control record. + REPETITION is the repetition number. */ + +static void +process_line_count (const struct control *p, intmax_t repetition) +{ + intmax_t linenum; + intmax_t last_line_to_save = p->lines_required * (repetition + 1); + + create_output_file (); + + /* Ensure that the line number specified is not 1 greater than + the number of lines in the file. + When suppressing matched lines, check before the loop. */ + if (no_more_lines () && suppress_matched) + handle_line_error (p, repetition); + + linenum = get_first_line_in_buffer (); + while (linenum++ < last_line_to_save) + { + struct cstring *line = remove_line (); + if (line == nullptr) + handle_line_error (p, repetition); + save_line_to_file (line); + } + + close_output_file (); + + if (suppress_matched) + remove_line (); + + /* Ensure that the line number specified is not 1 greater than + the number of lines in the file. */ + if (no_more_lines () && !suppress_matched) + handle_line_error (p, repetition); +} + +static void +regexp_error (struct control *p, intmax_t repetition, bool ignore) +{ + fprintf (stderr, _("%s: %s: match not found"), + program_name, quote (global_argv[p->argnum])); + + if (repetition) + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + fprintf (stderr, _(" on repetition %s\n"), imaxtostr (repetition, buf)); + } + else + fprintf (stderr, "\n"); + + if (!ignore) + { + dump_rest_of_file (); + close_output_file (); + } + cleanup_fatal (); +} + +/* Read the input until a line matches the regexp in P, outputting + it unless P->IGNORE is true. + REPETITION is this repeat-count; 0 means the first time. */ + +static void +process_regexp (struct control *p, intmax_t repetition) +{ + struct cstring *line; /* From input file. */ + idx_t line_len; /* To make "$" in regexps work. */ + intmax_t break_line; /* First line number of next file. */ + bool ignore = p->ignore; /* If true, skip this section. */ + regoff_t ret; + + if (!ignore) + create_output_file (); + + /* If there is no offset for the regular expression, or + it is positive, then it is not necessary to buffer the lines. */ + + if (p->offset >= 0) + { + while (true) + { + line = find_line (++current_line); + if (line == nullptr) + { + if (p->repeat_forever) + { + if (!ignore) + { + dump_rest_of_file (); + close_output_file (); + } + exit (EXIT_SUCCESS); + } + else + regexp_error (p, repetition, ignore); + } + line_len = line->len; + if (line->str[line_len - 1] == '\n') + line_len--; + ret = re_search (&p->re_compiled, line->str, line_len, + 0, line_len, nullptr); + if (ret == -2) + { + error (0, 0, _("error in regular expression search")); + cleanup_fatal (); + } + if (ret == -1) + { + line = remove_line (); + if (!ignore) + save_line_to_file (line); + } + else + break; + } + } + else + { + /* Buffer the lines. */ + while (true) + { + line = find_line (++current_line); + if (line == nullptr) + { + if (p->repeat_forever) + { + if (!ignore) + { + dump_rest_of_file (); + close_output_file (); + } + exit (EXIT_SUCCESS); + } + else + regexp_error (p, repetition, ignore); + } + line_len = line->len; + if (line->str[line_len - 1] == '\n') + line_len--; + ret = re_search (&p->re_compiled, line->str, line_len, + 0, line_len, nullptr); + if (ret == -2) + { + error (0, 0, _("error in regular expression search")); + cleanup_fatal (); + } + if (ret != -1) + break; + } + } + + /* Account for any offset from this regexp. */ + break_line = current_line + p->offset; + + write_to_file (break_line, ignore, p->argnum); + + if (!ignore) + close_output_file (); + + if (p->offset > 0) + current_line = break_line; + + if (suppress_matched) + remove_line (); +} + +/* Split the input file according to the control records we have built. */ + +static void +split_file (void) +{ + for (idx_t i = 0; i < control_used; i++) + { + intmax_t j; + if (controls[i].regexpr) + { + for (j = 0; (controls[i].repeat_forever + || j <= controls[i].repeat); j++) + process_regexp (&controls[i], j); + } + else + { + for (j = 0; (controls[i].repeat_forever + || j <= controls[i].repeat); j++) + process_line_count (&controls[i], j); + } + } + + create_output_file (); + dump_rest_of_file (); + close_output_file (); +} + +/* Return the name of output file number NUM. + + This function is called from a signal handler, so it should invoke + only reentrant functions that are async-signal-safe. POSIX does + not guarantee this for the functions called below, but we don't + know of any hosts where this implementation isn't safe. */ + +static char * +make_filename (int num) +{ + strcpy (filename_space, prefix); + if (suffix) + sprintf (filename_space + strlen (prefix), suffix, num); + else + sprintf (filename_space + strlen (prefix), "%0*d", digits, num); + return filename_space; +} + +/* Create the next output file. */ + +static void +create_output_file (void) +{ + int nfiles = files_created; + bool fopen_ok; + int fopen_errno; + + output_filename = make_filename (nfiles); + + if (nfiles == INT_MAX) + { + fopen_ok = false; + fopen_errno = EOVERFLOW; + } + else + { + /* Create the output file in a critical section, to avoid races. */ + sigset_t oldset; + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + output_stream = fopen (output_filename, "w"); + fopen_ok = (output_stream != nullptr); + fopen_errno = errno; + files_created = nfiles + fopen_ok; + sigprocmask (SIG_SETMASK, &oldset, nullptr); + } + + if (! fopen_ok) + { + error (0, fopen_errno, "%s", quotef (output_filename)); + cleanup_fatal (); + } + bytes_written = 0; +} + +/* If requested, delete all the files we have created. This function + must be called only from critical sections. */ + +static void +delete_all_files (bool in_signal_handler) +{ + if (! remove_files) + return; + + for (int i = files_created; 0 <= --i; ) + { + char const *name = make_filename (i); + if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler) + error (0, errno, "%s", quotef (name)); + } + + files_created = 0; +} + +/* Close the current output file and print the count + of characters in this file. */ + +static void +close_output_file (void) +{ + if (output_stream) + { + if (ferror (output_stream)) + { + error (0, 0, _("write error for %s"), quoteaf (output_filename)); + output_stream = nullptr; + cleanup_fatal (); + } + if (fclose (output_stream) != 0) + { + error (0, errno, "%s", quotef (output_filename)); + output_stream = nullptr; + cleanup_fatal (); + } + if (bytes_written == 0 && elide_empty_files) + { + sigset_t oldset; + bool unlink_ok; + int unlink_errno; + + /* Remove the output file in a critical section, to avoid races. */ + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + unlink_ok = (unlink (output_filename) == 0); + unlink_errno = errno; + files_created--; + sigprocmask (SIG_SETMASK, &oldset, nullptr); + + if (! unlink_ok && unlink_errno != ENOENT) + error (0, unlink_errno, "%s", quotef (output_filename)); + } + else + { + if (!suppress_count) + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + fprintf (stdout, "%s\n", imaxtostr (bytes_written, buf)); + } + } + output_stream = nullptr; + } +} + +/* Save line LINE to the output file and + increment the character count for the current file. */ + +static void +save_line_to_file (const struct cstring *line) +{ + idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream); + if (l != line->len) + { + error (0, errno, _("write error for %s"), quoteaf (output_filename)); + output_stream = nullptr; + cleanup_fatal (); + } + bytes_written += line->len; +} + +/* Return a new, initialized control record. */ + +static struct control * +new_control_record (void) +{ + static idx_t control_allocated = 0; /* Total space allocated. */ + struct control *p; + + if (control_used == control_allocated) + controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls); + p = &controls[control_used++]; + p->regexpr = false; + p->repeat = 0; + p->repeat_forever = false; + p->lines_required = 0; + p->offset = 0; + return p; +} + +/* Check if there is a numeric offset after a regular expression. + STR is the entire command line argument. + P is the control record for this regular expression. + NUM is the numeric part of STR. */ + +static void +check_for_offset (struct control *p, char const *str, char const *num) +{ + if (xstrtoimax (num, nullptr, 10, &p->offset, "") != LONGINT_OK) + error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), + quote (str)); +} + +/* Given that the first character of command line arg STR is '{', + make sure that the rest of the string is a valid repeat count + and store its value in P. + ARGNUM is the ARGV index of STR. */ + +static void +parse_repeat_count (int argnum, struct control *p, char *str) +{ + char *end; + + end = str + strlen (str) - 1; + if (*end != '}') + error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"), + quote (str)); + *end = '\0'; + + if (str + 1 == end - 1 && *(str + 1) == '*') + p->repeat_forever = true; + else + { + uintmax_t val; + if (xstrtoumax (str + 1, nullptr, 10, &val, "") != LONGINT_OK + || INTMAX_MAX < val) + { + error (EXIT_FAILURE, 0, + _("%s}: integer required between '{' and '}'"), + quote (global_argv[argnum])); + } + p->repeat = val; + } + + *end = '}'; +} + +/* Extract the regular expression from STR and check for a numeric offset. + STR should start with the regexp delimiter character. + Return a new control record for the regular expression. + ARGNUM is the ARGV index of STR. + Unless IGNORE is true, mark these lines for output. */ + +static struct control * +extract_regexp (int argnum, bool ignore, char const *str) +{ + idx_t len; /* Number of bytes in this regexp. */ + char delim = *str; + char const *closing_delim; + struct control *p; + char const *err; + + closing_delim = strrchr (str + 1, delim); + if (closing_delim == nullptr) + error (EXIT_FAILURE, 0, + _("%s: closing delimiter '%c' missing"), str, delim); + + len = closing_delim - str - 1; + p = new_control_record (); + p->argnum = argnum; + p->ignore = ignore; + + p->regexpr = true; + p->re_compiled.buffer = nullptr; + p->re_compiled.allocated = 0; + p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1); + p->re_compiled.translate = nullptr; + re_syntax_options = + RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; + err = re_compile_pattern (str + 1, len, &p->re_compiled); + if (err) + { + error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err); + cleanup_fatal (); + } + + if (closing_delim[1]) + check_for_offset (p, str, closing_delim + 1); + + return p; +} + +/* Extract the break patterns from args START through ARGC - 1 of ARGV. + After each pattern, check if the next argument is a repeat count. */ + +static void +parse_patterns (int argc, int start, char **argv) +{ + struct control *p; /* New control record created. */ + static intmax_t last_val = 0; + + for (int i = start; i < argc; i++) + { + if (*argv[i] == '/' || *argv[i] == '%') + { + p = extract_regexp (i, *argv[i] == '%', argv[i]); + } + else + { + p = new_control_record (); + p->argnum = i; + + uintmax_t val; + if (xstrtoumax (argv[i], nullptr, 10, &val, "") != LONGINT_OK + || INTMAX_MAX < val) + error (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i])); + if (val == 0) + error (EXIT_FAILURE, 0, + _("%s: line number must be greater than zero"), argv[i]); + if (val < last_val) + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + error (EXIT_FAILURE, 0, + _("line number %s is smaller than preceding line number," + " %s"), + quote (argv[i]), imaxtostr (last_val, buf)); + } + + if (val == last_val) + error (0, 0, + _("warning: line number %s is the same as preceding line number"), + quote (argv[i])); + + last_val = val; + + p->lines_required = val; + } + + if (i + 1 < argc && *argv[i + 1] == '{') + { + /* We have a repeat count. */ + i++; + parse_repeat_count (i, p, argv[i]); + } + } +} + + + +/* Names for the printf format flags ' and #. These can be ORed together. */ +enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 }; + +/* Scan the printf format flags in FORMAT, storing info about the + flags into *FLAGS_PTR. Return the number of flags found. */ +static idx_t +get_format_flags (char const *format, int *flags_ptr) +{ + int flags = 0; + + for (idx_t count = 0; ; count++) + { + switch (format[count]) + { + case '-': + case '0': + break; + + case '\'': + flags |= FLAG_THOUSANDS; + break; + + case '#': + flags |= FLAG_ALTERNATIVE; + break; + + default: + *flags_ptr = flags; + return count; + } + } +} + +/* Check that the printf format conversion specifier *FORMAT is valid + and compatible with FLAGS. Change it to 'd' if it is 'u', + since the format will be used with a signed value. */ +static void +check_format_conv_type (char *format, int flags) +{ + unsigned char ch = *format; + int compatible_flags = FLAG_THOUSANDS; + + switch (ch) + { + case 'd': + case 'i': + break; + + case 'u': + *format = 'd'; + break; + + case 'o': + case 'x': + case 'X': + compatible_flags = FLAG_ALTERNATIVE; + break; + + case 0: + error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix")); + + default: + if (isprint (ch)) + error (EXIT_FAILURE, 0, + _("invalid conversion specifier in suffix: %c"), ch); + else + error (EXIT_FAILURE, 0, + _("invalid conversion specifier in suffix: \\%.3o"), ch); + } + + if (flags & ~ compatible_flags) + error (EXIT_FAILURE, 0, + _("invalid flags in conversion specification: %%%c%c"), + (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch); +} + +/* Return the maximum number of bytes that can be generated by + applying FORMAT to an int value. If the format is + invalid, diagnose the problem and exit. */ +static idx_t +max_out (char *format) +{ + bool percent = false; + + for (char *f = format; *f; f++) + if (*f == '%' && *++f != '%') + { + if (percent) + error (EXIT_FAILURE, 0, + _("too many %% conversion specifications in suffix")); + percent = true; + int flags; + f += get_format_flags (f, &flags); + while (ISDIGIT (*f)) + f++; + if (*f == '.') + while (ISDIGIT (*++f)) + continue; + check_format_conv_type (f, flags); + } + + if (! percent) + error (EXIT_FAILURE, 0, + _("missing %% conversion specification in suffix")); + + int maxlen = snprintf (nullptr, 0, format, INT_MAX); + if (maxlen < 0) + xalloc_die (); + return maxlen; +} + +int +main (int argc, char **argv) +{ + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + global_argv = argv; + controls = nullptr; + control_used = 0; + suppress_count = false; + remove_files = true; + suppress_matched = false; + prefix = DEFAULT_PREFIX; + + while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, nullptr)) + != -1) + switch (optc) + { + case 'f': + prefix = optarg; + break; + + case 'b': + suffix = optarg; + break; + + case 'k': + remove_files = false; + break; + + case 'n': + digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "", + _("invalid number"), 0); + break; + + case 's': + case 'q': + suppress_count = true; + break; + + case 'z': + elide_empty_files = true; + break; + + case SUPPRESS_MATCHED_OPTION: + suppress_matched = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + if (argc - optind < 2) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + idx_t prefix_len = strlen (prefix); + idx_t max_digit_string_len + = (suffix + ? max_out (suffix) + : MAX (INT_STRLEN_BOUND (int), digits)); + idx_t filename_size; + if (ckd_add (&filename_size, prefix_len, max_digit_string_len + 1)) + xalloc_die (); + filename_space = ximalloc (filename_size); + + set_input_file (argv[optind++]); + + parse_patterns (argc, optind, argv); + + { + int i; + static int const sig[] = + { + /* The usual suspects. */ + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGVTALRM + SIGVTALRM, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + enum { nsigs = ARRAY_CARDINALITY (sig) }; + + struct sigaction act; + + sigemptyset (&caught_signals); + for (i = 0; i < nsigs; i++) + { + sigaction (sig[i], nullptr, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, sig[i]); + } + + act.sa_handler = interrupt_handler; + act.sa_mask = caught_signals; + act.sa_flags = 0; + + for (i = 0; i < nsigs; i++) + if (sigismember (&caught_signals, sig[i])) + sigaction (sig[i], &act, nullptr); + } + + split_file (); + + if (close (STDIN_FILENO) != 0) + { + error (0, errno, _("read error")); + cleanup_fatal (); + } + + return EXIT_SUCCESS; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... FILE PATTERN...\n\ +"), + program_name); + fputs (_("\ +Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\ +and output byte counts of each piece to standard output.\n\ +"), stdout); + fputs (_("\ +\n\ +Read standard input if FILE is -\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\ + -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\ + -k, --keep-files do not remove output files on errors\n\ +"), stdout); + fputs (_("\ + --suppress-matched suppress the lines matching PATTERN\n\ +"), stdout); + fputs (_("\ + -n, --digits=DIGITS use specified number of digits instead of 2\n\ + -s, --quiet, --silent do not print counts of output file sizes\n\ + -z, --elide-empty-files suppress empty output files\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Each PATTERN may be:\n\ + INTEGER copy up to but not including specified line number\n\ + /REGEXP/[OFFSET] copy up to but not including a matching line\n\ + %REGEXP%[OFFSET] skip to, but not including a matching line\n\ + {INTEGER} repeat the previous pattern specified number of times\n\ + {*} repeat the previous pattern as many times as possible\n\ +\n\ +A line OFFSET is an integer optionally preceded by '+' or '-'\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} diff --git a/src/cu-progs.mk b/src/cu-progs.mk new file mode 100644 index 0000000..b92e219 --- /dev/null +++ b/src/cu-progs.mk @@ -0,0 +1,114 @@ +## Automatically generated by gen-lists-of-programs.sh. DO NOT EDIT BY HAND! +no_install__progs = +no_install__progs += src/arch +no_install__progs += src/coreutils +no_install__progs += src/hostname +build_if_possible__progs = +build_if_possible__progs += src/chroot +build_if_possible__progs += src/df +build_if_possible__progs += src/hostid +build_if_possible__progs += src/libstdbuf.so +build_if_possible__progs += src/nice +build_if_possible__progs += src/pinky +build_if_possible__progs += src/stdbuf +build_if_possible__progs += src/stty +build_if_possible__progs += src/timeout +build_if_possible__progs += src/users +build_if_possible__progs += src/who +default__progs = +default__progs += src/[ +default__progs += src/b2sum +default__progs += src/base64 +default__progs += src/base32 +default__progs += src/basenc +default__progs += src/basename +default__progs += src/cat +default__progs += src/chcon +default__progs += src/chgrp +default__progs += src/chmod +default__progs += src/chown +default__progs += src/cksum +default__progs += src/comm +default__progs += src/cp +default__progs += src/csplit +default__progs += src/cut +default__progs += src/date +default__progs += src/dd +default__progs += src/dir +default__progs += src/dircolors +default__progs += src/dirname +default__progs += src/du +default__progs += src/echo +default__progs += src/env +default__progs += src/expand +default__progs += src/expr +default__progs += src/factor +default__progs += src/false +default__progs += src/fmt +default__progs += src/fold +default__progs += src/ginstall +default__progs += src/groups +default__progs += src/head +default__progs += src/id +default__progs += src/join +default__progs += src/kill +default__progs += src/link +default__progs += src/ln +default__progs += src/logname +default__progs += src/ls +default__progs += src/md5sum +default__progs += src/mkdir +default__progs += src/mkfifo +default__progs += src/mknod +default__progs += src/mktemp +default__progs += src/mv +default__progs += src/nl +default__progs += src/nproc +default__progs += src/nohup +default__progs += src/numfmt +default__progs += src/od +default__progs += src/paste +default__progs += src/pathchk +default__progs += src/pr +default__progs += src/printenv +default__progs += src/printf +default__progs += src/ptx +default__progs += src/pwd +default__progs += src/readlink +default__progs += src/realpath +default__progs += src/rm +default__progs += src/rmdir +default__progs += src/runcon +default__progs += src/seq +default__progs += src/sha1sum +default__progs += src/sha224sum +default__progs += src/sha256sum +default__progs += src/sha384sum +default__progs += src/sha512sum +default__progs += src/shred +default__progs += src/shuf +default__progs += src/sleep +default__progs += src/sort +default__progs += src/split +default__progs += src/stat +default__progs += src/sum +default__progs += src/sync +default__progs += src/tac +default__progs += src/tail +default__progs += src/tee +default__progs += src/test +default__progs += src/touch +default__progs += src/tr +default__progs += src/true +default__progs += src/truncate +default__progs += src/tsort +default__progs += src/tty +default__progs += src/uname +default__progs += src/unexpand +default__progs += src/uniq +default__progs += src/unlink +default__progs += src/uptime +default__progs += src/vdir +default__progs += src/wc +default__progs += src/whoami +default__progs += src/yes diff --git a/src/cut.c b/src/cut.c new file mode 100644 index 0000000..b4edbab --- /dev/null +++ b/src/cut.c @@ -0,0 +1,601 @@ +/* cut - remove parts of lines of files + Copyright (C) 1997-2023 Free Software Foundation, Inc. + Copyright (C) 1984 David M. Ihnat + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David Ihnat. */ + +/* POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie . + + Rewrite cut_fields and cut_bytes -- Jim Meyering. */ + +#include + +#include +#include +#include +#include "system.h" + +#include "assure.h" +#include "fadvise.h" +#include "getndelim2.h" + +#include "set-fields.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "cut" + +#define AUTHORS \ + proper_name ("David M. Ihnat"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +#define FATAL_ERROR(Message) \ + do \ + { \ + error (0, 0, (Message)); \ + usage (EXIT_FAILURE); \ + } \ + while (0) + + +/* Pointer inside RP. When checking if a byte or field is selected + by a finite range, we check if it is between CURRENT_RP.LO + and CURRENT_RP.HI. If the byte or field index is greater than + CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */ +static struct field_range_pair *current_rp; + +/* This buffer is used to support the semantics of the -s option + (or lack of same) when the specified field list includes (does + not include) the first field. In both of those cases, the entire + first field must be read into this buffer to determine whether it + is followed by a delimiter or a newline before any of it may be + output. Otherwise, cut_fields can do the job without using this + buffer. */ +static char *field_1_buffer; + +/* The number of bytes allocated for FIELD_1_BUFFER. */ +static size_t field_1_bufsize; + +/* If true, do not output lines containing no delimiter characters. + Otherwise, all such lines are printed. This option is valid only + with field mode. */ +static bool suppress_non_delimited; + +/* If true, print all bytes, characters, or fields _except_ + those that were specified. */ +static bool complement; + +/* The delimiter character for field mode. */ +static unsigned char delim; + +/* The delimiter for each line/record. */ +static unsigned char line_delim = '\n'; + +/* The length of output_delimiter_string. */ +static size_t output_delimiter_length; + +/* The output field separator string. Defaults to the 1-character + string consisting of the input delimiter. */ +static char *output_delimiter_string; + +/* The output delimiter string contents, if the default. */ +static char output_delimiter_default[1]; + +/* True if we have ever read standard input. */ +static bool have_read_stdin; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + OUTPUT_DELIMITER_OPTION = CHAR_MAX + 1, + COMPLEMENT_OPTION +}; + +static struct option const longopts[] = +{ + {"bytes", required_argument, nullptr, 'b'}, + {"characters", required_argument, nullptr, 'c'}, + {"fields", required_argument, nullptr, 'f'}, + {"delimiter", required_argument, nullptr, 'd'}, + {"only-delimited", no_argument, nullptr, 's'}, + {"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, + {"complement", no_argument, nullptr, COMPLEMENT_OPTION}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s OPTION... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Print selected parts of lines from each FILE to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --bytes=LIST select only these bytes\n\ + -c, --characters=LIST select only these characters\n\ + -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ +"), stdout); + fputs (_("\ + -f, --fields=LIST select only these fields; also print any line\n\ + that contains no delimiter character, unless\n\ + the -s option is specified\n\ + -n (ignored)\n\ +"), stdout); + fputs (_("\ + --complement complement the set of selected bytes, characters\n\ + or fields\n\ +"), stdout); + fputs (_("\ + -s, --only-delimited do not print lines not containing delimiters\n\ + --output-delimiter=STRING use STRING as the output delimiter\n\ + the default is to use the input delimiter\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Use one, and only one of -b, -c or -f. Each LIST is made up of one\n\ +range, or many ranges separated by commas. Selected input is written\n\ +in the same order that it is read, and is written exactly once.\n\ +"), stdout); + fputs (_("\ +Each range is one of:\n\ +\n\ + N N'th byte, character or field, counted from 1\n\ + N- from N'th byte, character or field, to end of line\n\ + N-M from N'th to M'th (included) byte, character or field\n\ + -M from first to M'th (included) byte, character or field\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + + +/* Increment *ITEM_IDX (i.e., a field or byte index), + and if required CURRENT_RP. */ + +static inline void +next_item (uintmax_t *item_idx) +{ + (*item_idx)++; + if ((*item_idx) > current_rp->hi) + current_rp++; +} + +/* Return nonzero if the K'th field or byte is printable. */ + +static inline bool +print_kth (uintmax_t k) +{ + return current_rp->lo <= k; +} + +/* Return nonzero if K'th byte is the beginning of a range. */ + +static inline bool +is_range_start_index (uintmax_t k) +{ + return k == current_rp->lo; +} + +/* Read from stream STREAM, printing to standard output any selected bytes. */ + +static void +cut_bytes (FILE *stream) +{ + uintmax_t byte_idx; /* Number of bytes in the line so far. */ + /* Whether to begin printing delimiters between ranges for the current line. + Set after we've begun printing data corresponding to the first range. */ + bool print_delimiter; + + byte_idx = 0; + print_delimiter = false; + current_rp = frp; + while (true) + { + int c; /* Each character from the file. */ + + c = getc (stream); + + if (c == line_delim) + { + if (putchar (c) < 0) + write_error (); + byte_idx = 0; + print_delimiter = false; + current_rp = frp; + } + else if (c == EOF) + { + if (byte_idx > 0) + { + if (putchar (line_delim) < 0) + write_error (); + } + break; + } + else + { + next_item (&byte_idx); + if (print_kth (byte_idx)) + { + if (output_delimiter_string != output_delimiter_default) + { + if (print_delimiter && is_range_start_index (byte_idx)) + { + if (fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout) + != output_delimiter_length) + write_error (); + } + print_delimiter = true; + } + + if (putchar (c) < 0) + write_error (); + } + } + } +} + +/* Read from stream STREAM, printing to standard output any selected fields. */ + +static void +cut_fields (FILE *stream) +{ + int c; /* Each character from the file. */ + uintmax_t field_idx = 1; + bool found_any_selected_field = false; + bool buffer_first_field; + + current_rp = frp; + + c = getc (stream); + if (c == EOF) + return; + + ungetc (c, stream); + c = 0; + + /* To support the semantics of the -s flag, we may have to buffer + all of the first field to determine whether it is 'delimited.' + But that is unnecessary if all non-delimited lines must be printed + and the first field has been selected, or if non-delimited lines + must be suppressed and the first field has *not* been selected. + That is because a non-delimited line has exactly one field. */ + buffer_first_field = (suppress_non_delimited ^ !print_kth (1)); + + while (true) + { + if (field_idx == 1 && buffer_first_field) + { + ssize_t len; + size_t n_bytes; + + len = getndelim2 (&field_1_buffer, &field_1_bufsize, 0, + GETNLINE_NO_LIMIT, delim, line_delim, stream); + if (len < 0) + { + free (field_1_buffer); + field_1_buffer = nullptr; + if (ferror (stream) || feof (stream)) + break; + xalloc_die (); + } + + n_bytes = len; + affirm (n_bytes != 0); + + c = 0; + + /* If the first field extends to the end of line (it is not + delimited) and we are printing all non-delimited lines, + print this one. */ + if (to_uchar (field_1_buffer[n_bytes - 1]) != delim) + { + if (suppress_non_delimited) + { + /* Empty. */ + } + else + { + if (fwrite (field_1_buffer, sizeof (char), n_bytes, stdout) + != n_bytes) + write_error (); + /* Make sure the output line is newline terminated. */ + if (field_1_buffer[n_bytes - 1] != line_delim) + { + if (putchar (line_delim) < 0) + write_error (); + } + c = line_delim; + } + continue; + } + + if (print_kth (1)) + { + /* Print the field, but not the trailing delimiter. */ + if (fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout) + != n_bytes - 1) + write_error (); + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ + if (delim == line_delim) + { + int last_c = getc (stream); + if (last_c != EOF) + { + ungetc (last_c, stream); + found_any_selected_field = true; + } + } + else + { + found_any_selected_field = true; + } + } + next_item (&field_idx); + } + + int prev_c = c; + + if (print_kth (field_idx)) + { + if (found_any_selected_field) + { + if (fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout) + != output_delimiter_length) + write_error (); + } + found_any_selected_field = true; + + while ((c = getc (stream)) != delim && c != line_delim && c != EOF) + { + if (putchar (c) < 0) + write_error (); + prev_c = c; + } + } + else + { + while ((c = getc (stream)) != delim && c != line_delim && c != EOF) + prev_c = c; + } + + /* With -d$'\n' don't treat the last '\n' as a delimiter. */ + if (delim == line_delim && c == delim) + { + int last_c = getc (stream); + if (last_c != EOF) + ungetc (last_c, stream); + else + c = last_c; + } + + if (c == delim) + next_item (&field_idx); + else if (c == line_delim || c == EOF) + { + if (found_any_selected_field + || !(suppress_non_delimited && field_idx == 1)) + { + /* Make sure the output line is newline terminated. */ + if (c == line_delim || prev_c != line_delim + || delim == line_delim) + { + if (putchar (line_delim) < 0) + write_error (); + } + } + if (c == EOF) + break; + + /* Start processing the next input line. */ + field_idx = 1; + current_rp = frp; + found_any_selected_field = false; + } + } +} + +/* Process file FILE to standard output, using CUT_STREAM. + Return true if successful. */ + +static bool +cut_file (char const *file, void (*cut_stream) (FILE *)) +{ + FILE *stream; + + if (STREQ (file, "-")) + { + have_read_stdin = true; + stream = stdin; + assume (stream); /* Pacify GCC bug#109613. */ + } + else + { + stream = fopen (file, "r"); + if (stream == nullptr) + { + error (0, errno, "%s", quotef (file)); + return false; + } + } + + fadvise (stream, FADVISE_SEQUENTIAL); + + cut_stream (stream); + + int err = errno; + if (!ferror (stream)) + err = 0; + if (STREQ (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) == EOF) + err = errno; + if (err) + { + error (0, err, "%s", quotef (file)); + return false; + } + return true; +} + +int +main (int argc, char **argv) +{ + int optc; + bool ok; + bool delim_specified = false; + bool byte_mode = false; + char *spec_list_string = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + /* By default, all non-delimited lines are printed. */ + suppress_non_delimited = false; + + delim = '\0'; + have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, nullptr)) + != -1) + { + switch (optc) + { + case 'b': + case 'c': + /* Build the byte list. */ + byte_mode = true; + FALLTHROUGH; + case 'f': + /* Build the field list. */ + if (spec_list_string) + FATAL_ERROR (_("only one list may be specified")); + spec_list_string = optarg; + break; + + case 'd': + /* New delimiter. */ + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ + if (optarg[0] != '\0' && optarg[1] != '\0') + FATAL_ERROR (_("the delimiter must be a single character")); + delim = optarg[0]; + delim_specified = true; + break; + + case OUTPUT_DELIMITER_OPTION: + /* Interpret --output-delimiter='' to mean + 'use the NUL byte as the delimiter.' */ + output_delimiter_length = (optarg[0] == '\0' + ? 1 : strlen (optarg)); + output_delimiter_string = optarg; + break; + + case 'n': + break; + + case 's': + suppress_non_delimited = true; + break; + + case 'z': + line_delim = '\0'; + break; + + case COMPLEMENT_OPTION: + complement = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (!spec_list_string) + FATAL_ERROR (_("you must specify a list of bytes, characters, or fields")); + + if (byte_mode) + { + if (delim_specified) + FATAL_ERROR (_("an input delimiter may be specified only\ + when operating on fields")); + + if (suppress_non_delimited) + FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ +\tonly when operating on fields")); + } + + set_fields (spec_list_string, + ((byte_mode ? SETFLD_ERRMSG_USE_POS : 0) + | (complement ? SETFLD_COMPLEMENT : 0))); + + if (!delim_specified) + delim = '\t'; + + if (output_delimiter_string == nullptr) + { + output_delimiter_default[0] = delim; + output_delimiter_string = output_delimiter_default; + output_delimiter_length = 1; + } + + void (*cut_stream) (FILE *) = byte_mode ? cut_bytes : cut_fields; + if (optind == argc) + ok = cut_file ("-", cut_stream); + else + for (ok = true; optind < argc; optind++) + ok &= cut_file (argv[optind], cut_stream); + + + if (have_read_stdin && fclose (stdin) == EOF) + { + error (0, errno, "-"); + ok = false; + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/date.c b/src/date.c new file mode 100644 index 0000000..9b66d39 --- /dev/null +++ b/src/date.c @@ -0,0 +1,679 @@ +/* date - print or set the system date and time + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + David MacKenzie */ + +#include +#include +#include +#include +#if HAVE_LANGINFO_CODESET +# include +#endif + +#include "system.h" +#include "argmatch.h" +#include "parse-datetime.h" +#include "posixtm.h" +#include "quote.h" +#include "stat-time.h" +#include "fprintftime.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "date" + +#define AUTHORS proper_name ("David MacKenzie") + +static bool show_date (char const *, struct timespec, timezone_t); + +enum Time_spec +{ + /* Display only the date. */ + TIME_SPEC_DATE, + /* Display date, hours, minutes, and seconds. */ + TIME_SPEC_SECONDS, + /* Similar, but display nanoseconds. */ + TIME_SPEC_NS, + + /* Put these last, since they aren't valid for --rfc-3339. */ + + /* Display date and hour. */ + TIME_SPEC_HOURS, + /* Display date, hours, and minutes. */ + TIME_SPEC_MINUTES +}; + +static char const *const time_spec_string[] = +{ + /* Put "hours" and "minutes" first, since they aren't valid for + --rfc-3339. */ + "hours", "minutes", + "date", "seconds", "ns", nullptr +}; +static enum Time_spec const time_spec[] = +{ + TIME_SPEC_HOURS, TIME_SPEC_MINUTES, + TIME_SPEC_DATE, TIME_SPEC_SECONDS, TIME_SPEC_NS +}; +ARGMATCH_VERIFY (time_spec_string, time_spec); + +/* A format suitable for Internet RFCs 5322, 2822, and 822. */ +static char const rfc_email_format[] = "%a, %d %b %Y %H:%M:%S %z"; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEBUG_DATE_PARSING_OPTION = CHAR_MAX + 1, + RESOLUTION_OPTION, + RFC_3339_OPTION +}; + +static char const short_options[] = "d:f:I::r:Rs:u"; + +static struct option const long_options[] = +{ + {"date", required_argument, nullptr, 'd'}, + {"debug", no_argument, nullptr, DEBUG_DATE_PARSING_OPTION}, + {"file", required_argument, nullptr, 'f'}, + {"iso-8601", optional_argument, nullptr, 'I'}, + {"reference", required_argument, nullptr, 'r'}, + {"resolution", no_argument, nullptr, RESOLUTION_OPTION}, + {"rfc-email", no_argument, nullptr, 'R'}, + {"rfc-822", no_argument, nullptr, 'R'}, + {"rfc-2822", no_argument, nullptr, 'R'}, + {"rfc-3339", required_argument, nullptr, RFC_3339_OPTION}, + {"set", required_argument, nullptr, 's'}, + {"uct", no_argument, nullptr, 'u'}, + {"utc", no_argument, nullptr, 'u'}, + {"universal", no_argument, nullptr, 'u'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* flags for parse_datetime2 */ +static unsigned int parse_datetime_flags; + +#if LOCALTIME_CACHE +# define TZSET tzset () +#else +# define TZSET /* empty */ +#endif + +#ifdef _DATE_FMT +# define DATE_FMT_LANGINFO() nl_langinfo (_DATE_FMT) +#else +# define DATE_FMT_LANGINFO() "" +#endif + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [+FORMAT]\n\ + or: %s [-u|--utc|--universal] [MMDDhhmm[[CC]YY][.ss]]\n\ +"), + program_name, program_name); + fputs (_("\ +Display date and time in the given FORMAT.\n\ +With -s, or with [MMDDhhmm[[CC]YY][.ss]], set the date and time.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -d, --date=STRING display time described by STRING, not 'now'\n\ +"), stdout); + fputs (_("\ + --debug annotate the parsed date,\n\ + and warn about questionable usage to stderr\n\ +"), stdout); + fputs (_("\ + -f, --file=DATEFILE like --date; once for each line of DATEFILE\n\ +"), stdout); + fputs (_("\ + -I[FMT], --iso-8601[=FMT] output date/time in ISO 8601 format.\n\ + FMT='date' for date only (the default),\n\ + 'hours', 'minutes', 'seconds', or 'ns'\n\ + for date and time to the indicated precision.\n\ + Example: 2006-08-14T02:34:56-06:00\n\ +"), stdout); + fputs (_("\ + --resolution output the available resolution of timestamps\n\ + Example: 0.000000001\n\ +"), stdout); + fputs (_("\ + -R, --rfc-email output date and time in RFC 5322 format.\n\ + Example: Mon, 14 Aug 2006 02:34:56 -0600\n\ +"), stdout); + fputs (_("\ + --rfc-3339=FMT output date/time in RFC 3339 format.\n\ + FMT='date', 'seconds', or 'ns'\n\ + for date and time to the indicated precision.\n\ + Example: 2006-08-14 02:34:56-06:00\n\ +"), stdout); + fputs (_("\ + -r, --reference=FILE display the last modification time of FILE\n\ +"), stdout); + fputs (_("\ + -s, --set=STRING set time described by STRING\n\ + -u, --utc, --universal print or set Coordinated Universal Time (UTC)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +All options that specify the date to display are mutually exclusive.\n\ +I.e.: --date, --file, --reference, --resolution.\n\ +"), stdout); + fputs (_("\ +\n\ +FORMAT controls the output. Interpreted sequences are:\n\ +\n\ + %% a literal %\n\ + %a locale's abbreviated weekday name (e.g., Sun)\n\ +"), stdout); + fputs (_("\ + %A locale's full weekday name (e.g., Sunday)\n\ + %b locale's abbreviated month name (e.g., Jan)\n\ + %B locale's full month name (e.g., January)\n\ + %c locale's date and time (e.g., Thu Mar 3 23:05:25 2005)\n\ +"), stdout); + fputs (_("\ + %C century; like %Y, except omit last two digits (e.g., 20)\n\ + %d day of month (e.g., 01)\n\ + %D date; same as %m/%d/%y\n\ + %e day of month, space padded; same as %_d\n\ +"), stdout); + fputs (_("\ + %F full date; like %+4Y-%m-%d\n\ + %g last two digits of year of ISO week number (see %G)\n\ + %G year of ISO week number (see %V); normally useful only with %V\n\ +"), stdout); + fputs (_("\ + %h same as %b\n\ + %H hour (00..23)\n\ + %I hour (01..12)\n\ + %j day of year (001..366)\n\ +"), stdout); + fputs (_("\ + %k hour, space padded ( 0..23); same as %_H\n\ + %l hour, space padded ( 1..12); same as %_I\n\ + %m month (01..12)\n\ + %M minute (00..59)\n\ +"), stdout); + fputs (_("\ + %n a newline\n\ + %N nanoseconds (000000000..999999999)\n\ + %p locale's equivalent of either AM or PM; blank if not known\n\ + %P like %p, but lower case\n\ + %q quarter of year (1..4)\n\ + %r locale's 12-hour clock time (e.g., 11:11:04 PM)\n\ + %R 24-hour hour and minute; same as %H:%M\n\ + %s seconds since the Epoch (1970-01-01 00:00 UTC)\n\ +"), stdout); + fputs (_("\ + %S second (00..60)\n\ + %t a tab\n\ + %T time; same as %H:%M:%S\n\ + %u day of week (1..7); 1 is Monday\n\ +"), stdout); + fputs (_("\ + %U week number of year, with Sunday as first day of week (00..53)\n\ + %V ISO week number, with Monday as first day of week (01..53)\n\ + %w day of week (0..6); 0 is Sunday\n\ + %W week number of year, with Monday as first day of week (00..53)\n\ +"), stdout); + fputs (_("\ + %x locale's date representation (e.g., 12/31/99)\n\ + %X locale's time representation (e.g., 23:13:48)\n\ + %y last two digits of year (00..99)\n\ + %Y year\n\ +"), stdout); + fputs (_("\ + %z +hhmm numeric time zone (e.g., -0400)\n\ + %:z +hh:mm numeric time zone (e.g., -04:00)\n\ + %::z +hh:mm:ss numeric time zone (e.g., -04:00:00)\n\ + %:::z numeric time zone with : to necessary precision (e.g., -04, +05:30)\n\ + %Z alphabetic time zone abbreviation (e.g., EDT)\n\ +\n\ +By default, date pads numeric fields with zeroes.\n\ +"), stdout); + fputs (_("\ +The following optional flags may follow '%':\n\ +\n\ + - (hyphen) do not pad the field\n\ + _ (underscore) pad with spaces\n\ + 0 (zero) pad with zeros\n\ + + pad with zeros, and put '+' before future years with >4 digits\n\ + ^ use upper case if possible\n\ + # use opposite case if possible\n\ +"), stdout); + fputs (_("\ +\n\ +After any flags comes an optional field width, as a decimal number;\n\ +then an optional modifier, which is either\n\ +E to use the locale's alternate representations if available, or\n\ +O to use the locale's alternate numeric symbols if available.\n\ +"), stdout); + fputs (_("\ +\n\ +Examples:\n\ +Convert seconds since the Epoch (1970-01-01 UTC) to a date\n\ + $ date --date='@2147483647'\n\ +\n\ +Show the time on the west coast of the US (use tzselect(1) to find TZ)\n\ + $ TZ='America/Los_Angeles' date\n\ +\n\ +Show the local time for 9AM next Friday on the west coast of the US\n\ + $ date --date='TZ=\"America/Los_Angeles\" 09:00 next Fri'\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Yield the number of decimal digits needed to output a time with the + nanosecond resolution RES, without losing information. */ + +static int +res_width (long int res) +{ + int i = 9; + for (long long int r = 1; (r *= 10) <= res; ) + i--; + return i; +} + +/* Return a newly allocated copy of FORMAT with each "%-N" adjusted to + be "%9N", "%6N", or whatever other resolution is appropriate for + the current platform. If no "%-N" appears, return nullptr. */ + +static char * +adjust_resolution (char const *format) +{ + char *copy = nullptr; + + for (char const *f = format; *f; f++) + if (f[0] == '%') + { + if (f[1] == '-' && f[2] == 'N') + { + if (!copy) + copy = xstrdup (format); + copy[f + 1 - format] = '0' + res_width (gettime_res ()); + f += 2; + } + else + f += f[1] == '%'; + } + + return copy; +} + +/* Parse each line in INPUT_FILENAME as with --date and display each + resulting time and date. If the file cannot be opened, tell why + then exit. Issue a diagnostic for any lines that cannot be parsed. + Return true if successful. */ + +static bool +batch_convert (char const *input_filename, char const *format, + timezone_t tz, char const *tzstring) +{ + bool ok; + FILE *in_stream; + char *line; + size_t buflen; + struct timespec when; + + if (STREQ (input_filename, "-")) + { + input_filename = _("standard input"); + in_stream = stdin; + } + else + { + in_stream = fopen (input_filename, "r"); + if (in_stream == nullptr) + error (EXIT_FAILURE, errno, "%s", quotef (input_filename)); + } + + line = nullptr; + buflen = 0; + ok = true; + while (true) + { + ssize_t line_length = getline (&line, &buflen, in_stream); + if (line_length < 0) + { + if (ferror (in_stream)) + error (EXIT_FAILURE, errno, _("%s: read error"), + quotef (input_filename)); + break; + } + + if (! parse_datetime2 (&when, line, nullptr, + parse_datetime_flags, tz, tzstring)) + { + if (line[line_length - 1] == '\n') + line[line_length - 1] = '\0'; + error (0, 0, _("invalid date %s"), quote (line)); + ok = false; + } + else + { + ok &= show_date (format, when, tz); + } + } + + if (fclose (in_stream) == EOF) + error (EXIT_FAILURE, errno, "%s", quotef (input_filename)); + + free (line); + + return ok; +} + +int +main (int argc, char **argv) +{ + int optc; + char const *datestr = nullptr; + char const *set_datestr = nullptr; + struct timespec when; + bool set_date = false; + char const *format = nullptr; + bool get_resolution = false; + char *batch_file = nullptr; + char *reference = nullptr; + struct stat refstats; + bool ok; + bool discarded_datestr = false; + bool discarded_set_datestr = false; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, short_options, long_options, nullptr)) + != -1) + { + char const *new_format = nullptr; + + switch (optc) + { + case 'd': + if (datestr) + discarded_datestr = true; + datestr = optarg; + break; + case DEBUG_DATE_PARSING_OPTION: + parse_datetime_flags |= PARSE_DATETIME_DEBUG; + break; + case 'f': + batch_file = optarg; + break; + case RESOLUTION_OPTION: + get_resolution = true; + break; + case RFC_3339_OPTION: + { + static char const rfc_3339_format[][32] = + { + "%Y-%m-%d", + "%Y-%m-%d %H:%M:%S%:z", + "%Y-%m-%d %H:%M:%S.%N%:z" + }; + enum Time_spec i = + XARGMATCH ("--rfc-3339", optarg, + time_spec_string + 2, time_spec + 2); + new_format = rfc_3339_format[i]; + break; + } + case 'I': + { + static char const iso_8601_format[][32] = + { + "%Y-%m-%d", + "%Y-%m-%dT%H:%M:%S%:z", + "%Y-%m-%dT%H:%M:%S,%N%:z", + "%Y-%m-%dT%H%:z", + "%Y-%m-%dT%H:%M%:z" + }; + enum Time_spec i = + (optarg + ? XARGMATCH ("--iso-8601", optarg, time_spec_string, time_spec) + : TIME_SPEC_DATE); + new_format = iso_8601_format[i]; + break; + } + case 'r': + reference = optarg; + break; + case 'R': + new_format = rfc_email_format; + break; + case 's': + if (set_datestr) + discarded_set_datestr = true; + set_datestr = optarg; + set_date = true; + break; + case 'u': + /* POSIX says that 'date -u' is equivalent to setting the TZ + environment variable, so this option should do nothing other + than setting TZ. */ + if (putenv (bad_cast ("TZ=UTC0")) != 0) + xalloc_die (); + TZSET; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + + if (new_format) + { + if (format) + error (EXIT_FAILURE, 0, _("multiple output formats specified")); + format = new_format; + } + } + + int option_specified_date = (!!datestr + !!batch_file + !!reference + + get_resolution); + + if (option_specified_date > 1) + { + error (0, 0, + _("the options to specify dates for printing are mutually exclusive")); + usage (EXIT_FAILURE); + } + + if (set_date && option_specified_date) + { + error (0, 0, + _("the options to print and set the time may not be used together")); + usage (EXIT_FAILURE); + } + + if (discarded_datestr && (parse_datetime_flags & PARSE_DATETIME_DEBUG)) + error (0, 0, _("only using last of multiple -d options")); + + if (discarded_set_datestr && (parse_datetime_flags & PARSE_DATETIME_DEBUG)) + error (0, 0, _("only using last of multiple -s options")); + + if (optind < argc) + { + if (optind + 1 < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + if (argv[optind][0] == '+') + { + if (format) + error (EXIT_FAILURE, 0, _("multiple output formats specified")); + format = argv[optind++] + 1; + } + else if (set_date || option_specified_date) + { + error (0, 0, + _("the argument %s lacks a leading '+';\n" + "when using an option to specify date(s), any non-option\n" + "argument must be a format string beginning with '+'"), + quote (argv[optind])); + usage (EXIT_FAILURE); + } + } + + if (!format) + { + if (get_resolution) + format = "%s.%N"; + else + { + format = DATE_FMT_LANGINFO (); + + /* Do not wrap the following literal format string with _(...). + For example, suppose LC_ALL is unset, LC_TIME=POSIX, + and LANG="ko_KR". In that case, POSIX says that LC_TIME + determines the format and contents of date and time strings + written by date, which means "date" must generate output + using the POSIX locale; but adding _() would cause "date" + to use a Korean translation of the format. */ + if (! *format) + format = "%a %b %e %H:%M:%S %Z %Y"; + } + } + + char *format_copy = adjust_resolution (format); + char const *format_res = format_copy ? format_copy : format; + char const *tzstring = getenv ("TZ"); + timezone_t tz = tzalloc (tzstring); + + if (batch_file != nullptr) + ok = batch_convert (batch_file, format_res, tz, tzstring); + else + { + bool valid_date = true; + ok = true; + + if (!option_specified_date && !set_date) + { + if (optind < argc) + { + /* Prepare to set system clock to the specified date/time + given in the POSIX-format. */ + set_date = true; + datestr = argv[optind]; + valid_date = posixtime (&when.tv_sec, + datestr, + (PDS_TRAILING_YEAR + | PDS_CENTURY | PDS_SECONDS)); + when.tv_nsec = 0; /* FIXME: posixtime should set this. */ + } + else + { + /* Prepare to print the current date/time. */ + gettime (&when); + } + } + else + { + /* (option_specified_date || set_date) */ + if (reference != nullptr) + { + if (stat (reference, &refstats) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (reference)); + when = get_stat_mtime (&refstats); + } + else if (get_resolution) + { + long int res = gettime_res (); + when.tv_sec = res / TIMESPEC_HZ; + when.tv_nsec = res % TIMESPEC_HZ; + } + else + { + if (set_datestr) + datestr = set_datestr; + valid_date = parse_datetime2 (&when, datestr, nullptr, + parse_datetime_flags, + tz, tzstring); + } + } + + if (! valid_date) + error (EXIT_FAILURE, 0, _("invalid date %s"), quote (datestr)); + + if (set_date) + { + /* Set the system clock to the specified date, then regardless of + the success of that operation, format and print that date. */ + if (settime (&when) != 0) + { + error (0, errno, _("cannot set date")); + ok = false; + } + } + + ok &= show_date (format_res, when, tz); + } + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* Display the date and/or time in WHEN according to the format specified + in FORMAT, followed by a newline. Return true if successful. */ + +static bool +show_date (char const *format, struct timespec when, timezone_t tz) +{ + struct tm tm; + + if (parse_datetime_flags & PARSE_DATETIME_DEBUG) + error (0, 0, _("output format: %s"), quote (format)); + + if (localtime_rz (tz, &when.tv_sec, &tm)) + { + if (format == rfc_email_format) + setlocale (LC_TIME, "C"); + fprintftime (stdout, format, &tm, tz, when.tv_nsec); + if (format == rfc_email_format) + setlocale (LC_TIME, ""); + fputc ('\n', stdout); + return true; + } + else + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + error (0, 0, _("time %s is out of range"), + quote (timetostr (when.tv_sec, buf))); + return false; + } +} diff --git a/src/dcgen b/src/dcgen new file mode 100755 index 0000000..141d2c6 --- /dev/null +++ b/src/dcgen @@ -0,0 +1,55 @@ +#!/usr/bin/perl -w +# dcgen -- convert dircolors.hin to dircolors.h. + +# Copyright (C) 1996-2023 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# written by Jim Meyering + +require 5.002; +use strict; +(my $ME = $0) =~ s|.*/||; + +# A global destructor to close standard output with error checking. +sub END +{ + defined fileno STDOUT + or return; + close STDOUT + and return; + warn "$ME: closing standard output: $!\n"; + $? ||= 1; +} + +my @line; +while (<>) + { + chomp; + s/[[:blank:]]+/ /g; + $_ + and push @line, $_; + } + +my $indent = ' '; + +print "static char const G_line[] =\n{\n"; +foreach (@line) + { + s/./'$&',/g; + s/'\\'/'\\\\'/g; + s/'''/'\\''/g; + print "$indent${_}0,\n"; + } +print "};\n"; diff --git a/src/dd.c b/src/dd.c new file mode 100644 index 0000000..b50b841 --- /dev/null +++ b/src/dd.c @@ -0,0 +1,2567 @@ +/* dd -- convert a file while copying it. + Copyright (C) 1985-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Rubin, David MacKenzie, and Stuart Kemp. */ + +#include + +#include +#include +#include + +#include "system.h" +#include "alignalloc.h" +#include "close-stream.h" +#include "fd-reopen.h" +#include "gethrxtime.h" +#include "human.h" +#include "ioblksize.h" +#include "long-options.h" +#include "quote.h" +#include "verror.h" +#include "xstrtol.h" +#include "xtime.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "dd" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Stuart Kemp") + +/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is + present. */ +#ifndef SA_NOCLDSTOP +# define SA_NOCLDSTOP 0 +# define sigprocmask(How, Set, Oset) /* empty */ +# define sigset_t int +# if ! HAVE_SIGINTERRUPT +# define siginterrupt(sig, flag) /* empty */ +# endif +#endif + +/* NonStop circa 2011 lacks SA_RESETHAND; see Bug#9076. */ +#ifndef SA_RESETHAND +# define SA_RESETHAND 0 +#endif + +#ifndef SIGINFO +# define SIGINFO SIGUSR1 +#endif + +/* This may belong in GNULIB's fcntl module instead. + Define O_CIO to 0 if it is not supported by this OS. */ +#ifndef O_CIO +# define O_CIO 0 +#endif + +/* On AIX 5.1 and AIX 5.2, O_NOCACHE is defined via + and would interfere with our use of that name, below. */ +#undef O_NOCACHE + +#if ! HAVE_FDATASYNC +# define fdatasync(fd) (errno = ENOSYS, -1) +#endif + +#define output_char(c) \ + do \ + { \ + obuf[oc++] = (c); \ + if (oc >= output_blocksize) \ + write_output (); \ + } \ + while (0) + +/* Default input and output blocksize. */ +#define DEFAULT_BLOCKSIZE 512 + +/* Conversions bit masks. */ +enum + { + C_ASCII = 01, + + C_EBCDIC = 02, + C_IBM = 04, + C_BLOCK = 010, + C_UNBLOCK = 020, + C_LCASE = 040, + C_UCASE = 0100, + C_SWAB = 0200, + C_NOERROR = 0400, + C_NOTRUNC = 01000, + C_SYNC = 02000, + + /* Use separate input and output buffers, and combine partial + input blocks. */ + C_TWOBUFS = 04000, + + C_NOCREAT = 010000, + C_EXCL = 020000, + C_FDATASYNC = 040000, + C_FSYNC = 0100000, + + C_SPARSE = 0200000 + }; + +/* Status levels. */ +enum + { + STATUS_NONE = 1, + STATUS_NOXFER = 2, + STATUS_DEFAULT = 3, + STATUS_PROGRESS = 4 + }; + +/* The name of the input file, or nullptr for the standard input. */ +static char const *input_file = nullptr; + +/* The name of the output file, or nullptr for the standard output. */ +static char const *output_file = nullptr; + +/* The page size on this host. */ +static idx_t page_size; + +/* The number of bytes in which atomic reads are done. */ +static idx_t input_blocksize = 0; + +/* The number of bytes in which atomic writes are done. */ +static idx_t output_blocksize = 0; + +/* Conversion buffer size, in bytes. 0 prevents conversions. */ +static idx_t conversion_blocksize = 0; + +/* Skip this many records of 'input_blocksize' bytes before input. */ +static intmax_t skip_records = 0; + +/* Skip this many bytes before input in addition of 'skip_records' + records. */ +static idx_t skip_bytes = 0; + +/* Skip this many records of 'output_blocksize' bytes before output. */ +static intmax_t seek_records = 0; + +/* Skip this many bytes in addition to 'seek_records' records before + output. */ +static intmax_t seek_bytes = 0; + +/* Whether the final output was done with a seek (rather than a write). */ +static bool final_op_was_seek; + +/* Copy only this many records. The default is effectively infinity. */ +static intmax_t max_records = INTMAX_MAX; + +/* Copy this many bytes in addition to 'max_records' records. */ +static idx_t max_bytes = 0; + +/* Bit vector of conversions to apply. */ +static int conversions_mask = 0; + +/* Open flags for the input and output files. */ +static int input_flags = 0; +static int output_flags = 0; + +/* Status flags for what is printed to stderr. */ +static int status_level = STATUS_DEFAULT; + +/* If nonzero, filter characters through the translation table. */ +static bool translation_needed = false; + +/* Number of partial blocks written. */ +static intmax_t w_partial = 0; + +/* Number of full blocks written. */ +static intmax_t w_full = 0; + +/* Number of partial blocks read. */ +static intmax_t r_partial = 0; + +/* Number of full blocks read. */ +static intmax_t r_full = 0; + +/* Number of bytes written. */ +static intmax_t w_bytes = 0; + +/* Last-reported number of bytes written, or negative if never reported. */ +static intmax_t reported_w_bytes = -1; + +/* Time that dd started. */ +static xtime_t start_time; + +/* Next time to report periodic progress. */ +static xtime_t next_time; + +/* If positive, the number of bytes output in the current progress line. */ +static int progress_len; + +/* True if input is seekable. */ +static bool input_seekable; + +/* Error number corresponding to initial attempt to lseek input. + If ESPIPE, do not issue any more diagnostics about it. */ +static int input_seek_errno; + +/* File offset of the input, in bytes, or -1 if it overflowed. */ +static off_t input_offset; + +/* True if a partial read should be diagnosed. */ +static bool warn_partial_read; + +/* Records truncated by conv=block. */ +static intmax_t r_truncate = 0; + +/* Output representation of newline and space characters. + They change if we're converting to EBCDIC. */ +static char newline_character = '\n'; +static char space_character = ' '; + +/* I/O buffers. */ +static char *ibuf; +static char *obuf; + +/* Current index into 'obuf'. */ +static idx_t oc = 0; + +/* Index into current line, for 'conv=block' and 'conv=unblock'. */ +static idx_t col = 0; + +/* The set of signals that are caught. */ +static sigset_t caught_signals; + +/* If nonzero, the value of the pending fatal signal. */ +static sig_atomic_t volatile interrupt_signal; + +/* A count of the number of pending info signals that have been received. */ +static sig_atomic_t volatile info_signal_count; + +/* Whether to discard cache for input or output. */ +static bool i_nocache, o_nocache; + +/* Whether to instruct the kernel to discard the complete file. */ +static bool i_nocache_eof, o_nocache_eof; + +/* Function used for read (to handle iflag=fullblock parameter). */ +static ssize_t (*iread_fnc) (int fd, char *buf, idx_t size); + +/* A longest symbol in the struct symbol_values tables below. */ +#define LONGEST_SYMBOL "count_bytes" + +/* A symbol and the corresponding integer value. */ +struct symbol_value +{ + char symbol[sizeof LONGEST_SYMBOL]; + int value; +}; + +/* Conversion symbols, for conv="...". */ +static struct symbol_value const conversions[] = +{ + {"ascii", C_ASCII | C_UNBLOCK | C_TWOBUFS}, /* EBCDIC to ASCII. */ + {"ebcdic", C_EBCDIC | C_BLOCK | C_TWOBUFS}, /* ASCII to EBCDIC. */ + {"ibm", C_IBM | C_BLOCK | C_TWOBUFS}, /* Different ASCII to EBCDIC. */ + {"block", C_BLOCK | C_TWOBUFS}, /* Variable to fixed length records. */ + {"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */ + {"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */ + {"ucase", C_UCASE | C_TWOBUFS}, /* Translate lower to upper case. */ + {"sparse", C_SPARSE}, /* Try to sparsely write output. */ + {"swab", C_SWAB | C_TWOBUFS}, /* Swap bytes of input. */ + {"noerror", C_NOERROR}, /* Ignore i/o errors. */ + {"nocreat", C_NOCREAT}, /* Do not create output file. */ + {"excl", C_EXCL}, /* Fail if the output file already exists. */ + {"notrunc", C_NOTRUNC}, /* Do not truncate output file. */ + {"sync", C_SYNC}, /* Pad input records to ibs with NULs. */ + {"fdatasync", C_FDATASYNC}, /* Synchronize output data before finishing. */ + {"fsync", C_FSYNC}, /* Also synchronize output metadata. */ + {"", 0} +}; + +#define FFS_MASK(x) ((x) ^ ((x) & ((x) - 1))) +enum + { + /* Compute a value that's bitwise disjoint from the union + of all O_ values. */ + v = ~(0 + | O_APPEND + | O_BINARY + | O_CIO + | O_DIRECT + | O_DIRECTORY + | O_DSYNC + | O_NOATIME + | O_NOCTTY + | O_NOFOLLOW + | O_NOLINKS + | O_NONBLOCK + | O_SYNC + | O_TEXT + ), + + /* Use its lowest bits for private flags. */ + O_FULLBLOCK = FFS_MASK (v), + v2 = v ^ O_FULLBLOCK, + + O_NOCACHE = FFS_MASK (v2), + v3 = v2 ^ O_NOCACHE, + + O_COUNT_BYTES = FFS_MASK (v3), + v4 = v3 ^ O_COUNT_BYTES, + + O_SKIP_BYTES = FFS_MASK (v4), + v5 = v4 ^ O_SKIP_BYTES, + + O_SEEK_BYTES = FFS_MASK (v5) + }; + +/* Ensure that we got something. */ +static_assert (O_FULLBLOCK != 0); +static_assert (O_NOCACHE != 0); +static_assert (O_COUNT_BYTES != 0); +static_assert (O_SKIP_BYTES != 0); +static_assert (O_SEEK_BYTES != 0); + +#define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0) + +/* Ensure that this is a single-bit value. */ +static_assert ( ! MULTIPLE_BITS_SET (O_FULLBLOCK)); +static_assert ( ! MULTIPLE_BITS_SET (O_NOCACHE)); +static_assert ( ! MULTIPLE_BITS_SET (O_COUNT_BYTES)); +static_assert ( ! MULTIPLE_BITS_SET (O_SKIP_BYTES)); +static_assert ( ! MULTIPLE_BITS_SET (O_SEEK_BYTES)); + +/* Flags, for iflag="..." and oflag="...". */ +static struct symbol_value const flags[] = +{ + {"append", O_APPEND}, + {"binary", O_BINARY}, + {"cio", O_CIO}, + {"direct", O_DIRECT}, + {"directory", O_DIRECTORY}, + {"dsync", O_DSYNC}, + {"noatime", O_NOATIME}, + {"nocache", O_NOCACHE}, /* Discard cache. */ + {"noctty", O_NOCTTY}, + {"nofollow", HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0}, + {"nolinks", O_NOLINKS}, + {"nonblock", O_NONBLOCK}, + {"sync", O_SYNC}, + {"text", O_TEXT}, + {"fullblock", O_FULLBLOCK}, /* Accumulate full blocks from input. */ + {"count_bytes", O_COUNT_BYTES}, + {"skip_bytes", O_SKIP_BYTES}, + {"seek_bytes", O_SEEK_BYTES}, + {"", 0} +}; + +/* Status, for status="...". */ +static struct symbol_value const statuses[] = +{ + {"none", STATUS_NONE}, + {"noxfer", STATUS_NOXFER}, + {"progress", STATUS_PROGRESS}, + {"", 0} +}; + +/* Translation table formed by applying successive transformations. */ +static unsigned char trans_table[256]; + +/* Standard translation tables, taken from POSIX 1003.1-2013. + Beware of imitations; there are lots of ASCII<->EBCDIC tables + floating around the net, perhaps valid for some applications but + not correct here. */ + +static char const ascii_to_ebcdic[] = +{ + '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057', + '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046', + '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037', + '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175', + '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157', + '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326', + '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346', + '\347', '\350', '\351', '\255', '\340', '\275', '\232', '\155', + '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226', + '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246', + '\247', '\250', '\251', '\300', '\117', '\320', '\137', '\007', + '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027', + '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033', + '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010', + '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341', + '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110', + '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165', + '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215', + '\216', '\217', '\220', '\152', '\233', '\234', '\235', '\236', + '\237', '\240', '\252', '\253', '\254', '\112', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\241', '\276', '\277', + '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333', + '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355', + '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static char const ascii_to_ibm[] = +{ + '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057', + '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046', + '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037', + '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175', + '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141', + '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', + '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157', + '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307', + '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326', + '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346', + '\347', '\350', '\351', '\255', '\340', '\275', '\137', '\155', + '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207', + '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226', + '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246', + '\247', '\250', '\251', '\300', '\117', '\320', '\241', '\007', + '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027', + '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033', + '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010', + '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341', + '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110', + '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127', + '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165', + '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215', + '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236', + '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', + '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333', + '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355', + '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +static char const ebcdic_to_ascii[] = +{ + '\000', '\001', '\002', '\003', '\234', '\011', '\206', '\177', + '\227', '\215', '\216', '\013', '\014', '\015', '\016', '\017', + '\020', '\021', '\022', '\023', '\235', '\205', '\010', '\207', + '\030', '\031', '\222', '\217', '\034', '\035', '\036', '\037', + '\200', '\201', '\202', '\203', '\204', '\012', '\027', '\033', + '\210', '\211', '\212', '\213', '\214', '\005', '\006', '\007', + '\220', '\221', '\026', '\223', '\224', '\225', '\226', '\004', + '\230', '\231', '\232', '\233', '\024', '\025', '\236', '\032', + '\040', '\240', '\241', '\242', '\243', '\244', '\245', '\246', + '\247', '\250', '\325', '\056', '\074', '\050', '\053', '\174', + '\046', '\251', '\252', '\253', '\254', '\255', '\256', '\257', + '\260', '\261', '\041', '\044', '\052', '\051', '\073', '\176', + '\055', '\057', '\262', '\263', '\264', '\265', '\266', '\267', + '\270', '\271', '\313', '\054', '\045', '\137', '\076', '\077', + '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\301', + '\302', '\140', '\072', '\043', '\100', '\047', '\075', '\042', + '\303', '\141', '\142', '\143', '\144', '\145', '\146', '\147', + '\150', '\151', '\304', '\305', '\306', '\307', '\310', '\311', + '\312', '\152', '\153', '\154', '\155', '\156', '\157', '\160', + '\161', '\162', '\136', '\314', '\315', '\316', '\317', '\320', + '\321', '\345', '\163', '\164', '\165', '\166', '\167', '\170', + '\171', '\172', '\322', '\323', '\324', '\133', '\326', '\327', + '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', + '\340', '\341', '\342', '\343', '\344', '\135', '\346', '\347', + '\173', '\101', '\102', '\103', '\104', '\105', '\106', '\107', + '\110', '\111', '\350', '\351', '\352', '\353', '\354', '\355', + '\175', '\112', '\113', '\114', '\115', '\116', '\117', '\120', + '\121', '\122', '\356', '\357', '\360', '\361', '\362', '\363', + '\134', '\237', '\123', '\124', '\125', '\126', '\127', '\130', + '\131', '\132', '\364', '\365', '\366', '\367', '\370', '\371', + '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', + '\070', '\071', '\372', '\373', '\374', '\375', '\376', '\377' +}; + +/* True if we need to close the standard output *stream*. */ +static bool close_stdout_required = true; + +/* The only reason to close the standard output *stream* is if + parse_long_options fails (as it does for --help or --version). + In any other case, dd uses only the STDOUT_FILENO file descriptor, + and the "cleanup" function calls "close (STDOUT_FILENO)". + Closing the file descriptor and then letting the usual atexit-run + close_stdout function call "fclose (stdout)" would result in a + harmless failure of the close syscall (with errno EBADF). + This function serves solely to avoid the unnecessary close_stdout + call, once parse_long_options has succeeded. + Meanwhile, we guarantee that the standard error stream is flushed, + by inlining the last half of close_stdout as needed. */ +static void +maybe_close_stdout (void) +{ + if (close_stdout_required) + close_stdout (); + else if (close_stream (stderr) != 0) + _exit (EXIT_FAILURE); +} + +/* Like the 'error' function but handle any pending newline, + and do not exit. */ + +ATTRIBUTE_FORMAT ((__printf__, 2, 3)) +static void +diagnose (int errnum, char const *fmt, ...) +{ + if (0 < progress_len) + { + fputc ('\n', stderr); + progress_len = 0; + } + + va_list ap; + va_start (ap, fmt); + verror (0, errnum, fmt, ap); + va_end (ap); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPERAND]...\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + fputs (_("\ +Copy a file, converting and formatting according to the operands.\n\ +\n\ + bs=BYTES read and write up to BYTES bytes at a time (default: 512);\n\ + overrides ibs and obs\n\ + cbs=BYTES convert BYTES bytes at a time\n\ + conv=CONVS convert the file as per the comma separated symbol list\n\ + count=N copy only N input blocks\n\ + ibs=BYTES read up to BYTES bytes at a time (default: 512)\n\ +"), stdout); + fputs (_("\ + if=FILE read from FILE instead of stdin\n\ + iflag=FLAGS read as per the comma separated symbol list\n\ + obs=BYTES write BYTES bytes at a time (default: 512)\n\ + of=FILE write to FILE instead of stdout\n\ + oflag=FLAGS write as per the comma separated symbol list\n\ + seek=N (or oseek=N) skip N obs-sized output blocks\n\ + skip=N (or iseek=N) skip N ibs-sized input blocks\n\ + status=LEVEL The LEVEL of information to print to stderr;\n\ + 'none' suppresses everything but error messages,\n\ + 'noxfer' suppresses the final transfer statistics,\n\ + 'progress' shows periodic transfer statistics\n\ +"), stdout); + fputs (_("\ +\n\ +N and BYTES may be followed by the following multiplicative suffixes:\n\ +c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024, xM=M,\n\ +GB=1000*1000*1000, G=1024*1024*1024, and so on for T, P, E, Z, Y, R, Q.\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +If N ends in 'B', it counts bytes not blocks.\n\ +\n\ +Each CONV symbol may be:\n\ +\n\ +"), stdout); + fputs (_("\ + ascii from EBCDIC to ASCII\n\ + ebcdic from ASCII to EBCDIC\n\ + ibm from ASCII to alternate EBCDIC\n\ + block pad newline-terminated records with spaces to cbs-size\n\ + unblock replace trailing spaces in cbs-size records with newline\n\ + lcase change upper case to lower case\n\ + ucase change lower case to upper case\n\ + sparse try to seek rather than write all-NUL output blocks\n\ + swab swap every pair of input bytes\n\ + sync pad every input block with NULs to ibs-size; when used\n\ + with block or unblock, pad with spaces rather than NULs\n\ +"), stdout); + fputs (_("\ + excl fail if the output file already exists\n\ + nocreat do not create the output file\n\ + notrunc do not truncate the output file\n\ + noerror continue after read errors\n\ + fdatasync physically write output file data before finishing\n\ + fsync likewise, but also write metadata\n\ +"), stdout); + fputs (_("\ +\n\ +Each FLAG symbol may be:\n\ +\n\ + append append mode (makes sense only for output; conv=notrunc suggested)\n\ +"), stdout); + if (O_CIO) + fputs (_(" cio use concurrent I/O for data\n"), stdout); + if (O_DIRECT) + fputs (_(" direct use direct I/O for data\n"), stdout); + if (O_DIRECTORY) + fputs (_(" directory fail unless a directory\n"), stdout); + if (O_DSYNC) + fputs (_(" dsync use synchronized I/O for data\n"), stdout); + if (O_SYNC) + fputs (_(" sync likewise, but also for metadata\n"), stdout); + fputs (_(" fullblock accumulate full blocks of input (iflag only)\n"), + stdout); + if (O_NONBLOCK) + fputs (_(" nonblock use non-blocking I/O\n"), stdout); + if (O_NOATIME) + fputs (_(" noatime do not update access time\n"), stdout); +#if HAVE_POSIX_FADVISE + if (O_NOCACHE) + fputs (_(" nocache Request to drop cache. See also oflag=sync\n"), + stdout); +#endif + if (O_NOCTTY) + fputs (_(" noctty do not assign controlling terminal from file\n"), + stdout); + if (HAVE_WORKING_O_NOFOLLOW) + fputs (_(" nofollow do not follow symlinks\n"), stdout); + if (O_NOLINKS) + fputs (_(" nolinks fail if multiply-linked\n"), stdout); + if (O_BINARY) + fputs (_(" binary use binary I/O for data\n"), stdout); + if (O_TEXT) + fputs (_(" text use text I/O for data\n"), stdout); + + { + printf (_("\ +\n\ +Sending a %s signal to a running 'dd' process makes it\n\ +print I/O statistics to standard error and then resume copying.\n\ +\n\ +Options are:\n\ +\n\ +"), SIGINFO == SIGUSR1 ? "USR1" : "INFO"); + } + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Common options to use when displaying sizes and rates. */ + +enum { human_opts = (human_autoscale | human_round_to_nearest + | human_space_before_unit | human_SI | human_B) }; + +/* Ensure input buffer IBUF is allocated. */ + +static void +alloc_ibuf (void) +{ + if (ibuf) + return; + + bool extra_byte_for_swab = !!(conversions_mask & C_SWAB); + ibuf = alignalloc (page_size, input_blocksize + extra_byte_for_swab); + if (!ibuf) + { + char hbuf[LONGEST_HUMAN_READABLE + 1]; + error (EXIT_FAILURE, 0, + _("memory exhausted by input buffer of size %td bytes (%s)"), + input_blocksize, + human_readable (input_blocksize, hbuf, + human_opts | human_base_1024, 1, 1)); + } +} + +/* Ensure output buffer OBUF is allocated/initialized. */ + +static void +alloc_obuf (void) +{ + if (obuf) + return; + + if (conversions_mask & C_TWOBUFS) + { + obuf = alignalloc (page_size, output_blocksize); + if (!obuf) + { + char hbuf[LONGEST_HUMAN_READABLE + 1]; + error (EXIT_FAILURE, 0, + _("memory exhausted by output buffer of size %td" + " bytes (%s)"), + output_blocksize, + human_readable (output_blocksize, hbuf, + human_opts | human_base_1024, 1, 1)); + } + } + else + { + alloc_ibuf (); + obuf = ibuf; + } +} + +static void +translate_charset (char const *new_trans) +{ + for (int i = 0; i < 256; i++) + trans_table[i] = new_trans[trans_table[i]]; + translation_needed = true; +} + +/* Return true if I has more than one bit set. I must be nonnegative. */ + +static inline bool +multiple_bits_set (int i) +{ + return MULTIPLE_BITS_SET (i); +} + +static bool +abbreviation_lacks_prefix (char const *message) +{ + return message[strlen (message) - 2] == ' '; +} + +/* Print transfer statistics. */ + +static void +print_xfer_stats (xtime_t progress_time) +{ + xtime_t now = progress_time ? progress_time : gethrxtime (); + static char const slash_s[] = "/s"; + char hbuf[3][LONGEST_HUMAN_READABLE + sizeof slash_s]; + double delta_s; + char const *bytes_per_second; + char const *si = human_readable (w_bytes, hbuf[0], human_opts, 1, 1); + char const *iec = human_readable (w_bytes, hbuf[1], + human_opts | human_base_1024, 1, 1); + + /* Use integer arithmetic to compute the transfer rate, + since that makes it easy to use SI abbreviations. */ + char *bpsbuf = hbuf[2]; + int bpsbufsize = sizeof hbuf[2]; + if (start_time < now) + { + double XTIME_PRECISIONe0 = XTIME_PRECISION; + xtime_t delta_xtime = now - start_time; + delta_s = delta_xtime / XTIME_PRECISIONe0; + bytes_per_second = human_readable (w_bytes, bpsbuf, human_opts, + XTIME_PRECISION, delta_xtime); + strcat (bytes_per_second - bpsbuf + bpsbuf, slash_s); + } + else + { + delta_s = 0; + snprintf (bpsbuf, bpsbufsize, "%s B/s", _("Infinity")); + bytes_per_second = bpsbuf; + } + + if (progress_time) + fputc ('\r', stderr); + + /* Use full seconds when printing progress, since the progress + report is output once per second and there is little point + displaying any subsecond jitter. Use default precision with %g + otherwise, as this provides more-useful output then. With long + transfers %g can generate a number with an exponent; that is OK. */ + char delta_s_buf[24]; + snprintf (delta_s_buf, sizeof delta_s_buf, + progress_time ? "%.0f s" : "%g s", delta_s); + + int stats_len + = (abbreviation_lacks_prefix (si) + ? fprintf (stderr, + ngettext ("%"PRIdMAX" byte copied, %s, %s", + "%"PRIdMAX" bytes copied, %s, %s", + select_plural (w_bytes)), + w_bytes, delta_s_buf, bytes_per_second) + : abbreviation_lacks_prefix (iec) + ? fprintf (stderr, + _("%"PRIdMAX" bytes (%s) copied, %s, %s"), + w_bytes, si, delta_s_buf, bytes_per_second) + : fprintf (stderr, + _("%"PRIdMAX" bytes (%s, %s) copied, %s, %s"), + w_bytes, si, iec, delta_s_buf, bytes_per_second)); + + if (progress_time) + { + /* Erase any trailing junk on the output line by outputting + spaces. In theory this could glitch the display because the + formatted translation of a line describing a larger file + could consume fewer screen columns than the strlen difference + from the previously formatted translation. In practice this + does not seem to be a problem. */ + if (0 <= stats_len && stats_len < progress_len) + fprintf (stderr, "%*s", progress_len - stats_len, ""); + progress_len = stats_len; + } + else + fputc ('\n', stderr); + + reported_w_bytes = w_bytes; +} + +static void +print_stats (void) +{ + if (status_level == STATUS_NONE) + return; + + if (0 < progress_len) + { + fputc ('\n', stderr); + progress_len = 0; + } + + fprintf (stderr, + _("%"PRIdMAX"+%"PRIdMAX" records in\n" + "%"PRIdMAX"+%"PRIdMAX" records out\n"), + r_full, r_partial, w_full, w_partial); + + if (r_truncate != 0) + fprintf (stderr, + ngettext ("%"PRIdMAX" truncated record\n", + "%"PRIdMAX" truncated records\n", + select_plural (r_truncate)), + r_truncate); + + if (status_level == STATUS_NOXFER) + return; + + print_xfer_stats (0); +} + +/* An ordinary signal was received; arrange for the program to exit. */ + +static void +interrupt_handler (int sig) +{ + if (! SA_RESETHAND) + signal (sig, SIG_DFL); + interrupt_signal = sig; +} + +/* An info signal was received; arrange for the program to print status. */ + +static void +siginfo_handler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, siginfo_handler); + info_signal_count++; +} + +/* Install the signal handlers. */ + +static void +install_signal_handlers (void) +{ + bool catch_siginfo = ! (SIGINFO == SIGUSR1 && getenv ("POSIXLY_CORRECT")); + +#if SA_NOCLDSTOP + + struct sigaction act; + sigemptyset (&caught_signals); + if (catch_siginfo) + sigaddset (&caught_signals, SIGINFO); + sigaction (SIGINT, nullptr, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, SIGINT); + act.sa_mask = caught_signals; + + if (sigismember (&caught_signals, SIGINFO)) + { + act.sa_handler = siginfo_handler; + /* Note we don't use SA_RESTART here and instead + handle EINTR explicitly in iftruncate etc. + to avoid blocking on uncommitted read/write calls. */ + act.sa_flags = 0; + sigaction (SIGINFO, &act, nullptr); + } + + if (sigismember (&caught_signals, SIGINT)) + { + act.sa_handler = interrupt_handler; + act.sa_flags = SA_NODEFER | SA_RESETHAND; + sigaction (SIGINT, &act, nullptr); + } + +#else + + if (catch_siginfo) + { + signal (SIGINFO, siginfo_handler); + siginterrupt (SIGINFO, 1); + } + if (signal (SIGINT, SIG_IGN) != SIG_IGN) + { + signal (SIGINT, interrupt_handler); + siginterrupt (SIGINT, 1); + } +#endif +} + +/* Close FD. Return 0 if successful, -1 (setting errno) otherwise. + If close fails with errno == EINTR, POSIX says the file descriptor + is in an unspecified state, so keep trying to close FD but do not + consider EBADF to be an error. Do not process signals. This all + differs somewhat from functions like ifdatasync and ifsync. */ +static int +iclose (int fd) +{ + if (close (fd) != 0) + do + if (errno != EINTR) + return -1; + while (close (fd) != 0 && errno != EBADF); + + return 0; +} + +static int synchronize_output (void); + +static void +cleanup (void) +{ + if (!interrupt_signal) + { + int sync_status = synchronize_output (); + if (sync_status) + exit (sync_status); + } + + if (iclose (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, _("closing input file %s"), + quoteaf (input_file)); + + /* Don't remove this call to close, even though close_stdout + closes standard output. This close is necessary when cleanup + is called as a consequence of signal handling. */ + if (iclose (STDOUT_FILENO) != 0) + error (EXIT_FAILURE, errno, + _("closing output file %s"), quoteaf (output_file)); +} + +/* Process any pending signals. If signals are caught, this function + should be called periodically. Ideally there should never be an + unbounded amount of time when signals are not being processed. */ + +static void +process_signals (void) +{ + while (interrupt_signal || info_signal_count) + { + int interrupt; + int infos; + sigset_t oldset; + + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + + /* Reload interrupt_signal and info_signal_count, in case a new + signal was handled before sigprocmask took effect. */ + interrupt = interrupt_signal; + infos = info_signal_count; + + if (infos) + info_signal_count = infos - 1; + + sigprocmask (SIG_SETMASK, &oldset, nullptr); + + if (interrupt) + cleanup (); + print_stats (); + if (interrupt) + raise (interrupt); + } +} + +static void +finish_up (void) +{ + /* Process signals first, so that cleanup is called at most once. */ + process_signals (); + cleanup (); + print_stats (); +} + +static void +quit (int code) +{ + finish_up (); + exit (code); +} + +/* Return LEN rounded down to a multiple of IO_BUFSIZE + (to minimize calls to the expensive posix_fadvise (,POSIX_FADV_DONTNEED), + while storing the remainder internally per FD. + Pass LEN == 0 to get the current remainder. */ + +static off_t +cache_round (int fd, off_t len) +{ + static off_t i_pending, o_pending; + off_t *pending = (fd == STDIN_FILENO ? &i_pending : &o_pending); + + if (len) + { + intmax_t c_pending; + if (ckd_add (&c_pending, *pending, len)) + c_pending = INTMAX_MAX; + *pending = c_pending % IO_BUFSIZE; + if (c_pending > *pending) + len = c_pending - *pending; + else + len = 0; + } + else + len = *pending; + + return len; +} + +/* Discard the cache from the current offset of either + STDIN_FILENO or STDOUT_FILENO. + Return true on success. */ + +static bool +invalidate_cache (int fd, off_t len) +{ + int adv_ret = -1; + off_t offset; + bool nocache_eof = (fd == STDIN_FILENO ? i_nocache_eof : o_nocache_eof); + + /* Minimize syscalls. */ + off_t clen = cache_round (fd, len); + if (len && !clen) + return true; /* Don't advise this time. */ + else if (! len && ! clen && ! nocache_eof) + return true; + off_t pending = len ? cache_round (fd, 0) : 0; + + if (fd == STDIN_FILENO) + { + if (input_seekable) + offset = input_offset; + else + { + offset = -1; + errno = ESPIPE; + } + } + else + { + static off_t output_offset = -2; + + if (output_offset != -1) + { + if (output_offset < 0) + output_offset = lseek (fd, 0, SEEK_CUR); + else if (len) + output_offset += clen + pending; + } + + offset = output_offset; + } + + if (0 <= offset) + { + if (! len && clen && nocache_eof) + { + pending = clen; + clen = 0; + } + + /* Note we're being careful here to only invalidate what + we've read, so as not to dump any read ahead cache. + Note also the kernel is conservative and only invalidates + full pages in the specified range. */ +#if HAVE_POSIX_FADVISE + offset = offset - clen - pending; + /* ensure full page specified when invalidating to eof. */ + if (clen == 0) + offset -= offset % page_size; + adv_ret = posix_fadvise (fd, offset, clen, POSIX_FADV_DONTNEED); +#else + errno = ENOTSUP; +#endif + } + + return adv_ret != -1 ? true : false; +} + +/* Read from FD into the buffer BUF of size SIZE, processing any + signals that arrive before bytes are read. Return the number of + bytes read if successful, -1 (setting errno) on failure. */ + +static ssize_t +iread (int fd, char *buf, idx_t size) +{ + ssize_t nread; + static ssize_t prev_nread; + + do + { + process_signals (); + nread = read (fd, buf, size); + /* Ignore final read error with iflag=direct as that + returns EINVAL due to the non aligned file offset. */ + if (nread == -1 && errno == EINVAL + && 0 < prev_nread && prev_nread < size + && (input_flags & O_DIRECT)) + { + errno = 0; + nread = 0; + } + } + while (nread < 0 && errno == EINTR); + + /* Short read may be due to received signal. */ + if (0 < nread && nread < size) + process_signals (); + + if (0 < nread && warn_partial_read) + { + if (0 < prev_nread && prev_nread < size) + { + idx_t prev = prev_nread; + if (status_level != STATUS_NONE) + diagnose (0, ngettext (("warning: partial read (%td byte); " + "suggest iflag=fullblock"), + ("warning: partial read (%td bytes); " + "suggest iflag=fullblock"), + select_plural (prev)), + prev); + warn_partial_read = false; + } + } + + prev_nread = nread; + return nread; +} + +/* Wrapper around iread function to accumulate full blocks. */ +static ssize_t +iread_fullblock (int fd, char *buf, idx_t size) +{ + ssize_t nread = 0; + + while (0 < size) + { + ssize_t ncurr = iread (fd, buf, size); + if (ncurr < 0) + return ncurr; + if (ncurr == 0) + break; + nread += ncurr; + buf += ncurr; + size -= ncurr; + } + + return nread; +} + +/* Write to FD the buffer BUF of size SIZE, processing any signals + that arrive. Return the number of bytes written, setting errno if + this is less than SIZE. Keep trying if there are partial + writes. */ + +static idx_t +iwrite (int fd, char const *buf, idx_t size) +{ + idx_t total_written = 0; + + if ((output_flags & O_DIRECT) && size < output_blocksize) + { + int old_flags = fcntl (STDOUT_FILENO, F_GETFL); + if (fcntl (STDOUT_FILENO, F_SETFL, old_flags & ~O_DIRECT) != 0 + && status_level != STATUS_NONE) + diagnose (errno, _("failed to turn off O_DIRECT: %s"), + quotef (output_file)); + + /* Since we have just turned off O_DIRECT for the final write, + we try to preserve some of its semantics. */ + + /* Call invalidate_cache to setup the appropriate offsets + for subsequent calls. */ + o_nocache_eof = true; + invalidate_cache (STDOUT_FILENO, 0); + + /* Attempt to ensure that that final block is committed + to stable storage as quickly as possible. */ + conversions_mask |= C_FSYNC; + + /* After the subsequent fsync we'll call invalidate_cache + to attempt to clear all data from the page cache. */ + } + + while (total_written < size) + { + ssize_t nwritten = 0; + process_signals (); + + /* Perform a seek for a NUL block if sparse output is enabled. */ + final_op_was_seek = false; + if ((conversions_mask & C_SPARSE) && is_nul (buf, size)) + { + if (lseek (fd, size, SEEK_CUR) < 0) + { + conversions_mask &= ~C_SPARSE; + /* Don't warn about the advisory sparse request. */ + } + else + { + final_op_was_seek = true; + nwritten = size; + } + } + + if (!nwritten) + nwritten = write (fd, buf + total_written, size - total_written); + + if (nwritten < 0) + { + if (errno != EINTR) + break; + } + else if (nwritten == 0) + { + /* Some buggy drivers return 0 when one tries to write beyond + a device's end. (Example: Linux kernel 1.2.13 on /dev/fd0.) + Set errno to ENOSPC so they get a sensible diagnostic. */ + errno = ENOSPC; + break; + } + else + total_written += nwritten; + } + + if (o_nocache && total_written) + invalidate_cache (fd, total_written); + + return total_written; +} + +/* Write, then empty, the output buffer 'obuf'. */ + +static void +write_output (void) +{ + idx_t nwritten = iwrite (STDOUT_FILENO, obuf, output_blocksize); + w_bytes += nwritten; + if (nwritten != output_blocksize) + { + diagnose (errno, _("writing to %s"), quoteaf (output_file)); + if (nwritten != 0) + w_partial++; + quit (EXIT_FAILURE); + } + else + w_full++; + oc = 0; +} + +/* Restart on EINTR from fdatasync. */ + +static int +ifdatasync (int fd) +{ + int ret; + + do + { + process_signals (); + ret = fdatasync (fd); + } + while (ret < 0 && errno == EINTR); + + return ret; +} + +/* Restart on EINTR from fd_reopen. */ + +static int +ifd_reopen (int desired_fd, char const *file, int flag, mode_t mode) +{ + int ret; + + do + { + process_signals (); + ret = fd_reopen (desired_fd, file, flag, mode); + } + while (ret < 0 && errno == EINTR); + + return ret; +} + +/* Restart on EINTR from fstat. */ + +static int +ifstat (int fd, struct stat *st) +{ + int ret; + + do + { + process_signals (); + ret = fstat (fd, st); + } + while (ret < 0 && errno == EINTR); + + return ret; +} + +/* Restart on EINTR from fsync. */ + +static int +ifsync (int fd) +{ + int ret; + + do + { + process_signals (); + ret = fsync (fd); + } + while (ret < 0 && errno == EINTR); + + return ret; +} + +/* Restart on EINTR from ftruncate. */ + +static int +iftruncate (int fd, off_t length) +{ + int ret; + + do + { + process_signals (); + ret = ftruncate (fd, length); + } + while (ret < 0 && errno == EINTR); + + return ret; +} + +/* Return true if STR is of the form "PATTERN" or "PATTERNDELIM...". */ + +ATTRIBUTE_PURE +static bool +operand_matches (char const *str, char const *pattern, char delim) +{ + while (*pattern) + if (*str++ != *pattern++) + return false; + return !*str || *str == delim; +} + +/* Interpret one "conv=..." or similar operand STR according to the + symbols in TABLE, returning the flags specified. If the operand + cannot be parsed, use ERROR_MSGID to generate a diagnostic. */ + +static int +parse_symbols (char const *str, struct symbol_value const *table, + bool exclusive, char const *error_msgid) +{ + int value = 0; + + while (true) + { + char const *strcomma = strchr (str, ','); + struct symbol_value const *entry; + + for (entry = table; + ! (operand_matches (str, entry->symbol, ',') && entry->value); + entry++) + { + if (! entry->symbol[0]) + { + idx_t slen = strcomma ? strcomma - str : strlen (str); + diagnose (0, "%s: %s", _(error_msgid), + quotearg_n_style_mem (0, locale_quoting_style, + str, slen)); + usage (EXIT_FAILURE); + } + } + + if (exclusive) + value = entry->value; + else + value |= entry->value; + if (!strcomma) + break; + str = strcomma + 1; + } + + return value; +} + +/* Return the value of STR, interpreted as a non-negative decimal integer, + optionally multiplied by various values. + Set *INVALID to an appropriate error value and return INTMAX_MAX if + it is an overflow, an indeterminate value if some other error occurred. */ + +static intmax_t +parse_integer (char const *str, strtol_error *invalid) +{ + /* Call xstrtoumax, not xstrtoimax, since we don't want to + allow strings like " -0". Initialize N to an indeterminate value; + calling code should not rely on this function returning 0 + when *INVALID represents a non-overflow error. */ + int indeterminate = 0; + uintmax_t n = indeterminate; + char *suffix; + static char const suffixes[] = "bcEGkKMPQRTwYZ0"; + strtol_error e = xstrtoumax (str, &suffix, 10, &n, suffixes); + intmax_t result; + + if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR + && *suffix == 'B' && str < suffix && suffix[-1] != 'B') + { + suffix++; + if (!*suffix) + e &= ~LONGINT_INVALID_SUFFIX_CHAR; + } + + if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR + && *suffix == 'x') + { + strtol_error f = LONGINT_OK; + intmax_t o = parse_integer (suffix + 1, &f); + if ((f & ~LONGINT_OVERFLOW) != LONGINT_OK) + { + e = f; + result = indeterminate; + } + else if (ckd_mul (&result, n, o) + || (result != 0 && ((e | f) & LONGINT_OVERFLOW))) + { + e = LONGINT_OVERFLOW; + result = INTMAX_MAX; + } + else + { + if (result == 0 && STRPREFIX (str, "0x")) + diagnose (0, _("warning: %s is a zero multiplier; " + "use %s if that is intended"), + quote_n (0, "0x"), quote_n (1, "00x")); + e = LONGINT_OK; + } + } + else if (n <= INTMAX_MAX) + result = n; + else + { + e = LONGINT_OVERFLOW; + result = INTMAX_MAX; + } + + *invalid = e; + return result; +} + +/* OPERAND is of the form "X=...". Return true if X is NAME. */ + +ATTRIBUTE_PURE +static bool +operand_is (char const *operand, char const *name) +{ + return operand_matches (operand, name, '='); +} + +static void +scanargs (int argc, char *const *argv) +{ + idx_t blocksize = 0; + intmax_t count = INTMAX_MAX; + intmax_t skip = 0; + intmax_t seek = 0; + bool count_B = false, skip_B = false, seek_B = false; + + for (int i = optind; i < argc; i++) + { + char const *name = argv[i]; + char const *val = strchr (name, '='); + + if (val == nullptr) + { + diagnose (0, _("unrecognized operand %s"), quoteaf (name)); + usage (EXIT_FAILURE); + } + val++; + + if (operand_is (name, "if")) + input_file = val; + else if (operand_is (name, "of")) + output_file = val; + else if (operand_is (name, "conv")) + conversions_mask |= parse_symbols (val, conversions, false, + N_("invalid conversion")); + else if (operand_is (name, "iflag")) + input_flags |= parse_symbols (val, flags, false, + N_("invalid input flag")); + else if (operand_is (name, "oflag")) + output_flags |= parse_symbols (val, flags, false, + N_("invalid output flag")); + else if (operand_is (name, "status")) + status_level = parse_symbols (val, statuses, true, + N_("invalid status level")); + else + { + strtol_error invalid = LONGINT_OK; + intmax_t n = parse_integer (val, &invalid); + bool has_B = !!strchr (val, 'B'); + intmax_t n_min = 0; + intmax_t n_max = INTMAX_MAX; + idx_t *converted_idx = nullptr; + + /* Maximum blocksize. Keep it smaller than IDX_MAX, so that + it fits into blocksize vars even if 1 is added for conv=swab. + Do not exceed SSIZE_MAX, for the benefit of system calls + like "read". And do not exceed OFF_T_MAX, for the + benefit of the large-offset seek code. */ + idx_t max_blocksize = MIN (IDX_MAX - 1, MIN (SSIZE_MAX, OFF_T_MAX)); + + if (operand_is (name, "ibs")) + { + n_min = 1; + n_max = max_blocksize; + converted_idx = &input_blocksize; + } + else if (operand_is (name, "obs")) + { + n_min = 1; + n_max = max_blocksize; + converted_idx = &output_blocksize; + } + else if (operand_is (name, "bs")) + { + n_min = 1; + n_max = max_blocksize; + converted_idx = &blocksize; + } + else if (operand_is (name, "cbs")) + { + n_min = 1; + n_max = MIN (SIZE_MAX, IDX_MAX); + converted_idx = &conversion_blocksize; + } + else if (operand_is (name, "skip") || operand_is (name, "iseek")) + { + skip = n; + skip_B = has_B; + } + else if (operand_is (name + (*name == 'o'), "seek")) + { + seek = n; + seek_B = has_B; + } + else if (operand_is (name, "count")) + { + count = n; + count_B = has_B; + } + else + { + diagnose (0, _("unrecognized operand %s"), quoteaf (name)); + usage (EXIT_FAILURE); + } + + if (n < n_min) + invalid = LONGINT_INVALID; + else if (n_max < n) + invalid = LONGINT_OVERFLOW; + + if (invalid != LONGINT_OK) + error (EXIT_FAILURE, invalid == LONGINT_OVERFLOW ? EOVERFLOW : 0, + "%s: %s", _("invalid number"), quoteaf (val)); + else if (converted_idx) + *converted_idx = n; + } + } + + if (blocksize) + input_blocksize = output_blocksize = blocksize; + else + { + /* POSIX says dd aggregates partial reads into + output_blocksize if bs= is not specified. */ + conversions_mask |= C_TWOBUFS; + } + + if (input_blocksize == 0) + input_blocksize = DEFAULT_BLOCKSIZE; + if (output_blocksize == 0) + output_blocksize = DEFAULT_BLOCKSIZE; + if (conversion_blocksize == 0) + conversions_mask &= ~(C_BLOCK | C_UNBLOCK); + + if (input_flags & (O_DSYNC | O_SYNC)) + input_flags |= O_RSYNC; + + if (output_flags & O_FULLBLOCK) + { + diagnose (0, "%s: %s", _("invalid output flag"), quote ("fullblock")); + usage (EXIT_FAILURE); + } + + if (skip_B) + input_flags |= O_SKIP_BYTES; + if (input_flags & O_SKIP_BYTES && skip != 0) + { + skip_records = skip / input_blocksize; + skip_bytes = skip % input_blocksize; + } + else if (skip != 0) + skip_records = skip; + + if (count_B) + input_flags |= O_COUNT_BYTES; + if (input_flags & O_COUNT_BYTES && count != INTMAX_MAX) + { + max_records = count / input_blocksize; + max_bytes = count % input_blocksize; + } + else if (count != INTMAX_MAX) + max_records = count; + + if (seek_B) + output_flags |= O_SEEK_BYTES; + if (output_flags & O_SEEK_BYTES && seek != 0) + { + seek_records = seek / output_blocksize; + seek_bytes = seek % output_blocksize; + } + else if (seek != 0) + seek_records = seek; + + /* Warn about partial reads if bs=SIZE is given and iflag=fullblock + is not, and if counting or skipping bytes or using direct I/O. + This helps to avoid confusion with miscounts, and to avoid issues + with direct I/O on GNU/Linux. */ + warn_partial_read = + (! (conversions_mask & C_TWOBUFS) && ! (input_flags & O_FULLBLOCK) + && (skip_records + || (0 < max_records && max_records < INTMAX_MAX) + || (input_flags | output_flags) & O_DIRECT)); + + iread_fnc = ((input_flags & O_FULLBLOCK) + ? iread_fullblock + : iread); + input_flags &= ~O_FULLBLOCK; + + if (multiple_bits_set (conversions_mask & (C_ASCII | C_EBCDIC | C_IBM))) + error (EXIT_FAILURE, 0, _("cannot combine any two of {ascii,ebcdic,ibm}")); + if (multiple_bits_set (conversions_mask & (C_BLOCK | C_UNBLOCK))) + error (EXIT_FAILURE, 0, _("cannot combine block and unblock")); + if (multiple_bits_set (conversions_mask & (C_LCASE | C_UCASE))) + error (EXIT_FAILURE, 0, _("cannot combine lcase and ucase")); + if (multiple_bits_set (conversions_mask & (C_EXCL | C_NOCREAT))) + error (EXIT_FAILURE, 0, _("cannot combine excl and nocreat")); + if (multiple_bits_set (input_flags & (O_DIRECT | O_NOCACHE)) + || multiple_bits_set (output_flags & (O_DIRECT | O_NOCACHE))) + error (EXIT_FAILURE, 0, _("cannot combine direct and nocache")); + + if (input_flags & O_NOCACHE) + { + i_nocache = true; + i_nocache_eof = (max_records == 0 && max_bytes == 0); + input_flags &= ~O_NOCACHE; + } + if (output_flags & O_NOCACHE) + { + o_nocache = true; + o_nocache_eof = (max_records == 0 && max_bytes == 0); + output_flags &= ~O_NOCACHE; + } +} + +/* Fix up translation table. */ + +static void +apply_translations (void) +{ + int i; + + if (conversions_mask & C_ASCII) + translate_charset (ebcdic_to_ascii); + + if (conversions_mask & C_UCASE) + { + for (i = 0; i < 256; i++) + trans_table[i] = toupper (trans_table[i]); + translation_needed = true; + } + else if (conversions_mask & C_LCASE) + { + for (i = 0; i < 256; i++) + trans_table[i] = tolower (trans_table[i]); + translation_needed = true; + } + + if (conversions_mask & C_EBCDIC) + { + translate_charset (ascii_to_ebcdic); + newline_character = ascii_to_ebcdic['\n']; + space_character = ascii_to_ebcdic[' ']; + } + else if (conversions_mask & C_IBM) + { + translate_charset (ascii_to_ibm); + newline_character = ascii_to_ibm['\n']; + space_character = ascii_to_ibm[' ']; + } +} + +/* Apply the character-set translations specified by the user + to the NREAD bytes in BUF. */ + +static void +translate_buffer (char *buf, idx_t nread) +{ + idx_t i; + char *cp; + for (i = nread, cp = buf; i; i--, cp++) + *cp = trans_table[to_uchar (*cp)]; +} + +/* Swap *NREAD bytes in BUF, which should have room for an extra byte + after the end because the swapping is not in-place. If *SAVED_BYTE + is nonnegative, also swap that initial byte from the previous call. + Save the last byte into into *SAVED_BYTE if needed to make the + resulting *NREAD even, and set *SAVED_BYTE to -1 otherwise. + Return the buffer's adjusted start, either BUF or BUF + 1. */ + +static char * +swab_buffer (char *buf, idx_t *nread, int *saved_byte) +{ + if (*nread == 0) + return buf; + + /* Update *SAVED_BYTE, and set PREV_SAVED to its old value. */ + int prev_saved = *saved_byte; + if ((prev_saved < 0) == (*nread & 1)) + { + unsigned char c = buf[--*nread]; + *saved_byte = c; + } + else + *saved_byte = -1; + + /* Do the byte-swapping by moving every other byte two + positions toward the end, working from the end of the buffer + toward the beginning. This way we move only half the data. */ + for (idx_t i = *nread; 1 < i; i -= 2) + buf[i] = buf[i - 2]; + + if (prev_saved < 0) + return buf + 1; + + buf[1] = prev_saved; + ++*nread; + return buf; +} + +/* Add OFFSET to the input offset, setting the overflow flag if + necessary. */ + +static void +advance_input_offset (intmax_t offset) +{ + if (0 <= input_offset && ckd_add (&input_offset, input_offset, offset)) + input_offset = -1; +} + +/* Throw away RECORDS blocks of BLOCKSIZE bytes plus BYTES bytes on + file descriptor FDESC, which is open with read permission for FILE. + Store up to BLOCKSIZE bytes of the data at a time in IBUF or OBUF, if + necessary. RECORDS or BYTES must be nonzero. If FDESC is + STDIN_FILENO, advance the input offset. Return the number of + records remaining, i.e., that were not skipped because EOF was + reached. If FDESC is STDOUT_FILENO, on return, BYTES is the + remaining bytes in addition to the remaining records. */ + +static intmax_t +skip (int fdesc, char const *file, intmax_t records, idx_t blocksize, + idx_t *bytes) +{ + /* Try lseek and if an error indicates it was an inappropriate operation -- + or if the file offset is not representable as an off_t -- + fall back on using read. */ + + errno = 0; + off_t offset; + if (! ckd_mul (&offset, records, blocksize) + && ! ckd_add (&offset, offset, *bytes) + && 0 <= lseek (fdesc, offset, SEEK_CUR)) + { + if (fdesc == STDIN_FILENO) + { + struct stat st; + if (ifstat (STDIN_FILENO, &st) != 0) + error (EXIT_FAILURE, errno, _("cannot fstat %s"), quoteaf (file)); + if (usable_st_size (&st) && 0 <= input_offset + && st.st_size - input_offset < offset) + { + /* When skipping past EOF, return the number of _full_ blocks + * that are not skipped, and set offset to EOF, so the caller + * can determine the requested skip was not satisfied. */ + records = ( offset - st.st_size ) / blocksize; + offset = st.st_size - input_offset; + } + else + records = 0; + advance_input_offset (offset); + } + else + { + records = 0; + *bytes = 0; + } + return records; + } + else + { + int lseek_errno = errno; + + /* The seek request may have failed above if it was too big + (> device size, > max file size, etc.) + Or it may not have been done at all (> OFF_T_MAX). + Therefore try to seek to the end of the file, + to avoid redundant reading. */ + if (lseek (fdesc, 0, SEEK_END) >= 0) + { + /* File is seekable, and we're at the end of it, and + size <= OFF_T_MAX. So there's no point using read to advance. */ + + if (!lseek_errno) + { + /* The original seek was not attempted as offset > OFF_T_MAX. + We should error for write as can't get to the desired + location, even if OFF_T_MAX < max file size. + For read we're not going to read any data anyway, + so we should error for consistency. + It would be nice to not error for /dev/{zero,null} + for any offset, but that's not a significant issue. */ + lseek_errno = EOVERFLOW; + } + + diagnose (lseek_errno, + gettext (fdesc == STDIN_FILENO + ? N_("%s: cannot skip") + : N_("%s: cannot seek")), + quotef (file)); + /* If the file has a specific size and we've asked + to skip/seek beyond the max allowable, then quit. */ + quit (EXIT_FAILURE); + } + /* else file_size && offset > OFF_T_MAX or file ! seekable */ + + char *buf; + if (fdesc == STDIN_FILENO) + { + alloc_ibuf (); + buf = ibuf; + } + else + { + alloc_obuf (); + buf = obuf; + } + + do + { + ssize_t nread = iread_fnc (fdesc, buf, records ? blocksize : *bytes); + if (nread < 0) + { + if (fdesc == STDIN_FILENO) + { + diagnose (errno, _("error reading %s"), quoteaf (file)); + if (conversions_mask & C_NOERROR) + print_stats (); + } + else + diagnose (lseek_errno, _("%s: cannot seek"), quotef (file)); + quit (EXIT_FAILURE); + } + else if (nread == 0) + break; + else if (fdesc == STDIN_FILENO) + advance_input_offset (nread); + + if (records != 0) + records--; + else + *bytes = 0; + } + while (records || *bytes); + + return records; + } +} + +/* Advance the input by NBYTES if possible, after a read error. + The input file offset may or may not have advanced after the failed + read; adjust it to point just after the bad record regardless. + Return true if successful, or if the input is already known to not + be seekable. */ + +static bool +advance_input_after_read_error (idx_t nbytes) +{ + if (! input_seekable) + { + if (input_seek_errno == ESPIPE) + return true; + errno = input_seek_errno; + } + else + { + off_t offset; + advance_input_offset (nbytes); + if (input_offset < 0) + { + diagnose (0, _("offset overflow while reading file %s"), + quoteaf (input_file)); + return false; + } + offset = lseek (STDIN_FILENO, 0, SEEK_CUR); + if (0 <= offset) + { + off_t diff; + if (offset == input_offset) + return true; + diff = input_offset - offset; + if (! (0 <= diff && diff <= nbytes) && status_level != STATUS_NONE) + diagnose (0, _("warning: invalid file offset after failed read")); + if (0 <= lseek (STDIN_FILENO, diff, SEEK_CUR)) + return true; + if (errno == 0) + diagnose (0, _("cannot work around kernel bug after all")); + } + } + + diagnose (errno, _("%s: cannot seek"), quotef (input_file)); + return false; +} + +/* Copy NREAD bytes of BUF, with no conversions. */ + +static void +copy_simple (char const *buf, idx_t nread) +{ + char const *start = buf; /* First uncopied char in BUF. */ + + do + { + idx_t nfree = MIN (nread, output_blocksize - oc); + + memcpy (obuf + oc, start, nfree); + + nread -= nfree; /* Update the number of bytes left to copy. */ + start += nfree; + oc += nfree; + if (oc >= output_blocksize) + write_output (); + } + while (nread != 0); +} + +/* Copy NREAD bytes of BUF, doing conv=block + (pad newline-terminated records to 'conversion_blocksize', + replacing the newline with trailing spaces). */ + +static void +copy_with_block (char const *buf, idx_t nread) +{ + for (idx_t i = nread; i; i--, buf++) + { + if (*buf == newline_character) + { + if (col < conversion_blocksize) + { + idx_t j; + for (j = col; j < conversion_blocksize; j++) + output_char (space_character); + } + col = 0; + } + else + { + if (col == conversion_blocksize) + r_truncate++; + else if (col < conversion_blocksize) + output_char (*buf); + col++; + } + } +} + +/* Copy NREAD bytes of BUF, doing conv=unblock + (replace trailing spaces in 'conversion_blocksize'-sized records + with a newline). */ + +static void +copy_with_unblock (char const *buf, idx_t nread) +{ + static idx_t pending_spaces = 0; + + for (idx_t i = 0; i < nread; i++) + { + char c = buf[i]; + + if (col++ >= conversion_blocksize) + { + col = pending_spaces = 0; /* Wipe out any pending spaces. */ + i--; /* Push the char back; get it later. */ + output_char (newline_character); + } + else if (c == space_character) + pending_spaces++; + else + { + /* 'c' is the character after a run of spaces that were not + at the end of the conversion buffer. Output them. */ + while (pending_spaces) + { + output_char (space_character); + --pending_spaces; + } + output_char (c); + } + } +} + +/* Set the file descriptor flags for FD that correspond to the nonzero bits + in ADD_FLAGS. The file's name is NAME. */ + +static void +set_fd_flags (int fd, int add_flags, char const *name) +{ + /* Ignore file creation flags that are no-ops on file descriptors. */ + add_flags &= ~ (O_NOCTTY | O_NOFOLLOW); + + if (add_flags) + { + int old_flags = fcntl (fd, F_GETFL); + int new_flags = old_flags | add_flags; + bool ok = true; + if (old_flags < 0) + ok = false; + else if (old_flags != new_flags) + { + if (new_flags & (O_DIRECTORY | O_NOLINKS)) + { + /* NEW_FLAGS contains at least one file creation flag that + requires some checking of the open file descriptor. */ + struct stat st; + if (ifstat (fd, &st) != 0) + ok = false; + else if ((new_flags & O_DIRECTORY) && ! S_ISDIR (st.st_mode)) + { + errno = ENOTDIR; + ok = false; + } + else if ((new_flags & O_NOLINKS) && 1 < st.st_nlink) + { + errno = EMLINK; + ok = false; + } + new_flags &= ~ (O_DIRECTORY | O_NOLINKS); + } + + if (ok && old_flags != new_flags + && fcntl (fd, F_SETFL, new_flags) == -1) + ok = false; + } + + if (!ok) + error (EXIT_FAILURE, errno, _("setting flags for %s"), quoteaf (name)); + } +} + +/* The main loop. */ + +static int +dd_copy (void) +{ + char *bufstart; /* Input buffer. */ + ssize_t nread; /* Bytes read in the current block. */ + + /* If nonzero, then the previously read block was partial and + PARTREAD was its size. */ + idx_t partread = 0; + + int exit_status = EXIT_SUCCESS; + idx_t n_bytes_read; + + if (skip_records != 0 || skip_bytes != 0) + { + intmax_t us_bytes; + bool us_bytes_overflow = + (ckd_mul (&us_bytes, skip_records, input_blocksize) + || ckd_add (&us_bytes, skip_bytes, us_bytes)); + off_t input_offset0 = input_offset; + intmax_t us_blocks = skip (STDIN_FILENO, input_file, + skip_records, input_blocksize, &skip_bytes); + + /* POSIX doesn't say what to do when dd detects it has been + asked to skip past EOF, so I assume it's non-fatal. + There are 3 reasons why there might be unskipped blocks/bytes: + 1. file is too small + 2. pipe has not enough data + 3. partial reads */ + if ((us_blocks + || (0 <= input_offset + && (us_bytes_overflow + || us_bytes != input_offset - input_offset0))) + && status_level != STATUS_NONE) + { + diagnose (0, _("%s: cannot skip to specified offset"), + quotef (input_file)); + } + } + + if (seek_records != 0 || seek_bytes != 0) + { + idx_t bytes = seek_bytes; + intmax_t write_records = skip (STDOUT_FILENO, output_file, + seek_records, output_blocksize, &bytes); + + if (write_records != 0 || bytes != 0) + { + memset (obuf, 0, write_records ? output_blocksize : bytes); + + do + { + idx_t size = write_records ? output_blocksize : bytes; + if (iwrite (STDOUT_FILENO, obuf, size) != size) + { + diagnose (errno, _("writing to %s"), quoteaf (output_file)); + quit (EXIT_FAILURE); + } + + if (write_records != 0) + write_records--; + else + bytes = 0; + } + while (write_records || bytes); + } + } + + if (max_records == 0 && max_bytes == 0) + return exit_status; + + alloc_ibuf (); + alloc_obuf (); + int saved_byte = -1; + + while (true) + { + if (status_level == STATUS_PROGRESS) + { + xtime_t progress_time = gethrxtime (); + if (next_time <= progress_time) + { + print_xfer_stats (progress_time); + next_time += XTIME_PRECISION; + } + } + + if (r_partial + r_full >= max_records + !!max_bytes) + break; + + /* Zero the buffer before reading, so that if we get a read error, + whatever data we are able to read is followed by zeros. + This minimizes data loss. */ + if ((conversions_mask & C_SYNC) && (conversions_mask & C_NOERROR)) + memset (ibuf, + (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0', + input_blocksize); + + if (r_partial + r_full >= max_records) + nread = iread_fnc (STDIN_FILENO, ibuf, max_bytes); + else + nread = iread_fnc (STDIN_FILENO, ibuf, input_blocksize); + + if (nread > 0) + { + advance_input_offset (nread); + if (i_nocache) + invalidate_cache (STDIN_FILENO, nread); + } + else if (nread == 0) + { + i_nocache_eof |= i_nocache; + o_nocache_eof |= o_nocache && ! (conversions_mask & C_NOTRUNC); + break; /* EOF. */ + } + else + { + if (!(conversions_mask & C_NOERROR) || status_level != STATUS_NONE) + diagnose (errno, _("error reading %s"), quoteaf (input_file)); + + if (conversions_mask & C_NOERROR) + { + print_stats (); + idx_t bad_portion = input_blocksize - partread; + + /* We already know this data is not cached, + but call this so that correct offsets are maintained. */ + invalidate_cache (STDIN_FILENO, bad_portion); + + /* Seek past the bad block if possible. */ + if (!advance_input_after_read_error (bad_portion)) + { + exit_status = EXIT_FAILURE; + + /* Suppress duplicate diagnostics. */ + input_seekable = false; + input_seek_errno = ESPIPE; + } + if ((conversions_mask & C_SYNC) && !partread) + /* Replace the missing input with null bytes and + proceed normally. */ + nread = 0; + else + continue; + } + else + { + /* Write any partial block. */ + exit_status = EXIT_FAILURE; + break; + } + } + + n_bytes_read = nread; + + if (n_bytes_read < input_blocksize) + { + r_partial++; + partread = n_bytes_read; + if (conversions_mask & C_SYNC) + { + if (!(conversions_mask & C_NOERROR)) + /* If C_NOERROR, we zeroed the block before reading. */ + memset (ibuf + n_bytes_read, + (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0', + input_blocksize - n_bytes_read); + n_bytes_read = input_blocksize; + } + } + else + { + r_full++; + partread = 0; + } + + if (ibuf == obuf) /* If not C_TWOBUFS. */ + { + idx_t nwritten = iwrite (STDOUT_FILENO, obuf, n_bytes_read); + w_bytes += nwritten; + if (nwritten != n_bytes_read) + { + diagnose (errno, _("error writing %s"), quoteaf (output_file)); + return EXIT_FAILURE; + } + else if (n_bytes_read == input_blocksize) + w_full++; + else + w_partial++; + continue; + } + + /* Do any translations on the whole buffer at once. */ + + if (translation_needed) + translate_buffer (ibuf, n_bytes_read); + + if (conversions_mask & C_SWAB) + bufstart = swab_buffer (ibuf, &n_bytes_read, &saved_byte); + else + bufstart = ibuf; + + if (conversions_mask & C_BLOCK) + copy_with_block (bufstart, n_bytes_read); + else if (conversions_mask & C_UNBLOCK) + copy_with_unblock (bufstart, n_bytes_read); + else + copy_simple (bufstart, n_bytes_read); + } + + /* If we have a char left as a result of conv=swab, output it. */ + if (0 <= saved_byte) + { + char saved_char = saved_byte; + if (conversions_mask & C_BLOCK) + copy_with_block (&saved_char, 1); + else if (conversions_mask & C_UNBLOCK) + copy_with_unblock (&saved_char, 1); + else + output_char (saved_char); + } + + if ((conversions_mask & C_BLOCK) && col > 0) + { + /* If the final input line didn't end with a '\n', pad + the output block to 'conversion_blocksize' chars. */ + for (idx_t i = col; i < conversion_blocksize; i++) + output_char (space_character); + } + + if (col && (conversions_mask & C_UNBLOCK)) + { + /* If there was any output, add a final '\n'. */ + output_char (newline_character); + } + + /* Write out the last block. */ + if (oc != 0) + { + idx_t nwritten = iwrite (STDOUT_FILENO, obuf, oc); + w_bytes += nwritten; + if (nwritten != 0) + w_partial++; + if (nwritten != oc) + { + diagnose (errno, _("error writing %s"), quoteaf (output_file)); + return EXIT_FAILURE; + } + } + + /* If the last write was converted to a seek, then for a regular file + or shared memory object, ftruncate to extend the size. */ + if (final_op_was_seek) + { + struct stat stdout_stat; + if (ifstat (STDOUT_FILENO, &stdout_stat) != 0) + { + diagnose (errno, _("cannot fstat %s"), quoteaf (output_file)); + return EXIT_FAILURE; + } + if (S_ISREG (stdout_stat.st_mode) || S_TYPEISSHM (&stdout_stat)) + { + off_t output_offset = lseek (STDOUT_FILENO, 0, SEEK_CUR); + if (0 <= output_offset && stdout_stat.st_size < output_offset) + { + if (iftruncate (STDOUT_FILENO, output_offset) != 0) + { + diagnose (errno, _("failed to truncate to %" PRIdMAX " bytes" + " in output file %s"), + (intmax_t) output_offset, quoteaf (output_file)); + return EXIT_FAILURE; + } + } + } + } + + /* fdatasync/fsync can take a long time, so issue a final progress + indication now if progress has been made since the previous indication. */ + if (conversions_mask & (C_FDATASYNC | C_FSYNC) + && status_level == STATUS_PROGRESS + && 0 <= reported_w_bytes && reported_w_bytes < w_bytes) + print_xfer_stats (0); + + return exit_status; +} + +/* Synchronize output according to conversions_mask. + Do this even if w_bytes is zero, as fsync and fdatasync + flush out write requests from other processes too. + Clear bits in conversions_mask so that synchronization is done only once. + Return zero if successful, an exit status otherwise. */ + +static int +synchronize_output (void) +{ + int exit_status = 0; + int mask = conversions_mask; + conversions_mask &= ~ (C_FDATASYNC | C_FSYNC); + + if ((mask & C_FDATASYNC) && ifdatasync (STDOUT_FILENO) != 0) + { + if (errno != ENOSYS && errno != EINVAL) + { + diagnose (errno, _("fdatasync failed for %s"), quoteaf (output_file)); + exit_status = EXIT_FAILURE; + } + mask |= C_FSYNC; + } + + if ((mask & C_FSYNC) && ifsync (STDOUT_FILENO) != 0) + { + diagnose (errno, _("fsync failed for %s"), quoteaf (output_file)); + return EXIT_FAILURE; + } + + return exit_status; +} + +int +main (int argc, char **argv) +{ + int i; + int exit_status; + off_t offset; + + install_signal_handlers (); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Arrange to close stdout if parse_long_options exits. */ + atexit (maybe_close_stdout); + + page_size = getpagesize (); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE, Version, + true, usage, AUTHORS, + (char const *) nullptr); + close_stdout_required = false; + + /* Initialize translation table to identity translation. */ + for (i = 0; i < 256; i++) + trans_table[i] = i; + + /* Decode arguments. */ + scanargs (argc, argv); + + apply_translations (); + + if (input_file == nullptr) + { + input_file = _("standard input"); + set_fd_flags (STDIN_FILENO, input_flags, input_file); + } + else + { + if (ifd_reopen (STDIN_FILENO, input_file, O_RDONLY | input_flags, 0) < 0) + error (EXIT_FAILURE, errno, _("failed to open %s"), + quoteaf (input_file)); + } + + offset = lseek (STDIN_FILENO, 0, SEEK_CUR); + input_seekable = (0 <= offset); + input_offset = MAX (0, offset); + input_seek_errno = errno; + + if (output_file == nullptr) + { + output_file = _("standard output"); + set_fd_flags (STDOUT_FILENO, output_flags, output_file); + } + else + { + mode_t perms = MODE_RW_UGO; + int opts + = (output_flags + | (conversions_mask & C_NOCREAT ? 0 : O_CREAT) + | (conversions_mask & C_EXCL ? O_EXCL : 0) + | (seek_records || (conversions_mask & C_NOTRUNC) ? 0 : O_TRUNC)); + + off_t size; + if ((ckd_mul (&size, seek_records, output_blocksize) + || ckd_add (&size, seek_bytes, size)) + && !(conversions_mask & C_NOTRUNC)) + error (EXIT_FAILURE, 0, + _("offset too large: " + "cannot truncate to a length of seek=%"PRIdMAX"" + " (%td-byte) blocks"), + seek_records, output_blocksize); + + /* Open the output file with *read* access only if we might + need to read to satisfy a 'seek=' request. If we can't read + the file, go ahead with write-only access; it might work. */ + if ((! seek_records + || ifd_reopen (STDOUT_FILENO, output_file, O_RDWR | opts, perms) < 0) + && (ifd_reopen (STDOUT_FILENO, output_file, O_WRONLY | opts, perms) + < 0)) + error (EXIT_FAILURE, errno, _("failed to open %s"), + quoteaf (output_file)); + + if (seek_records != 0 && !(conversions_mask & C_NOTRUNC)) + { + if (iftruncate (STDOUT_FILENO, size) != 0) + { + /* Complain only when ftruncate fails on a regular file, a + directory, or a shared memory object, as POSIX 1003.1-2004 + specifies ftruncate's behavior only for these file types. + For example, do not complain when Linux kernel 2.4 ftruncate + fails on /dev/fd0. */ + int ftruncate_errno = errno; + struct stat stdout_stat; + if (ifstat (STDOUT_FILENO, &stdout_stat) != 0) + { + diagnose (errno, _("cannot fstat %s"), quoteaf (output_file)); + exit_status = EXIT_FAILURE; + } + else if (S_ISREG (stdout_stat.st_mode) + || S_ISDIR (stdout_stat.st_mode) + || S_TYPEISSHM (&stdout_stat)) + { + intmax_t isize = size; + diagnose (ftruncate_errno, + _("failed to truncate to %"PRIdMAX" bytes" + " in output file %s"), + isize, quoteaf (output_file)); + exit_status = EXIT_FAILURE; + } + } + } + } + + start_time = gethrxtime (); + next_time = start_time + XTIME_PRECISION; + + exit_status = dd_copy (); + + int sync_status = synchronize_output (); + if (sync_status) + exit_status = sync_status; + + if (max_records == 0 && max_bytes == 0) + { + /* Special case to invalidate cache to end of file. */ + if (i_nocache && !invalidate_cache (STDIN_FILENO, 0)) + { + diagnose (errno, _("failed to discard cache for: %s"), + quotef (input_file)); + exit_status = EXIT_FAILURE; + } + if (o_nocache && !invalidate_cache (STDOUT_FILENO, 0)) + { + diagnose (errno, _("failed to discard cache for: %s"), + quotef (output_file)); + exit_status = EXIT_FAILURE; + } + } + else + { + /* Invalidate any pending region or to EOF if appropriate. */ + if (i_nocache || i_nocache_eof) + invalidate_cache (STDIN_FILENO, 0); + if (o_nocache || o_nocache_eof) + invalidate_cache (STDOUT_FILENO, 0); + } + + finish_up (); + main_exit (exit_status); +} diff --git a/src/df.c b/src/df.c new file mode 100644 index 0000000..31b5184 --- /dev/null +++ b/src/df.c @@ -0,0 +1,1853 @@ +/* df - summarize free file system space + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie . + --human-readable option added by lm@sgi.com. + --si and large file support added by eggert@twinsun.com. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "canonicalize.h" +#include "fsusage.h" +#include "human.h" +#include "mbsalign.h" +#include "mbswidth.h" +#include "mountlist.h" +#include "quote.h" +#include "find-mount-point.h" +#include "hash.h" +#include "xstrtol-error.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "df" + +#define AUTHORS \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Paul Eggert") + +struct devlist +{ + dev_t dev_num; + struct mount_entry *me; + struct devlist *next; + struct devlist *seen_last; /* valid for hashed devlist entries only */ +}; + +/* Filled with device numbers of examined file systems to avoid + duplicates in output. */ +static Hash_table *devlist_table; + +/* If true, show even file systems with zero size or + uninteresting types. */ +static bool show_all_fs; + +/* If true, show only local file systems. */ +static bool show_local_fs; + +/* If true, output data for each file system corresponding to a + command line argument -- even if it's a dummy (automounter) entry. */ +static bool show_listed_fs; + +/* Human-readable options for output. */ +static int human_output_opts; + +/* The units to use when printing sizes. */ +static uintmax_t output_block_size; + +/* True if a file system has been processed for output. */ +static bool file_systems_processed; + +/* If true, invoke the 'sync' system call before getting any usage data. + Using this option can make df very slow, especially with many or very + busy file systems. This may make a difference on some systems -- + SunOS 4.1.3, for one. It is *not* necessary on GNU/Linux. */ +static bool require_sync; + +/* Desired exit status. */ +static int exit_status; + +/* A file system type to display. */ + +struct fs_type_list +{ + char *fs_name; + struct fs_type_list *fs_next; +}; + +/* Linked list of file system types to display. + If 'fs_select_list' is null, list all types. + This table is generated dynamically from command-line options, + rather than hardcoding into the program what it thinks are the + valid file system types; let the user specify any file system type + they want to, and if there are any file systems of that type, they + will be shown. + + Some file system types: + 4.2 4.3 ufs nfs swap ignore io vm efs dbg */ + +static struct fs_type_list *fs_select_list; + +/* Linked list of file system types to omit. + If the list is empty, don't exclude any types. */ + +static struct fs_type_list *fs_exclude_list; + +/* Linked list of mounted file systems. */ +static struct mount_entry *mount_list; + +/* If true, print file system type as well. */ +static bool print_type; + +/* If true, print a grand total at the end. */ +static bool print_grand_total; + +/* Grand total data. */ +static struct fs_usage grand_fsu; + +/* Display modes. */ +static enum +{ + DEFAULT_MODE, + INODES_MODE, + HUMAN_MODE, + POSIX_MODE, + OUTPUT_MODE +} header_mode = DEFAULT_MODE; + +/* Displayable fields. */ +typedef enum +{ + SOURCE_FIELD, /* file system */ + FSTYPE_FIELD, /* FS type */ + SIZE_FIELD, /* FS size */ + USED_FIELD, /* FS size used */ + AVAIL_FIELD, /* FS size available */ + PCENT_FIELD, /* percent used */ + ITOTAL_FIELD, /* inode total */ + IUSED_FIELD, /* inodes used */ + IAVAIL_FIELD, /* inodes available */ + IPCENT_FIELD, /* inodes used in percent */ + TARGET_FIELD, /* mount point */ + FILE_FIELD, /* specified file name */ + INVALID_FIELD /* validation marker */ +} display_field_t; + +/* Flag if a field contains a block, an inode or another value. */ +typedef enum +{ + BLOCK_FLD, /* Block values field */ + INODE_FLD, /* Inode values field */ + OTHER_FLD /* Neutral field, e.g. target */ +} field_type_t; + +/* Attributes of a display field. */ +struct field_data_t +{ + display_field_t field; + char const *arg; + field_type_t field_type; + char const *caption;/* nullptr means use default header of this field. */ + size_t width; /* Auto adjusted (up) widths used to align columns. */ + mbs_align_t align; /* Alignment for this field. */ + bool used; +}; + +/* Header strings, minimum width and alignment for the above fields. */ +static struct field_data_t field_data[] = { + [SOURCE_FIELD] = { SOURCE_FIELD, + "source", OTHER_FLD, N_("Filesystem"), 14, MBS_ALIGN_LEFT, false }, + + [FSTYPE_FIELD] = { FSTYPE_FIELD, + "fstype", OTHER_FLD, N_("Type"), 4, MBS_ALIGN_LEFT, false }, + + [SIZE_FIELD] = { SIZE_FIELD, + "size", BLOCK_FLD, N_("blocks"), 5, MBS_ALIGN_RIGHT, false }, + + [USED_FIELD] = { USED_FIELD, + "used", BLOCK_FLD, N_("Used"), 5, MBS_ALIGN_RIGHT, false }, + + [AVAIL_FIELD] = { AVAIL_FIELD, + "avail", BLOCK_FLD, N_("Available"), 5, MBS_ALIGN_RIGHT, false }, + + [PCENT_FIELD] = { PCENT_FIELD, + "pcent", BLOCK_FLD, N_("Use%"), 4, MBS_ALIGN_RIGHT, false }, + + [ITOTAL_FIELD] = { ITOTAL_FIELD, + "itotal", INODE_FLD, N_("Inodes"), 5, MBS_ALIGN_RIGHT, false }, + + [IUSED_FIELD] = { IUSED_FIELD, + "iused", INODE_FLD, N_("IUsed"), 5, MBS_ALIGN_RIGHT, false }, + + [IAVAIL_FIELD] = { IAVAIL_FIELD, + "iavail", INODE_FLD, N_("IFree"), 5, MBS_ALIGN_RIGHT, false }, + + [IPCENT_FIELD] = { IPCENT_FIELD, + "ipcent", INODE_FLD, N_("IUse%"), 4, MBS_ALIGN_RIGHT, false }, + + [TARGET_FIELD] = { TARGET_FIELD, + "target", OTHER_FLD, N_("Mounted on"), 0, MBS_ALIGN_LEFT, false }, + + [FILE_FIELD] = { FILE_FIELD, + "file", OTHER_FLD, N_("File"), 0, MBS_ALIGN_LEFT, false } +}; + +static char const *all_args_string = + "source,fstype,itotal,iused,iavail,ipcent,size," + "used,avail,pcent,file,target"; + +/* Storage for the definition of output columns. */ +static struct field_data_t **columns; + +/* The current number of output columns. */ +static size_t ncolumns; + +/* Field values. */ +struct field_values_t +{ + uintmax_t input_units; + uintmax_t output_units; + uintmax_t total; + uintmax_t available; + bool negate_available; + uintmax_t available_to_root; + uintmax_t used; + bool negate_used; +}; + +/* Storage for pointers for each string (cell of table). */ +static char ***table; + +/* The current number of processed rows (including header). */ +static size_t nrows; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + NO_SYNC_OPTION = CHAR_MAX + 1, + SYNC_OPTION, + TOTAL_OPTION, + OUTPUT_OPTION +}; + +static struct option const long_options[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"block-size", required_argument, nullptr, 'B'}, + {"inodes", no_argument, nullptr, 'i'}, + {"human-readable", no_argument, nullptr, 'h'}, + {"si", no_argument, nullptr, 'H'}, + {"local", no_argument, nullptr, 'l'}, + {"output", optional_argument, nullptr, OUTPUT_OPTION}, + {"portability", no_argument, nullptr, 'P'}, + {"print-type", no_argument, nullptr, 'T'}, + {"sync", no_argument, nullptr, SYNC_OPTION}, + {"no-sync", no_argument, nullptr, NO_SYNC_OPTION}, + {"total", no_argument, nullptr, TOTAL_OPTION}, + {"type", required_argument, nullptr, 't'}, + {"exclude-type", required_argument, nullptr, 'x'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Stat FILE and put the results into *ST. Return 0 if successful, an + error number otherwise. Try to open FILE before statting, to + trigger automounts. */ + +static int +automount_stat_err (char const *file, struct stat *st) +{ + int fd = open (file, O_RDONLY | O_NOCTTY | O_NONBLOCK); + if (fd < 0) + { + if (errno == ENOENT || errno == ENOTDIR) + return errno; + return stat (file, st) == 0 ? 0 : errno; + } + else + { + int err = fstat (fd, st) == 0 ? 0 : errno; + close (fd); + return err; + } +} + +/* Replace problematic chars with '?'. + Since only control characters are currently considered, + this should work in all encodings. */ + +static void +replace_control_chars (char *cell) +{ + char *p = cell; + while (*p) + { + if (c_iscntrl (to_uchar (*p))) + *p = '?'; + p++; + } +} + +/* Replace problematic chars with '?'. */ + +static void +replace_invalid_chars (char *cell) +{ + char *srcend = cell + strlen (cell); + char *dst = cell; + mbstate_t mbstate = { 0, }; + size_t n; + + for (char *src = cell; src != srcend; src += n) + { + wchar_t wc; + size_t srcbytes = srcend - src; + n = mbrtowc (&wc, src, srcbytes, &mbstate); + bool ok = n <= srcbytes; + + if (ok) + ok = !iswcntrl (wc); + else + n = 1; + + if (ok) + { + memmove (dst, src, n); + dst += n; + } + else + { + *dst++ = '?'; + memset (&mbstate, 0, sizeof mbstate); + } + } + + *dst = '\0'; +} + +static void +replace_problematic_chars (char *cell) +{ + static int tty_out = -1; + if (tty_out < 0) + tty_out = isatty (STDOUT_FILENO); + + (tty_out ? replace_invalid_chars : replace_control_chars) (cell) ; +} + + +/* Dynamically allocate a row of pointers in TABLE, which + can then be accessed with standard 2D array notation. */ + +static void +alloc_table_row (void) +{ + nrows++; + table = xnrealloc (table, nrows, sizeof (char **)); + table[nrows - 1] = xnmalloc (ncolumns, sizeof (char *)); +} + +/* Output each cell in the table, accounting for the + alignment and max width of each column. */ + +static void +print_table (void) +{ + size_t row; + + for (row = 0; row < nrows; row++) + { + size_t col; + for (col = 0; col < ncolumns; col++) + { + char *cell = table[row][col]; + + /* Note the SOURCE_FIELD used to be displayed on it's own line + if (!posix_format && mbswidth (cell) > 20), but that + functionality was probably more problematic than helpful, + hence changed in commit v8.10-40-g99679ff. */ + if (col != 0) + putchar (' '); + + int flags = 0; + if (col == ncolumns - 1) /* The last one. */ + flags = MBA_NO_RIGHT_PAD; + + size_t width = columns[col]->width; + cell = ambsalign (cell, &width, columns[col]->align, flags); + /* When ambsalign fails, output unaligned data. */ + fputs (cell ? cell : table[row][col], stdout); + free (cell); + } + putchar ('\n'); + } +} + +/* Dynamically allocate a struct field_t in COLUMNS, which + can then be accessed with standard array notation. */ + +static void +alloc_field (int f, char const *c) +{ + ncolumns++; + columns = xnrealloc (columns, ncolumns, sizeof (struct field_data_t *)); + columns[ncolumns - 1] = &field_data[f]; + if (c != nullptr) + columns[ncolumns - 1]->caption = c; + + affirm (!field_data[f].used); + + /* Mark field as used. */ + field_data[f].used = true; +} + + +/* Given a string, ARG, containing a comma-separated list of arguments + to the --output option, add the appropriate fields to columns. */ +static void +decode_output_arg (char const *arg) +{ + char *arg_writable = xstrdup (arg); + char *s = arg_writable; + do + { + /* find next comma */ + char *comma = strchr (s, ','); + + /* If we found a comma, put a NUL in its place and advance. */ + if (comma) + *comma++ = 0; + + /* process S. */ + display_field_t field = INVALID_FIELD; + for (idx_t i = 0; i < ARRAY_CARDINALITY (field_data); i++) + { + if (STREQ (field_data[i].arg, s)) + { + field = i; + break; + } + } + if (field == INVALID_FIELD) + { + error (0, 0, _("option --output: field %s unknown"), quote (s)); + usage (EXIT_FAILURE); + } + + if (field_data[field].used) + { + /* Prevent the fields from being used more than once. */ + error (0, 0, _("option --output: field %s used more than once"), + quote (field_data[field].arg)); + usage (EXIT_FAILURE); + } + + switch (field) + { + case SOURCE_FIELD: + case FSTYPE_FIELD: + case USED_FIELD: + case PCENT_FIELD: + case ITOTAL_FIELD: + case IUSED_FIELD: + case IAVAIL_FIELD: + case IPCENT_FIELD: + case TARGET_FIELD: + case FILE_FIELD: + alloc_field (field, nullptr); + break; + + case SIZE_FIELD: + alloc_field (field, N_("Size")); + break; + + case AVAIL_FIELD: + alloc_field (field, N_("Avail")); + break; + + default: + affirm (!"invalid field"); + } + s = comma; + } + while (s); + + free (arg_writable); +} + +/* Get the appropriate columns for the mode. */ +static void +get_field_list (void) +{ + switch (header_mode) + { + case DEFAULT_MODE: + alloc_field (SOURCE_FIELD, nullptr); + if (print_type) + alloc_field (FSTYPE_FIELD, nullptr); + alloc_field (SIZE_FIELD, nullptr); + alloc_field (USED_FIELD, nullptr); + alloc_field (AVAIL_FIELD, nullptr); + alloc_field (PCENT_FIELD, nullptr); + alloc_field (TARGET_FIELD, nullptr); + break; + + case HUMAN_MODE: + alloc_field (SOURCE_FIELD, nullptr); + if (print_type) + alloc_field (FSTYPE_FIELD, nullptr); + + alloc_field (SIZE_FIELD, N_("Size")); + alloc_field (USED_FIELD, nullptr); + alloc_field (AVAIL_FIELD, N_("Avail")); + alloc_field (PCENT_FIELD, nullptr); + alloc_field (TARGET_FIELD, nullptr); + break; + + case INODES_MODE: + alloc_field (SOURCE_FIELD, nullptr); + if (print_type) + alloc_field (FSTYPE_FIELD, nullptr); + alloc_field (ITOTAL_FIELD, nullptr); + alloc_field (IUSED_FIELD, nullptr); + alloc_field (IAVAIL_FIELD, nullptr); + alloc_field (IPCENT_FIELD, nullptr); + alloc_field (TARGET_FIELD, nullptr); + break; + + case POSIX_MODE: + alloc_field (SOURCE_FIELD, nullptr); + if (print_type) + alloc_field (FSTYPE_FIELD, nullptr); + alloc_field (SIZE_FIELD, nullptr); + alloc_field (USED_FIELD, nullptr); + alloc_field (AVAIL_FIELD, nullptr); + alloc_field (PCENT_FIELD, N_("Capacity")); + alloc_field (TARGET_FIELD, nullptr); + break; + + case OUTPUT_MODE: + if (!ncolumns) + { + /* Add all fields if --output was given without a field list. */ + decode_output_arg (all_args_string); + } + break; + + default: + unreachable (); + } +} + +/* Obtain the appropriate header entries. */ + +static void +get_header (void) +{ + size_t col; + + alloc_table_row (); + + for (col = 0; col < ncolumns; col++) + { + char *cell = nullptr; + char const *header = _(columns[col]->caption); + + if (columns[col]->field == SIZE_FIELD + && (header_mode == DEFAULT_MODE + || (header_mode == OUTPUT_MODE + && !(human_output_opts & human_autoscale)))) + { + char buf[LONGEST_HUMAN_READABLE + 1]; + + int opts = (human_suppress_point_zero + | human_autoscale | human_SI + | (human_output_opts + & (human_group_digits | human_base_1024 | human_B))); + + /* Prefer the base that makes the human-readable value more exact, + if there is a difference. */ + + uintmax_t q1000 = output_block_size; + uintmax_t q1024 = output_block_size; + bool divisible_by_1000; + bool divisible_by_1024; + + do + { + divisible_by_1000 = q1000 % 1000 == 0; q1000 /= 1000; + divisible_by_1024 = q1024 % 1024 == 0; q1024 /= 1024; + } + while (divisible_by_1000 & divisible_by_1024); + + if (divisible_by_1000 < divisible_by_1024) + opts |= human_base_1024; + if (divisible_by_1024 < divisible_by_1000) + opts &= ~human_base_1024; + if (! (opts & human_base_1024)) + opts |= human_B; + + char *num = human_readable (output_block_size, buf, opts, 1, 1); + + /* Reset the header back to the default in OUTPUT_MODE. */ + header = _("blocks"); + + /* TRANSLATORS: this is the "1K-blocks" header in "df" output. */ + if (asprintf (&cell, _("%s-%s"), num, header) == -1) + cell = nullptr; + } + else if (header_mode == POSIX_MODE && columns[col]->field == SIZE_FIELD) + { + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + char *num = umaxtostr (output_block_size, buf); + + /* TRANSLATORS: this is the "1024-blocks" header in "df -P". */ + if (asprintf (&cell, _("%s-%s"), num, header) == -1) + cell = nullptr; + } + else + cell = strdup (header); + + if (!cell) + xalloc_die (); + + replace_problematic_chars (cell); + + table[nrows - 1][col] = cell; + + size_t cell_width = mbswidth (cell, 0); + columns[col]->width = MAX (columns[col]->width, cell_width); + } +} + +/* Is FSTYPE a type of file system that should be listed? */ + +ATTRIBUTE_PURE +static bool +selected_fstype (char const *fstype) +{ + const struct fs_type_list *fsp; + + if (fs_select_list == nullptr || fstype == nullptr) + return true; + for (fsp = fs_select_list; fsp; fsp = fsp->fs_next) + if (STREQ (fstype, fsp->fs_name)) + return true; + return false; +} + +/* Is FSTYPE a type of file system that should be omitted? */ + +ATTRIBUTE_PURE +static bool +excluded_fstype (char const *fstype) +{ + const struct fs_type_list *fsp; + + if (fs_exclude_list == nullptr || fstype == nullptr) + return false; + for (fsp = fs_exclude_list; fsp; fsp = fsp->fs_next) + if (STREQ (fstype, fsp->fs_name)) + return true; + return false; +} + +static size_t +devlist_hash (void const *x, size_t table_size) +{ + struct devlist const *p = x; + return (uintmax_t) p->dev_num % table_size; +} + +static bool +devlist_compare (void const *x, void const *y) +{ + struct devlist const *a = x; + struct devlist const *b = y; + return a->dev_num == b->dev_num; +} + +static struct devlist * +devlist_for_dev (dev_t dev) +{ + if (devlist_table == nullptr) + return nullptr; + struct devlist dev_entry; + dev_entry.dev_num = dev; + + struct devlist *found = hash_lookup (devlist_table, &dev_entry); + if (found == nullptr) + return nullptr; + + /* Return the last devlist entry we have seen with this dev_num */ + return found->seen_last; +} + +/* Filter mount list by skipping duplicate entries. + In the case of duplicates - based on the device number - the mount entry + with a '/' in its me_devname (i.e., not pseudo name like tmpfs) wins. + If both have a real devname (e.g. bind mounts), then that with the shorter + me_mountdir wins. With DEVICES_ONLY == true (set with df -a), only update + the global devlist_table, rather than filtering the global mount_list. */ + +static void +filter_mount_list (bool devices_only) +{ + struct mount_entry *me; + + /* Temporary list to keep entries ordered. */ + struct devlist *device_list = nullptr; + int mount_list_size = 0; + + for (me = mount_list; me; me = me->me_next) + mount_list_size++; + + devlist_table = hash_initialize (mount_list_size, nullptr, + devlist_hash, devlist_compare, nullptr); + if (devlist_table == nullptr) + xalloc_die (); + + /* Sort all 'wanted' entries into the list device_list. */ + for (me = mount_list; me;) + { + struct stat buf; + struct mount_entry *discard_me = nullptr; + + /* Avoid stating remote file systems as that may hang. + On Linux we probably have me_dev populated from /proc/self/mountinfo, + however we still stat() in case another device was mounted later. */ + if ((me->me_remote && show_local_fs) + || (me->me_dummy && !show_all_fs && !show_listed_fs) + || (!selected_fstype (me->me_type) || excluded_fstype (me->me_type)) + || -1 == stat (me->me_mountdir, &buf)) + { + /* If remote, and showing just local, or FS type is excluded, + add ME for filtering later. + If stat failed; add ME to be able to complain about it later. */ + buf.st_dev = me->me_dev; + } + else + { + /* If we've already seen this device... */ + struct devlist *seen_dev = devlist_for_dev (buf.st_dev); + + if (seen_dev) + { + bool target_nearer_root = strlen (seen_dev->me->me_mountdir) + > strlen (me->me_mountdir); + /* With bind mounts, prefer items nearer the root of the source */ + bool source_below_root = seen_dev->me->me_mntroot != nullptr + && me->me_mntroot != nullptr + && (strlen (seen_dev->me->me_mntroot) + < strlen (me->me_mntroot)); + if (! print_grand_total + && me->me_remote && seen_dev->me->me_remote + && ! STREQ (seen_dev->me->me_devname, me->me_devname)) + { + /* Don't discard remote entries with different locations, + as these are more likely to be explicitly mounted. + However avoid this when producing a total to give + a more accurate value in that case. */ + } + else if ((strchr (me->me_devname, '/') + /* let "real" devices with '/' in the name win. */ + && ! strchr (seen_dev->me->me_devname, '/')) + /* let points towards the root of the device win. */ + || (target_nearer_root && ! source_below_root) + /* let an entry overmounted on a new device win... */ + || (! STREQ (seen_dev->me->me_devname, me->me_devname) + /* ... but only when matching an existing mnt point, + to avoid problematic replacement when given + inaccurate mount lists, seen with some chroot + environments for example. */ + && STREQ (me->me_mountdir, + seen_dev->me->me_mountdir))) + { + /* Discard mount entry for existing device. */ + discard_me = seen_dev->me; + seen_dev->me = me; + } + else + { + /* Discard mount entry currently being processed. */ + discard_me = me; + } + + } + } + + if (discard_me) + { + me = me->me_next; + if (! devices_only) + free_mount_entry (discard_me); + } + else + { + /* Add the device number to the device_table. */ + struct devlist *devlist = xmalloc (sizeof *devlist); + devlist->me = me; + devlist->dev_num = buf.st_dev; + devlist->next = device_list; + device_list = devlist; + + struct devlist *hash_entry = hash_insert (devlist_table, devlist); + if (hash_entry == nullptr) + xalloc_die (); + /* Ensure lookups use this latest devlist. */ + hash_entry->seen_last = devlist; + + me = me->me_next; + } + } + + /* Finally rebuild the mount_list from the devlist. */ + if (! devices_only) { + mount_list = nullptr; + while (device_list) + { + /* Add the mount entry. */ + me = device_list->me; + me->me_next = mount_list; + mount_list = me; + struct devlist *next = device_list->next; + free (device_list); + device_list = next; + } + + hash_free (devlist_table); + devlist_table = nullptr; + } +} + + +/* Search a mount entry list for device id DEV. + Return the corresponding mount entry if found or nullptr if not. */ + +ATTRIBUTE_PURE +static struct mount_entry const * +me_for_dev (dev_t dev) +{ + struct devlist *dl = devlist_for_dev (dev); + if (dl) + return dl->me; + + return nullptr; +} + +/* Return true if N is a known integer value. On many file systems, + UINTMAX_MAX represents an unknown value; on AIX, UINTMAX_MAX - 1 + represents unknown. Use a rule that works on AIX file systems, and + that almost-always works on other types. */ +static bool +known_value (uintmax_t n) +{ + return n < UINTMAX_MAX - 1; +} + +/* Like human_readable (N, BUF, human_output_opts, INPUT_UNITS, OUTPUT_UNITS), + except: + + - If NEGATIVE, then N represents a negative number, + expressed in two's complement. + - Otherwise, return "-" if N is unknown. */ + +static char const * +df_readable (bool negative, uintmax_t n, char *buf, + uintmax_t input_units, uintmax_t output_units) +{ + if (! known_value (n) && !negative) + return "-"; + else + { + char *p = human_readable (negative ? -n : n, buf + negative, + human_output_opts, input_units, output_units); + if (negative) + *--p = '-'; + return p; + } +} + +/* Add integral value while using uintmax_t for value part and separate + negation flag. It adds value of SRC and SRC_NEG to DEST and DEST_NEG. + The result will be in DEST and DEST_NEG. See df_readable to understand + how the negation flag is used. */ +static void +add_uint_with_neg_flag (uintmax_t *dest, bool *dest_neg, + uintmax_t src, bool src_neg) +{ + if (*dest_neg == src_neg) + { + *dest += src; + return; + } + + if (*dest_neg) + *dest = -*dest; + + if (src_neg) + src = -src; + + if (src < *dest) + *dest -= src; + else + { + *dest = src - *dest; + *dest_neg = src_neg; + } + + if (*dest_neg) + *dest = -*dest; +} + +/* Return true if S ends in a string that may be a 36-byte UUID, + i.e., of the form HHHHHHHH-HHHH-HHHH-HHHH-HHHHHHHHHHHH, where + each H is an upper or lower case hexadecimal digit. */ +ATTRIBUTE_PURE +static bool +has_uuid_suffix (char const *s) +{ + size_t len = strlen (s); + return (36 < len + && strspn (s + len - 36, "-0123456789abcdefABCDEF") == 36); +} + +/* Obtain the block values BV and inode values IV + from the file system usage FSU. */ +static void +get_field_values (struct field_values_t *bv, + struct field_values_t *iv, + const struct fs_usage *fsu) +{ + /* Inode values. */ + iv->input_units = iv->output_units = 1; + iv->total = fsu->fsu_files; + iv->available = iv->available_to_root = fsu->fsu_ffree; + iv->negate_available = false; + + iv->used = UINTMAX_MAX; + iv->negate_used = false; + if (known_value (iv->total) && known_value (iv->available_to_root)) + { + iv->used = iv->total - iv->available_to_root; + iv->negate_used = (iv->total < iv->available_to_root); + } + + /* Block values. */ + bv->input_units = fsu->fsu_blocksize; + bv->output_units = output_block_size; + bv->total = fsu->fsu_blocks; + bv->available = fsu->fsu_bavail; + bv->available_to_root = fsu->fsu_bfree; + bv->negate_available = (fsu->fsu_bavail_top_bit_set + && known_value (fsu->fsu_bavail)); + + bv->used = UINTMAX_MAX; + bv->negate_used = false; + if (known_value (bv->total) && known_value (bv->available_to_root)) + { + bv->used = bv->total - bv->available_to_root; + bv->negate_used = (bv->total < bv->available_to_root); + } +} + +/* Add block and inode values to grand total. */ +static void +add_to_grand_total (struct field_values_t *bv, struct field_values_t *iv) +{ + if (known_value (iv->total)) + grand_fsu.fsu_files += iv->total; + if (known_value (iv->available)) + grand_fsu.fsu_ffree += iv->available; + + if (known_value (bv->total)) + grand_fsu.fsu_blocks += bv->input_units * bv->total; + if (known_value (bv->available_to_root)) + grand_fsu.fsu_bfree += bv->input_units * bv->available_to_root; + if (known_value (bv->available)) + add_uint_with_neg_flag (&grand_fsu.fsu_bavail, + &grand_fsu.fsu_bavail_top_bit_set, + bv->input_units * bv->available, + bv->negate_available); +} + +/* Obtain a space listing for the device with absolute file name DEVICE. + If MOUNT_POINT is non-null, it is the name of the root of the + file system on DEVICE. + If STAT_FILE is non-null, it is the name of a file within the file + system that the user originally asked for; this provides better + diagnostics, and sometimes it provides better results on networked + file systems that give different free-space results depending on + where in the file system you probe. + If FSTYPE is non-null, it is the type of the file system on DEVICE. + If MOUNT_POINT is non-null, then DEVICE may be null -- certain systems may + not be able to produce statistics in this case. + ME_DUMMY and ME_REMOTE are the mount entry flags. + Caller must set PROCESS_ALL to true when iterating over all entries, as + when df is invoked with no non-option argument. See below for details. */ + +static void +get_dev (char const *device, char const *mount_point, char const *file, + char const *stat_file, char const *fstype, + bool me_dummy, bool me_remote, + const struct fs_usage *force_fsu, + bool process_all) +{ + if (me_remote && show_local_fs) + return; + + if (me_dummy && !show_all_fs && !show_listed_fs) + return; + + if (!selected_fstype (fstype) || excluded_fstype (fstype)) + return; + + /* Ignore relative MOUNT_POINTs, which are present for example + in /proc/mounts on Linux with network namespaces. */ + if (!force_fsu && mount_point && ! IS_ABSOLUTE_FILE_NAME (mount_point)) + return; + + /* If MOUNT_POINT is null, then the file system is not mounted, and this + program reports on the file system that the special file is on. + It would be better to report on the unmounted file system, + but statfs doesn't do that on most systems. */ + if (!stat_file) + stat_file = mount_point ? mount_point : device; + + struct fs_usage fsu; + if (force_fsu) + fsu = *force_fsu; + else if (get_fs_usage (stat_file, device, &fsu)) + { + /* If we can't access a system provided entry due + to it not being present (now), or due to permissions, + just output placeholder values rather than failing. */ + if (process_all && (errno == EACCES || errno == ENOENT)) + { + if (! show_all_fs) + return; + + fstype = "-"; + fsu.fsu_bavail_top_bit_set = false; + fsu.fsu_blocksize = fsu.fsu_blocks = fsu.fsu_bfree = + fsu.fsu_bavail = fsu.fsu_files = fsu.fsu_ffree = UINTMAX_MAX; + } + else + { + error (0, errno, "%s", quotef (stat_file)); + exit_status = EXIT_FAILURE; + return; + } + } + else if (process_all && show_all_fs) + { + /* Ensure we don't output incorrect stats for over-mounted directories. + Discard stats when the device name doesn't match. Though don't + discard when used and current mount entries are both remote due + to the possibility of aliased host names or exports. */ + struct stat sb; + if (stat (stat_file, &sb) == 0) + { + struct mount_entry const * dev_me = me_for_dev (sb.st_dev); + if (dev_me && ! STREQ (dev_me->me_devname, device) + && (! dev_me->me_remote || ! me_remote)) + { + fstype = "-"; + fsu.fsu_bavail_top_bit_set = false; + fsu.fsu_blocksize = fsu.fsu_blocks = fsu.fsu_bfree = + fsu.fsu_bavail = fsu.fsu_files = fsu.fsu_ffree = UINTMAX_MAX; + } + } + } + + if (fsu.fsu_blocks == 0 && !show_all_fs && !show_listed_fs) + return; + + if (! force_fsu) + file_systems_processed = true; + + alloc_table_row (); + + if (! device) + device = "-"; /* unknown */ + + if (! file) + file = "-"; /* unspecified */ + + char *dev_name = xstrdup (device); + char *resolved_dev; + + /* On some systems, dev_name is a long-named symlink like + /dev/disk/by-uuid/828fc648-9f30-43d8-a0b1-f7196a2edb66 pointing to a + much shorter and more useful name like /dev/sda1. It may also look + like /dev/mapper/luks-828fc648-9f30-43d8-a0b1-f7196a2edb66 and point to + /dev/dm-0. When process_all is true and dev_name is a symlink whose + name ends with a UUID use the resolved name instead. */ + if (process_all + && has_uuid_suffix (dev_name) + && (resolved_dev = canonicalize_filename_mode (dev_name, CAN_EXISTING))) + { + free (dev_name); + dev_name = resolved_dev; + } + + if (! fstype) + fstype = "-"; /* unknown */ + + struct field_values_t block_values; + struct field_values_t inode_values; + get_field_values (&block_values, &inode_values, &fsu); + + /* Add to grand total unless processing grand total line. */ + if (print_grand_total && ! force_fsu) + add_to_grand_total (&block_values, &inode_values); + + size_t col; + for (col = 0; col < ncolumns; col++) + { + char buf[LONGEST_HUMAN_READABLE + 2]; + char *cell; + + struct field_values_t *v; + switch (columns[col]->field_type) + { + case BLOCK_FLD: + v = &block_values; + break; + case INODE_FLD: + v = &inode_values; + break; + case OTHER_FLD: + v = nullptr; + break; + default: + affirm (!"bad field_type"); + } + + switch (columns[col]->field) + { + case SOURCE_FIELD: + cell = xstrdup (dev_name); + break; + + case FSTYPE_FIELD: + cell = xstrdup (fstype); + break; + + case SIZE_FIELD: + case ITOTAL_FIELD: + cell = xstrdup (df_readable (false, v->total, buf, + v->input_units, v->output_units)); + break; + + case USED_FIELD: + case IUSED_FIELD: + cell = xstrdup (df_readable (v->negate_used, v->used, buf, + v->input_units, v->output_units)); + break; + + case AVAIL_FIELD: + case IAVAIL_FIELD: + cell = xstrdup (df_readable (v->negate_available, v->available, buf, + v->input_units, v->output_units)); + break; + + case PCENT_FIELD: + case IPCENT_FIELD: + { + double pct = -1; + if (! known_value (v->used) || ! known_value (v->available)) + ; + else if (!v->negate_used + && v->used <= TYPE_MAXIMUM (uintmax_t) / 100 + && v->used + v->available != 0 + && (v->used + v->available < v->used) + == v->negate_available) + { + uintmax_t u100 = v->used * 100; + uintmax_t nonroot_total = v->used + v->available; + pct = u100 / nonroot_total + (u100 % nonroot_total != 0); + } + else + { + /* The calculation cannot be done easily with integer + arithmetic. Fall back on floating point. This can suffer + from minor rounding errors, but doing it exactly requires + multiple precision arithmetic, and it's not worth the + aggravation. */ + double u = v->negate_used ? - (double) - v->used : v->used; + double a = v->negate_available + ? - (double) - v->available : v->available; + double nonroot_total = u + a; + if (nonroot_total) + { + long int lipct = pct = u * 100 / nonroot_total; + double ipct = lipct; + + /* Like 'pct = ceil (dpct);', but avoid ceil so that + the math library needn't be linked. */ + if (ipct - 1 < pct && pct <= ipct + 1) + pct = ipct + (ipct < pct); + } + } + + if (0 <= pct) + { + if (asprintf (&cell, "%.0f%%", pct) == -1) + cell = nullptr; + } + else + cell = strdup ("-"); + + if (!cell) + xalloc_die (); + + break; + } + + case FILE_FIELD: + cell = xstrdup (file); + break; + + case TARGET_FIELD: +#ifdef HIDE_AUTOMOUNT_PREFIX + /* Don't print the first directory name in MOUNT_POINT if it's an + artifact of an automounter. This is a bit too aggressive to be + the default. */ + if (STRNCMP_LIT (mount_point, "/auto/") == 0) + mount_point += 5; + else if (STRNCMP_LIT (mount_point, "/tmp_mnt/") == 0) + mount_point += 8; +#endif + cell = xstrdup (mount_point); + break; + + default: + affirm (!"unhandled field"); + } + + affirm (cell); + + replace_problematic_chars (cell); + size_t cell_width = mbswidth (cell, 0); + columns[col]->width = MAX (columns[col]->width, cell_width); + table[nrows - 1][col] = cell; + } + free (dev_name); +} + +/* Scan the mount list returning the _last_ device found for MOUNT. + nullptr is returned if MOUNT not found. The result is malloced. */ +static char * +last_device_for_mount (char const *mount) +{ + struct mount_entry const *me; + struct mount_entry const *le = nullptr; + + for (me = mount_list; me; me = me->me_next) + { + if (STREQ (me->me_mountdir, mount)) + le = me; + } + + if (le) + { + char *devname = le->me_devname; + char *canon_dev = canonicalize_file_name (devname); + if (canon_dev && IS_ABSOLUTE_FILE_NAME (canon_dev)) + return canon_dev; + free (canon_dev); + return xstrdup (le->me_devname); + } + else + return nullptr; +} + +/* If DEVICE corresponds to a mount point, show its usage + and return true. Otherwise, return false. */ +static bool +get_device (char const *device) +{ + struct mount_entry const *me; + struct mount_entry const *best_match = nullptr; + bool best_match_accessible = false; + bool eclipsed_device = false; + char const *file = device; + + char *resolved = canonicalize_file_name (device); + if (resolved && IS_ABSOLUTE_FILE_NAME (resolved)) + device = resolved; + + size_t best_match_len = SIZE_MAX; + for (me = mount_list; me; me = me->me_next) + { + /* TODO: Should cache canon_dev in the mount_entry struct. */ + char *devname = me->me_devname; + char *canon_dev = canonicalize_file_name (me->me_devname); + if (canon_dev && IS_ABSOLUTE_FILE_NAME (canon_dev)) + devname = canon_dev; + + if (STREQ (device, devname)) + { + char *last_device = last_device_for_mount (me->me_mountdir); + eclipsed_device = last_device && ! STREQ (last_device, devname); + size_t len = strlen (me->me_mountdir); + + if (! eclipsed_device + && (! best_match_accessible || len < best_match_len)) + { + struct stat device_stats; + bool this_match_accessible = false; + + if (stat (me->me_mountdir, &device_stats) == 0) + best_match_accessible = this_match_accessible = true; + + if (this_match_accessible + || (! best_match_accessible && len < best_match_len)) + { + best_match = me; + if (len == 1) /* Traditional root. */ + { + free (last_device); + free (canon_dev); + break; + } + else + best_match_len = len; + } + } + + free (last_device); + } + + free (canon_dev); + } + + free (resolved); + + if (best_match) + { + get_dev (best_match->me_devname, best_match->me_mountdir, file, nullptr, + best_match->me_type, best_match->me_dummy, + best_match->me_remote, nullptr, false); + return true; + } + else if (eclipsed_device) + { + error (0, 0, _("cannot access %s: over-mounted by another device"), + quoteaf (file)); + exit_status = EXIT_FAILURE; + return true; + } + + return false; +} + +/* Figure out which device file or directory POINT is mounted on + and show its device usage. + STATP must be the result of 'stat (POINT, STATP)'. */ +static void +get_point (char const *point, const struct stat *statp) +{ + struct stat device_stats; + struct mount_entry *me; + struct mount_entry const *best_match = nullptr; + + /* Calculate the real absolute file name for POINT, and use that to find + the mount point. This avoids statting unavailable mount points, + which can hang df. */ + char *resolved = canonicalize_file_name (point); + if (resolved && resolved[0] == '/') + { + size_t resolved_len = strlen (resolved); + size_t best_match_len = 0; + + for (me = mount_list; me; me = me->me_next) + { + if (!STREQ (me->me_type, "lofs") + && (!best_match || best_match->me_dummy || !me->me_dummy)) + { + size_t len = strlen (me->me_mountdir); + if (best_match_len <= len && len <= resolved_len + && (len == 1 /* root file system */ + || ((len == resolved_len || resolved[len] == '/') + && STREQ_LEN (me->me_mountdir, resolved, len)))) + { + best_match = me; + best_match_len = len; + } + } + } + } + free (resolved); + if (best_match + && (stat (best_match->me_mountdir, &device_stats) != 0 + || device_stats.st_dev != statp->st_dev)) + best_match = nullptr; + + if (! best_match) + for (me = mount_list; me; me = me->me_next) + { + if (me->me_dev == (dev_t) -1) + { + if (stat (me->me_mountdir, &device_stats) == 0) + me->me_dev = device_stats.st_dev; + else + { + /* Report only I/O errors. Other errors might be + caused by shadowed mount points, which means POINT + can't possibly be on this file system. */ + if (errno == EIO) + { + error (0, errno, "%s", quotef (me->me_mountdir)); + exit_status = EXIT_FAILURE; + } + + /* So we won't try and fail repeatedly. */ + me->me_dev = (dev_t) -2; + } + } + + if (statp->st_dev == me->me_dev + && !STREQ (me->me_type, "lofs") + && (!best_match || best_match->me_dummy || !me->me_dummy)) + { + /* Skip bogus mtab entries. */ + if (stat (me->me_mountdir, &device_stats) != 0 + || device_stats.st_dev != me->me_dev) + me->me_dev = (dev_t) -2; + else + best_match = me; + } + } + + if (best_match) + get_dev (best_match->me_devname, best_match->me_mountdir, point, point, + best_match->me_type, best_match->me_dummy, best_match->me_remote, + nullptr, false); + else + { + /* We couldn't find the mount entry corresponding to POINT. Go ahead and + print as much info as we can; methods that require the device to be + present will fail at a later point. */ + + /* Find the actual mount point. */ + char *mp = find_mount_point (point, statp); + if (mp) + { + get_dev (nullptr, mp, point, nullptr, nullptr, + false, false, nullptr, false); + free (mp); + } + } +} + +/* Determine what kind of node NAME is and show the device usage + for it. STATP is the results of 'stat' on NAME. */ + +static void +get_entry (char const *name, struct stat const *statp) +{ + if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode)) + && get_device (name)) + return; + + get_point (name, statp); +} + +/* Show all mounted file systems, except perhaps those that are of + an unselected type or are empty. */ + +static void +get_all_entries (void) +{ + struct mount_entry *me; + + filter_mount_list (show_all_fs); + + for (me = mount_list; me; me = me->me_next) + get_dev (me->me_devname, me->me_mountdir, nullptr, nullptr, me->me_type, + me->me_dummy, me->me_remote, nullptr, true); +} + +/* Add FSTYPE to the list of file system types to display. */ + +static void +add_fs_type (char const *fstype) +{ + struct fs_type_list *fsp; + + fsp = xmalloc (sizeof *fsp); + fsp->fs_name = (char *) fstype; + fsp->fs_next = fs_select_list; + fs_select_list = fsp; +} + +/* Add FSTYPE to the list of file system types to be omitted. */ + +static void +add_excluded_fs_type (char const *fstype) +{ + struct fs_type_list *fsp; + + fsp = xmalloc (sizeof *fsp); + fsp->fs_name = (char *) fstype; + fsp->fs_next = fs_exclude_list; + fs_exclude_list = fsp; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); + fputs (_("\ +Show information about the file system on which each FILE resides,\n\ +or all file systems by default.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + /* TRANSLATORS: The thousands and decimal separators are best + adjusted to an appropriate default for your locale. */ + fputs (_("\ + -a, --all include pseudo, duplicate, inaccessible file systems\n\ + -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ + '-BM' prints sizes in units of 1,048,576 bytes;\n\ + see SIZE format below\n\ + -h, --human-readable print sizes in powers of 1024 (e.g., 1023M)\n\ + -H, --si print sizes in powers of 1000 (e.g., 1.1G)\n\ +"), stdout); + fputs (_("\ + -i, --inodes list inode information instead of block usage\n\ + -k like --block-size=1K\n\ + -l, --local limit listing to local file systems\n\ + --no-sync do not invoke sync before getting usage info (default)\ +\n\ +"), stdout); + fputs (_("\ + --output[=FIELD_LIST] use the output format defined by FIELD_LIST,\n\ + or print all fields if FIELD_LIST is omitted.\n\ + -P, --portability use the POSIX output format\n\ + --sync invoke sync before getting usage info\n\ +"), stdout); + fputs (_("\ + --total elide all entries insignificant to available space,\n\ + and produce a grand total\n\ +"), stdout); + fputs (_("\ + -t, --type=TYPE limit listing to file systems of type TYPE\n\ + -T, --print-type print file system type\n\ + -x, --exclude-type=TYPE limit listing to file systems not of type TYPE\n\ + -v (ignored)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_blocksize_note ("DF"); + emit_size_note (); + fputs (_("\n\ +FIELD_LIST is a comma-separated list of columns to be included. Valid\n\ +field names are: 'source', 'fstype', 'itotal', 'iused', 'iavail', 'ipcent',\n\ +'size', 'used', 'avail', 'pcent', 'file' and 'target' (see info page).\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + struct stat *stats = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + fs_select_list = nullptr; + fs_exclude_list = nullptr; + show_all_fs = false; + show_listed_fs = false; + human_output_opts = -1; + print_type = false; + file_systems_processed = false; + exit_status = EXIT_SUCCESS; + print_grand_total = false; + grand_fsu.fsu_blocksize = 1; + + /* If true, use the POSIX output format. */ + bool posix_format = false; + + char const *msg_mut_excl = _("options %s and %s are mutually exclusive"); + + while (true) + { + int oi = -1; + int c = getopt_long (argc, argv, "aB:iF:hHklmPTt:vx:", long_options, + &oi); + if (c == -1) + break; + + switch (c) + { + case 'a': + show_all_fs = true; + break; + case 'B': + { + enum strtol_error e = human_options (optarg, &human_output_opts, + &output_block_size); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + } + break; + case 'i': + if (header_mode == OUTPUT_MODE) + { + error (0, 0, msg_mut_excl, "-i", "--output"); + usage (EXIT_FAILURE); + } + header_mode = INODES_MODE; + break; + case 'h': + human_output_opts = human_autoscale | human_SI | human_base_1024; + output_block_size = 1; + break; + case 'H': + human_output_opts = human_autoscale | human_SI; + output_block_size = 1; + break; + case 'k': + human_output_opts = 0; + output_block_size = 1024; + break; + case 'l': + show_local_fs = true; + break; + case 'm': /* obsolescent, exists for BSD compatibility */ + human_output_opts = 0; + output_block_size = 1024 * 1024; + break; + case 'T': + if (header_mode == OUTPUT_MODE) + { + error (0, 0, msg_mut_excl, "-T", "--output"); + usage (EXIT_FAILURE); + } + print_type = true; + break; + case 'P': + if (header_mode == OUTPUT_MODE) + { + error (0, 0, msg_mut_excl, "-P", "--output"); + usage (EXIT_FAILURE); + } + posix_format = true; + break; + case SYNC_OPTION: + require_sync = true; + break; + case NO_SYNC_OPTION: + require_sync = false; + break; + + case 'F': + /* Accept -F as a synonym for -t for compatibility with Solaris. */ + case 't': + add_fs_type (optarg); + break; + + case 'v': /* For SysV compatibility. */ + /* ignore */ + break; + case 'x': + add_excluded_fs_type (optarg); + break; + + case OUTPUT_OPTION: + if (header_mode == INODES_MODE) + { + error (0, 0, msg_mut_excl, "-i", "--output"); + usage (EXIT_FAILURE); + } + if (posix_format && header_mode == DEFAULT_MODE) + { + error (0, 0, msg_mut_excl, "-P", "--output"); + usage (EXIT_FAILURE); + } + if (print_type) + { + error (0, 0, msg_mut_excl, "-T", "--output"); + usage (EXIT_FAILURE); + } + header_mode = OUTPUT_MODE; + if (optarg) + decode_output_arg (optarg); + break; + + case TOTAL_OPTION: + print_grand_total = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (human_output_opts == -1) + { + if (posix_format) + { + human_output_opts = 0; + output_block_size = (getenv ("POSIXLY_CORRECT") ? 512 : 1024); + } + else + human_options (getenv ("DF_BLOCK_SIZE"), + &human_output_opts, &output_block_size); + } + + if (header_mode == INODES_MODE || header_mode == OUTPUT_MODE) + ; + else if (human_output_opts & human_autoscale) + header_mode = HUMAN_MODE; + else if (posix_format) + header_mode = POSIX_MODE; + + /* Fail if the same file system type was both selected and excluded. */ + { + bool match = false; + struct fs_type_list *fs_incl; + for (fs_incl = fs_select_list; fs_incl; fs_incl = fs_incl->fs_next) + { + struct fs_type_list *fs_excl; + for (fs_excl = fs_exclude_list; fs_excl; fs_excl = fs_excl->fs_next) + { + if (STREQ (fs_incl->fs_name, fs_excl->fs_name)) + { + error (0, 0, + _("file system type %s both selected and excluded"), + quote (fs_incl->fs_name)); + match = true; + break; + } + } + } + if (match) + return EXIT_FAILURE; + } + + if (optind < argc) + { + /* stat each of the given entries to make sure any corresponding + partition is automounted. This must be done before reading the + file system table. */ + stats = xnmalloc (argc - optind, sizeof *stats); + for (int i = optind; i < argc; ++i) + { + int err = automount_stat_err (argv[i], &stats[i - optind]); + if (err != 0) + { + error (0, err, "%s", quotef (argv[i])); + exit_status = EXIT_FAILURE; + argv[i] = nullptr; + } + } + } + + mount_list = + read_file_system_list ((fs_select_list != nullptr + || fs_exclude_list != nullptr + || print_type + || field_data[FSTYPE_FIELD].used + || show_local_fs)); + + if (mount_list == nullptr) + { + /* Couldn't read the table of mounted file systems. + Fail if df was invoked with no file name arguments, + or when either of -a, -l, -t or -x is used with file name + arguments. Otherwise, merely give a warning and proceed. */ + int status = 0; + if ( ! (optind < argc) + || (show_all_fs + || show_local_fs + || fs_select_list != nullptr + || fs_exclude_list != nullptr)) + { + status = EXIT_FAILURE; + } + char const *warning = (status == 0 ? _("Warning: ") : ""); + error (status, errno, "%s%s", warning, + _("cannot read table of mounted file systems")); + } + + if (require_sync) + sync (); + + get_field_list (); + get_header (); + + if (stats) + { + /* Display explicitly requested empty file systems. */ + show_listed_fs = true; + + for (int i = optind; i < argc; ++i) + if (argv[i]) + get_entry (argv[i], &stats[i - optind]); + } + else + get_all_entries (); + + if (file_systems_processed) + { + if (print_grand_total) + get_dev ("total", + (field_data[SOURCE_FIELD].used ? "-" : "total"), + nullptr, nullptr, nullptr, false, false, &grand_fsu, false); + + print_table (); + } + else + { + /* Print the "no FS processed" diagnostic only if there was no preceding + diagnostic, e.g., if all have been excluded. */ + if (exit_status == EXIT_SUCCESS) + error (EXIT_FAILURE, 0, _("no file systems processed")); + } + + main_exit (exit_status); +} diff --git a/src/digest.c b/src/digest.c new file mode 100644 index 0000000..3d239da --- /dev/null +++ b/src/digest.c @@ -0,0 +1,1628 @@ +/* Compute checksums of files or strings. + Copyright (C) 1995-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Ulrich Drepper . */ + +#include + +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "quote.h" +#include "xdectoint.h" +#include "xstrtol.h" + +#if HASH_ALGO_SUM || HASH_ALGO_CKSUM +# include "sum.h" +#endif +#if HASH_ALGO_CKSUM +# include "cksum.h" +# include "base64.h" +#endif +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM +# include "blake2/b2sum.h" +#endif +#if HASH_ALGO_MD5 || HASH_ALGO_CKSUM +# include "md5.h" +#endif +#if HASH_ALGO_SHA1 || HASH_ALGO_CKSUM +# include "sha1.h" +#endif +#if HASH_ALGO_SHA256 || HASH_ALGO_SHA224 || HASH_ALGO_CKSUM +# include "sha256.h" +#endif +#if HASH_ALGO_SHA512 || HASH_ALGO_SHA384 || HASH_ALGO_CKSUM +# include "sha512.h" +#endif +#if HASH_ALGO_CKSUM +# include "sm3.h" +#endif +#include "fadvise.h" +#include "stdio--.h" +#include "xbinary-io.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#if HASH_ALGO_SUM +# define PROGRAM_NAME "sum" +# define DIGEST_TYPE_STRING "BSD" +# define DIGEST_STREAM sumfns[sum_algorithm] +# define DIGEST_OUT sum_output_fns[sum_algorithm] +# define DIGEST_BITS 16 +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_CKSUM +# define MAX_DIGEST_BITS 512 +# define MAX_DIGEST_ALIGN 8 +# define PROGRAM_NAME "cksum" +# define DIGEST_TYPE_STRING algorithm_tags[cksum_algorithm] +# define DIGEST_STREAM cksumfns[cksum_algorithm] +# define DIGEST_OUT cksum_output_fns[cksum_algorithm] +# define DIGEST_BITS MAX_DIGEST_BITS +# define DIGEST_ALIGN MAX_DIGEST_ALIGN +#elif HASH_ALGO_MD5 +# define PROGRAM_NAME "md5sum" +# define DIGEST_TYPE_STRING "MD5" +# define DIGEST_STREAM md5_stream +# define DIGEST_BITS 128 +# define DIGEST_REFERENCE "RFC 1321" +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_BLAKE2 +# define PROGRAM_NAME "b2sum" +# define DIGEST_TYPE_STRING "BLAKE2b" +# define DIGEST_STREAM blake2b_stream +# define DIGEST_BITS 512 +# define DIGEST_REFERENCE "RFC 7693" +# define DIGEST_ALIGN 8 +#elif HASH_ALGO_SHA1 +# define PROGRAM_NAME "sha1sum" +# define DIGEST_TYPE_STRING "SHA1" +# define DIGEST_STREAM sha1_stream +# define DIGEST_BITS 160 +# define DIGEST_REFERENCE "FIPS-180-1" +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_SHA256 +# define PROGRAM_NAME "sha256sum" +# define DIGEST_TYPE_STRING "SHA256" +# define DIGEST_STREAM sha256_stream +# define DIGEST_BITS 256 +# define DIGEST_REFERENCE "FIPS-180-2" +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_SHA224 +# define PROGRAM_NAME "sha224sum" +# define DIGEST_TYPE_STRING "SHA224" +# define DIGEST_STREAM sha224_stream +# define DIGEST_BITS 224 +# define DIGEST_REFERENCE "RFC 3874" +# define DIGEST_ALIGN 4 +#elif HASH_ALGO_SHA512 +# define PROGRAM_NAME "sha512sum" +# define DIGEST_TYPE_STRING "SHA512" +# define DIGEST_STREAM sha512_stream +# define DIGEST_BITS 512 +# define DIGEST_REFERENCE "FIPS-180-2" +# define DIGEST_ALIGN 8 +#elif HASH_ALGO_SHA384 +# define PROGRAM_NAME "sha384sum" +# define DIGEST_TYPE_STRING "SHA384" +# define DIGEST_STREAM sha384_stream +# define DIGEST_BITS 384 +# define DIGEST_REFERENCE "FIPS-180-2" +# define DIGEST_ALIGN 8 +#else +# error "Can't decide which hash algorithm to compile." +#endif +#if !HASH_ALGO_SUM && !HASH_ALGO_CKSUM +# define DIGEST_OUT output_file +#endif + +#if HASH_ALGO_SUM +# define AUTHORS \ + proper_name ("Kayvan Aghaiepour"), \ + proper_name ("David MacKenzie") +#elif HASH_ALGO_CKSUM +# define AUTHORS \ + proper_name_lite ("Padraig Brady", "P\303\241draig Brady"), \ + proper_name ("Q. Frank Xia") +#elif HASH_ALGO_BLAKE2 +# define AUTHORS \ + proper_name_lite ("Padraig Brady", "P\303\241draig Brady"), \ + proper_name ("Samuel Neves") +#else +# define AUTHORS \ + proper_name ("Ulrich Drepper"), \ + proper_name ("Scott Miller"), \ + proper_name ("David Madore") +#endif +#if !HASH_ALGO_BLAKE2 && !HASH_ALGO_CKSUM +# define DIGEST_HEX_BYTES (DIGEST_BITS / 4) +#endif +#define DIGEST_BIN_BYTES (DIGEST_BITS / 8) + +/* The minimum length of a valid digest line. This length does + not include any newline character at the end of a line. */ +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM +# define MIN_DIGEST_LINE_LENGTH 3 /* With -l 8. */ +#else +# define MIN_DIGEST_LINE_LENGTH \ + (DIGEST_HEX_BYTES /* length of hexadecimal message digest */ \ + + 1 /* blank */ \ + + 1 /* minimum filename length */ ) +#endif + +#if !HASH_ALGO_SUM +static void +output_file (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, bool args, + uintmax_t length); +#endif + +/* True if any of the files read were the standard input. */ +static bool have_read_stdin; + +/* The minimum length of a valid checksum line for the selected algorithm. */ +static size_t min_digest_line_length; + +/* Set to the length of a digest hex string for the selected algorithm. */ +static size_t digest_hex_bytes; + +/* With --check, don't generate any output. + The exit code indicates success or failure. */ +static bool status_only = false; + +/* With --check, print a message to standard error warning about each + improperly formatted checksum line. */ +static bool warn = false; + +/* With --check, ignore missing files. */ +static bool ignore_missing = false; + +/* With --check, suppress the "OK" printed for each verified file. */ +static bool quiet = false; + +/* With --check, exit with a non-zero return code if any line is + improperly formatted. */ +static bool strict = false; + +/* Whether a BSD reversed format checksum is detected. */ +static int bsd_reversed = -1; + +/* line delimiter. */ +static unsigned char digest_delim = '\n'; + +#if HASH_ALGO_CKSUM +/* If true, print base64-encoded digests, not hex. */ +static bool base64_digest = false; +#endif + +/* If true, print binary digests, not hex. */ +static bool raw_digest = false; + +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM +# define BLAKE2B_MAX_LEN BLAKE2B_OUTBYTES +static uintmax_t digest_length; +#endif /* HASH_ALGO_BLAKE2 */ + +typedef void (*digest_output_fn)(char const *, int, void const *, bool, + bool, unsigned char, bool, uintmax_t); +#if HASH_ALGO_SUM +enum Algorithm +{ + bsd, + sysv, +}; + +static enum Algorithm sum_algorithm; +static sumfn sumfns[]= +{ + bsd_sum_stream, + sysv_sum_stream, +}; +static digest_output_fn sum_output_fns[]= +{ + output_bsd, + output_sysv, +}; +#endif + +#if HASH_ALGO_CKSUM +static int +md5_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return md5_stream (stream, resstream); +} +static int +sha1_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sha1_stream (stream, resstream); +} +static int +sha224_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sha224_stream (stream, resstream); +} +static int +sha256_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sha256_stream (stream, resstream); +} +static int +sha384_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sha384_stream (stream, resstream); +} +static int +sha512_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sha512_stream (stream, resstream); +} +static int +blake2b_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return blake2b_stream (stream, resstream, *length); +} +static int +sm3_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + return sm3_stream (stream, resstream); +} + +enum Algorithm +{ + bsd, + sysv, + crc, + md5, + sha1, + sha224, + sha256, + sha384, + sha512, + blake2b, + sm3, +}; + +static char const *const algorithm_args[] = +{ + "bsd", "sysv", "crc", "md5", "sha1", "sha224", + "sha256", "sha384", "sha512", "blake2b", "sm3", nullptr +}; +static enum Algorithm const algorithm_types[] = +{ + bsd, sysv, crc, md5, sha1, sha224, + sha256, sha384, sha512, blake2b, sm3, +}; +ARGMATCH_VERIFY (algorithm_args, algorithm_types); + +static char const *const algorithm_tags[] = +{ + "BSD", "SYSV", "CRC", "MD5", "SHA1", "SHA224", + "SHA256", "SHA384", "SHA512", "BLAKE2b", "SM3", nullptr +}; +static int const algorithm_bits[] = +{ + 16, 16, 32, 128, 160, 224, + 256, 384, 512, 512, 256, 0 +}; + +static_assert (ARRAY_CARDINALITY (algorithm_bits) + == ARRAY_CARDINALITY (algorithm_args)); + +static bool algorithm_specified = false; +static enum Algorithm cksum_algorithm = crc; +static sumfn cksumfns[]= +{ + bsd_sum_stream, + sysv_sum_stream, + crc_sum_stream, + md5_sum_stream, + sha1_sum_stream, + sha224_sum_stream, + sha256_sum_stream, + sha384_sum_stream, + sha512_sum_stream, + blake2b_sum_stream, + sm3_sum_stream, +}; +static digest_output_fn cksum_output_fns[]= +{ + output_bsd, + output_sysv, + output_crc, + output_file, + output_file, + output_file, + output_file, + output_file, + output_file, + output_file, + output_file, +}; +bool cksum_debug; +#endif + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ + +enum +{ + IGNORE_MISSING_OPTION = CHAR_MAX + 1, + STATUS_OPTION, + QUIET_OPTION, + STRICT_OPTION, + TAG_OPTION, + UNTAG_OPTION, + DEBUG_PROGRAM_OPTION, + RAW_OPTION, + BASE64_OPTION, +}; + +static struct option const long_options[] = +{ +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + { "length", required_argument, nullptr, 'l'}, +#endif + +#if !HASH_ALGO_SUM + { "check", no_argument, nullptr, 'c' }, + { "ignore-missing", no_argument, nullptr, IGNORE_MISSING_OPTION}, + { "quiet", no_argument, nullptr, QUIET_OPTION }, + { "status", no_argument, nullptr, STATUS_OPTION }, + { "warn", no_argument, nullptr, 'w' }, + { "strict", no_argument, nullptr, STRICT_OPTION }, + { "tag", no_argument, nullptr, TAG_OPTION }, + { "zero", no_argument, nullptr, 'z' }, + +# if HASH_ALGO_CKSUM + { "algorithm", required_argument, nullptr, 'a'}, + { "base64", no_argument, nullptr, BASE64_OPTION }, + { "debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION}, + { "raw", no_argument, nullptr, RAW_OPTION}, + { "untagged", no_argument, nullptr, UNTAG_OPTION }, +# endif + { "binary", no_argument, nullptr, 'b' }, + { "text", no_argument, nullptr, 't' }, + +#else + {"sysv", no_argument, nullptr, 's'}, +#endif + + { GETOPT_HELP_OPTION_DECL }, + { GETOPT_VERSION_OPTION_DECL }, + { nullptr, 0, nullptr, 0 } +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), program_name); +#if HASH_ALGO_CKSUM + fputs (_("\ +Print or verify checksums.\n\ +By default use the 32 bit CRC algorithm.\n\ +"), stdout); +#else + printf (_("\ +Print or check %s (%d-bit) checksums.\n\ +"), + DIGEST_TYPE_STRING, + DIGEST_BITS); +#endif + + emit_stdin_note (); +#if HASH_ALGO_SUM + fputs (_("\ +\n\ + -r use BSD sum algorithm (the default), use 1K blocks\n\ + -s, --sysv use System V sum algorithm, use 512 bytes blocks\n\ +"), stdout); +#endif +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + emit_mandatory_arg_note (); +#endif +#if HASH_ALGO_CKSUM + fputs (_("\ + -a, --algorithm=TYPE select the digest type to use. See DIGEST below.\ +\n\ +"), stdout); + fputs (_("\ + --base64 emit base64-encoded digests, not hexadecimal\ +\n\ +"), stdout); +#endif +#if !HASH_ALGO_SUM +# if !HASH_ALGO_CKSUM + if (O_BINARY) + fputs (_("\ + -b, --binary read in binary mode (default unless reading tty stdin)\ +\n\ +"), stdout); + else + fputs (_("\ + -b, --binary read in binary mode\n\ +"), stdout); +# endif + fputs (_("\ + -c, --check read checksums from the FILEs and check them\n\ +"), stdout); +# if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + fputs (_("\ + -l, --length=BITS digest length in bits; must not exceed the max for\n\ + the blake2 algorithm and must be a multiple of 8\n\ +"), stdout); +# endif +# if HASH_ALGO_CKSUM + fputs (_("\ + --raw emit a raw binary digest, not hexadecimal\ +\n\ +"), stdout); + fputs (_("\ + --tag create a BSD-style checksum (the default)\n\ +"), stdout); + fputs (_("\ + --untagged create a reversed style checksum, without digest type\n\ +"), stdout); +# else + fputs (_("\ + --tag create a BSD-style checksum\n\ +"), stdout); +# endif +# if !HASH_ALGO_CKSUM + if (O_BINARY) + fputs (_("\ + -t, --text read in text mode (default if reading tty stdin)\n\ +"), stdout); + else + fputs (_("\ + -t, --text read in text mode (default)\n\ +"), stdout); +# endif + fputs (_("\ + -z, --zero end each output line with NUL, not newline,\n\ + and disable file name escaping\n\ +"), stdout); + fputs (_("\ +\n\ +The following five options are useful only when verifying checksums:\n\ + --ignore-missing don't fail or report status for missing files\n\ + --quiet don't print OK for each successfully verified file\n\ + --status don't output anything, status code shows success\n\ + --strict exit non-zero for improperly formatted checksum lines\n\ + -w, --warn warn about improperly formatted checksum lines\n\ +\n\ +"), stdout); +#endif +#if HASH_ALGO_CKSUM + fputs (_("\ + --debug indicate which implementation used\n\ +"), stdout); +#endif + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); +#if HASH_ALGO_CKSUM + fputs (_("\ +\n\ +DIGEST determines the digest algorithm and default output format:\n\ + sysv (equivalent to sum -s)\n\ + bsd (equivalent to sum -r)\n\ + crc (equivalent to cksum)\n\ + md5 (equivalent to md5sum)\n\ + sha1 (equivalent to sha1sum)\n\ + sha224 (equivalent to sha224sum)\n\ + sha256 (equivalent to sha256sum)\n\ + sha384 (equivalent to sha384sum)\n\ + sha512 (equivalent to sha512sum)\n\ + blake2b (equivalent to b2sum)\n\ + sm3 (only available through cksum)\n\ +\n"), stdout); +#endif +#if !HASH_ALGO_SUM && !HASH_ALGO_CKSUM + printf (_("\ +\n\ +The sums are computed as described in %s.\n"), DIGEST_REFERENCE); + fputs (_("\ +When checking, the input should be a former output of this program.\n\ +The default mode is to print a line with: checksum, a space,\n\ +a character indicating input mode ('*' for binary, ' ' for text\n\ +or where binary is insignificant), and name for each FILE.\n\ +\n\ +Note: There is no difference between binary mode and text mode on GNU systems.\ +\n"), stdout); +#endif +#if HASH_ALGO_CKSUM + fputs (_("\ +When checking, the input should be a former output of this program,\n\ +or equivalent standalone program.\ +\n"), stdout); +#endif + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +/* Given a string S, return TRUE if it contains problematic characters + that need escaping. Note we escape '\' itself to provide some forward + compat to introduce escaping of other characters. */ + +ATTRIBUTE_PURE +static bool +problematic_chars (char const *s) +{ + size_t length = strcspn (s, "\\\n\r"); + return s[length] != '\0'; +} + +#define ISWHITE(c) ((c) == ' ' || (c) == '\t') + +/* Given a file name, S of length S_LEN, that is not NUL-terminated, + modify it in place, performing the equivalent of this sed substitution: + 's/\\n/\n/g;s/\\r/\r/g;s/\\\\/\\/g' i.e., replacing each "\\n" string + with a newline, each "\\r" string with a carriage return, + and each "\\\\" with a single backslash, NUL-terminate it and return S. + If S is not a valid escaped file name, i.e., if it ends with an odd number + of backslashes or if it contains a backslash followed by anything other + than "n" or another backslash, return nullptr. */ + +static char * +filename_unescape (char *s, size_t s_len) +{ + char *dst = s; + + for (size_t i = 0; i < s_len; i++) + { + switch (s[i]) + { + case '\\': + if (i == s_len - 1) + { + /* File name ends with an unescaped backslash: invalid. */ + return nullptr; + } + ++i; + switch (s[i]) + { + case 'n': + *dst++ = '\n'; + break; + case 'r': + *dst++ = '\r'; + break; + case '\\': + *dst++ = '\\'; + break; + default: + /* Only '\', 'n' or 'r' may follow a backslash. */ + return nullptr; + } + break; + + case '\0': + /* The file name may not contain a NUL. */ + return nullptr; + + default: + *dst++ = s[i]; + break; + } + } + if (dst < s + s_len) + *dst = '\0'; + + return s; +} + +/* Return true if S is a LEN-byte NUL-terminated string of hex or base64 + digits and has the expected length. Otherwise, return false. */ +ATTRIBUTE_PURE +static bool +valid_digits (unsigned char const *s, size_t len) +{ +#if HASH_ALGO_CKSUM + if (len == BASE64_LENGTH (digest_length / 8)) + { + size_t i; + for (i = 0; i < len - digest_length % 3; i++) + { + if (!isbase64 (*s)) + return false; + ++s; + } + for ( ; i < len; i++) + { + if (*s != '=') + return false; + ++s; + } + } + else +#endif + if (len == digest_hex_bytes) + { + for (idx_t i = 0; i < digest_hex_bytes; i++) + { + if (!isxdigit (*s)) + return false; + ++s; + } + } + else + return false; + + return *s == '\0'; +} + +/* Split the checksum string S (of length S_LEN) from a BSD 'md5' or + 'sha1' command into two parts: a hexadecimal digest, and the file + name. S is modified. Set *D_LEN to the length of the digest string. + Return true if successful. */ + +static bool +bsd_split_3 (char *s, size_t s_len, + unsigned char **digest, size_t *d_len, + char **file_name, bool escaped_filename) +{ + if (s_len == 0) + return false; + + /* Find end of filename. */ + size_t i = s_len - 1; + while (i && s[i] != ')') + i--; + + if (s[i] != ')') + return false; + + *file_name = s; + + if (escaped_filename && filename_unescape (s, i) == nullptr) + return false; + + s[i++] = '\0'; + + while (ISWHITE (s[i])) + i++; + + if (s[i] != '=') + return false; + + i++; + + while (ISWHITE (s[i])) + i++; + + *digest = (unsigned char *) &s[i]; + + *d_len = s_len - i; + return valid_digits (*digest, *d_len); +} + +#if HASH_ALGO_CKSUM +/* Return the corresponding Algorithm for the string S, + or -1 for no match. */ + +static ptrdiff_t +algorithm_from_tag (char *s) +{ + /* Limit check size to this length for perf reasons. */ + static size_t max_tag_len; + if (! max_tag_len) + { + char const * const * tag = algorithm_tags; + while (*tag) + { + size_t tag_len = strlen (*tag++); + max_tag_len = MAX (tag_len, max_tag_len); + } + } + + size_t i = 0; + + /* Find end of tag */ + while (i <= max_tag_len && s[i] && ! ISWHITE (s[i]) + && s[i] != '-' && s[i] != '(') + ++i; + + if (i > max_tag_len) + return -1; + + /* Terminate tag, and lookup. */ + char sep = s[i]; + s[i] = '\0'; + ptrdiff_t algo = argmatch_exact (s, algorithm_tags); + s[i] = sep; + + return algo; +} +#endif + +/* Split the string S (of length S_LEN) into three parts: + a hexadecimal digest, binary flag, and the file name. + S is modified. Set *D_LEN to the length of the digest string. + Return true if successful. */ + +static bool +split_3 (char *s, size_t s_len, + unsigned char **digest, size_t *d_len, int *binary, char **file_name) +{ + bool escaped_filename = false; + size_t algo_name_len; + + size_t i = 0; + while (ISWHITE (s[i])) + ++i; + + if (s[i] == '\\') + { + ++i; + escaped_filename = true; + } + + /* Check for BSD-style checksum line. */ + +#if HASH_ALGO_CKSUM + if (! algorithm_specified) + { + ptrdiff_t algo_tag = algorithm_from_tag (s + i); + if (algo_tag >= 0) + { + if (algo_tag <= crc) + return false; /* We don't support checking these older formats. */ + cksum_algorithm = algo_tag; + } + else + return false; /* We only support tagged format without -a. */ + } +#endif + + algo_name_len = strlen (DIGEST_TYPE_STRING); + if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len)) + { + i += algo_name_len; +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + /* Terminate and match algorithm name. */ + char const *algo_name = &s[i - algo_name_len]; + bool length_specified = s[i] == '-'; + bool openssl_format = s[i] == '('; /* and no length_specified */ + s[i++] = '\0'; + if (!STREQ (algo_name, DIGEST_TYPE_STRING)) + return false; + if (openssl_format) + s[--i] = '('; + +# if HASH_ALGO_BLAKE2 + digest_length = BLAKE2B_MAX_LEN * 8; +# else + digest_length = algorithm_bits[cksum_algorithm]; +# endif + if (length_specified) + { + uintmax_t length; + char *siend; + if (! (xstrtoumax (s + i, &siend, 0, &length, nullptr) == LONGINT_OK + && 0 < length && length <= digest_length + && length % 8 == 0)) + return false; + + i = siend - s; + digest_length = length; + } + digest_hex_bytes = digest_length / 4; +#endif + if (s[i] == ' ') + ++i; + if (s[i] == '(') + { + ++i; + *binary = 0; + return bsd_split_3 (s + i, s_len - i, + digest, d_len, file_name, escaped_filename); + } + return false; + } + + /* Ignore this line if it is too short. + Each line must have at least 'min_digest_line_length - 1' (or one more, if + the first is a backslash) more characters to contain correct message digest + information. */ + if (s_len - i < min_digest_line_length + (s[i] == '\\')) + return false; + + *digest = (unsigned char *) &s[i]; + +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + /* Auto determine length. */ +# if HASH_ALGO_CKSUM + if (cksum_algorithm == blake2b) { +# endif + unsigned char const *hp = *digest; + digest_hex_bytes = 0; + while (isxdigit (*hp++)) + digest_hex_bytes++; + if (digest_hex_bytes < 2 || digest_hex_bytes % 2 + || BLAKE2B_MAX_LEN * 2 < digest_hex_bytes) + return false; + digest_length = digest_hex_bytes * 4; +# if HASH_ALGO_CKSUM + } +# endif +#endif + + /* This field must be the hexadecimal or base64 representation + of the message digest. */ + while (s[i] && !ISWHITE (s[i])) + i++; + + /* The digest must be followed by at least one whitespace character. */ + if (i == s_len) + return false; + + *d_len = &s[i] - (char *) *digest; + s[i++] = '\0'; + + if (! valid_digits (*digest, *d_len)) + return false; + + /* If "bsd reversed" format detected. */ + if ((s_len - i == 1) || (s[i] != ' ' && s[i] != '*')) + { + /* Don't allow mixing bsd and standard formats, + to minimize security issues with attackers + renaming files with leading spaces. + This assumes that with bsd format checksums + that the first file name does not have + a leading ' ' or '*'. */ + if (bsd_reversed == 0) + return false; + bsd_reversed = 1; + } + else if (bsd_reversed != 1) + { + bsd_reversed = 0; + *binary = (s[i++] == '*'); + } + + /* All characters between the type indicator and end of line are + significant -- that includes leading and trailing white space. */ + *file_name = &s[i]; + + if (escaped_filename) + return filename_unescape (&s[i], s_len - i) != nullptr; + + return true; +} + +/* If ESCAPE is true, then translate each: + NEWLINE byte to the string, "\\n", + CARRIAGE RETURN byte to the string, "\\r", + and each backslash to "\\\\". */ +static void +print_filename (char const *file, bool escape) +{ + if (! escape) + { + fputs (file, stdout); + return; + } + + while (*file) + { + switch (*file) + { + case '\n': + fputs ("\\n", stdout); + break; + + case '\r': + fputs ("\\r", stdout); + break; + + case '\\': + fputs ("\\\\", stdout); + break; + + default: + putchar (*file); + break; + } + file++; + } +} + +/* An interface to the function, DIGEST_STREAM. + Operate on FILENAME (it may be "-"). + + *BINARY indicates whether the file is binary. BINARY < 0 means it + depends on whether binary mode makes any difference and the file is + a terminal; in that case, clear *BINARY if the file was treated as + text because it was a terminal. + + Put the checksum in *BIN_RESULT, which must be properly aligned. + Put true in *MISSING if the file can't be opened due to ENOENT. + Return true if successful. */ + +static bool +digest_file (char const *filename, int *binary, unsigned char *bin_result, + bool *missing, MAYBE_UNUSED uintmax_t *length) +{ + FILE *fp; + int err; + bool is_stdin = STREQ (filename, "-"); + + *missing = false; + + if (is_stdin) + { + have_read_stdin = true; + fp = stdin; + if (O_BINARY && *binary) + { + if (*binary < 0) + *binary = ! isatty (STDIN_FILENO); + if (*binary) + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + } + else + { + fp = fopen (filename, (O_BINARY && *binary ? "rb" : "r")); + if (fp == nullptr) + { + if (ignore_missing && errno == ENOENT) + { + *missing = true; + return true; + } + error (0, errno, "%s", quotef (filename)); + return false; + } + } + + fadvise (fp, FADVISE_SEQUENTIAL); + +#if HASH_ALGO_CKSUM + if (cksum_algorithm == blake2b) + *length = digest_length / 8; + err = DIGEST_STREAM (fp, bin_result, length); +#elif HASH_ALGO_SUM + err = DIGEST_STREAM (fp, bin_result, length); +#elif HASH_ALGO_BLAKE2 + err = DIGEST_STREAM (fp, bin_result, digest_length / 8); +#else + err = DIGEST_STREAM (fp, bin_result); +#endif + err = err ? errno : 0; + if (is_stdin) + clearerr (fp); + else if (fclose (fp) != 0 && !err) + err = errno; + + if (err) + { + error (0, err, "%s", quotef (filename)); + return false; + } + + return true; +} + +#if !HASH_ALGO_SUM +static void +output_file (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, MAYBE_UNUSED bool args, + MAYBE_UNUSED uintmax_t length) +{ +# if HASH_ALGO_CKSUM + if (raw) + { + fwrite (digest, 1, digest_length / 8, stdout); + return; + } +# endif + + unsigned char const *bin_buffer = digest; + + /* Output a leading backslash if the file name contains problematic chars. */ + bool needs_escape = delim == '\n' && problematic_chars (file); + + if (needs_escape) + putchar ('\\'); + + if (tagged) + { + fputs (DIGEST_TYPE_STRING, stdout); +# if HASH_ALGO_BLAKE2 + if (digest_length < BLAKE2B_MAX_LEN * 8) + printf ("-%"PRIuMAX, digest_length); +# elif HASH_ALGO_CKSUM + if (cksum_algorithm == blake2b) + { + if (digest_length < BLAKE2B_MAX_LEN * 8) + printf ("-%"PRIuMAX, digest_length); + } +# endif + fputs (" (", stdout); + print_filename (file, needs_escape); + fputs (") = ", stdout); + } + +# if HASH_ALGO_CKSUM + if (base64_digest) + { + char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1]; + base64_encode ((char const *) bin_buffer, digest_length / 8, + b64, sizeof b64); + fputs (b64, stdout); + } + else +# endif + { + for (size_t i = 0; i < (digest_hex_bytes / 2); ++i) + printf ("%02x", bin_buffer[i]); + } + + if (!tagged) + { + putchar (' '); + putchar (binary_file ? '*' : ' '); + print_filename (file, needs_escape); + } + + putchar (delim); +} +#endif + +#if HASH_ALGO_CKSUM +/* Return true if B64_DIGEST is the same as the base64 digest of the + DIGEST_LENGTH/8 bytes at BIN_BUFFER. */ +static bool +b64_equal (unsigned char const *b64_digest, unsigned char const *bin_buffer) +{ + size_t b64_n_bytes = BASE64_LENGTH (digest_length / 8); + char b64[BASE64_LENGTH (DIGEST_BIN_BYTES) + 1]; + base64_encode ((char const *) bin_buffer, digest_length / 8, b64, sizeof b64); + return memcmp (b64_digest, b64, b64_n_bytes + 1) == 0; +} +#endif + +/* Return true if HEX_DIGEST is the same as the hex-encoded digest of the + DIGEST_LENGTH/8 bytes at BIN_BUFFER. */ +static bool +hex_equal (unsigned char const *hex_digest, unsigned char const *bin_buffer) +{ + static const char bin2hex[] = { '0', '1', '2', '3', + '4', '5', '6', '7', + '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f' }; + size_t digest_bin_bytes = digest_hex_bytes / 2; + + /* Compare generated binary number with text representation + in check file. Ignore case of hex digits. */ + size_t cnt; + for (cnt = 0; cnt < digest_bin_bytes; ++cnt) + { + if (tolower (hex_digest[2 * cnt]) + != bin2hex[bin_buffer[cnt] >> 4] + || (tolower (hex_digest[2 * cnt + 1]) + != (bin2hex[bin_buffer[cnt] & 0xf]))) + break; + } + return cnt == digest_bin_bytes; +} + +static bool +digest_check (char const *checkfile_name) +{ + FILE *checkfile_stream; + uintmax_t n_misformatted_lines = 0; + uintmax_t n_mismatched_checksums = 0; + uintmax_t n_open_or_read_failures = 0; + bool properly_formatted_lines = false; + bool matched_checksums = false; + unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN]; + /* Make sure bin_buffer is properly aligned. */ + unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN); + uintmax_t line_number; + char *line; + size_t line_chars_allocated; + bool is_stdin = STREQ (checkfile_name, "-"); + + if (is_stdin) + { + have_read_stdin = true; + checkfile_name = _("standard input"); + checkfile_stream = stdin; + } + else + { + checkfile_stream = fopen (checkfile_name, "r"); + if (checkfile_stream == nullptr) + { + error (0, errno, "%s", quotef (checkfile_name)); + return false; + } + } + + line_number = 0; + line = nullptr; + line_chars_allocated = 0; + do + { + char *filename; + int binary; + unsigned char *digest; + ssize_t line_length; + + ++line_number; + if (line_number == 0) + error (EXIT_FAILURE, 0, _("%s: too many checksum lines"), + quotef (checkfile_name)); + + line_length = getline (&line, &line_chars_allocated, checkfile_stream); + if (line_length <= 0) + break; + + /* Ignore comment lines, which begin with a '#' character. */ + if (line[0] == '#') + continue; + + /* Remove any trailing newline. */ + line_length -= line[line_length - 1] == '\n'; + /* Remove any trailing carriage return. */ + line_length -= line[line_length - (0 < line_length)] == '\r'; + + /* Ignore empty lines. */ + if (line_length == 0) + continue; + + line[line_length] = '\0'; + + size_t d_len; + if (! (split_3 (line, line_length, &digest, &d_len, &binary, &filename) + && ! (is_stdin && STREQ (filename, "-")))) + { + ++n_misformatted_lines; + + if (warn) + { + error (0, 0, + _("%s: %" PRIuMAX + ": improperly formatted %s checksum line"), + quotef (checkfile_name), line_number, + DIGEST_TYPE_STRING); + } + } + else + { + bool ok; + bool missing; + bool needs_escape = ! status_only && problematic_chars (filename); + + properly_formatted_lines = true; + + uintmax_t length; + ok = digest_file (filename, &binary, bin_buffer, &missing, &length); + + if (!ok) + { + ++n_open_or_read_failures; + if (!status_only) + { + if (needs_escape) + putchar ('\\'); + print_filename (filename, needs_escape); + printf (": %s\n", _("FAILED open or read")); + } + } + else if (ignore_missing && missing) + { + /* Ignore missing files with --ignore-missing. */ + ; + } + else + { + bool match = false; +#if HASH_ALGO_CKSUM + if (d_len < digest_hex_bytes) + match = b64_equal (digest, bin_buffer); + else +#endif + if (d_len == digest_hex_bytes) + match = hex_equal (digest, bin_buffer); + + if (match) + matched_checksums = true; + else + ++n_mismatched_checksums; + + if (!status_only) + { + if (! match || ! quiet) + { + if (needs_escape) + putchar ('\\'); + print_filename (filename, needs_escape); + } + + if (! match) + printf (": %s\n", _("FAILED")); + else if (!quiet) + printf (": %s\n", _("OK")); + } + } + } + } + while (!feof (checkfile_stream) && !ferror (checkfile_stream)); + + free (line); + + int err = ferror (checkfile_stream) ? 0 : -1; + if (is_stdin) + clearerr (checkfile_stream); + else if (fclose (checkfile_stream) != 0 && err < 0) + err = errno; + + if (0 <= err) + { + error (0, err, err ? "%s" : _("%s: read error"), + quotef (checkfile_name)); + return false; + } + + if (! properly_formatted_lines) + { + /* Warn if no tests are found. */ + error (0, 0, _("%s: no properly formatted checksum lines found"), + quotef (checkfile_name)); + } + else + { + if (!status_only) + { + if (n_misformatted_lines != 0) + error (0, 0, + (ngettext + ("WARNING: %" PRIuMAX " line is improperly formatted", + "WARNING: %" PRIuMAX " lines are improperly formatted", + select_plural (n_misformatted_lines))), + n_misformatted_lines); + + if (n_open_or_read_failures != 0) + error (0, 0, + (ngettext + ("WARNING: %" PRIuMAX " listed file could not be read", + "WARNING: %" PRIuMAX " listed files could not be read", + select_plural (n_open_or_read_failures))), + n_open_or_read_failures); + + if (n_mismatched_checksums != 0) + error (0, 0, + (ngettext + ("WARNING: %" PRIuMAX " computed checksum did NOT match", + "WARNING: %" PRIuMAX " computed checksums did NOT match", + select_plural (n_mismatched_checksums))), + n_mismatched_checksums); + + if (ignore_missing && ! matched_checksums) + error (0, 0, _("%s: no file was verified"), + quotef (checkfile_name)); + } + } + + return (properly_formatted_lines + && matched_checksums + && n_mismatched_checksums == 0 + && n_open_or_read_failures == 0 + && (!strict || n_misformatted_lines == 0)); +} + +int +main (int argc, char **argv) +{ + unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN]; + /* Make sure bin_buffer is properly aligned. */ + unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN); + bool do_check = false; + int opt; + bool ok = true; + int binary = -1; +#if HASH_ALGO_CKSUM + bool prefix_tag = true; +#else + bool prefix_tag = false; +#endif + + /* Setting values of global variables. */ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + /* Line buffer stdout to ensure lines are written atomically and immediately + so that processes running in parallel do not intersperse their output. */ + setvbuf (stdout, nullptr, _IOLBF, 0); + +#if HASH_ALGO_SUM + char const *short_opts = "rs"; +#elif HASH_ALGO_CKSUM + char const *short_opts = "a:l:bctwz"; + char const *digest_length_str = ""; +#elif HASH_ALGO_BLAKE2 + char const *short_opts = "l:bctwz"; + char const *digest_length_str = ""; +#else + char const *short_opts = "bctwz"; +#endif + + while ((opt = getopt_long (argc, argv, short_opts, long_options, nullptr)) + != -1) + switch (opt) + { +#if HASH_ALGO_CKSUM + case 'a': + cksum_algorithm = XARGMATCH_EXACT ("--algorithm", optarg, + algorithm_args, algorithm_types); + algorithm_specified = true; + break; + + case DEBUG_PROGRAM_OPTION: + cksum_debug = true; + break; +#endif +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM + case 'l': + digest_length = xdectoumax (optarg, 0, UINTMAX_MAX, "", + _("invalid length"), 0); + digest_length_str = optarg; + if (digest_length % 8 != 0) + { + error (0, 0, _("invalid length: %s"), quote (digest_length_str)); + error (EXIT_FAILURE, 0, _("length is not a multiple of 8")); + } + break; +#endif +#if !HASH_ALGO_SUM + case 'c': + do_check = true; + break; + case STATUS_OPTION: + status_only = true; + warn = false; + quiet = false; + break; + case 'b': + binary = 1; + break; + case 't': + binary = 0; + break; + case 'w': + status_only = false; + warn = true; + quiet = false; + break; + case IGNORE_MISSING_OPTION: + ignore_missing = true; + break; + case QUIET_OPTION: + status_only = false; + warn = false; + quiet = true; + break; + case STRICT_OPTION: + strict = true; + break; +# if HASH_ALGO_CKSUM + case BASE64_OPTION: + base64_digest = true; + break; + case RAW_OPTION: + raw_digest = true; + break; + case UNTAG_OPTION: + prefix_tag = false; + break; +# endif + case TAG_OPTION: + prefix_tag = true; + binary = 1; + break; + case 'z': + digest_delim = '\0'; + break; +#endif +#if HASH_ALGO_SUM + case 'r': /* For SysV compatibility. */ + sum_algorithm = bsd; + break; + + case 's': + sum_algorithm = sysv; + break; +#endif + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + + min_digest_line_length = MIN_DIGEST_LINE_LENGTH; +#if HASH_ALGO_BLAKE2 || HASH_ALGO_CKSUM +# if HASH_ALGO_CKSUM + if (digest_length && cksum_algorithm != blake2b) + error (EXIT_FAILURE, 0, + _("--length is only supported with --algorithm=blake2b")); +# endif + if (digest_length > BLAKE2B_MAX_LEN * 8) + { + error (0, 0, _("invalid length: %s"), quote (digest_length_str)); + error (EXIT_FAILURE, 0, + _("maximum digest length for %s is %d bits"), + quote (DIGEST_TYPE_STRING), + BLAKE2B_MAX_LEN * 8); + } + if (digest_length == 0) + { +# if HASH_ALGO_BLAKE2 + digest_length = BLAKE2B_MAX_LEN * 8; +# else + digest_length = algorithm_bits[cksum_algorithm]; +# endif + } + digest_hex_bytes = digest_length / 4; +#else + digest_hex_bytes = DIGEST_HEX_BYTES; +#endif + +#if HASH_ALGO_CKSUM + switch (cksum_algorithm) + { + case bsd: + case sysv: + case crc: + if (do_check && algorithm_specified) + error (EXIT_FAILURE, 0, + _("--check is not supported with --algorithm={bsd,sysv,crc}")); + break; + default: + break; + } + + if (base64_digest && raw_digest) + { + error (0, 0, _("--base64 and --raw are mutually exclusive")); + usage (EXIT_FAILURE); + } +#endif + + if (prefix_tag && !binary) + { + /* This could be supported in a backwards compatible way + by prefixing the output line with a space in text mode. + However that's invasive enough that it was agreed to + not support this mode with --tag, as --text use cases + are adequately supported by the default output format. */ +#if !HASH_ALGO_CKSUM + error (0, 0, _("--tag does not support --text mode")); +#else + error (0, 0, _("--text mode is only supported with --untagged")); +#endif + usage (EXIT_FAILURE); + } + + if (digest_delim != '\n' && do_check) + { + error (0, 0, _("the --zero option is not supported when " + "verifying checksums")); + usage (EXIT_FAILURE); + } +#if !HASH_ALGO_CKSUM + if (prefix_tag && do_check) + { + error (0, 0, _("the --tag option is meaningless when " + "verifying checksums")); + usage (EXIT_FAILURE); + } +#endif + + if (0 <= binary && do_check) + { + error (0, 0, _("the --binary and --text options are meaningless when " + "verifying checksums")); + usage (EXIT_FAILURE); + } + + if (ignore_missing && !do_check) + { + error (0, 0, + _("the --ignore-missing option is meaningful only when " + "verifying checksums")); + usage (EXIT_FAILURE); + } + + if (status_only && !do_check) + { + error (0, 0, + _("the --status option is meaningful only when verifying checksums")); + usage (EXIT_FAILURE); + } + + if (warn && !do_check) + { + error (0, 0, + _("the --warn option is meaningful only when verifying checksums")); + usage (EXIT_FAILURE); + } + + if (quiet && !do_check) + { + error (0, 0, + _("the --quiet option is meaningful only when verifying checksums")); + usage (EXIT_FAILURE); + } + + if (strict & !do_check) + { + error (0, 0, + _("the --strict option is meaningful only when verifying checksums")); + usage (EXIT_FAILURE); + } + + if (!O_BINARY && binary < 0) + binary = 0; + + char **operand_lim = argv + argc; + if (optind == argc) + *operand_lim++ = bad_cast ("-"); + else if (1 < argc - optind && raw_digest) + error (EXIT_FAILURE, 0, + _("the --raw option is not supported with multiple files")); + + for (char **operandp = argv + optind; operandp < operand_lim; operandp++) + { + char *file = *operandp; + if (do_check) + ok &= digest_check (file); + else + { + int binary_file = binary; + bool missing; + uintmax_t length; + + if (! digest_file (file, &binary_file, bin_buffer, &missing, &length)) + ok = false; + else + { + DIGEST_OUT (file, binary_file, bin_buffer, raw_digest, prefix_tag, + digest_delim, optind != argc, length); + } + } + } + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, _("standard input")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/dircolors.c b/src/dircolors.c new file mode 100644 index 0000000..8a86efb --- /dev/null +++ b/src/dircolors.c @@ -0,0 +1,543 @@ +/* dircolors - output commands to set the LS_COLOR environment variable + Copyright (C) 1996-2023 Free Software Foundation, Inc. + Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000 H. Peter Anvin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include + +#include "system.h" +#include "dircolors.h" +#include "c-strcase.h" +#include "obstack.h" +#include "quote.h" +#include "stdio--.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "dircolors" + +#define AUTHORS proper_name ("H. Peter Anvin") + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +enum Shell_syntax +{ + SHELL_SYNTAX_BOURNE, + SHELL_SYNTAX_C, + SHELL_SYNTAX_UNKNOWN +}; + +#define APPEND_CHAR(C) obstack_1grow (&lsc_obstack, C) + +/* Accumulate in this obstack the value for the LS_COLORS environment + variable. */ +static struct obstack lsc_obstack; + +static char const *const slack_codes[] = +{ + "NORMAL", "NORM", "FILE", "RESET", "DIR", "LNK", "LINK", + "SYMLINK", "ORPHAN", "MISSING", "FIFO", "PIPE", "SOCK", "BLK", "BLOCK", + "CHR", "CHAR", "DOOR", "EXEC", "LEFT", "LEFTCODE", "RIGHT", "RIGHTCODE", + "END", "ENDCODE", "SUID", "SETUID", "SGID", "SETGID", "STICKY", + "OTHER_WRITABLE", "OWR", "STICKY_OTHER_WRITABLE", "OWT", "CAPABILITY", + "MULTIHARDLINK", "CLRTOEOL", nullptr +}; + +static char const *const ls_codes[] = +{ + "no", "no", "fi", "rs", "di", "ln", "ln", "ln", "or", "mi", "pi", "pi", + "so", "bd", "bd", "cd", "cd", "do", "ex", "lc", "lc", "rc", "rc", "ec", "ec", + "su", "su", "sg", "sg", "st", "ow", "ow", "tw", "tw", "ca", "mh", "cl", + nullptr +}; +static_assert (ARRAY_CARDINALITY (slack_codes) == ARRAY_CARDINALITY (ls_codes)); + +/* Whether to output escaped ls color codes for display. */ +static bool print_ls_colors; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + PRINT_LS_COLORS_OPTION = CHAR_MAX + 1, +}; + +static struct option const long_options[] = + { + {"bourne-shell", no_argument, nullptr, 'b'}, + {"sh", no_argument, nullptr, 'b'}, + {"csh", no_argument, nullptr, 'c'}, + {"c-shell", no_argument, nullptr, 'c'}, + {"print-database", no_argument, nullptr, 'p'}, + {"print-ls-colors", no_argument, nullptr, PRINT_LS_COLORS_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} + }; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]\n"), program_name); + fputs (_("\ +Output commands to set the LS_COLORS environment variable.\n\ +\n\ +Determine format of output:\n\ + -b, --sh, --bourne-shell output Bourne shell code to set LS_COLORS\n\ + -c, --csh, --c-shell output C shell code to set LS_COLORS\n\ + -p, --print-database output defaults\n\ + --print-ls-colors output fully escaped colors for display\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +If FILE is specified, read it to determine which colors to use for which\n\ +file types and extensions. Otherwise, a precompiled database is used.\n\ +For details on the format of these files, run 'dircolors --print-database'.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +/* If the SHELL environment variable is set to 'csh' or 'tcsh,' + assume C shell. Else Bourne shell. */ + +static enum Shell_syntax +guess_shell_syntax (void) +{ + char *shell; + + shell = getenv ("SHELL"); + if (shell == nullptr || *shell == '\0') + return SHELL_SYNTAX_UNKNOWN; + + shell = last_component (shell); + + if (STREQ (shell, "csh") || STREQ (shell, "tcsh")) + return SHELL_SYNTAX_C; + + return SHELL_SYNTAX_BOURNE; +} + +static void +parse_line (char const *line, char **keyword, char **arg) +{ + char const *p; + char const *keyword_start; + char const *arg_start; + + *keyword = nullptr; + *arg = nullptr; + + for (p = line; isspace (to_uchar (*p)); ++p) + continue; + + /* Ignore blank lines and shell-style comments. */ + if (*p == '\0' || *p == '#') + return; + + keyword_start = p; + + while (!isspace (to_uchar (*p)) && *p != '\0') + { + ++p; + } + + *keyword = ximemdup0 (keyword_start, p - keyword_start); + if (*p == '\0') + return; + + do + { + ++p; + } + while (isspace (to_uchar (*p))); + + if (*p == '\0' || *p == '#') + return; + + arg_start = p; + + while (*p != '\0' && *p != '#') + ++p; + + for (--p; isspace (to_uchar (*p)); --p) + continue; + ++p; + + *arg = ximemdup0 (arg_start, p - arg_start); +} + +/* Accumulate STR to LS_COLORS data. + If outputting shell syntax, then escape appropriately. */ + +static void +append_quoted (char const *str) +{ + bool need_backslash = true; + + while (*str != '\0') + { + if (! print_ls_colors) + switch (*str) + { + case '\'': + APPEND_CHAR ('\''); + APPEND_CHAR ('\\'); + APPEND_CHAR ('\''); + need_backslash = true; + break; + + case '\\': + case '^': + need_backslash = !need_backslash; + break; + + case ':': + case '=': + if (need_backslash) + APPEND_CHAR ('\\'); + FALLTHROUGH; + + default: + need_backslash = true; + break; + } + + APPEND_CHAR (*str); + ++str; + } +} + +/* Accumulate entry to LS_COLORS data. + Use shell syntax unless PRINT_LS_COLORS is set. */ + +static void +append_entry (char prefix, char const *item, char const *arg) +{ + if (print_ls_colors) + { + append_quoted ("\x1B["); + append_quoted (arg); + APPEND_CHAR ('m'); + } + if (prefix) + APPEND_CHAR (prefix); + append_quoted (item); + APPEND_CHAR (print_ls_colors ? '\t' : '='); + append_quoted (arg); + if (print_ls_colors) + append_quoted ("\x1B[0m"); + APPEND_CHAR (print_ls_colors ? '\n' : ':'); +} + +/* Read the file open on FP (with name FILENAME). First, look for a + 'TERM name' directive where name matches the current terminal type. + Once found, translate and accumulate the associated directives onto + the global obstack LSC_OBSTACK. Give a diagnostic + upon failure (unrecognized keyword is the only way to fail here). + Return true if successful. */ + +static bool +dc_parse_stream (FILE *fp, char const *filename) +{ + idx_t line_number = 0; + char const *next_G_line = G_line; + char *input_line = nullptr; + size_t input_line_size = 0; + char const *line; + char const *term; + char const *colorterm; + bool ok = true; + + /* State for the parser. */ + enum { ST_TERMNO, ST_TERMYES, ST_TERMSURE, ST_GLOBAL } state = ST_GLOBAL; + + /* Get terminal type */ + term = getenv ("TERM"); + if (term == nullptr || *term == '\0') + term = "none"; + + /* Also match $COLORTERM. */ + colorterm = getenv ("COLORTERM"); + if (colorterm == nullptr) + colorterm = ""; /* Doesn't match default "?*" */ + + while (true) + { + char *keywd, *arg; + bool unrecognized; + + ++line_number; + + if (fp) + { + if (getline (&input_line, &input_line_size, fp) <= 0) + { + if (ferror (fp)) + { + error (0, errno, _("%s: read error"), quotef (filename)); + ok = false; + } + free (input_line); + break; + } + line = input_line; + } + else + { + if (next_G_line == G_line + sizeof G_line) + break; + line = next_G_line; + next_G_line += strlen (next_G_line) + 1; + } + + parse_line (line, &keywd, &arg); + + if (keywd == nullptr) + continue; + + if (arg == nullptr) + { + error (0, 0, _("%s:%td: invalid line; missing second token"), + quotef (filename), line_number); + ok = false; + free (keywd); + continue; + } + + unrecognized = false; + if (c_strcasecmp (keywd, "TERM") == 0) + { + if (state != ST_TERMSURE) + state = fnmatch (arg, term, 0) == 0 ? ST_TERMSURE : ST_TERMNO; + } + else if (c_strcasecmp (keywd, "COLORTERM") == 0) + { + if (state != ST_TERMSURE) + state = fnmatch (arg, colorterm, 0) == 0 ? ST_TERMSURE : ST_TERMNO; + } + else + { + if (state == ST_TERMSURE) + state = ST_TERMYES; /* Another {COLOR,}TERM can cancel. */ + + if (state != ST_TERMNO) + { + if (keywd[0] == '.') + append_entry ('*', keywd, arg); + else if (keywd[0] == '*') + append_entry (0, keywd, arg); + else if (c_strcasecmp (keywd, "OPTIONS") == 0 + || c_strcasecmp (keywd, "COLOR") == 0 + || c_strcasecmp (keywd, "EIGHTBIT") == 0) + { + /* Ignore. */ + } + else + { + int i; + + for (i = 0; slack_codes[i] != nullptr; ++i) + if (c_strcasecmp (keywd, slack_codes[i]) == 0) + break; + + if (slack_codes[i] != nullptr) + append_entry (0, ls_codes[i], arg); + else + unrecognized = true; + } + } + else + unrecognized = true; + } + + if (unrecognized && (state == ST_TERMSURE || state == ST_TERMYES)) + { + error (0, 0, _("%s:%td: unrecognized keyword %s"), + (filename ? quotef (filename) : _("")), + line_number, keywd); + ok = false; + } + + free (keywd); + free (arg); + } + + return ok; +} + +static bool +dc_parse_file (char const *filename) +{ + bool ok; + + if (! STREQ (filename, "-") && freopen (filename, "r", stdin) == nullptr) + { + error (0, errno, "%s", quotef (filename)); + return false; + } + + ok = dc_parse_stream (stdin, filename); + + if (fclose (stdin) != 0) + { + error (0, errno, "%s", quotef (filename)); + return false; + } + + return ok; +} + +int +main (int argc, char **argv) +{ + bool ok = true; + int optc; + enum Shell_syntax syntax = SHELL_SYNTAX_UNKNOWN; + bool print_database = false; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "bcp", long_options, nullptr)) != -1) + switch (optc) + { + case 'b': /* Bourne shell syntax. */ + syntax = SHELL_SYNTAX_BOURNE; + break; + + case 'c': /* C shell syntax. */ + syntax = SHELL_SYNTAX_C; + break; + + case 'p': + print_database = true; + break; + + case PRINT_LS_COLORS_OPTION: + print_ls_colors = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + argc -= optind; + argv += optind; + + /* It doesn't make sense to use --print with either of + --bourne or --c-shell. */ + if ((print_database | print_ls_colors) && syntax != SHELL_SYNTAX_UNKNOWN) + { + error (0, 0, + _("the options to output non shell syntax,\n" + "and to select a shell syntax are mutually exclusive")); + usage (EXIT_FAILURE); + } + + if (print_database && print_ls_colors) + { + error (0, 0, + _("options --print-database and --print-ls-colors " + "are mutually exclusive")); + usage (EXIT_FAILURE); + } + + if ((!print_database) < argc) + { + error (0, 0, _("extra operand %s"), + quote (argv[!print_database])); + if (print_database) + fprintf (stderr, "%s\n", + _("file operands cannot be combined with " + "--print-database (-p)")); + usage (EXIT_FAILURE); + } + + if (print_database) + { + char const *p = G_line; + while (p - G_line < sizeof G_line) + { + puts (p); + p += strlen (p) + 1; + } + } + else + { + /* If shell syntax was not explicitly specified, try to guess it. */ + if (syntax == SHELL_SYNTAX_UNKNOWN && ! print_ls_colors) + { + syntax = guess_shell_syntax (); + if (syntax == SHELL_SYNTAX_UNKNOWN) + error (EXIT_FAILURE, 0, + _("no SHELL environment variable," + " and no shell type option given")); + } + + obstack_init (&lsc_obstack); + if (argc == 0) + ok = dc_parse_stream (nullptr, nullptr); + else + ok = dc_parse_file (argv[0]); + + if (ok) + { + size_t len = obstack_object_size (&lsc_obstack); + char *s = obstack_finish (&lsc_obstack); + char const *prefix; + char const *suffix; + + if (syntax == SHELL_SYNTAX_BOURNE) + { + prefix = "LS_COLORS='"; + suffix = "';\nexport LS_COLORS\n"; + } + else + { + prefix = "setenv LS_COLORS '"; + suffix = "'\n"; + } + if (! print_ls_colors) + fputs (prefix, stdout); + fwrite (s, 1, len, stdout); + if (! print_ls_colors) + fputs (suffix, stdout); + } + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/dircolors.h b/src/dircolors.h new file mode 100644 index 0000000..029d590 --- /dev/null +++ b/src/dircolors.h @@ -0,0 +1,239 @@ +static char const G_line[] = +{ + '#',' ','C','o','n','f','i','g','u','r','a','t','i','o','n',' ','f','i','l','e',' ','f','o','r',' ','d','i','r','c','o','l','o','r','s',',',' ','a',' ','u','t','i','l','i','t','y',' ','t','o',' ','h','e','l','p',' ','y','o','u',' ','s','e','t',' ','t','h','e',0, + '#',' ','L','S','_','C','O','L','O','R','S',' ','e','n','v','i','r','o','n','m','e','n','t',' ','v','a','r','i','a','b','l','e',' ','u','s','e','d',' ','b','y',' ','G','N','U',' ','l','s',' ','w','i','t','h',' ','t','h','e',' ','-','-','c','o','l','o','r',' ','o','p','t','i','o','n','.',0, + '#',' ','C','o','p','y','r','i','g','h','t',' ','(','C',')',' ','1','9','9','6','-','2','0','2','3',' ','F','r','e','e',' ','S','o','f','t','w','a','r','e',' ','F','o','u','n','d','a','t','i','o','n',',',' ','I','n','c','.',0, + '#',' ','C','o','p','y','i','n','g',' ','a','n','d',' ','d','i','s','t','r','i','b','u','t','i','o','n',' ','o','f',' ','t','h','i','s',' ','f','i','l','e',',',' ','w','i','t','h',' ','o','r',' ','w','i','t','h','o','u','t',' ','m','o','d','i','f','i','c','a','t','i','o','n',',',0, + '#',' ','a','r','e',' ','p','e','r','m','i','t','t','e','d',' ','p','r','o','v','i','d','e','d',' ','t','h','e',' ','c','o','p','y','r','i','g','h','t',' ','n','o','t','i','c','e',' ','a','n','d',' ','t','h','i','s',' ','n','o','t','i','c','e',' ','a','r','e',' ','p','r','e','s','e','r','v','e','d','.',0, + '#',0, + '#',' ','T','h','e',' ','k','e','y','w','o','r','d','s',' ','C','O','L','O','R',',',' ','O','P','T','I','O','N','S',',',' ','a','n','d',' ','E','I','G','H','T','B','I','T',' ','(','h','o','n','o','r','e','d',' ','b','y',' ','t','h','e',0, + '#',' ','s','l','a','c','k','w','a','r','e',' ','v','e','r','s','i','o','n',' ','o','f',' ','d','i','r','c','o','l','o','r','s',')',' ','a','r','e',' ','r','e','c','o','g','n','i','z','e','d',' ','b','u','t',' ','i','g','n','o','r','e','d','.',0, + '#',' ','G','l','o','b','a','l',' ','c','o','n','f','i','g',' ','o','p','t','i','o','n','s',' ','c','a','n',' ','b','e',' ','s','p','e','c','i','f','i','e','d',' ','b','e','f','o','r','e',' ','T','E','R','M',' ','o','r',' ','C','O','L','O','R','T','E','R','M',' ','e','n','t','r','i','e','s',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','T','e','r','m','i','n','a','l',' ','f','i','l','t','e','r','s',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','B','e','l','o','w',' ','a','r','e',' ','T','E','R','M',' ','o','r',' ','C','O','L','O','R','T','E','R','M',' ','e','n','t','r','i','e','s',',',' ','w','h','i','c','h',' ','c','a','n',' ','b','e',' ','g','l','o','b',' ','p','a','t','t','e','r','n','s',',',' ','w','h','i','c','h',0, + '#',' ','r','e','s','t','r','i','c','t',' ','f','o','l','l','o','w','i','n','g',' ','c','o','n','f','i','g',' ','t','o',' ','s','y','s','t','e','m','s',' ','w','i','t','h',' ','m','a','t','c','h','i','n','g',' ','e','n','v','i','r','o','n','m','e','n','t',' ','v','a','r','i','a','b','l','e','s','.',0, + 'C','O','L','O','R','T','E','R','M',' ','?','*',0, + 'T','E','R','M',' ','E','t','e','r','m',0, + 'T','E','R','M',' ','a','n','s','i',0, + 'T','E','R','M',' ','*','c','o','l','o','r','*',0, + 'T','E','R','M',' ','c','o','n','[','0','-','9',']','*','x','[','0','-','9',']','*',0, + 'T','E','R','M',' ','c','o','n','s','2','5',0, + 'T','E','R','M',' ','c','o','n','s','o','l','e',0, + 'T','E','R','M',' ','c','y','g','w','i','n',0, + 'T','E','R','M',' ','*','d','i','r','e','c','t','*',0, + 'T','E','R','M',' ','d','t','t','e','r','m',0, + 'T','E','R','M',' ','g','n','o','m','e',0, + 'T','E','R','M',' ','h','u','r','d',0, + 'T','E','R','M',' ','j','f','b','t','e','r','m',0, + 'T','E','R','M',' ','k','o','n','s','o','l','e',0, + 'T','E','R','M',' ','k','t','e','r','m',0, + 'T','E','R','M',' ','l','i','n','u','x',0, + 'T','E','R','M',' ','l','i','n','u','x','-','c',0, + 'T','E','R','M',' ','m','l','t','e','r','m',0, + 'T','E','R','M',' ','p','u','t','t','y',0, + 'T','E','R','M',' ','r','x','v','t','*',0, + 'T','E','R','M',' ','s','c','r','e','e','n','*',0, + 'T','E','R','M',' ','s','t',0, + 'T','E','R','M',' ','t','e','r','m','i','n','a','t','o','r',0, + 'T','E','R','M',' ','t','m','u','x','*',0, + 'T','E','R','M',' ','v','t','1','0','0',0, + 'T','E','R','M',' ','x','t','e','r','m','*',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','B','a','s','i','c',' ','f','i','l','e',' ','a','t','t','r','i','b','u','t','e','s',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','B','e','l','o','w',' ','a','r','e',' ','t','h','e',' ','c','o','l','o','r',' ','i','n','i','t',' ','s','t','r','i','n','g','s',' ','f','o','r',' ','t','h','e',' ','b','a','s','i','c',' ','f','i','l','e',' ','t','y','p','e','s','.',0, + '#',' ','O','n','e',' ','c','a','n',' ','u','s','e',' ','c','o','d','e','s',' ','f','o','r',' ','2','5','6',' ','o','r',' ','m','o','r','e',' ','c','o','l','o','r','s',' ','s','u','p','p','o','r','t','e','d',' ','b','y',' ','m','o','d','e','r','n',' ','t','e','r','m','i','n','a','l','s','.',0, + '#',' ','T','h','e',' ','d','e','f','a','u','l','t',' ','c','o','l','o','r',' ','c','o','d','e','s',' ','u','s','e',' ','t','h','e',' ','c','a','p','a','b','i','l','i','t','i','e','s',' ','o','f',' ','a','n',' ','8',' ','c','o','l','o','r',' ','t','e','r','m','i','n','a','l',0, + '#',' ','w','i','t','h',' ','s','o','m','e',' ','a','d','d','i','t','i','o','n','a','l',' ','a','t','t','r','i','b','u','t','e','s',' ','a','s',' ','p','e','r',' ','t','h','e',' ','f','o','l','l','o','w','i','n','g',' ','c','o','d','e','s',':',0, + '#',' ','A','t','t','r','i','b','u','t','e',' ','c','o','d','e','s',':',0, + '#',' ','0','0','=','n','o','n','e',' ','0','1','=','b','o','l','d',' ','0','4','=','u','n','d','e','r','s','c','o','r','e',' ','0','5','=','b','l','i','n','k',' ','0','7','=','r','e','v','e','r','s','e',' ','0','8','=','c','o','n','c','e','a','l','e','d',0, + '#',' ','T','e','x','t',' ','c','o','l','o','r',' ','c','o','d','e','s',':',0, + '#',' ','3','0','=','b','l','a','c','k',' ','3','1','=','r','e','d',' ','3','2','=','g','r','e','e','n',' ','3','3','=','y','e','l','l','o','w',' ','3','4','=','b','l','u','e',' ','3','5','=','m','a','g','e','n','t','a',' ','3','6','=','c','y','a','n',' ','3','7','=','w','h','i','t','e',0, + '#',' ','B','a','c','k','g','r','o','u','n','d',' ','c','o','l','o','r',' ','c','o','d','e','s',':',0, + '#',' ','4','0','=','b','l','a','c','k',' ','4','1','=','r','e','d',' ','4','2','=','g','r','e','e','n',' ','4','3','=','y','e','l','l','o','w',' ','4','4','=','b','l','u','e',' ','4','5','=','m','a','g','e','n','t','a',' ','4','6','=','c','y','a','n',' ','4','7','=','w','h','i','t','e',0, + '#','N','O','R','M','A','L',' ','0','0',' ','#',' ','n','o',' ','c','o','l','o','r',' ','c','o','d','e',' ','a','t',' ','a','l','l',0, + '#','F','I','L','E',' ','0','0',' ','#',' ','r','e','g','u','l','a','r',' ','f','i','l','e',':',' ','u','s','e',' ','n','o',' ','c','o','l','o','r',' ','a','t',' ','a','l','l',0, + 'R','E','S','E','T',' ','0',' ','#',' ','r','e','s','e','t',' ','t','o',' ','"','n','o','r','m','a','l','"',' ','c','o','l','o','r',0, + 'D','I','R',' ','0','1',';','3','4',' ','#',' ','d','i','r','e','c','t','o','r','y',0, + 'L','I','N','K',' ','0','1',';','3','6',' ','#',' ','s','y','m','b','o','l','i','c',' ','l','i','n','k','.',' ','(','I','f',' ','y','o','u',' ','s','e','t',' ','t','h','i','s',' ','t','o',' ','\'','t','a','r','g','e','t','\'',' ','i','n','s','t','e','a','d',' ','o','f',' ','a',0, + ' ','#',' ','n','u','m','e','r','i','c','a','l',' ','v','a','l','u','e',',',' ','t','h','e',' ','c','o','l','o','r',' ','i','s',' ','a','s',' ','f','o','r',' ','t','h','e',' ','f','i','l','e',' ','p','o','i','n','t','e','d',' ','t','o','.',')',0, + 'M','U','L','T','I','H','A','R','D','L','I','N','K',' ','0','0',' ','#',' ','r','e','g','u','l','a','r',' ','f','i','l','e',' ','w','i','t','h',' ','m','o','r','e',' ','t','h','a','n',' ','o','n','e',' ','l','i','n','k',0, + 'F','I','F','O',' ','4','0',';','3','3',' ','#',' ','p','i','p','e',0, + 'S','O','C','K',' ','0','1',';','3','5',' ','#',' ','s','o','c','k','e','t',0, + 'D','O','O','R',' ','0','1',';','3','5',' ','#',' ','d','o','o','r',0, + 'B','L','K',' ','4','0',';','3','3',';','0','1',' ','#',' ','b','l','o','c','k',' ','d','e','v','i','c','e',' ','d','r','i','v','e','r',0, + 'C','H','R',' ','4','0',';','3','3',';','0','1',' ','#',' ','c','h','a','r','a','c','t','e','r',' ','d','e','v','i','c','e',' ','d','r','i','v','e','r',0, + 'O','R','P','H','A','N',' ','4','0',';','3','1',';','0','1',' ','#',' ','s','y','m','l','i','n','k',' ','t','o',' ','n','o','n','e','x','i','s','t','e','n','t',' ','f','i','l','e',',',' ','o','r',' ','n','o','n','-','s','t','a','t','\'','a','b','l','e',' ','f','i','l','e',' ','.','.','.',0, + 'M','I','S','S','I','N','G',' ','0','0',' ','#',' ','.','.','.',' ','a','n','d',' ','t','h','e',' ','f','i','l','e','s',' ','t','h','e','y',' ','p','o','i','n','t',' ','t','o',0, + 'S','E','T','U','I','D',' ','3','7',';','4','1',' ','#',' ','f','i','l','e',' ','t','h','a','t',' ','i','s',' ','s','e','t','u','i','d',' ','(','u','+','s',')',0, + 'S','E','T','G','I','D',' ','3','0',';','4','3',' ','#',' ','f','i','l','e',' ','t','h','a','t',' ','i','s',' ','s','e','t','g','i','d',' ','(','g','+','s',')',0, + 'C','A','P','A','B','I','L','I','T','Y',' ','0','0',' ','#',' ','f','i','l','e',' ','w','i','t','h',' ','c','a','p','a','b','i','l','i','t','y',' ','(','v','e','r','y',' ','e','x','p','e','n','s','i','v','e',' ','t','o',' ','l','o','o','k','u','p',')',0, + 'S','T','I','C','K','Y','_','O','T','H','E','R','_','W','R','I','T','A','B','L','E',' ','3','0',';','4','2',' ','#',' ','d','i','r',' ','t','h','a','t',' ','i','s',' ','s','t','i','c','k','y',' ','a','n','d',' ','o','t','h','e','r','-','w','r','i','t','a','b','l','e',' ','(','+','t',',','o','+','w',')',0, + 'O','T','H','E','R','_','W','R','I','T','A','B','L','E',' ','3','4',';','4','2',' ','#',' ','d','i','r',' ','t','h','a','t',' ','i','s',' ','o','t','h','e','r','-','w','r','i','t','a','b','l','e',' ','(','o','+','w',')',' ','a','n','d',' ','n','o','t',' ','s','t','i','c','k','y',0, + 'S','T','I','C','K','Y',' ','3','7',';','4','4',' ','#',' ','d','i','r',' ','w','i','t','h',' ','t','h','e',' ','s','t','i','c','k','y',' ','b','i','t',' ','s','e','t',' ','(','+','t',')',' ','a','n','d',' ','n','o','t',' ','o','t','h','e','r','-','w','r','i','t','a','b','l','e',0, + '#',' ','T','h','i','s',' ','i','s',' ','f','o','r',' ','f','i','l','e','s',' ','w','i','t','h',' ','e','x','e','c','u','t','e',' ','p','e','r','m','i','s','s','i','o','n',':',0, + 'E','X','E','C',' ','0','1',';','3','2',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','F','i','l','e',' ','e','x','t','e','n','s','i','o','n',' ','a','t','t','r','i','b','u','t','e','s',0, + '#',' ','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=','=',0, + '#',' ','L','i','s','t',' ','a','n','y',' ','f','i','l','e',' ','e','x','t','e','n','s','i','o','n','s',' ','l','i','k','e',' ','\'','.','g','z','\'',' ','o','r',' ','\'','.','t','a','r','\'',' ','t','h','a','t',' ','y','o','u',' ','w','o','u','l','d',' ','l','i','k','e',' ','l','s',0, + '#',' ','t','o',' ','c','o','l','o','r',' ','b','e','l','o','w','.',' ','P','u','t',' ','t','h','e',' ','s','u','f','f','i','x',',',' ','a',' ','s','p','a','c','e',',',' ','a','n','d',' ','t','h','e',' ','c','o','l','o','r',' ','i','n','i','t',' ','s','t','r','i','n','g','.',0, + '#',' ','(','a','n','d',' ','a','n','y',' ','c','o','m','m','e','n','t','s',' ','y','o','u',' ','w','a','n','t',' ','t','o',' ','a','d','d',' ','a','f','t','e','r',' ','a',' ','\'','#','\'',')','.',0, + '#',' ','S','u','f','f','i','x','e','s',' ','a','r','e',' ','m','a','t','c','h','e','d',' ','c','a','s','e',' ','i','n','s','e','n','s','i','t','i','v','e','l','y',',',' ','b','u','t',' ','i','f',' ','y','o','u',' ','d','e','f','i','n','e',' ','d','i','f','f','e','r','e','n','t',0, + '#',' ','i','n','i','t',' ','s','t','r','i','n','g','s',' ','f','o','r',' ','s','e','p','a','r','a','t','e',' ','c','a','s','e','s',',',' ','t','h','o','s','e',' ','w','i','l','l',' ','b','e',' ','h','o','n','o','r','e','d','.',0, + '#',0, + '#',' ','I','f',' ','y','o','u',' ','u','s','e',' ','D','O','S','-','s','t','y','l','e',' ','s','u','f','f','i','x','e','s',',',' ','y','o','u',' ','m','a','y',' ','w','a','n','t',' ','t','o',' ','u','n','c','o','m','m','e','n','t',' ','t','h','e',' ','f','o','l','l','o','w','i','n','g',':',0, + '#','.','c','m','d',' ','0','1',';','3','2',' ','#',' ','e','x','e','c','u','t','a','b','l','e','s',' ','(','b','r','i','g','h','t',' ','g','r','e','e','n',')',0, + '#','.','e','x','e',' ','0','1',';','3','2',0, + '#','.','c','o','m',' ','0','1',';','3','2',0, + '#','.','b','t','m',' ','0','1',';','3','2',0, + '#','.','b','a','t',' ','0','1',';','3','2',0, + '#',' ','O','r',' ','i','f',' ','y','o','u',' ','w','a','n','t',' ','t','o',' ','c','o','l','o','r',' ','s','c','r','i','p','t','s',' ','e','v','e','n',' ','i','f',' ','t','h','e','y',' ','d','o',' ','n','o','t',' ','h','a','v','e',' ','t','h','e',0, + '#',' ','e','x','e','c','u','t','a','b','l','e',' ','b','i','t',' ','a','c','t','u','a','l','l','y',' ','s','e','t','.',0, + '#','.','s','h',' ','0','1',';','3','2',0, + '#','.','c','s','h',' ','0','1',';','3','2',0, + '#',' ','a','r','c','h','i','v','e','s',' ','o','r',' ','c','o','m','p','r','e','s','s','e','d',' ','(','b','r','i','g','h','t',' ','r','e','d',')',0, + '.','t','a','r',' ','0','1',';','3','1',0, + '.','t','g','z',' ','0','1',';','3','1',0, + '.','a','r','c',' ','0','1',';','3','1',0, + '.','a','r','j',' ','0','1',';','3','1',0, + '.','t','a','z',' ','0','1',';','3','1',0, + '.','l','h','a',' ','0','1',';','3','1',0, + '.','l','z','4',' ','0','1',';','3','1',0, + '.','l','z','h',' ','0','1',';','3','1',0, + '.','l','z','m','a',' ','0','1',';','3','1',0, + '.','t','l','z',' ','0','1',';','3','1',0, + '.','t','x','z',' ','0','1',';','3','1',0, + '.','t','z','o',' ','0','1',';','3','1',0, + '.','t','7','z',' ','0','1',';','3','1',0, + '.','z','i','p',' ','0','1',';','3','1',0, + '.','z',' ','0','1',';','3','1',0, + '.','d','z',' ','0','1',';','3','1',0, + '.','g','z',' ','0','1',';','3','1',0, + '.','l','r','z',' ','0','1',';','3','1',0, + '.','l','z',' ','0','1',';','3','1',0, + '.','l','z','o',' ','0','1',';','3','1',0, + '.','x','z',' ','0','1',';','3','1',0, + '.','z','s','t',' ','0','1',';','3','1',0, + '.','t','z','s','t',' ','0','1',';','3','1',0, + '.','b','z','2',' ','0','1',';','3','1',0, + '.','b','z',' ','0','1',';','3','1',0, + '.','t','b','z',' ','0','1',';','3','1',0, + '.','t','b','z','2',' ','0','1',';','3','1',0, + '.','t','z',' ','0','1',';','3','1',0, + '.','d','e','b',' ','0','1',';','3','1',0, + '.','r','p','m',' ','0','1',';','3','1',0, + '.','j','a','r',' ','0','1',';','3','1',0, + '.','w','a','r',' ','0','1',';','3','1',0, + '.','e','a','r',' ','0','1',';','3','1',0, + '.','s','a','r',' ','0','1',';','3','1',0, + '.','r','a','r',' ','0','1',';','3','1',0, + '.','a','l','z',' ','0','1',';','3','1',0, + '.','a','c','e',' ','0','1',';','3','1',0, + '.','z','o','o',' ','0','1',';','3','1',0, + '.','c','p','i','o',' ','0','1',';','3','1',0, + '.','7','z',' ','0','1',';','3','1',0, + '.','r','z',' ','0','1',';','3','1',0, + '.','c','a','b',' ','0','1',';','3','1',0, + '.','w','i','m',' ','0','1',';','3','1',0, + '.','s','w','m',' ','0','1',';','3','1',0, + '.','d','w','m',' ','0','1',';','3','1',0, + '.','e','s','d',' ','0','1',';','3','1',0, + '#',' ','i','m','a','g','e',' ','f','o','r','m','a','t','s',0, + '.','a','v','i','f',' ','0','1',';','3','5',0, + '.','j','p','g',' ','0','1',';','3','5',0, + '.','j','p','e','g',' ','0','1',';','3','5',0, + '.','m','j','p','g',' ','0','1',';','3','5',0, + '.','m','j','p','e','g',' ','0','1',';','3','5',0, + '.','g','i','f',' ','0','1',';','3','5',0, + '.','b','m','p',' ','0','1',';','3','5',0, + '.','p','b','m',' ','0','1',';','3','5',0, + '.','p','g','m',' ','0','1',';','3','5',0, + '.','p','p','m',' ','0','1',';','3','5',0, + '.','t','g','a',' ','0','1',';','3','5',0, + '.','x','b','m',' ','0','1',';','3','5',0, + '.','x','p','m',' ','0','1',';','3','5',0, + '.','t','i','f',' ','0','1',';','3','5',0, + '.','t','i','f','f',' ','0','1',';','3','5',0, + '.','p','n','g',' ','0','1',';','3','5',0, + '.','s','v','g',' ','0','1',';','3','5',0, + '.','s','v','g','z',' ','0','1',';','3','5',0, + '.','m','n','g',' ','0','1',';','3','5',0, + '.','p','c','x',' ','0','1',';','3','5',0, + '.','m','o','v',' ','0','1',';','3','5',0, + '.','m','p','g',' ','0','1',';','3','5',0, + '.','m','p','e','g',' ','0','1',';','3','5',0, + '.','m','2','v',' ','0','1',';','3','5',0, + '.','m','k','v',' ','0','1',';','3','5',0, + '.','w','e','b','m',' ','0','1',';','3','5',0, + '.','w','e','b','p',' ','0','1',';','3','5',0, + '.','o','g','m',' ','0','1',';','3','5',0, + '.','m','p','4',' ','0','1',';','3','5',0, + '.','m','4','v',' ','0','1',';','3','5',0, + '.','m','p','4','v',' ','0','1',';','3','5',0, + '.','v','o','b',' ','0','1',';','3','5',0, + '.','q','t',' ','0','1',';','3','5',0, + '.','n','u','v',' ','0','1',';','3','5',0, + '.','w','m','v',' ','0','1',';','3','5',0, + '.','a','s','f',' ','0','1',';','3','5',0, + '.','r','m',' ','0','1',';','3','5',0, + '.','r','m','v','b',' ','0','1',';','3','5',0, + '.','f','l','c',' ','0','1',';','3','5',0, + '.','a','v','i',' ','0','1',';','3','5',0, + '.','f','l','i',' ','0','1',';','3','5',0, + '.','f','l','v',' ','0','1',';','3','5',0, + '.','g','l',' ','0','1',';','3','5',0, + '.','d','l',' ','0','1',';','3','5',0, + '.','x','c','f',' ','0','1',';','3','5',0, + '.','x','w','d',' ','0','1',';','3','5',0, + '.','y','u','v',' ','0','1',';','3','5',0, + '.','c','g','m',' ','0','1',';','3','5',0, + '.','e','m','f',' ','0','1',';','3','5',0, + '#',' ','h','t','t','p','s',':','/','/','w','i','k','i','.','x','i','p','h','.','o','r','g','/','M','I','M','E','_','T','y','p','e','s','_','a','n','d','_','F','i','l','e','_','E','x','t','e','n','s','i','o','n','s',0, + '.','o','g','v',' ','0','1',';','3','5',0, + '.','o','g','x',' ','0','1',';','3','5',0, + '#',' ','a','u','d','i','o',' ','f','o','r','m','a','t','s',0, + '.','a','a','c',' ','0','0',';','3','6',0, + '.','a','u',' ','0','0',';','3','6',0, + '.','f','l','a','c',' ','0','0',';','3','6',0, + '.','m','4','a',' ','0','0',';','3','6',0, + '.','m','i','d',' ','0','0',';','3','6',0, + '.','m','i','d','i',' ','0','0',';','3','6',0, + '.','m','k','a',' ','0','0',';','3','6',0, + '.','m','p','3',' ','0','0',';','3','6',0, + '.','m','p','c',' ','0','0',';','3','6',0, + '.','o','g','g',' ','0','0',';','3','6',0, + '.','r','a',' ','0','0',';','3','6',0, + '.','w','a','v',' ','0','0',';','3','6',0, + '#',' ','h','t','t','p','s',':','/','/','w','i','k','i','.','x','i','p','h','.','o','r','g','/','M','I','M','E','_','T','y','p','e','s','_','a','n','d','_','F','i','l','e','_','E','x','t','e','n','s','i','o','n','s',0, + '.','o','g','a',' ','0','0',';','3','6',0, + '.','o','p','u','s',' ','0','0',';','3','6',0, + '.','s','p','x',' ','0','0',';','3','6',0, + '.','x','s','p','f',' ','0','0',';','3','6',0, + '#',' ','b','a','c','k','u','p',' ','f','i','l','e','s',0, + '*','~',' ','0','0',';','9','0',0, + '*','#',' ','0','0',';','9','0',0, + '.','b','a','k',' ','0','0',';','9','0',0, + '.','c','r','d','o','w','n','l','o','a','d',' ','0','0',';','9','0',0, + '.','d','p','k','g','-','d','i','s','t',' ','0','0',';','9','0',0, + '.','d','p','k','g','-','n','e','w',' ','0','0',';','9','0',0, + '.','d','p','k','g','-','o','l','d',' ','0','0',';','9','0',0, + '.','d','p','k','g','-','t','m','p',' ','0','0',';','9','0',0, + '.','o','l','d',' ','0','0',';','9','0',0, + '.','o','r','i','g',' ','0','0',';','9','0',0, + '.','p','a','r','t',' ','0','0',';','9','0',0, + '.','r','e','j',' ','0','0',';','9','0',0, + '.','r','p','m','n','e','w',' ','0','0',';','9','0',0, + '.','r','p','m','o','r','i','g',' ','0','0',';','9','0',0, + '.','r','p','m','s','a','v','e',' ','0','0',';','9','0',0, + '.','s','w','p',' ','0','0',';','9','0',0, + '.','t','m','p',' ','0','0',';','9','0',0, + '.','u','c','f','-','d','i','s','t',' ','0','0',';','9','0',0, + '.','u','c','f','-','n','e','w',' ','0','0',';','9','0',0, + '.','u','c','f','-','o','l','d',' ','0','0',';','9','0',0, + '#',0, + '#',' ','S','u','b','s','e','q','u','e','n','t',' ','T','E','R','M',' ','o','r',' ','C','O','L','O','R','T','E','R','M',' ','e','n','t','r','i','e','s',',',' ','c','a','n',' ','b','e',' ','u','s','e','d',' ','t','o',' ','a','d','d',' ','/',' ','o','v','e','r','r','i','d','e',0, + '#',' ','c','o','n','f','i','g',' ','s','p','e','c','i','f','i','c',' ','t','o',' ','t','h','o','s','e',' ','m','a','t','c','h','i','n','g',' ','e','n','v','i','r','o','n','m','e','n','t',' ','v','a','r','i','a','b','l','e','s','.',0, +}; diff --git a/src/dircolors.hin b/src/dircolors.hin new file mode 100644 index 0000000..353831c --- /dev/null +++ b/src/dircolors.hin @@ -0,0 +1,251 @@ +# Configuration file for dircolors, a utility to help you set the +# LS_COLORS environment variable used by GNU ls with the --color option. + +# Copyright (C) 1996-2023 Free Software Foundation, Inc. +# Copying and distribution of this file, with or without modification, +# are permitted provided the copyright notice and this notice are preserved. + +# +# The keywords COLOR, OPTIONS, and EIGHTBIT (honored by the +# slackware version of dircolors) are recognized but ignored. + +# Global config options can be specified before TERM or COLORTERM entries + +# =================================================================== +# Terminal filters +# =================================================================== +# Below are TERM or COLORTERM entries, which can be glob patterns, which +# restrict following config to systems with matching environment variables. +COLORTERM ?* +TERM Eterm +TERM ansi +TERM *color* +TERM con[0-9]*x[0-9]* +TERM cons25 +TERM console +TERM cygwin +TERM *direct* +TERM dtterm +TERM gnome +TERM hurd +TERM jfbterm +TERM konsole +TERM kterm +TERM linux +TERM linux-c +TERM mlterm +TERM putty +TERM rxvt* +TERM screen* +TERM st +TERM terminator +TERM tmux* +TERM vt100 +TERM xterm* + +# =================================================================== +# Basic file attributes +# =================================================================== +# Below are the color init strings for the basic file types. +# One can use codes for 256 or more colors supported by modern terminals. +# The default color codes use the capabilities of an 8 color terminal +# with some additional attributes as per the following codes: +# Attribute codes: +# 00=none 01=bold 04=underscore 05=blink 07=reverse 08=concealed +# Text color codes: +# 30=black 31=red 32=green 33=yellow 34=blue 35=magenta 36=cyan 37=white +# Background color codes: +# 40=black 41=red 42=green 43=yellow 44=blue 45=magenta 46=cyan 47=white +#NORMAL 00 # no color code at all +#FILE 00 # regular file: use no color at all +RESET 0 # reset to "normal" color +DIR 01;34 # directory +LINK 01;36 # symbolic link. (If you set this to 'target' instead of a + # numerical value, the color is as for the file pointed to.) +MULTIHARDLINK 00 # regular file with more than one link +FIFO 40;33 # pipe +SOCK 01;35 # socket +DOOR 01;35 # door +BLK 40;33;01 # block device driver +CHR 40;33;01 # character device driver +ORPHAN 40;31;01 # symlink to nonexistent file, or non-stat'able file ... +MISSING 00 # ... and the files they point to +SETUID 37;41 # file that is setuid (u+s) +SETGID 30;43 # file that is setgid (g+s) +CAPABILITY 00 # file with capability (very expensive to lookup) +STICKY_OTHER_WRITABLE 30;42 # dir that is sticky and other-writable (+t,o+w) +OTHER_WRITABLE 34;42 # dir that is other-writable (o+w) and not sticky +STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable + +# This is for files with execute permission: +EXEC 01;32 + +# =================================================================== +# File extension attributes +# =================================================================== +# List any file extensions like '.gz' or '.tar' that you would like ls +# to color below. Put the suffix, a space, and the color init string. +# (and any comments you want to add after a '#'). +# Suffixes are matched case insensitively, but if you define different +# init strings for separate cases, those will be honored. +# + +# If you use DOS-style suffixes, you may want to uncomment the following: +#.cmd 01;32 # executables (bright green) +#.exe 01;32 +#.com 01;32 +#.btm 01;32 +#.bat 01;32 +# Or if you want to color scripts even if they do not have the +# executable bit actually set. +#.sh 01;32 +#.csh 01;32 + +# archives or compressed (bright red) +.tar 01;31 +.tgz 01;31 +.arc 01;31 +.arj 01;31 +.taz 01;31 +.lha 01;31 +.lz4 01;31 +.lzh 01;31 +.lzma 01;31 +.tlz 01;31 +.txz 01;31 +.tzo 01;31 +.t7z 01;31 +.zip 01;31 +.z 01;31 +.dz 01;31 +.gz 01;31 +.lrz 01;31 +.lz 01;31 +.lzo 01;31 +.xz 01;31 +.zst 01;31 +.tzst 01;31 +.bz2 01;31 +.bz 01;31 +.tbz 01;31 +.tbz2 01;31 +.tz 01;31 +.deb 01;31 +.rpm 01;31 +.jar 01;31 +.war 01;31 +.ear 01;31 +.sar 01;31 +.rar 01;31 +.alz 01;31 +.ace 01;31 +.zoo 01;31 +.cpio 01;31 +.7z 01;31 +.rz 01;31 +.cab 01;31 +.wim 01;31 +.swm 01;31 +.dwm 01;31 +.esd 01;31 + +# image formats +.avif 01;35 +.jpg 01;35 +.jpeg 01;35 +.mjpg 01;35 +.mjpeg 01;35 +.gif 01;35 +.bmp 01;35 +.pbm 01;35 +.pgm 01;35 +.ppm 01;35 +.tga 01;35 +.xbm 01;35 +.xpm 01;35 +.tif 01;35 +.tiff 01;35 +.png 01;35 +.svg 01;35 +.svgz 01;35 +.mng 01;35 +.pcx 01;35 +.mov 01;35 +.mpg 01;35 +.mpeg 01;35 +.m2v 01;35 +.mkv 01;35 +.webm 01;35 +.webp 01;35 +.ogm 01;35 +.mp4 01;35 +.m4v 01;35 +.mp4v 01;35 +.vob 01;35 +.qt 01;35 +.nuv 01;35 +.wmv 01;35 +.asf 01;35 +.rm 01;35 +.rmvb 01;35 +.flc 01;35 +.avi 01;35 +.fli 01;35 +.flv 01;35 +.gl 01;35 +.dl 01;35 +.xcf 01;35 +.xwd 01;35 +.yuv 01;35 +.cgm 01;35 +.emf 01;35 + +# https://wiki.xiph.org/MIME_Types_and_File_Extensions +.ogv 01;35 +.ogx 01;35 + +# audio formats +.aac 00;36 +.au 00;36 +.flac 00;36 +.m4a 00;36 +.mid 00;36 +.midi 00;36 +.mka 00;36 +.mp3 00;36 +.mpc 00;36 +.ogg 00;36 +.ra 00;36 +.wav 00;36 + +# https://wiki.xiph.org/MIME_Types_and_File_Extensions +.oga 00;36 +.opus 00;36 +.spx 00;36 +.xspf 00;36 + +# backup files +*~ 00;90 +*# 00;90 +.bak 00;90 +.crdownload 00;90 +.dpkg-dist 00;90 +.dpkg-new 00;90 +.dpkg-old 00;90 +.dpkg-tmp 00;90 +.old 00;90 +.orig 00;90 +.part 00;90 +.rej 00;90 +.rpmnew 00;90 +.rpmorig 00;90 +.rpmsave 00;90 +.swp 00;90 +.tmp 00;90 +.ucf-dist 00;90 +.ucf-new 00;90 +.ucf-old 00;90 + +# +# Subsequent TERM or COLORTERM entries, can be used to add / override +# config specific to those matching environment variables. diff --git a/src/dirname.c b/src/dirname.c new file mode 100644 index 0000000..4ee6cd1 --- /dev/null +++ b/src/dirname.c @@ -0,0 +1,135 @@ +/* dirname -- strip suffix from file name + + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie and Jim Meyering. */ + +#include +#include +#include +#include + +#include "system.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "dirname" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +static struct option const longopts[] = +{ + {"zero", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION] NAME...\n\ +"), + program_name); + fputs (_("\ +Output each NAME with its last non-slash component and trailing slashes\n\ +removed; if NAME contains no /'s, output '.' (meaning the current directory).\n\ +\n\ +"), stdout); + fputs (_("\ + -z, --zero end each output line with NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +Examples:\n\ + %s /usr/bin/ -> \"/usr\"\n\ + %s dir1/str dir2/str -> \"dir1\" followed by \"dir2\"\n\ + %s stdio.h -> \".\"\n\ +"), + program_name, program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + static char const dot = '.'; + bool use_nuls = false; + char const *result; + size_t len; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "z", longopts, nullptr); + + if (c == -1) + break; + + switch (c) + { + case 'z': + use_nuls = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (argc < optind + 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + for (; optind < argc; optind++) + { + result = argv[optind]; + len = dir_len (result); + + if (! len) + { + result = ˙ + len = 1; + } + + fwrite (result, 1, len, stdout); + putchar (use_nuls ? '\0' :'\n'); + } + + return EXIT_SUCCESS; +} diff --git a/src/du.c b/src/du.c new file mode 100644 index 0000000..9361651 --- /dev/null +++ b/src/du.c @@ -0,0 +1,1139 @@ +/* du -- summarize device usage + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Differences from the Unix du: + * Doesn't simply ignore the names of regular files given as arguments + when -a is given. + + By tege@sics.se, Torbjörn Granlund, + and djm@ai.mit.edu, David MacKenzie. + Variable blocks added by lm@sgi.com and eggert@twinsun.com. + Rewritten to use nftw, then to use fts by Jim Meyering. */ + +#include +#include +#include +#include "system.h" +#include "argmatch.h" +#include "argv-iter.h" +#include "assure.h" +#include "di-set.h" +#include "exclude.h" +#include "fprintftime.h" +#include "human.h" +#include "mountlist.h" +#include "quote.h" +#include "stat-size.h" +#include "stat-time.h" +#include "stdio--.h" +#include "xfts.h" +#include "xstrtol.h" +#include "xstrtol-error.h" + +extern bool fts_debug; + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "du" + +#define AUTHORS \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Paul Eggert"), \ + proper_name ("Jim Meyering") + +#if DU_DEBUG +# define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts) +#else +# define FTS_CROSS_CHECK(Fts) +#endif + +/* A set of dev/ino pairs to help identify files and directories + whose sizes have already been counted. */ +static struct di_set *di_files; + +/* A set containing a dev/ino pair for each local mount point directory. */ +static struct di_set *di_mnt; + +/* Keep track of the preceding "level" (depth in hierarchy) + from one call of process_file to the next. */ +static size_t prev_level; + +/* Define a class for collecting directory information. */ +struct duinfo +{ + /* Size of files in directory. */ + uintmax_t size; + + /* Number of inodes in directory. */ + uintmax_t inodes; + + /* Latest timestamp found. If tmax.tv_sec == TYPE_MINIMUM (time_t) + && tmax.tv_nsec < 0, no timestamp has been found. */ + struct timespec tmax; +}; + +/* Initialize directory data. */ +static inline void +duinfo_init (struct duinfo *a) +{ + a->size = 0; + a->inodes = 0; + a->tmax.tv_sec = TYPE_MINIMUM (time_t); + a->tmax.tv_nsec = -1; +} + +/* Set directory data. */ +static inline void +duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax) +{ + a->size = size; + a->inodes = 1; + a->tmax = tmax; +} + +/* Accumulate directory data. */ +static inline void +duinfo_add (struct duinfo *a, struct duinfo const *b) +{ + uintmax_t sum = a->size + b->size; + a->size = a->size <= sum ? sum : UINTMAX_MAX; + a->inodes = a->inodes + b->inodes; + if (timespec_cmp (a->tmax, b->tmax) < 0) + a->tmax = b->tmax; +} + +/* A structure for per-directory level information. */ +struct dulevel +{ + /* Entries in this directory. */ + struct duinfo ent; + + /* Total for subdirectories. */ + struct duinfo subdir; +}; + +/* If true, display counts for all files, not just directories. */ +static bool opt_all = false; + +/* If true, rather than using the device usage of each file, + use the apparent size (stat.st_size if usable, 0 otherwise). */ +static bool apparent_size = false; + +/* If true, count each hard link of files with multiple links. */ +static bool opt_count_all = false; + +/* If true, hash all files to look for hard links. */ +static bool hash_all; + +/* If true, output the NUL byte instead of a newline at the end of each line. */ +static bool opt_nul_terminate_output = false; + +/* If true, print a grand total at the end. */ +static bool print_grand_total = false; + +/* If nonzero, do not add sizes of subdirectories. */ +static bool opt_separate_dirs = false; + +/* Show the total for each directory (and file if --all) that is at + most MAX_DEPTH levels down from the root of the hierarchy. The root + is at level 0, so 'du --max-depth=0' is equivalent to 'du -s'. */ +static idx_t max_depth = IDX_MAX; + +/* Only output entries with at least this SIZE if positive, + or at most if negative. See --threshold option. */ +static intmax_t opt_threshold = 0; + +/* Human-readable options for output. */ +static int human_output_opts; + +/* Output inodes count instead of blocks used. */ +static bool opt_inodes = false; + +/* If true, print most recently modified date, using the specified format. */ +static bool opt_time = false; + +/* Type of time to display. controlled by --time. */ + +enum time_type + { + time_mtime, /* default */ + time_ctime, + time_atime + }; + +static enum time_type time_type = time_mtime; + +/* User specified date / time style */ +static char const *time_style = nullptr; + +/* Format used to display date / time. Controlled by --time-style */ +static char const *time_format = nullptr; + +/* The local time zone rules, as per the TZ environment variable. */ +static timezone_t localtz; + +/* The units to use when printing sizes. */ +static uintmax_t output_block_size; + +/* File name patterns to exclude. */ +static struct exclude *exclude; + +/* Grand total size of all args, in bytes. Also latest modified date. */ +static struct duinfo tot_dui; + +#define IS_DIR_TYPE(Type) \ + ((Type) == FTS_DP \ + || (Type) == FTS_DNR) + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + APPARENT_SIZE_OPTION = CHAR_MAX + 1, + EXCLUDE_OPTION, + FILES0_FROM_OPTION, + HUMAN_SI_OPTION, + FTS_DEBUG, + TIME_OPTION, + TIME_STYLE_OPTION, + INODES_OPTION +}; + +static struct option const long_options[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"apparent-size", no_argument, nullptr, APPARENT_SIZE_OPTION}, + {"block-size", required_argument, nullptr, 'B'}, + {"bytes", no_argument, nullptr, 'b'}, + {"count-links", no_argument, nullptr, 'l'}, + /* {"-debug", no_argument, nullptr, FTS_DEBUG}, */ + {"dereference", no_argument, nullptr, 'L'}, + {"dereference-args", no_argument, nullptr, 'D'}, + {"exclude", required_argument, nullptr, EXCLUDE_OPTION}, + {"exclude-from", required_argument, nullptr, 'X'}, + {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION}, + {"human-readable", no_argument, nullptr, 'h'}, + {"inodes", no_argument, nullptr, INODES_OPTION}, + {"si", no_argument, nullptr, HUMAN_SI_OPTION}, + {"max-depth", required_argument, nullptr, 'd'}, + {"null", no_argument, nullptr, '0'}, + {"no-dereference", no_argument, nullptr, 'P'}, + {"one-file-system", no_argument, nullptr, 'x'}, + {"separate-dirs", no_argument, nullptr, 'S'}, + {"summarize", no_argument, nullptr, 's'}, + {"total", no_argument, nullptr, 'c'}, + {"threshold", required_argument, nullptr, 't'}, + {"time", optional_argument, nullptr, TIME_OPTION}, + {"time-style", required_argument, nullptr, TIME_STYLE_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static char const *const time_args[] = +{ + "atime", "access", "use", "ctime", "status", nullptr +}; +static enum time_type const time_types[] = +{ + time_atime, time_atime, time_atime, time_ctime, time_ctime +}; +ARGMATCH_VERIFY (time_args, time_types); + +/* 'full-iso' uses full ISO-style dates and times. 'long-iso' uses longer + ISO-style timestamps, though shorter than 'full-iso'. 'iso' uses shorter + ISO-style timestamps. */ +enum time_style + { + full_iso_time_style, /* --time-style=full-iso */ + long_iso_time_style, /* --time-style=long-iso */ + iso_time_style /* --time-style=iso */ + }; + +static char const *const time_style_args[] = +{ + "full-iso", "long-iso", "iso", nullptr +}; +static enum time_style const time_style_types[] = +{ + full_iso_time_style, long_iso_time_style, iso_time_style +}; +ARGMATCH_VERIFY (time_style_args, time_style_types); + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files0-from=F\n\ +"), program_name, program_name); + fputs (_("\ +Summarize device usage of the set of FILEs, recursively for directories.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -0, --null end each output line with NUL, not newline\n\ + -a, --all write counts for all files, not just directories\n\ + --apparent-size print apparent sizes rather than device usage; although\ +\n\ + the apparent size is usually smaller, it may be\n\ + larger due to holes in ('sparse') files, internal\n\ + fragmentation, indirect blocks, and the like\n\ +"), stdout); + fputs (_("\ + -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ + '-BM' prints sizes in units of 1,048,576 bytes;\n\ + see SIZE format below\n\ + -b, --bytes equivalent to '--apparent-size --block-size=1'\n\ + -c, --total produce a grand total\n\ + -D, --dereference-args dereference only symlinks that are listed on the\n\ + command line\n\ + -d, --max-depth=N print the total for a directory (or file, with --all)\n\ + only if it is N or fewer levels below the command\n\ + line argument; --max-depth=0 is the same as\n\ + --summarize\n\ +"), stdout); + fputs (_("\ + --files0-from=F summarize device usage of the\n\ + NUL-terminated file names specified in file F;\n\ + if F is -, then read names from standard input\n\ + -H equivalent to --dereference-args (-D)\n\ + -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\ +\n\ + --inodes list inode usage information instead of block usage\n\ +"), stdout); + fputs (_("\ + -k like --block-size=1K\n\ + -L, --dereference dereference all symbolic links\n\ + -l, --count-links count sizes many times if hard linked\n\ + -m like --block-size=1M\n\ +"), stdout); + fputs (_("\ + -P, --no-dereference don't follow any symbolic links (this is the default)\n\ + -S, --separate-dirs for directories do not include size of subdirectories\n\ + --si like -h, but use powers of 1000 not 1024\n\ + -s, --summarize display only a total for each argument\n\ +"), stdout); + fputs (_("\ + -t, --threshold=SIZE exclude entries smaller than SIZE if positive,\n\ + or entries greater than SIZE if negative\n\ + --time show time of the last modification of any file in the\n\ + directory, or any of its subdirectories\n\ + --time=WORD show time as WORD instead of modification time:\n\ + atime, access, use, ctime or status\n\ + --time-style=STYLE show times using STYLE, which can be:\n\ + full-iso, long-iso, iso, or +FORMAT;\n\ + FORMAT is interpreted like in 'date'\n\ +"), stdout); + fputs (_("\ + -X, --exclude-from=FILE exclude files that match any pattern in FILE\n\ + --exclude=PATTERN exclude files that match PATTERN\n\ + -x, --one-file-system skip directories on different file systems\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_blocksize_note ("DU"); + emit_size_note (); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Try to insert the INO/DEV pair into DI_SET. + Return true if the pair is successfully inserted, + false if the pair was already there. */ +static bool +hash_ins (struct di_set *di_set, ino_t ino, dev_t dev) +{ + int inserted = di_set_insert (di_set, dev, ino); + if (inserted < 0) + xalloc_die (); + return inserted; +} + +/* FIXME: this code is nearly identical to code in date.c */ +/* Display the date and time in WHEN according to the format specified + in FORMAT. */ + +static void +show_date (char const *format, struct timespec when, timezone_t tz) +{ + struct tm tm; + if (localtime_rz (tz, &when.tv_sec, &tm)) + fprintftime (stdout, format, &tm, tz, when.tv_nsec); + else + { + char buf[INT_BUFSIZE_BOUND (intmax_t)]; + char *when_str = timetostr (when.tv_sec, buf); + error (0, 0, _("time %s is out of range"), quote (when_str)); + fputs (when_str, stdout); + } +} + +/* Print N_BYTES. Convert it to a readable value before printing. */ + +static void +print_only_size (uintmax_t n_bytes) +{ + char buf[LONGEST_HUMAN_READABLE + 1]; + fputs ((n_bytes == UINTMAX_MAX + ? _("Infinity") + : human_readable (n_bytes, buf, human_output_opts, + 1, output_block_size)), + stdout); +} + +/* Print size (and optionally time) indicated by *PDUI, followed by STRING. */ + +static void +print_size (const struct duinfo *pdui, char const *string) +{ + print_only_size (opt_inodes + ? pdui->inodes + : pdui->size); + + if (opt_time) + { + putchar ('\t'); + show_date (time_format, pdui->tmax, localtz); + } + printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n'); + fflush (stdout); +} + +/* Fill the di_mnt set with local mount point dev/ino pairs. */ + +static void +fill_mount_table (void) +{ + struct mount_entry *mnt_ent = read_file_system_list (false); + while (mnt_ent) + { + struct mount_entry *mnt_free; + if (!mnt_ent->me_remote && !mnt_ent->me_dummy) + { + struct stat buf; + if (!stat (mnt_ent->me_mountdir, &buf)) + hash_ins (di_mnt, buf.st_ino, buf.st_dev); + else + { + /* Ignore stat failure. False positives are too common. + E.g., "Permission denied" on /run/user//gvfs. */ + } + } + + mnt_free = mnt_ent; + mnt_ent = mnt_ent->me_next; + free_mount_entry (mnt_free); + } +} + +/* This function checks whether any of the directories in the cycle that + fts detected is a mount point. */ + +static bool +mount_point_in_fts_cycle (FTSENT const *ent) +{ + FTSENT const *cycle_ent = ent->fts_cycle; + + if (!di_mnt) + { + /* Initialize the set of dev,inode pairs. */ + di_mnt = di_set_alloc (); + if (!di_mnt) + xalloc_die (); + + fill_mount_table (); + } + + while (ent && ent != cycle_ent) + { + if (di_set_lookup (di_mnt, ent->fts_statp->st_dev, + ent->fts_statp->st_ino) > 0) + { + return true; + } + ent = ent->fts_parent; + } + + return false; +} + +/* This function is called once for every file system object that fts + encounters. fts does a depth-first traversal. This function knows + that and accumulates per-directory totals based on changes in + the depth of the current entry. It returns true on success. */ + +static bool +process_file (FTS *fts, FTSENT *ent) +{ + bool ok = true; + struct duinfo dui; + struct duinfo dui_to_print; + size_t level; + static size_t n_alloc; + /* First element of the structure contains: + The sum of the sizes of all entries in the single directory + at the corresponding level. Although this does include the sizes + corresponding to each subdirectory, it does not include the size of + any file in a subdirectory. Also corresponding last modified date. + Second element of the structure contains: + The sum of the sizes of all entries in the hierarchy at or below the + directory at the specified level. */ + static struct dulevel *dulvl; + + char const *file = ent->fts_path; + const struct stat *sb = ent->fts_statp; + int info = ent->fts_info; + + if (info == FTS_DNR) + { + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, _("cannot read directory %s"), quoteaf (file)); + ok = false; + } + else if (info != FTS_DP) + { + bool excluded = excluded_file_name (exclude, file); + if (! excluded) + { + /* Make the stat buffer *SB valid, or fail noisily. */ + + if (info == FTS_NSOK) + { + fts_set (fts, ent, FTS_AGAIN); + MAYBE_UNUSED FTSENT const *e = fts_read (fts); + affirm (e == ent); + info = ent->fts_info; + } + + if (info == FTS_NS || info == FTS_SLNONE) + { + error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file)); + return false; + } + + /* The --one-file-system (-x) option cannot exclude anything + specified on the command-line. By definition, it can exclude + a file or directory only when its device number is different + from that of its just-processed parent directory, and du does + not process the parent of a command-line argument. */ + if (fts->fts_options & FTS_XDEV + && FTS_ROOTLEVEL < ent->fts_level + && fts->fts_dev != sb->st_dev) + excluded = true; + } + + if (excluded + || (! opt_count_all + && (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink)) + && ! hash_ins (di_files, sb->st_ino, sb->st_dev))) + { + /* If ignoring a directory in preorder, skip its children. + Ignore the next fts_read output too, as it's a postorder + visit to the same directory. */ + if (info == FTS_D) + { + fts_set (fts, ent, FTS_SKIP); + MAYBE_UNUSED FTSENT const *e = fts_read (fts); + affirm (e == ent); + } + + return true; + } + + switch (info) + { + case FTS_D: + return true; + + case FTS_ERR: + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, "%s", quotef (file)); + ok = false; + break; + + case FTS_DC: + /* If not following symlinks and not a (bind) mount point. */ + if (cycle_warning_required (fts, ent) + && ! mount_point_in_fts_cycle (ent)) + { + emit_cycle_warning (file); + return false; + } + return true; + } + } + + duinfo_set (&dui, + (apparent_size + ? (usable_st_size (sb) ? MAX (0, sb->st_size) : 0) + : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE), + (time_type == time_mtime ? get_stat_mtime (sb) + : time_type == time_atime ? get_stat_atime (sb) + : get_stat_ctime (sb))); + + level = ent->fts_level; + dui_to_print = dui; + + if (n_alloc == 0) + { + n_alloc = level + 10; + dulvl = xcalloc (n_alloc, sizeof *dulvl); + } + else + { + if (level == prev_level) + { + /* This is usually the most common case. Do nothing. */ + } + else if (level > prev_level) + { + /* Descending the hierarchy. + Clear the accumulators for *all* levels between prev_level + and the current one. The depth may change dramatically, + e.g., from 1 to 10. */ + + if (n_alloc <= level) + { + dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl); + n_alloc = level * 2; + } + + for (size_t i = prev_level + 1; i <= level; i++) + { + duinfo_init (&dulvl[i].ent); + duinfo_init (&dulvl[i].subdir); + } + } + else /* level < prev_level */ + { + /* Ascending the hierarchy. + Process a directory only after all entries in that + directory have been processed. When the depth decreases, + propagate sums from the children (prev_level) to the parent. + Here, the current level is always one smaller than the + previous one. */ + affirm (level == prev_level - 1); + duinfo_add (&dui_to_print, &dulvl[prev_level].ent); + if (!opt_separate_dirs) + duinfo_add (&dui_to_print, &dulvl[prev_level].subdir); + duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].ent); + duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].subdir); + } + } + + prev_level = level; + + /* Let the size of a directory entry contribute to the total for the + containing directory, unless --separate-dirs (-S) is specified. */ + if (! (opt_separate_dirs && IS_DIR_TYPE (info))) + duinfo_add (&dulvl[level].ent, &dui); + + /* Even if this directory is unreadable or we can't chdir into it, + do let its size contribute to the total. */ + duinfo_add (&tot_dui, &dui); + + if ((IS_DIR_TYPE (info) && level <= max_depth) + || (opt_all && level <= max_depth) + || level == 0) + { + /* Print or elide this entry according to the --threshold option. */ + uintmax_t v = opt_inodes ? dui_to_print.inodes : dui_to_print.size; + if (opt_threshold < 0 + ? v <= -opt_threshold + : v >= opt_threshold) + print_size (&dui_to_print, file); + } + + return ok; +} + +/* Recursively print the sizes of the directories (and, if selected, files) + named in FILES, the last entry of which is null. + BIT_FLAGS controls how fts works. + Return true if successful. */ + +static bool +du_files (char **files, int bit_flags) +{ + bool ok = true; + + if (*files) + { + FTS *fts = xfts_open (files, bit_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + error (0, errno, _("fts_read failed: %s"), + quotef (fts->fts_path)); + ok = false; + } + + /* When exiting this loop early, be careful to reset the + global, prev_level, used in process_file. Otherwise, its + (level == prev_level - 1) assertion could fail. */ + prev_level = 0; + break; + } + FTS_CROSS_CHECK (fts); + + ok &= process_file (fts, ent); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + } + + return ok; +} + +int +main (int argc, char **argv) +{ + char *cwd_only[2]; + bool max_depth_specified = false; + bool ok = true; + char *files_from = nullptr; + + /* Bit flags that control how fts works. */ + int bit_flags = FTS_NOSTAT; + + /* Select one of the three FTS_ options that control if/when + to follow a symlink. */ + int symlink_deref_bits = FTS_PHYSICAL; + + /* If true, display only a total for each argument. */ + bool opt_summarize_only = false; + + cwd_only[0] = bad_cast ("."); + cwd_only[1] = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + exclude = new_exclude (); + + human_options (getenv ("DU_BLOCK_SIZE"), + &human_output_opts, &output_block_size); + + while (true) + { + int oi = -1; + int c = getopt_long (argc, argv, "0abd:chHklmst:xB:DLPSX:", + long_options, &oi); + if (c == -1) + break; + + switch (c) + { +#if DU_DEBUG + case FTS_DEBUG: + fts_debug = true; + break; +#endif + + case '0': + opt_nul_terminate_output = true; + break; + + case 'a': + opt_all = true; + break; + + case APPARENT_SIZE_OPTION: + apparent_size = true; + break; + + case 'b': + apparent_size = true; + human_output_opts = 0; + output_block_size = 1; + break; + + case 'c': + print_grand_total = true; + break; + + case 'h': + human_output_opts = human_autoscale | human_SI | human_base_1024; + output_block_size = 1; + break; + + case HUMAN_SI_OPTION: + human_output_opts = human_autoscale | human_SI; + output_block_size = 1; + break; + + case 'k': + human_output_opts = 0; + output_block_size = 1024; + break; + + case 'd': /* --max-depth=N */ + { + intmax_t tmp; + if (xstrtoimax (optarg, nullptr, 0, &tmp, "") == LONGINT_OK + && tmp <= IDX_MAX) + { + max_depth_specified = true; + max_depth = tmp; + } + else + { + error (0, 0, _("invalid maximum depth %s"), + quote (optarg)); + ok = false; + } + } + break; + + case 'm': + human_output_opts = 0; + output_block_size = 1024 * 1024; + break; + + case 'l': + opt_count_all = true; + break; + + case 's': + opt_summarize_only = true; + break; + + case 't': + { + enum strtol_error e; + e = xstrtoimax (optarg, nullptr, 0, &opt_threshold, + "kKmMGTPEZYRQ0"); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + if (opt_threshold == 0 && *optarg == '-') + { + /* Do not allow -0, as this wouldn't make sense anyway. */ + error (EXIT_FAILURE, 0, _("invalid --threshold argument '-0'")); + } + } + break; + + case 'x': + bit_flags |= FTS_XDEV; + break; + + case 'B': + { + enum strtol_error e = human_options (optarg, &human_output_opts, + &output_block_size); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + } + break; + + case 'H': /* NOTE: before 2008-12, -H was equivalent to --si. */ + case 'D': + symlink_deref_bits = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* --dereference */ + symlink_deref_bits = FTS_LOGICAL; + break; + + case 'P': /* --no-dereference */ + symlink_deref_bits = FTS_PHYSICAL; + break; + + case 'S': + opt_separate_dirs = true; + break; + + case 'X': + if (add_exclude_file (add_exclude, exclude, optarg, + EXCLUDE_WILDCARDS, '\n')) + { + error (0, errno, "%s", quotef (optarg)); + ok = false; + } + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case EXCLUDE_OPTION: + add_exclude (exclude, optarg, EXCLUDE_WILDCARDS); + break; + + case INODES_OPTION: + opt_inodes = true; + break; + + case TIME_OPTION: + opt_time = true; + time_type = + (optarg + ? XARGMATCH ("--time", optarg, time_args, time_types) + : time_mtime); + localtz = tzalloc (getenv ("TZ")); + break; + + case TIME_STYLE_OPTION: + time_style = optarg; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + ok = false; + } + } + + if (!ok) + usage (EXIT_FAILURE); + + if (opt_all && opt_summarize_only) + { + error (0, 0, _("cannot both summarize and show all entries")); + usage (EXIT_FAILURE); + } + + if (opt_summarize_only && max_depth_specified && max_depth == 0) + { + error (0, 0, + _("warning: summarizing is the same as using --max-depth=0")); + } + + if (opt_summarize_only && max_depth_specified && max_depth != 0) + { + error (0, 0, _("warning: summarizing conflicts with --max-depth=%td"), + max_depth); + usage (EXIT_FAILURE); + } + + if (opt_summarize_only) + max_depth = 0; + + if (opt_inodes) + { + if (apparent_size) + { + error (0, 0, _("warning: options --apparent-size and -b are " + "ineffective with --inodes")); + } + output_block_size = 1; + } + + /* Process time style if printing last times. */ + if (opt_time) + { + if (! time_style) + { + time_style = getenv ("TIME_STYLE"); + + /* Ignore TIMESTYLE="locale", for compatibility with ls. */ + if (! time_style || STREQ (time_style, "locale")) + time_style = "long-iso"; + else if (*time_style == '+') + { + /* Ignore anything after a newline, for compatibility + with ls. */ + char *p = strchr (time_style, '\n'); + if (p) + *p = '\0'; + } + else + { + /* Ignore "posix-" prefix, for compatibility with ls. */ + static char const posix_prefix[] = "posix-"; + static const size_t prefix_len = sizeof posix_prefix - 1; + while (STREQ_LEN (time_style, posix_prefix, prefix_len)) + time_style += prefix_len; + } + } + + if (*time_style == '+') + time_format = time_style + 1; + else + { + switch (XARGMATCH ("time style", time_style, + time_style_args, time_style_types)) + { + case full_iso_time_style: + time_format = "%Y-%m-%d %H:%M:%S.%N %z"; + break; + + case long_iso_time_style: + time_format = "%Y-%m-%d %H:%M"; + break; + + case iso_time_style: + time_format = "%Y-%m-%d"; + break; + } + } + } + + struct argv_iterator *ai; + if (files_from) + { + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } + + if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin))) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); + + ai = argv_iter_init_stream (stdin); + + /* It's not easy here to count the arguments, so assume the + worst. */ + hash_all = true; + } + else + { + char **files = (optind < argc ? argv + optind : cwd_only); + ai = argv_iter_init_argv (files); + + /* Hash all dev,ino pairs if there are multiple arguments, or if + following non-command-line symlinks, because in either case a + file with just one hard link might be seen more than once. */ + hash_all = (optind + 1 < argc || symlink_deref_bits == FTS_LOGICAL); + } + + if (!ai) + xalloc_die (); + + /* Initialize the set of dev,inode pairs. */ + di_files = di_set_alloc (); + if (!di_files) + xalloc_die (); + + /* If not hashing everything, process_file won't find cycles on its + own, so ask fts_read to check for them accurately. */ + if (opt_count_all || ! hash_all) + bit_flags |= FTS_TIGHT_CYCLE_CHECK; + + bit_flags |= symlink_deref_bits; + static char *temp_argv[] = { nullptr, nullptr }; + + while (true) + { + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + affirm (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | du --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + /* Report and skip any empty file names before invoking fts. + This works around a glitch in fts, which fails immediately + (without looking at the other file names) when given an empty + file name. */ + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == nullptr) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + idx_t file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%td: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + { + temp_argv[0] = file_name; + ok &= du_files (temp_argv, bit_flags); + } + } + argv_iter_done: + + argv_iter_free (ai); + di_set_free (di_files); + if (di_mnt) + di_set_free (di_mnt); + + if (files_from && (ferror (stdin) || fclose (stdin) != 0) && ok) + error (EXIT_FAILURE, 0, _("error reading %s"), quoteaf (files_from)); + + if (print_grand_total) + print_size (&tot_dui, _("total")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/echo.c b/src/echo.c new file mode 100644 index 0000000..278778e --- /dev/null +++ b/src/echo.c @@ -0,0 +1,277 @@ +/* echo.c, derived from code echo.c in Bash. + Copyright (C) 1987-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include "system.h" +#include "assure.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "echo" + +#define AUTHORS \ + proper_name ("Brian Fox"), \ + proper_name ("Chet Ramey") + +/* If true, interpret backslash escapes by default. */ +#ifndef DEFAULT_ECHO_TO_XPG +enum { DEFAULT_ECHO_TO_XPG = false }; +#endif + +void +usage (int status) +{ + /* STATUS should always be EXIT_SUCCESS (unlike in most other + utilities which would call emit_try_help otherwise). */ + affirm (status == EXIT_SUCCESS); + + printf (_("\ +Usage: %s [SHORT-OPTION]... [STRING]...\n\ + or: %s LONG-OPTION\n\ +"), program_name, program_name); + fputs (_("\ +Echo the STRING(s) to standard output.\n\ +\n\ + -n do not output the trailing newline\n\ +"), stdout); + fputs (_(DEFAULT_ECHO_TO_XPG + ? N_("\ + -e enable interpretation of backslash escapes (default)\n\ + -E disable interpretation of backslash escapes\n") + : N_("\ + -e enable interpretation of backslash escapes\n\ + -E disable interpretation of backslash escapes (default)\n")), + stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +If -e is in effect, the following sequences are recognized:\n\ +\n\ +"), stdout); + fputs (_("\ + \\\\ backslash\n\ + \\a alert (BEL)\n\ + \\b backspace\n\ + \\c produce no further output\n\ + \\e escape\n\ + \\f form feed\n\ + \\n new line\n\ + \\r carriage return\n\ + \\t horizontal tab\n\ + \\v vertical tab\n\ +"), stdout); + fputs (_("\ + \\0NNN byte with octal value NNN (1 to 3 digits)\n\ + \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + fputs (_("\n\ +NOTE: printf(1) is a preferred alternative,\n\ +which does not have issues outputting option-like strings.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + exit (status); +} + +/* Convert C from hexadecimal character to integer. */ +static int +hextobin (unsigned char c) +{ + switch (c) + { + default: return c - '0'; + case 'a': case 'A': return 10; + case 'b': case 'B': return 11; + case 'c': case 'C': return 12; + case 'd': case 'D': return 13; + case 'e': case 'E': return 14; + case 'f': case 'F': return 15; + } +} + +/* Print the words in LIST to standard output. If the first word is + '-n', then don't print a trailing newline. We also support the + echo syntax from Version 9 unix systems. */ + +int +main (int argc, char **argv) +{ + bool display_return = true; + bool posixly_correct = !!getenv ("POSIXLY_CORRECT"); + bool allow_options = + (! posixly_correct + || (! DEFAULT_ECHO_TO_XPG && 1 < argc && STREQ (argv[1], "-n"))); + + /* System V machines already have a /bin/sh with a v9 behavior. + Use the identical behavior for these machines so that the + existing system shell scripts won't barf. */ + bool do_v9 = DEFAULT_ECHO_TO_XPG; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + /* We directly parse options, rather than use parse_long_options, in + order to avoid accepting abbreviations. */ + if (allow_options && argc == 2) + { + if (STREQ (argv[1], "--help")) + usage (EXIT_SUCCESS); + + if (STREQ (argv[1], "--version")) + { + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, + (char *) nullptr); + return EXIT_SUCCESS; + } + } + + --argc; + ++argv; + + if (allow_options) + while (argc > 0 && *argv[0] == '-') + { + char const *temp = argv[0] + 1; + size_t i; + + /* If it appears that we are handling options, then make sure that + all of the options specified are actually valid. Otherwise, the + string should just be echoed. */ + + for (i = 0; temp[i]; i++) + switch (temp[i]) + { + case 'e': case 'E': case 'n': + break; + default: + goto just_echo; + } + + if (i == 0) + goto just_echo; + + /* All of the options in TEMP are valid options to ECHO. + Handle them. */ + while (*temp) + switch (*temp++) + { + case 'e': + do_v9 = true; + break; + + case 'E': + do_v9 = false; + break; + + case 'n': + display_return = false; + break; + } + + argc--; + argv++; + } + +just_echo: + + if (do_v9 || posixly_correct) + { + while (argc > 0) + { + char const *s = argv[0]; + unsigned char c; + + while ((c = *s++)) + { + if (c == '\\' && *s) + { + switch (c = *s++) + { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'c': return EXIT_SUCCESS; + case 'e': c = '\x1B'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case 'x': + { + unsigned char ch = *s; + if (! isxdigit (ch)) + goto not_an_escape; + s++; + c = hextobin (ch); + ch = *s; + if (isxdigit (ch)) + { + s++; + c = c * 16 + hextobin (ch); + } + } + break; + case '0': + c = 0; + if (! ('0' <= *s && *s <= '7')) + break; + c = *s++; + FALLTHROUGH; + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + if ('0' <= *s && *s <= '7') + c = c * 8 + (*s++ - '0'); + if ('0' <= *s && *s <= '7') + c = c * 8 + (*s++ - '0'); + break; + case '\\': break; + + not_an_escape: + default: putchar ('\\'); break; + } + } + putchar (c); + } + argc--; + argv++; + if (argc > 0) + putchar (' '); + } + } + else + { + while (argc > 0) + { + fputs (argv[0], stdout); + argc--; + argv++; + if (argc > 0) + putchar (' '); + } + } + + if (display_return) + putchar ('\n'); + return EXIT_SUCCESS; +} diff --git a/src/env.c b/src/env.c new file mode 100644 index 0000000..b008ea2 --- /dev/null +++ b/src/env.c @@ -0,0 +1,902 @@ +/* env - run a program in a modified environment + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Richard Mlynarik and David MacKenzie */ + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "operand2sig.h" +#include "quote.h" +#include "sig2str.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "env" + +#define AUTHORS \ + proper_name ("Richard Mlynarik"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Assaf Gordon") + +/* Array of envvars to unset. */ +static char const **usvars; +static size_t usvars_alloc; +static idx_t usvars_used; + +/* Annotate the output with extra info to aid the user. */ +static bool dev_debug; + +/* Buffer and length of extracted envvars in -S strings. */ +static char *varname; +static idx_t vnlen; + +/* Possible actions on each signal. */ +enum SIGNAL_MODE { + UNCHANGED = 0, + DEFAULT, /* Set to default handler (SIG_DFL). */ + DEFAULT_NOERR, /* Ditto, but ignore sigaction(2) errors. */ + IGNORE, /* Set to ignore (SIG_IGN). */ + IGNORE_NOERR /* Ditto, but ignore sigaction(2) errors. */ +}; +static enum SIGNAL_MODE *signals; + +/* Set of signals to block. */ +static sigset_t block_signals; + +/* Set of signals to unblock. */ +static sigset_t unblock_signals; + +/* Whether signal mask adjustment requested. */ +static bool sig_mask_changed; + +/* Whether to list non default handling. */ +static bool report_signal_handling; + +/* The isspace characters in the C locale. */ +#define C_ISSPACE_CHARS " \t\n\v\f\r" + +static char const shortopts[] = "+C:iS:u:v0" C_ISSPACE_CHARS; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEFAULT_SIGNAL_OPTION = CHAR_MAX + 1, + IGNORE_SIGNAL_OPTION, + BLOCK_SIGNAL_OPTION, + LIST_SIGNAL_HANDLING_OPTION, +}; + +static struct option const longopts[] = +{ + {"ignore-environment", no_argument, nullptr, 'i'}, + {"null", no_argument, nullptr, '0'}, + {"unset", required_argument, nullptr, 'u'}, + {"chdir", required_argument, nullptr, 'C'}, + {"default-signal", optional_argument, nullptr, DEFAULT_SIGNAL_OPTION}, + {"ignore-signal", optional_argument, nullptr, IGNORE_SIGNAL_OPTION}, + {"block-signal", optional_argument, nullptr, BLOCK_SIGNAL_OPTION}, + {"list-signal-handling", no_argument, nullptr, LIST_SIGNAL_HANDLING_OPTION}, + {"debug", no_argument, nullptr, 'v'}, + {"split-string", required_argument, nullptr, 'S'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [-] [NAME=VALUE]... [COMMAND [ARG]...]\n"), + program_name); + fputs (_("\ +Set each NAME to VALUE in the environment and run COMMAND.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -i, --ignore-environment start with an empty environment\n\ + -0, --null end each output line with NUL, not newline\n\ + -u, --unset=NAME remove variable from the environment\n\ +"), stdout); + fputs (_("\ + -C, --chdir=DIR change working directory to DIR\n\ +"), stdout); + fputs (_("\ + -S, --split-string=S process and split S into separate arguments;\n\ + used to pass multiple arguments on shebang lines\n\ +"), stdout); + fputs (_("\ + --block-signal[=SIG] block delivery of SIG signal(s) to COMMAND\n\ +"), stdout); + fputs (_("\ + --default-signal[=SIG] reset handling of SIG signal(s) to the default\n\ +"), stdout); + fputs (_("\ + --ignore-signal[=SIG] set handling of SIG signal(s) to do nothing\n\ +"), stdout); + fputs (_("\ + --list-signal-handling list non default signal handling to stderr\n\ +"), stdout); + fputs (_("\ + -v, --debug print verbose information for each processing step\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +A mere - implies -i. If no COMMAND, print the resulting environment.\n\ +"), stdout); + fputs (_("\ +\n\ +SIG may be a signal name like 'PIPE', or a signal number like '13'.\n\ +Without SIG, all known signals are included. Multiple signals can be\n\ +comma-separated. An empty SIG argument is a no-op.\n\ +"), stdout); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +append_unset_var (char const *var) +{ + if (usvars_used == usvars_alloc) + usvars = x2nrealloc (usvars, &usvars_alloc, sizeof *usvars); + usvars[usvars_used++] = var; +} + +static void +unset_envvars (void) +{ + for (idx_t i = 0; i < usvars_used; ++i) + { + devmsg ("unset: %s\n", usvars[i]); + + if (unsetenv (usvars[i])) + error (EXIT_CANCELED, errno, _("cannot unset %s"), + quote (usvars[i])); + } +} + +/* Return a pointer to the end of a valid ${VARNAME} string, or nullptr. + 'str' should point to the '$' character. + First letter in VARNAME must be alpha or underscore, + rest of letters are alnum or underscore. + Any other character is an error. */ +ATTRIBUTE_PURE +static char const * +scan_varname (char const *str) +{ + if (str[1] == '{' && (c_isalpha (str[2]) || str[2] == '_')) + { + char const *end = str + 3; + while (c_isalnum (*end) || *end == '_') + ++end; + if (*end == '}') + return end; + } + + return nullptr; +} + +/* Return a pointer to a static buffer containing the VARNAME as + extracted from a '${VARNAME}' string. + The returned string will be NUL terminated. + The returned pointer should not be freed. + Return nullptr if not a valid ${VARNAME} syntax. */ +static char * +extract_varname (char const *str) +{ + idx_t i; + char const *p; + + p = scan_varname (str); + if (!p) + return nullptr; + + /* -2 and +2 (below) account for the '${' prefix. */ + i = p - str - 2; + + if (i >= vnlen) + { + vnlen = i + 1; + varname = xrealloc (varname, vnlen); + } + + memcpy (varname, str + 2, i); + varname[i] = 0; + + return varname; +} + +/* Temporary buffer used by --split-string processing. */ +struct splitbuf +{ + /* Buffer address, arg count, and half the number of elements in the buffer. + ARGC and ARGV are as in 'main', and ARGC + 1 <= HALF_ALLOC so + that the upper half of ARGV can be used for string contents. + This may waste up to half the space but keeps the code simple, + which is better for this rarely-used but security-sensitive code. + + ARGV[0] is not initialized; that is the caller's responsibility + after finalization. + + During assembly, ARGV[I] (where 0 < I < ARGC) contains the offset + of the Ith string (relative to ARGV + HALF_ALLOC), so that + reallocating ARGV does not change the validity of its contents. + The integer offset is cast to char * during assembly, and is + converted to a true char * pointer on finalization. + + During assembly, ARGV[ARGC] contains the offset of the first + unused string byte (relative to ARGV + HALF_ALLOC). */ + char **argv; + int argc; + idx_t half_alloc; + + /* The number of extra argv slots to keep room for. */ + int extra_argc; + + /* Whether processing should act as if the most recent character + seen was a separator. */ + bool sep; +}; + +/* Expand SS so that it has at least one more argv slot and at least + one more string byte. */ +static void +splitbuf_grow (struct splitbuf *ss) +{ + idx_t old_half_alloc = ss->half_alloc; + idx_t string_bytes = (intptr_t) ss->argv[ss->argc]; + ss->argv = xpalloc (ss->argv, &ss->half_alloc, 1, + MIN (INT_MAX, IDX_MAX), 2 * sizeof *ss->argv); + memmove (ss->argv + ss->half_alloc, ss->argv + old_half_alloc, string_bytes); +} + +/* In SS, append C to the last string. */ +static void +splitbuf_append_byte (struct splitbuf *ss, char c) +{ + idx_t string_bytes = (intptr_t) ss->argv[ss->argc]; + if (ss->half_alloc * sizeof *ss->argv <= string_bytes) + splitbuf_grow (ss); + ((char *) (ss->argv + ss->half_alloc))[string_bytes] = c; + ss->argv[ss->argc] = (char *) (intptr_t) (string_bytes + 1); +} + +/* If SS's most recent character was a separator, finish off its + previous argument and start a new one. */ +static void +check_start_new_arg (struct splitbuf *ss) +{ + if (ss->sep) + { + splitbuf_append_byte (ss, '\0'); + int argc = ss->argc; + if (ss->half_alloc <= argc + ss->extra_argc + 1) + splitbuf_grow (ss); + ss->argv[argc + 1] = ss->argv[argc]; + ss->argc = argc + 1; + ss->sep = false; + } +} + +/* All additions to SS have been made. Convert its offsets to pointers, + and return the resulting argument vector. */ +static char ** +splitbuf_finishup (struct splitbuf *ss) +{ + int argc = ss->argc; + char **argv = ss->argv; + char *stringbase = (char *) (ss->argv + ss->half_alloc); + for (int i = 1; i < argc; i++) + argv[i] = stringbase + (intptr_t) argv[i]; + return argv; +} + +/* Return a newly-allocated argv-like array, + by parsing and splitting the input 'str'. + + 'extra_argc' is the number of additional elements to allocate + in the array (on top of the number of args required to split 'str'). + + Store into *argc the number of arguments found (plus 1 for + the program name). + + Example: + int argc; + char **argv = build_argv ("A=B uname -k', 3, &argc); + Results in: + argc = 4 + argv[0] = [not initialized] + argv[1] = "A=B" + argv[2] = "uname" + argv[3] = "-k" + argv[4,5,6,7] = [allocated due to extra_argc + 1, but not initialized] + + To free allocated memory: + free (argv); + However, 'env' does not free since it's about to exec or exit anyway + and the complexity of keeping track of the storage that may have been + allocated via multiple calls to build_argv is not worth the hassle. */ +static char ** +build_argv (char const *str, int extra_argc, int *argc) +{ + bool dq = false, sq = false; + struct splitbuf ss; + ss.argv = xnmalloc (extra_argc + 2, 2 * sizeof *ss.argv); + ss.argc = 1; + ss.half_alloc = extra_argc + 2; + ss.extra_argc = extra_argc; + ss.sep = true; + ss.argv[ss.argc] = 0; + + /* In the following loop, + 'break' causes the character 'newc' to be added to *dest, + 'continue' skips the character. */ + while (*str) + { + char newc = *str; /* Default: add the next character. */ + + switch (*str) + { + case '\'': + if (dq) + break; + sq = !sq; + check_start_new_arg (&ss); + ++str; + continue; + + case '"': + if (sq) + break; + dq = !dq; + check_start_new_arg (&ss); + ++str; + continue; + + case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': + /* Start a new argument if outside quotes. */ + if (sq || dq) + break; + ss.sep = true; + str += strspn (str, C_ISSPACE_CHARS); + continue; + + case '#': + if (!ss.sep) + break; + goto eos; /* '#' as first char terminates the string. */ + + case '\\': + /* Backslash inside single-quotes is not special, except \\ + and \'. */ + if (sq && str[1] != '\\' && str[1] != '\'') + break; + + /* Skip the backslash and examine the next character. */ + newc = *++str; + switch (newc) + { + case '"': case '#': case '$': case '\'': case '\\': + /* Pass escaped character as-is. */ + break; + + case '_': + if (!dq) + { + ++str; /* '\_' outside double-quotes is arg separator. */ + ss.sep = true; + continue; + } + newc = ' '; /* '\_' inside double-quotes is space. */ + break; + + case 'c': + if (dq) + error (EXIT_CANCELED, 0, + _("'\\c' must not appear in double-quoted -S string")); + goto eos; /* '\c' terminates the string. */ + + case 'f': newc = '\f'; break; + case 'n': newc = '\n'; break; + case 'r': newc = '\r'; break; + case 't': newc = '\t'; break; + case 'v': newc = '\v'; break; + + case '\0': + error (EXIT_CANCELED, 0, + _("invalid backslash at end of string in -S")); + + default: + error (EXIT_CANCELED, 0, + _("invalid sequence '\\%c' in -S"), newc); + } + break; + + case '$': + /* ${VARNAME} are not expanded inside single-quotes. */ + if (sq) + break; + + /* Store the ${VARNAME} value. */ + { + char *n = extract_varname (str); + if (!n) + error (EXIT_CANCELED, 0, + _("only ${VARNAME} expansion is supported, error at: %s"), + str); + + char *v = getenv (n); + if (v) + { + check_start_new_arg (&ss); + devmsg ("expanding ${%s} into %s\n", n, quote (v)); + for (; *v; v++) + splitbuf_append_byte (&ss, *v); + } + else + devmsg ("replacing ${%s} with null string\n", n); + + str = strchr (str, '}') + 1; + continue; + } + } + + check_start_new_arg (&ss); + splitbuf_append_byte (&ss, newc); + ++str; + } + + if (dq || sq) + error (EXIT_CANCELED, 0, _("no terminating quote in -S string")); + + eos: + splitbuf_append_byte (&ss, '\0'); + *argc = ss.argc; + return splitbuf_finishup (&ss); +} + +/* Process an "-S" string and create the corresponding argv array. + Update the given argc/argv parameters with the new argv. + + Example: if executed as: + $ env -S"-i -C/tmp A=B" foo bar + The input argv is: + argv[0] = "env" + argv[1] = "-S-i -C/tmp A=B" + argv[2] = "foo" + argv[3] = "bar" + argv[4] = nullptr + This function will modify argv to be: + argv[0] = "env" + argv[1] = "-i" + argv[2] = "-C/tmp" + argv[3] = "A=B" + argv[4] = "foo" + argv[5] = "bar" + argv[6] = nullptr + argc will be updated from 4 to 6. + optind will be reset to 0 to force getopt_long to rescan all arguments. */ +static void +parse_split_string (char const *str, int *orig_optind, + int *orig_argc, char ***orig_argv) +{ + int extra_argc = *orig_argc - *orig_optind, newargc; + char **newargv = build_argv (str, extra_argc, &newargc); + + /* Restore argv[0] - the 'env' executable name. */ + *newargv = (*orig_argv)[0]; + + /* Print parsed arguments. */ + if (dev_debug && 1 < newargc) + { + devmsg ("split -S: %s\n", quote (str)); + devmsg (" into: %s\n", quote (newargv[1])); + for (int i = 2; i < newargc; i++) + devmsg (" & %s\n", quote (newargv[i])); + } + + /* Add remaining arguments and terminating null from the original + command line. */ + memcpy (newargv + newargc, *orig_argv + *orig_optind, + (extra_argc + 1) * sizeof *newargv); + + /* Set new values for original getopt variables. */ + *orig_argc = newargc + extra_argc; + *orig_argv = newargv; + *orig_optind = 0; /* Tell getopt to restart from first argument. */ +} + +static void +parse_signal_action_params (char const *optarg, bool set_default) +{ + char signame[SIG2STR_MAX]; + char *opt_sig; + char *optarg_writable; + + if (! optarg) + { + /* Without an argument, reset all signals. + Some signals cannot be set to ignore or default (e.g., SIGKILL, + SIGSTOP on most OSes, and SIGCONT on AIX.) - so ignore errors. */ + for (int i = 1 ; i <= SIGNUM_BOUND; i++) + if (sig2str (i, signame) == 0) + signals[i] = set_default ? DEFAULT_NOERR : IGNORE_NOERR; + return; + } + + optarg_writable = xstrdup (optarg); + + opt_sig = strtok (optarg_writable, ","); + while (opt_sig) + { + int signum = operand2sig (opt_sig, signame); + /* operand2sig accepts signal 0 (EXIT) - but we reject it. */ + if (signum == 0) + error (0, 0, _("%s: invalid signal"), quote (opt_sig)); + if (signum <= 0) + usage (exit_failure); + + signals[signum] = set_default ? DEFAULT : IGNORE; + + opt_sig = strtok (nullptr, ","); + } + + free (optarg_writable); +} + +static void +reset_signal_handlers (void) +{ + for (int i = 1; i <= SIGNUM_BOUND; i++) + { + struct sigaction act; + + if (signals[i] == UNCHANGED) + continue; + + bool ignore_errors = (signals[i] == DEFAULT_NOERR + || signals[i] == IGNORE_NOERR); + + bool set_to_default = (signals[i] == DEFAULT + || signals[i] == DEFAULT_NOERR); + + int sig_err = sigaction (i, nullptr, &act); + + if (sig_err && !ignore_errors) + error (EXIT_CANCELED, errno, + _("failed to get signal action for signal %d"), i); + + if (! sig_err) + { + act.sa_handler = set_to_default ? SIG_DFL : SIG_IGN; + sig_err = sigaction (i, &act, nullptr); + if (sig_err && !ignore_errors) + error (EXIT_CANCELED, errno, + _("failed to set signal action for signal %d"), i); + } + + if (dev_debug) + { + char signame[SIG2STR_MAX]; + sig2str (i, signame); + devmsg ("Reset signal %s (%d) to %s%s\n", + signame, i, + set_to_default ? "DEFAULT" : "IGNORE", + sig_err ? " (failure ignored)" : ""); + } + } +} + + +static void +parse_block_signal_params (char const *optarg, bool block) +{ + char signame[SIG2STR_MAX]; + char *opt_sig; + char *optarg_writable; + + if (! optarg) + { + /* Without an argument, reset all signals. */ + sigfillset (block ? &block_signals : &unblock_signals); + sigemptyset (block ? &unblock_signals : &block_signals); + } + else if (! sig_mask_changed) + { + /* Initialize the sets. */ + sigemptyset (&block_signals); + sigemptyset (&unblock_signals); + } + + sig_mask_changed = true; + + if (! optarg) + return; + + optarg_writable = xstrdup (optarg); + + opt_sig = strtok (optarg_writable, ","); + while (opt_sig) + { + int signum = operand2sig (opt_sig, signame); + /* operand2sig accepts signal 0 (EXIT) - but we reject it. */ + if (signum == 0) + error (0, 0, _("%s: invalid signal"), quote (opt_sig)); + if (signum <= 0) + usage (exit_failure); + + sigaddset (block ? &block_signals : &unblock_signals, signum); + sigdelset (block ? &unblock_signals : &block_signals, signum); + + opt_sig = strtok (nullptr, ","); + } + + free (optarg_writable); +} + +static void +set_signal_proc_mask (void) +{ + /* Get the existing signal mask */ + sigset_t set; + char const *debug_act; + + sigemptyset (&set); + + if (sigprocmask (0, nullptr, &set)) + error (EXIT_CANCELED, errno, _("failed to get signal process mask")); + + for (int i = 1; i <= SIGNUM_BOUND; i++) + { + if (sigismember (&block_signals, i)) + { + sigaddset (&set, i); + debug_act = "BLOCK"; + } + else if (sigismember (&unblock_signals, i)) + { + sigdelset (&set, i); + debug_act = "UNBLOCK"; + } + else + { + debug_act = nullptr; + } + + if (dev_debug && debug_act) + { + char signame[SIG2STR_MAX]; + sig2str (i, signame); + devmsg ("signal %s (%d) mask set to %s\n", + signame, i, debug_act); + } + } + + if (sigprocmask (SIG_SETMASK, &set, nullptr)) + error (EXIT_CANCELED, errno, _("failed to set signal process mask")); +} + +static void +list_signal_handling (void) +{ + sigset_t set; + char signame[SIG2STR_MAX]; + + sigemptyset (&set); + if (sigprocmask (0, nullptr, &set)) + error (EXIT_CANCELED, errno, _("failed to get signal process mask")); + + for (int i = 1; i <= SIGNUM_BOUND; i++) + { + struct sigaction act; + if (sigaction (i, nullptr, &act)) + continue; + + char const *ignored = act.sa_handler == SIG_IGN ? "IGNORE" : ""; + char const *blocked = sigismember (&set, i) ? "BLOCK" : ""; + char const *connect = *ignored && *blocked ? "," : ""; + + if (! *ignored && ! *blocked) + continue; + + sig2str (i, signame); + fprintf (stderr, "%-10s (%2d): %s%s%s\n", signame, i, + blocked, connect, ignored); + } +} + +static void +initialize_signals (void) +{ + signals = xmalloc ((sizeof *signals) * (SIGNUM_BOUND + 1)); + + for (int i = 0 ; i <= SIGNUM_BOUND; i++) + signals[i] = UNCHANGED; + + return; +} + +int +main (int argc, char **argv) +{ + int optc; + bool ignore_environment = false; + bool opt_nul_terminate_output = false; + char const *newdir = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + initialize_signals (); + + while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) + { + switch (optc) + { + case 'i': + ignore_environment = true; + break; + case 'u': + append_unset_var (optarg); + break; + case 'v': + dev_debug = true; + break; + case '0': + opt_nul_terminate_output = true; + break; + case DEFAULT_SIGNAL_OPTION: + parse_signal_action_params (optarg, true); + parse_block_signal_params (optarg, false); + break; + case IGNORE_SIGNAL_OPTION: + parse_signal_action_params (optarg, false); + break; + case BLOCK_SIGNAL_OPTION: + parse_block_signal_params (optarg, true); + break; + case LIST_SIGNAL_HANDLING_OPTION: + report_signal_handling = true; + break; + case 'C': + newdir = optarg; + break; + case 'S': + parse_split_string (optarg, &optind, &argc, &argv); + break; + case ' ': case '\t': case '\n': case '\v': case '\f': case '\r': + /* These are undocumented options. Attempt to detect + incorrect shebang usage with extraneous space, e.g.: + #!/usr/bin/env -i command + In which case argv[1] == "-i command". */ + error (0, 0, _("invalid option -- '%c'"), optc); + error (0, 0, _("use -[v]S to pass options in shebang lines")); + usage (EXIT_CANCELED); + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_CANCELED); + } + } + + if (optind < argc && STREQ (argv[optind], "-")) + { + ignore_environment = true; + ++optind; + } + + if (ignore_environment) + { + devmsg ("cleaning environ\n"); + static char *dummy_environ[] = { nullptr }; + environ = dummy_environ; + } + else + unset_envvars (); + + char *eq; + while (optind < argc && (eq = strchr (argv[optind], '='))) + { + devmsg ("setenv: %s\n", argv[optind]); + + if (putenv (argv[optind])) + { + *eq = '\0'; + error (EXIT_CANCELED, errno, _("cannot set %s"), + quote (argv[optind])); + } + optind++; + } + + bool program_specified = optind < argc; + + if (opt_nul_terminate_output && program_specified) + { + error (0, 0, _("cannot specify --null (-0) with command")); + usage (EXIT_CANCELED); + } + + if (newdir && ! program_specified) + { + error (0, 0, _("must specify command with --chdir (-C)")); + usage (EXIT_CANCELED); + } + + if (! program_specified) + { + /* Print the environment and exit. */ + char *const *e = environ; + while (*e) + printf ("%s%c", *e++, opt_nul_terminate_output ? '\0' : '\n'); + return EXIT_SUCCESS; + } + + reset_signal_handlers (); + if (sig_mask_changed) + set_signal_proc_mask (); + + if (report_signal_handling) + list_signal_handling (); + + if (newdir) + { + devmsg ("chdir: %s\n", quoteaf (newdir)); + + if (chdir (newdir) != 0) + error (EXIT_CANCELED, errno, _("cannot change directory to %s"), + quoteaf (newdir)); + } + + if (dev_debug) + { + devmsg ("executing: %s\n", argv[optind]); + for (int i=optind; i. */ + +#include + +#include +#include +#include "system.h" +#include "fadvise.h" +#include "quote.h" + +#include "expand-common.h" + +/* If true, convert blanks even after nonblank characters have been + read on the line. */ +bool convert_entire_line = false; + +/* If nonzero, the size of all tab stops. If zero, use 'tab_list' instead. */ +static uintmax_t tab_size = 0; + +/* If nonzero, the size of all tab stops after the last specified. */ +static uintmax_t extend_size = 0; + +/* If nonzero, an increment for additional tab stops after the last specified.*/ +static uintmax_t increment_size = 0; + +/* The maximum distance between tab stops. */ +size_t max_column_width; + +/* Array of the explicit column numbers of the tab stops; + after 'tab_list' is exhausted, each additional tab is replaced + by a space. The first column is column 0. */ +static uintmax_t *tab_list = nullptr; + +/* The number of allocated entries in 'tab_list'. */ +static size_t n_tabs_allocated = 0; + +/* The index of the first invalid element of 'tab_list', + where the next element can be added. */ +static size_t first_free_tab = 0; + +/* Null-terminated array of input filenames. */ +static char **file_list = nullptr; + +/* Default for 'file_list' if no files are given on the command line. */ +static char *stdin_argv[] = +{ + (char *) "-", nullptr +}; + +/* True if we have ever read standard input. */ +static bool have_read_stdin = false; + +/* The desired exit status. */ +int exit_status = EXIT_SUCCESS; + + + +/* Add tab stop TABVAL to the end of 'tab_list'. */ +extern void +add_tab_stop (uintmax_t tabval) +{ + uintmax_t prev_column = first_free_tab ? tab_list[first_free_tab - 1] : 0; + uintmax_t column_width = prev_column <= tabval ? tabval - prev_column : 0; + + if (first_free_tab == n_tabs_allocated) + tab_list = X2NREALLOC (tab_list, &n_tabs_allocated); + tab_list[first_free_tab++] = tabval; + + if (max_column_width < column_width) + { + if (SIZE_MAX < column_width) + error (EXIT_FAILURE, 0, _("tabs are too far apart")); + max_column_width = column_width; + } +} + +static bool +set_extend_size (uintmax_t tabval) +{ + bool ok = true; + + if (extend_size) + { + error (0, 0, + _("'/' specifier only allowed" + " with the last value")); + ok = false; + } + extend_size = tabval; + + return ok; +} + +static bool +set_increment_size (uintmax_t tabval) +{ + bool ok = true; + + if (increment_size) + { + error (0,0, + _("'+' specifier only allowed" + " with the last value")); + ok = false; + } + increment_size = tabval; + + return ok; +} + +/* Add the comma or blank separated list of tab stops STOPS + to the list of tab stops. */ +extern void +parse_tab_stops (char const *stops) +{ + bool have_tabval = false; + uintmax_t tabval = 0; + bool extend_tabval = false; + bool increment_tabval = false; + char const *num_start = nullptr; + bool ok = true; + + for (; *stops; stops++) + { + if (*stops == ',' || isblank (to_uchar (*stops))) + { + if (have_tabval) + { + if (extend_tabval) + { + if (! set_extend_size (tabval)) + { + ok = false; + break; + } + } + else if (increment_tabval) + { + if (! set_increment_size (tabval)) + { + ok = false; + break; + } + } + else + add_tab_stop (tabval); + } + have_tabval = false; + } + else if (*stops == '/') + { + if (have_tabval) + { + error (0, 0, _("'/' specifier not at start of number: %s"), + quote (stops)); + ok = false; + } + extend_tabval = true; + increment_tabval = false; + } + else if (*stops == '+') + { + if (have_tabval) + { + error (0, 0, _("'+' specifier not at start of number: %s"), + quote (stops)); + ok = false; + } + increment_tabval = true; + extend_tabval = false; + } + else if (ISDIGIT (*stops)) + { + if (!have_tabval) + { + tabval = 0; + have_tabval = true; + num_start = stops; + } + + /* Detect overflow. */ + if (!DECIMAL_DIGIT_ACCUMULATE (tabval, *stops - '0', uintmax_t)) + { + size_t len = strspn (num_start, "0123456789"); + char *bad_num = ximemdup0 (num_start, len); + error (0, 0, _("tab stop is too large %s"), quote (bad_num)); + free (bad_num); + ok = false; + stops = num_start + len - 1; + } + } + else + { + error (0, 0, _("tab size contains invalid character(s): %s"), + quote (stops)); + ok = false; + break; + } + } + + if (ok && have_tabval) + { + if (extend_tabval) + ok &= set_extend_size (tabval); + else if (increment_tabval) + ok &= set_increment_size (tabval); + else + add_tab_stop (tabval); + } + + if (! ok) + exit (EXIT_FAILURE); +} + +/* Check that the list of tab stops TABS, with ENTRIES entries, + contains only nonzero, ascending values. */ + +static void +validate_tab_stops (uintmax_t const *tabs, size_t entries) +{ + uintmax_t prev_tab = 0; + + for (size_t i = 0; i < entries; i++) + { + if (tabs[i] == 0) + error (EXIT_FAILURE, 0, _("tab size cannot be 0")); + if (tabs[i] <= prev_tab) + error (EXIT_FAILURE, 0, _("tab sizes must be ascending")); + prev_tab = tabs[i]; + } + + if (increment_size && extend_size) + error (EXIT_FAILURE, 0, _("'/' specifier is mutually exclusive with '+'")); +} + +/* Called after all command-line options have been parsed, + and add_tab_stop/parse_tab_stops have been called. + Will validate the tab-stop values, + and set the final values to: + tab-stops = 8 (if no tab-stops given on command line) + tab-stops = N (if value N specified as the only value). + tab-stops = distinct values given on command line (if multiple values given). +*/ +extern void +finalize_tab_stops (void) +{ + validate_tab_stops (tab_list, first_free_tab); + + if (first_free_tab == 0) + tab_size = max_column_width = extend_size + ? extend_size : increment_size + ? increment_size : 8; + else if (first_free_tab == 1 && ! extend_size && ! increment_size) + tab_size = tab_list[0]; + else + tab_size = 0; +} + + +extern uintmax_t +get_next_tab_column (const uintmax_t column, size_t *tab_index, + bool *last_tab) +{ + *last_tab = false; + + /* single tab-size - return multiples of it */ + if (tab_size) + return column + (tab_size - column % tab_size); + + /* multiple tab-sizes - iterate them until the tab position is beyond + the current input column. */ + for ( ; *tab_index < first_free_tab ; (*tab_index)++ ) + { + uintmax_t tab = tab_list[*tab_index]; + if (column < tab) + return tab; + } + + /* relative last tab - return multiples of it */ + if (extend_size) + return column + (extend_size - column % extend_size); + + /* incremental last tab - add increment_size to the previous tab stop */ + if (increment_size) + { + uintmax_t end_tab = tab_list[first_free_tab - 1]; + + return column + (increment_size - ((column - end_tab) % increment_size)); + } + + *last_tab = true; + return 0; +} + + + + +/* Sets new file-list */ +extern void +set_file_list (char **list) +{ + have_read_stdin = false; + + if (!list) + file_list = stdin_argv; + else + file_list = list; +} + +/* Close the old stream pointer FP if it is non-null, + and return a new one opened to read the next input file. + Open a filename of '-' as the standard input. + Return nullptr if there are no more input files. */ + +extern FILE * +next_file (FILE *fp) +{ + static char *prev_file; + char *file; + + if (fp) + { + int err = errno; + if (!ferror (fp)) + err = 0; + if (STREQ (prev_file, "-")) + clearerr (fp); /* Also clear EOF. */ + else if (fclose (fp) != 0) + err = errno; + if (err) + { + error (0, err, "%s", quotef (prev_file)); + exit_status = EXIT_FAILURE; + } + } + + while ((file = *file_list++) != nullptr) + { + if (STREQ (file, "-")) + { + have_read_stdin = true; + fp = stdin; + } + else + fp = fopen (file, "r"); + if (fp) + { + prev_file = file; + fadvise (fp, FADVISE_SEQUENTIAL); + return fp; + } + error (0, errno, "%s", quotef (file)); + exit_status = EXIT_FAILURE; + } + return nullptr; +} + +/* */ +extern void +cleanup_file_list_stdin (void) +{ + if (have_read_stdin && fclose (stdin) != 0) + error (EXIT_FAILURE, errno, "-"); +} + + +extern void +emit_tab_list_info (void) +{ + /* suppress syntax check for emit_mandatory_arg_note() */ + fputs (_("\ + -t, --tabs=LIST use comma separated list of tab positions.\n\ +"), stdout); + fputs (_("\ + The last specified position can be prefixed with '/'\n\ + to specify a tab size to use after the last\n\ + explicitly specified tab stop. Also a prefix of '+'\n\ + can be used to align remaining tab stops relative to\n\ + the last specified tab stop instead of the first column\n\ +"), stdout); +} diff --git a/src/expand-common.h b/src/expand-common.h new file mode 100644 index 0000000..daed31e --- /dev/null +++ b/src/expand-common.h @@ -0,0 +1,68 @@ +/* expand-common - common functionality for expand/unexpand + + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* If true, convert blanks even after nonblank characters have been + read on the line. */ +extern bool convert_entire_line; + +/* The maximum distance between tab stops. */ +extern size_t max_column_width; + +/* The desired exit status. */ +extern int exit_status; + +/* Add tab stop TABVAL to the end of 'tab_list'. */ +extern void +add_tab_stop (uintmax_t tabval); + +/* Add the comma or blank separated list of tab stops STOPS + to the list of tab stops. */ +extern void +parse_tab_stops (char const *stops) _GL_ATTRIBUTE_NONNULL (); + +/* TODO: Document */ +extern uintmax_t +get_next_tab_column (const uintmax_t column, size_t *tab_index, + bool *last_tab) + _GL_ATTRIBUTE_NONNULL ((3)); + +/* Called after all command-line options have been parsed, + sets the final tab-stops values */ +extern void +finalize_tab_stops (void); + + + + +/* Sets new file-list */ +extern void +set_file_list (char **file_list); + +/* Close the old stream pointer FP if it is non-null, + and return a new one opened to read the next input file. + Open a filename of '-' as the standard input. + Return nullptr if there are no more input files. */ +extern FILE * +next_file (FILE *fp); + +/* */ +extern void +cleanup_file_list_stdin (void); + + +extern void +emit_tab_list_info (void); diff --git a/src/expand.c b/src/expand.c new file mode 100644 index 0000000..0e74d0c --- /dev/null +++ b/src/expand.c @@ -0,0 +1,235 @@ +/* expand - convert tabs to spaces + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* By default, convert all tabs to spaces. + Preserves backspace characters in the output; they decrement the + column count for tab calculations. + The default action is equivalent to -8. + + Options: + --tabs=tab1[,tab2[,...]] + -t tab1[,tab2[,...]] + -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 + columns apart instead of the default 8. Otherwise, + set the tabs at columns tab1, tab2, etc. (numbered from + 0); replace any tabs beyond the tab stops given with + single spaces. + --initial + -i Only convert initial tabs on each line to spaces. + + David MacKenzie */ + +#include + +#include +#include +#include +#include "system.h" +#include "expand-common.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "expand" + +#define AUTHORS proper_name ("David MacKenzie") + +static char const shortopts[] = "it:0::1::2::3::4::5::6::7::8::9::"; + +static struct option const longopts[] = +{ + {"tabs", required_argument, nullptr, 't'}, + {"initial", no_argument, nullptr, 'i'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Convert tabs in each FILE to spaces, writing to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -i, --initial do not convert tabs after non blanks\n\ + -t, --tabs=N have tabs N characters apart, not 8\n\ +"), stdout); + emit_tab_list_info (); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + + +/* Change tabs to spaces, writing to stdout. + Read each file in 'file_list', in order. */ + +static void +expand (void) +{ + /* Input stream. */ + FILE *fp = next_file (nullptr); + + if (!fp) + return; + + while (true) + { + /* Input character, or EOF. */ + int c; + + /* If true, perform translations. */ + bool convert = true; + + + /* The following variables have valid values only when CONVERT + is true: */ + + /* Column of next input character. */ + uintmax_t column = 0; + + /* Index in TAB_LIST of next tab stop to examine. */ + size_t tab_index = 0; + + + /* Convert a line of text. */ + + do + { + while ((c = getc (fp)) < 0 && (fp = next_file (fp))) + continue; + + if (convert) + { + if (c == '\t') + { + /* Column the next input tab stop is on. */ + uintmax_t next_tab_column; + bool last_tab; + + next_tab_column = get_next_tab_column (column, &tab_index, + &last_tab); + + if (last_tab) + next_tab_column = column + 1; + + if (next_tab_column < column) + error (EXIT_FAILURE, 0, _("input line is too long")); + + while (++column < next_tab_column) + if (putchar (' ') < 0) + write_error (); + + c = ' '; + } + else if (c == '\b') + { + /* Go back one column, and force recalculation of the + next tab stop. */ + column -= !!column; + tab_index -= !!tab_index; + } + else + { + column++; + if (!column) + error (EXIT_FAILURE, 0, _("input line is too long")); + } + + convert &= convert_entire_line || !! isblank (c); + } + + if (c < 0) + return; + + if (putchar (c) < 0) + write_error (); + } + while (c != '\n'); + } +} + +int +main (int argc, char **argv) +{ + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + convert_entire_line = true; + + while ((c = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) + { + switch (c) + { + case 'i': + convert_entire_line = false; + break; + + case 't': + parse_tab_stops (optarg); + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (optarg) + parse_tab_stops (optarg - 1); + else + { + char tab_stop[2]; + tab_stop[0] = c; + tab_stop[1] = '\0'; + parse_tab_stops (tab_stop); + } + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + finalize_tab_stops (); + + set_file_list (optind < argc ? &argv[optind] : nullptr); + + expand (); + + cleanup_file_list_stdin (); + + return exit_status; +} diff --git a/src/expr.c b/src/expr.c new file mode 100644 index 0000000..eeb4e13 --- /dev/null +++ b/src/expr.c @@ -0,0 +1,1014 @@ +/* expr -- evaluate expressions. + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Author: Mike Parker. + Modified for arbitrary-precision calculation by James Youngman. + + This program evaluates expressions. Each token (operator, operand, + parenthesis) of the expression must be a separate argument. The + parser used is a reasonably general one, though any incarnation of + it is language-specific. It is especially nice for expressions. + + No parse tree is needed; a new node is evaluated immediately. + One function can handle multiple operators all of equal precedence, + provided they all associate ((x op x) op x). + + Define EVAL_TRACE to print an evaluation trace. */ + +#include +#include +#include +#include "system.h" + +#include +#include +#include "long-options.h" +#include "mbuiter.h" +#include "strnumcmp.h" +#include "xstrtol.h" + +/* Various parts of this code assume size_t fits into unsigned long + int, the widest unsigned type that GMP supports. */ +static_assert (SIZE_MAX <= ULONG_MAX); + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "expr" + +#define AUTHORS \ + proper_name ("Mike Parker"), \ + proper_name ("James Youngman"), \ + proper_name ("Paul Eggert") + +/* Exit statuses. */ +enum + { + /* Invalid expression: e.g., its form does not conform to the + grammar for expressions. Our grammar is an extension of the + POSIX grammar. */ + EXPR_INVALID = 2, + + /* An internal error occurred, e.g., arithmetic overflow, storage + exhaustion. */ + EXPR_FAILURE + }; + +/* The kinds of value we can have. */ +enum valtype +{ + integer, + string +}; +typedef enum valtype TYPE; + +/* A value is.... */ +struct valinfo +{ + TYPE type; /* Which kind. */ + union + { /* The value itself. */ + mpz_t i; + char *s; + } u; +}; +typedef struct valinfo VALUE; + +/* The arguments given to the program, minus the program name. */ +static char **args; + +static VALUE *eval (bool); +static bool nomoreargs (void); +static bool null (VALUE *v); +static void printv (VALUE *v); + + +/* + Find the first occurrence in the character string STRING of any character + in the character string ACCEPT. + + Copied from gnulib's mbscspn, with two differences: + 1. Returns 1-based position of first found character, or zero if not found. + 2. Returned value is the logical character index, NOT byte offset. + + Examples: + mbs_logical_cspn ('hello','a') => 0 + mbs_logical_cspn ('hello','h') => 1 + mbs_logical_cspn ('hello','oe') => 1 + mbs_logical_cspn ('hello','lo') => 3 + + In UTF-8 \xCE\xB1 is a single character (greek alpha): + mbs_logical_cspn ('\xCE\xB1bc','\xCE\xB1') => 1 + mbs_logical_cspn ('\xCE\xB1bc','c') => 3 */ +static size_t +mbs_logical_cspn (char const *s, char const *accept) +{ + size_t idx = 0; + + if (accept[0] == '\0') + return 0; + + /* General case. */ + if (MB_CUR_MAX > 1) + { + mbui_iterator_t iter; + + for (mbui_init (iter, s); mbui_avail (iter); mbui_advance (iter)) + { + ++idx; + if (mb_len (mbui_cur (iter)) == 1) + { + if (mbschr (accept, *mbui_cur_ptr (iter))) + return idx; + } + else + { + mbui_iterator_t aiter; + + for (mbui_init (aiter, accept); + mbui_avail (aiter); + mbui_advance (aiter)) + if (mb_equal (mbui_cur (aiter), mbui_cur (iter))) + return idx; + } + } + + /* not found */ + return 0; + } + else + { + /* single-byte locale, + convert returned byte offset to 1-based index or zero if not found. */ + size_t i = strcspn (s, accept); + return (s[i] ? i + 1 : 0); + } +} + +/* Extract the substring of S, from logical character + position POS and LEN characters. + first character position is 1. + POS and LEN refer to logical characters, not octets. + + Upon exit, sets v->s to the new string. + The new string might be empty if POS/LEN are invalid. */ +static char * +mbs_logical_substr (char const *s, size_t pos, size_t len) +{ + char *v, *vlim; + + size_t blen = strlen (s); /* byte length */ + size_t llen = (MB_CUR_MAX > 1) ? mbslen (s) : blen; /* logical length */ + + if (llen < pos || pos == 0 || len == 0 || len == SIZE_MAX) + return xstrdup (""); + + /* characters to copy */ + size_t vlen = MIN (len, llen - pos + 1); + + if (MB_CUR_MAX == 1) + { + /* Single-byte case */ + v = xmalloc (vlen + 1); + vlim = mempcpy (v, s + pos - 1, vlen); + } + else + { + /* Multibyte case */ + + /* FIXME: this is wasteful. Some memory can be saved by counting + how many bytes the matching characters occupy. */ + vlim = v = xmalloc (blen + 1); + + mbui_iterator_t iter; + size_t idx=1; + for (mbui_init (iter, s); + mbui_avail (iter) && vlen > 0; + mbui_advance (iter), ++idx) + { + /* Skip until we reach the starting position */ + if (idx < pos) + continue; + + /* Copy one character */ + --vlen; + vlim = mempcpy (vlim, mbui_cur_ptr (iter), mb_len (mbui_cur (iter))); + } + } + *vlim = '\0'; + return v; +} + +/* Return the number of logical characters (possibly multibyte) + that are in string S in the first OFS octets. + + Example in UTF-8: + "\xE2\x9D\xA7" is "U+2767 ROTATED FLORAL HEART BULLET". + In the string below, there are only two characters + up to the first 4 bytes (The U+2767 which occupies 3 bytes and 'x'): + mbs_count_to_offset ("\xE2\x9D\xA7xyz", 4) => 2 */ +static size_t +mbs_offset_to_chars (char const *s, size_t ofs) +{ + mbui_iterator_t iter; + size_t c = 0; + for (mbui_init (iter, s); mbui_avail (iter); mbui_advance (iter)) + { + ptrdiff_t d = mbui_cur_ptr (iter) - s; + if (d >= ofs) + break; + ++c; + } + return c; +} + + + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s EXPRESSION\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + putchar ('\n'); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Print the value of EXPRESSION to standard output. A blank line below\n\ +separates increasing precedence groups. EXPRESSION may be:\n\ +\n\ + ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n\ +\n\ + ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n\ +"), stdout); + fputs (_("\ +\n\ + ARG1 < ARG2 ARG1 is less than ARG2\n\ + ARG1 <= ARG2 ARG1 is less than or equal to ARG2\n\ + ARG1 = ARG2 ARG1 is equal to ARG2\n\ + ARG1 != ARG2 ARG1 is unequal to ARG2\n\ + ARG1 >= ARG2 ARG1 is greater than or equal to ARG2\n\ + ARG1 > ARG2 ARG1 is greater than ARG2\n\ +"), stdout); + fputs (_("\ +\n\ + ARG1 + ARG2 arithmetic sum of ARG1 and ARG2\n\ + ARG1 - ARG2 arithmetic difference of ARG1 and ARG2\n\ +"), stdout); + /* Tell xgettext that the "% A" below is not a printf-style + format string: xgettext:no-c-format */ + fputs (_("\ +\n\ + ARG1 * ARG2 arithmetic product of ARG1 and ARG2\n\ + ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2\n\ + ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2\n\ +"), stdout); + fputs (_("\ +\n\ + STRING : REGEXP anchored pattern match of REGEXP in STRING\n\ +\n\ + match STRING REGEXP same as STRING : REGEXP\n\ + substr STRING POS LENGTH substring of STRING, POS counted from 1\n\ + index STRING CHARS index in STRING where any CHARS is found, or 0\n\ + length STRING length of STRING\n\ +"), stdout); + fputs (_("\ + + TOKEN interpret TOKEN as a string, even if it is a\n\ + keyword like 'match' or an operator like '/'\n\ +\n\ + ( EXPRESSION ) value of EXPRESSION\n\ +"), stdout); + fputs (_("\ +\n\ +Beware that many operators need to be escaped or quoted for shells.\n\ +Comparisons are arithmetic if both ARGs are numbers, else lexicographical.\n\ +Pattern matches return the string matched between \\( and \\) or null; if\n\ +\\( and \\) are not used, they return the number of characters matched or 0.\n\ +"), stdout); + fputs (_("\ +\n\ +Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null\n\ +or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + + +int +main (int argc, char **argv) +{ + VALUE *v; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXPR_FAILURE); + atexit (close_stdout); + + parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE_NAME, VERSION, + usage, AUTHORS, (char const *) nullptr); + + /* The above handles --help and --version. + Since there is no other invocation of getopt, handle '--' here. */ + if (1 < argc && STREQ (argv[1], "--")) + { + --argc; + ++argv; + } + + if (argc <= 1) + { + error (0, 0, _("missing operand")); + usage (EXPR_INVALID); + } + + args = argv + 1; + + v = eval (true); + if (!nomoreargs ()) + error (EXPR_INVALID, 0, _("syntax error: unexpected argument %s"), + quotearg_n_style (0, locale_quoting_style, *args)); + + printv (v); + + main_exit (null (v)); +} + +/* Return a VALUE for I. */ + +static VALUE * +int_value (unsigned long int i) +{ + VALUE *v = xmalloc (sizeof *v); + v->type = integer; + mpz_init_set_ui (v->u.i, i); + return v; +} + +/* Return a VALUE for S. */ + +static VALUE * +str_value (char const *s) +{ + VALUE *v = xmalloc (sizeof *v); + v->type = string; + v->u.s = xstrdup (s); + return v; +} + +/* Free VALUE V, including structure components. */ + +static void +freev (VALUE *v) +{ + if (v->type == string) + free (v->u.s); + else + mpz_clear (v->u.i); + free (v); +} + +/* Print VALUE V. */ + +static void +printv (VALUE *v) +{ + switch (v->type) + { + case integer: + mpz_out_str (stdout, 10, v->u.i); + putchar ('\n'); + break; + case string: + puts (v->u.s); + break; + default: + unreachable (); + } +} + +/* Return true if V is a null-string or zero-number. */ + +ATTRIBUTE_PURE +static bool +null (VALUE *v) +{ + switch (v->type) + { + case integer: + return mpz_sgn (v->u.i) == 0; + case string: + { + char const *cp = v->u.s; + if (*cp == '\0') + return true; + + cp += (*cp == '-'); + + do + { + if (*cp != '0') + return false; + } + while (*++cp); + + return true; + } + default: + unreachable (); + } +} + +/* Return true if CP takes the form of an integer. */ + +ATTRIBUTE_PURE +static bool +looks_like_integer (char const *cp) +{ + cp += (*cp == '-'); + + do + if (! ISDIGIT (*cp)) + return false; + while (*++cp); + + return true; +} + +/* Coerce V to a string value (can't fail). */ + +static void +tostring (VALUE *v) +{ + switch (v->type) + { + case integer: + { + char *s = mpz_get_str (nullptr, 10, v->u.i); + mpz_clear (v->u.i); + v->u.s = s; + v->type = string; + } + break; + case string: + break; + default: + unreachable (); + } +} + +/* Coerce V to an integer value. Return true on success, false on failure. */ + +static bool +toarith (VALUE *v) +{ + switch (v->type) + { + case integer: + return true; + case string: + { + char *s = v->u.s; + + if (! looks_like_integer (s)) + return false; + if (mpz_init_set_str (v->u.i, s, 10) != 0) + error (EXPR_FAILURE, ERANGE, "%s", (s)); + free (s); + v->type = integer; + return true; + } + default: + unreachable (); + } +} + +/* Extract a size_t value from an integer value I. + If the value is negative, return SIZE_MAX. + If the value is too large, return SIZE_MAX - 1. */ +static size_t +getsize (mpz_t i) +{ + if (mpz_sgn (i) < 0) + return SIZE_MAX; + if (mpz_fits_ulong_p (i)) + { + unsigned long int ul = mpz_get_ui (i); + if (ul < SIZE_MAX) + return ul; + } + return SIZE_MAX - 1; +} + +/* Return true and advance if the next token matches STR exactly. + STR must not be null. */ + +static bool +nextarg (char const *str) +{ + if (*args == nullptr) + return false; + else + { + bool r = STREQ (*args, str); + args += r; + return r; + } +} + +/* Return true if there no more tokens. */ + +static bool +nomoreargs (void) +{ + return *args == 0; +} + +/* Report missing operand. + There is an implicit assumption that there was a previous argument, + and (args-1) is valid. */ +static void +require_more_args (void) +{ + if (nomoreargs ()) + error (EXPR_INVALID, 0, _("syntax error: missing argument after %s"), + quotearg_n_style (0, locale_quoting_style, *(args - 1))); +} + + +#ifdef EVAL_TRACE +/* Print evaluation trace and args remaining. */ + +static void +trace (fxn) + char *fxn; +{ + char **a; + + printf ("%s:", fxn); + for (a = args; *a; a++) + printf (" %s", *a); + putchar ('\n'); +} +#endif + +/* Do the : operator. + SV is the VALUE for the lhs (the string), + PV is the VALUE for the rhs (the pattern). */ + +static VALUE * +docolon (VALUE *sv, VALUE *pv) +{ + VALUE *v; + char const *errmsg; + struct re_pattern_buffer re_buffer; + char fastmap[UCHAR_MAX + 1]; + struct re_registers re_regs; + regoff_t matchlen; + + tostring (sv); + tostring (pv); + + re_regs.num_regs = 0; + re_regs.start = nullptr; + re_regs.end = nullptr; + + re_buffer.buffer = nullptr; + re_buffer.allocated = 0; + re_buffer.fastmap = fastmap; + re_buffer.translate = nullptr; + re_syntax_options = + RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; + errmsg = re_compile_pattern (pv->u.s, strlen (pv->u.s), &re_buffer); + if (errmsg) + error (EXPR_INVALID, 0, "%s", (errmsg)); + re_buffer.newline_anchor = 0; + + matchlen = re_match (&re_buffer, sv->u.s, strlen (sv->u.s), 0, &re_regs); + if (0 <= matchlen) + { + /* Were \(...\) used? */ + if (re_buffer.re_nsub > 0) + { + if (re_regs.end[1] < 0) + v = str_value (""); + else + { + sv->u.s[re_regs.end[1]] = '\0'; + v = str_value (sv->u.s + re_regs.start[1]); + } + } + else + { + /* In multibyte locales, convert the matched offset (=number of bytes) + to the number of matched characters. */ + size_t i = (MB_CUR_MAX == 1 + ? matchlen + : mbs_offset_to_chars (sv->u.s, matchlen)); + v = int_value (i); + } + } + else if (matchlen == -1) + { + /* Match failed -- return the right kind of null. */ + if (re_buffer.re_nsub > 0) + v = str_value (""); + else + v = int_value (0); + } + else + error (EXPR_FAILURE, + matchlen == -2 ? errno : EOVERFLOW, + _("error in regular expression matcher")); + + if (0 < re_regs.num_regs) + { + free (re_regs.start); + free (re_regs.end); + } + re_buffer.fastmap = nullptr; + regfree (&re_buffer); + return v; +} + +/* Handle bare operands and ( expr ) syntax. */ + +static VALUE * +eval7 (bool evaluate) +{ + VALUE *v; + +#ifdef EVAL_TRACE + trace ("eval7"); +#endif + require_more_args (); + + if (nextarg ("(")) + { + v = eval (evaluate); + if (nomoreargs ()) + error (EXPR_INVALID, 0, _("syntax error: expecting ')' after %s"), + quotearg_n_style (0, locale_quoting_style, *(args - 1))); + if (!nextarg (")")) + error (EXPR_INVALID, 0, _("syntax error: expecting ')' instead of %s"), + quotearg_n_style (0, locale_quoting_style, *args)); + return v; + } + + if (nextarg (")")) + error (EXPR_INVALID, 0, _("syntax error: unexpected ')'")); + + return str_value (*args++); +} + +/* Handle match, substr, index, and length keywords, and quoting "+". */ + +static VALUE * +eval6 (bool evaluate) +{ + VALUE *l; + VALUE *r; + VALUE *v; + VALUE *i1; + VALUE *i2; + +#ifdef EVAL_TRACE + trace ("eval6"); +#endif + if (nextarg ("+")) + { + require_more_args (); + return str_value (*args++); + } + else if (nextarg ("length")) + { + r = eval6 (evaluate); + tostring (r); + v = int_value (mbslen (r->u.s)); + freev (r); + return v; + } + else if (nextarg ("match")) + { + l = eval6 (evaluate); + r = eval6 (evaluate); + if (evaluate) + { + v = docolon (l, r); + freev (l); + } + else + v = l; + freev (r); + return v; + } + else if (nextarg ("index")) + { + size_t pos; + + l = eval6 (evaluate); + r = eval6 (evaluate); + tostring (l); + tostring (r); + pos = mbs_logical_cspn (l->u.s, r->u.s); + v = int_value (pos); + freev (l); + freev (r); + return v; + } + else if (nextarg ("substr")) + { + l = eval6 (evaluate); + i1 = eval6 (evaluate); + i2 = eval6 (evaluate); + tostring (l); + + if (!toarith (i1) || !toarith (i2)) + v = str_value (""); + else + { + size_t pos = getsize (i1->u.i); + size_t len = getsize (i2->u.i); + + char *s = mbs_logical_substr (l->u.s, pos, len); + v = str_value (s); + free (s); + } + freev (l); + freev (i1); + freev (i2); + return v; + } + else + return eval7 (evaluate); +} + +/* Handle : operator (pattern matching). + Calls docolon to do the real work. */ + +static VALUE * +eval5 (bool evaluate) +{ + VALUE *l; + VALUE *r; + VALUE *v; + +#ifdef EVAL_TRACE + trace ("eval5"); +#endif + l = eval6 (evaluate); + while (true) + { + if (nextarg (":")) + { + r = eval6 (evaluate); + if (evaluate) + { + v = docolon (l, r); + freev (l); + l = v; + } + freev (r); + } + else + return l; + } +} + +/* Handle *, /, % operators. */ + +static VALUE * +eval4 (bool evaluate) +{ + VALUE *l; + VALUE *r; + enum { multiply, divide, mod } fxn; + +#ifdef EVAL_TRACE + trace ("eval4"); +#endif + l = eval5 (evaluate); + while (true) + { + if (nextarg ("*")) + fxn = multiply; + else if (nextarg ("/")) + fxn = divide; + else if (nextarg ("%")) + fxn = mod; + else + return l; + r = eval5 (evaluate); + if (evaluate) + { + if (!toarith (l) || !toarith (r)) + error (EXPR_INVALID, 0, _("non-integer argument")); + if (fxn != multiply && mpz_sgn (r->u.i) == 0) + error (EXPR_INVALID, 0, _("division by zero")); + ((fxn == multiply ? mpz_mul + : fxn == divide ? mpz_tdiv_q + : mpz_tdiv_r) + (l->u.i, l->u.i, r->u.i)); + } + freev (r); + } +} + +/* Handle +, - operators. */ + +static VALUE * +eval3 (bool evaluate) +{ + VALUE *l; + VALUE *r; + enum { plus, minus } fxn; + +#ifdef EVAL_TRACE + trace ("eval3"); +#endif + l = eval4 (evaluate); + while (true) + { + if (nextarg ("+")) + fxn = plus; + else if (nextarg ("-")) + fxn = minus; + else + return l; + r = eval4 (evaluate); + if (evaluate) + { + if (!toarith (l) || !toarith (r)) + error (EXPR_INVALID, 0, _("non-integer argument")); + (fxn == plus ? mpz_add : mpz_sub) (l->u.i, l->u.i, r->u.i); + } + freev (r); + } +} + +/* Handle comparisons. */ + +static VALUE * +eval2 (bool evaluate) +{ + VALUE *l; + +#ifdef EVAL_TRACE + trace ("eval2"); +#endif + l = eval3 (evaluate); + while (true) + { + VALUE *r; + enum + { + less_than, less_equal, equal, not_equal, greater_equal, greater_than + } fxn; + bool val = false; + + if (nextarg ("<")) + fxn = less_than; + else if (nextarg ("<=")) + fxn = less_equal; + else if (nextarg ("=") || nextarg ("==")) + fxn = equal; + else if (nextarg ("!=")) + fxn = not_equal; + else if (nextarg (">=")) + fxn = greater_equal; + else if (nextarg (">")) + fxn = greater_than; + else + return l; + r = eval3 (evaluate); + + if (evaluate) + { + int cmp; + tostring (l); + tostring (r); + + if (looks_like_integer (l->u.s) && looks_like_integer (r->u.s)) + cmp = strintcmp (l->u.s, r->u.s); + else + { + errno = 0; + cmp = strcoll (l->u.s, r->u.s); + + if (errno) + { + error (0, errno, _("string comparison failed")); + error (0, 0, _("set LC_ALL='C' to work around the problem")); + error (EXPR_INVALID, 0, + _("the strings compared were %s and %s"), + quotearg_n_style (0, locale_quoting_style, l->u.s), + quotearg_n_style (1, locale_quoting_style, r->u.s)); + } + } + + switch (fxn) + { + case less_than: val = (cmp < 0); break; + case less_equal: val = (cmp <= 0); break; + case equal: val = (cmp == 0); break; + case not_equal: val = (cmp != 0); break; + case greater_equal: val = (cmp >= 0); break; + case greater_than: val = (cmp > 0); break; + default: unreachable (); + } + } + + freev (l); + freev (r); + l = int_value (val); + } +} + +/* Handle &. */ + +static VALUE * +eval1 (bool evaluate) +{ + VALUE *l; + VALUE *r; + +#ifdef EVAL_TRACE + trace ("eval1"); +#endif + l = eval2 (evaluate); + while (true) + { + if (nextarg ("&")) + { + r = eval2 (evaluate && !null (l)); + if (null (l) || null (r)) + { + freev (l); + freev (r); + l = int_value (0); + } + else + freev (r); + } + else + return l; + } +} + +/* Handle |. */ + +static VALUE * +eval (bool evaluate) +{ + VALUE *l; + VALUE *r; + +#ifdef EVAL_TRACE + trace ("eval"); +#endif + l = eval1 (evaluate); + while (true) + { + if (nextarg ("|")) + { + r = eval1 (evaluate && null (l)); + if (null (l)) + { + freev (l); + l = r; + if (null (l)) + { + freev (l); + l = int_value (0); + } + } + else + freev (r); + } + else + return l; + } +} diff --git a/src/extract-magic b/src/extract-magic new file mode 100644 index 0000000..f8d3029 --- /dev/null +++ b/src/extract-magic @@ -0,0 +1,162 @@ +#!/usr/bin/perl -w +# Derive #define directives from specially formatted 'case ...:' statements. + +# Copyright (C) 2003-2023 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +use strict; + +use Getopt::Long; + +(my $VERSION = '$Revision: 1.5 $ ') =~ tr/[0-9].//cd; +(my $ME = $0) =~ s|.*/||; + +END +{ + # Nobody ever checks the status of print()s. That's okay, because + # if any do fail, we're guaranteed to get an indicator when we close() + # the filehandle. + # + # Close stdout now, and if there were no errors, return happy status. + # If stdout has already been closed by the script, though, do nothing. + defined fileno STDOUT + or return; + close STDOUT + and return; + + # Errors closing stdout. Indicate that, and hope stderr is OK. + warn "$ME: closing standard output: $!\n"; + + # Don't be so arrogant as to assume that we're the first END handler + # defined, and thus the last one invoked. There may be others yet + # to come. $? will be passed on to them, and to the final _exit(). + # + # If it isn't already an error, make it one (and if it _is_ an error, + # preserve the value: it might be important). + $? ||= 1; +} + +sub usage ($) +{ + my ($exit_code) = @_; + my $STREAM = ($exit_code == 0 ? *STDOUT : *STDERR); + if ($exit_code != 0) + { + print $STREAM "Try '$ME --help' for more information.\n"; + } + else + { + print $STREAM < sub { $emit_magic = 0 }, + help => sub { usage 0 }, + version => sub { print "$ME version $VERSION\n"; exit }, + ) or usage 1; + + my $fail = 0; + + @ARGV < 1 + and (warn "$ME: missing FILE arguments\n"), $fail = 1; + 1 < @ARGV + and (warn "$ME: too many arguments\n"), $fail = 1; + $fail + and usage 1; + + my $file = $ARGV[0]; + + open FH, $file + or die "$ME: can't open '$file' for reading: $!\n"; + + # For each line like this: + # case S_MAGIC_ROMFS: /* 0x7275 */ + # emit one like this: + # # define S_MAGIC_ROMFS 0x7275 + # Fail if there is a 'case S_MAGIC_.*' line without + # a properly formed comment. + + my $map_comment = <)) + { + $line =~ /^[ \t]+case S_MAGIC_/ + or next; + $line =~ + m!^[ \t]+case (S_MAGIC_\w+): /\* (0x[0-9A-Fa-f]+) (local|remote) \*/! + or (warn "$ME:$file:$.: malformed case S_MAGIC_... line"), + $fail = 1, next; + my $name = $1; + my $magic = $2; + my $local = $3 eq 'local' ? 1 : 0; + print $emit_magic + ? "# define $name $magic\n" + : " case $name: return $local;\n"; + } + + $emit_magic + and print <<\EOF; +#elif defined __GNU__ +# include +#endif +EOF + $emit_magic + or printf " default: return -1;\n }\n}\n"; + + close FH; + + exit $fail; +} diff --git a/src/factor.c b/src/factor.c new file mode 100644 index 0000000..c47250a --- /dev/null +++ b/src/factor.c @@ -0,0 +1,2662 @@ +/* factor -- print prime factors of n. + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Originally written by Paul Rubin . + Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering. + Arbitrary-precision code adapted by James Youngman from Torbjörn + Granlund's factorize.c, from GNU MP version 4.2.2. + In 2012, the core was rewritten by Torbjörn Granlund and Niels Möller. + Contains code from GNU MP. */ + +/* Efficiently factor numbers that fit in one or two words (word = uintmax_t), + or, with GMP, numbers of any size. + + Code organization: + + There are several variants of many functions, for handling one word, two + words, and GMP's mpz_t type. If the one-word variant is called foo, the + two-word variant will be foo2, and the one for mpz_t will be mp_foo. In + some cases, the plain function variants will handle both one-word and + two-word numbers, evidenced by function arguments. + + The factoring code for two words will fall into the code for one word when + progress allows that. + + Algorithm: + + (1) Perform trial division using a small primes table, but without hardware + division since the primes table store inverses modulo the word base. + (The GMP variant of this code doesn't make use of the precomputed + inverses, but instead relies on GMP for fast divisibility testing.) + (2) Check the nature of any non-factored part using Miller-Rabin for + detecting composites, and Lucas for detecting primes. + (3) Factor any remaining composite part using the Pollard-Brent rho + algorithm or if USE_SQUFOF is defined to 1, try that first. + Status of found factors are checked again using Miller-Rabin and Lucas. + + We prefer using Hensel norm in the divisions, not the more familiar + Euclidean norm, since the former leads to much faster code. In the + Pollard-Brent rho code and the prime testing code, we use Montgomery's + trick of multiplying all n-residues by the word base, allowing cheap Hensel + reductions mod n. + + The GMP code uses an algorithm that can be considerably slower; + for example, on a circa-2017 Intel Xeon Silver 4116, factoring + 2^{127}-3 takes about 50 ms with the two-word algorithm but would + take about 750 ms with the GMP code. + + Improvements: + + * Use modular inverses also for exact division in the Lucas code, and + elsewhere. A problem is to locate the inverses not from an index, but + from a prime. We might instead compute the inverse on-the-fly. + + * Tune trial division table size (not forgetting that this is a standalone + program where the table will be read from secondary storage for + each invocation). + + * Implement less naive powm, using k-ary exponentiation for k = 3 or + perhaps k = 4. + + * Try to speed trial division code for single uintmax_t numbers, i.e., the + code using DIVBLOCK. It currently runs at 2 cycles per prime (Intel SBR, + IBR), 3 cycles per prime (AMD Stars) and 5 cycles per prime (AMD BD) when + using gcc 4.6 and 4.7. Some software pipelining should help; 1, 2, and 4 + respectively cycles ought to be possible. + + * The redcify function could be vastly improved by using (plain Euclidean) + pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from + GMP's gmp-impl.h). The redcify2 function could be vastly improved using + similar methods. These functions currently dominate run time when using + the -w option. +*/ + +/* Whether to recursively factor to prove primality, + or run faster probabilistic tests. */ +#ifndef PROVE_PRIMALITY +# define PROVE_PRIMALITY 1 +#endif + +/* Faster for certain ranges but less general. */ +#ifndef USE_SQUFOF +# define USE_SQUFOF 0 +#endif + +/* Output SQUFOF statistics. */ +#ifndef STAT_SQUFOF +# define STAT_SQUFOF 0 +#endif + + +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "full-write.h" +#include "quote.h" +#include "readtokens.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "factor" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name_lite ("Niels Moller", "Niels M\303\266ller") + +/* Token delimiters when reading from a file. */ +#define DELIM "\n\t " + +#ifndef USE_LONGLONG_H +/* With the way we use longlong.h, it's only safe to use + when UWtype = UHWtype, as there were various cases + (as can be seen in the history for longlong.h) where + for example, _LP64 was required to enable W_TYPE_SIZE==64 code, + to avoid compile time or run time issues. */ +# if LONG_MAX == INTMAX_MAX +# define USE_LONGLONG_H 1 +# endif +#endif + +#if USE_LONGLONG_H + +/* Make definitions for longlong.h to make it do what it can do for us */ + +/* bitcount for uintmax_t */ +# if UINTMAX_MAX == UINT32_MAX +# define W_TYPE_SIZE 32 +# elif UINTMAX_MAX == UINT64_MAX +# define W_TYPE_SIZE 64 +# elif UINTMAX_MAX == UINT128_MAX +# define W_TYPE_SIZE 128 +# endif + +# define UWtype uintmax_t +# define UHWtype unsigned long int +# undef UDWtype +# if HAVE_ATTRIBUTE_MODE +typedef unsigned int UQItype __attribute__ ((mode (QI))); +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef int DItype __attribute__ ((mode (DI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +# else +typedef unsigned char UQItype; +typedef long SItype; +typedef unsigned long int USItype; +# if HAVE_LONG_LONG_INT +typedef long long int DItype; +typedef unsigned long long int UDItype; +# else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ +typedef long int DItype; +typedef unsigned long int UDItype; +# endif +# endif +# define LONGLONG_STANDALONE /* Don't require GMP's longlong.h mdep files */ +# define ASSERT(x) /* FIXME make longlong.h really standalone */ +# define __GMP_DECLSPEC /* FIXME make longlong.h really standalone */ +# define __clz_tab factor_clz_tab /* Rename to avoid glibc collision */ +# ifndef __GMP_GNUC_PREREQ +# define __GMP_GNUC_PREREQ(a,b) 1 +# endif + +/* These stub macros are only used in longlong.h in certain system compiler + combinations, so ensure usage to avoid -Wunused-macros warnings. */ +# if __GMP_GNUC_PREREQ (1,1) && defined __clz_tab +ASSERT (1) +__GMP_DECLSPEC +# endif + +# if _ARCH_PPC +# define HAVE_HOST_CPU_FAMILY_powerpc 1 +# endif +# include "longlong.h" +# ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB +const unsigned char factor_clz_tab[129] = +{ + 1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 9 +}; +# endif + +#else /* not USE_LONGLONG_H */ + +# define W_TYPE_SIZE (8 * sizeof (uintmax_t)) +# define __ll_B ((uintmax_t) 1 << (W_TYPE_SIZE / 2)) +# define __ll_lowpart(t) ((uintmax_t) (t) & (__ll_B - 1)) +# define __ll_highpart(t) ((uintmax_t) (t) >> (W_TYPE_SIZE / 2)) + +#endif + +#if !defined __clz_tab && !defined UHWtype +/* Without this seemingly useless conditional, gcc -Wunused-macros + warns that each of the two tested macros is unused on Fedora 18. + FIXME: this is just an ugly band-aid. Fix it properly. */ +#endif + +/* 2*3*5*7*11...*101 is 128 bits, and has 26 prime factors */ +#define MAX_NFACTS 26 + +enum +{ + DEV_DEBUG_OPTION = CHAR_MAX + 1 +}; + +static struct option const long_options[] = +{ + {"exponents", no_argument, nullptr, 'h'}, + {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* If true, use p^e output format. */ +static bool print_exponents; + +struct factors +{ + uintmax_t plarge[2]; /* Can have a single large factor */ + uintmax_t p[MAX_NFACTS]; + unsigned char e[MAX_NFACTS]; + unsigned char nfactors; +}; + +struct mp_factors +{ + mpz_t *p; + unsigned long int *e; + idx_t nfactors; +}; + +static void factor (uintmax_t, uintmax_t, struct factors *); + +#ifndef umul_ppmm +# define umul_ppmm(w1, w0, u, v) \ + do { \ + uintmax_t __x0, __x1, __x2, __x3; \ + unsigned long int __ul, __vl, __uh, __vh; \ + uintmax_t __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ + \ + __x0 = (uintmax_t) __ul * __vl; \ + __x1 = (uintmax_t) __ul * __vh; \ + __x2 = (uintmax_t) __uh * __vl; \ + __x3 = (uintmax_t) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* This can't give carry. */ \ + __x1 += __x2; /* But this indeed can. */ \ + if (__x1 < __x2) /* Did we get it? */ \ + __x3 += __ll_B; /* Yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = (__x1 << W_TYPE_SIZE / 2) + __ll_lowpart (__x0); \ + } while (0) +#endif + +#if !defined udiv_qrnnd || defined UDIV_NEEDS_NORMALIZATION +/* Define our own, not needing normalization. This function is + currently not performance critical, so keep it simple. Similar to + the mod macro below. */ +# undef udiv_qrnnd +# define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + uintmax_t __d1, __d0, __q, __r1, __r0; \ + \ + __d1 = (d); __d0 = 0; \ + __r1 = (n1); __r0 = (n0); \ + affirm (__r1 < __d1); \ + __q = 0; \ + for (int __i = W_TYPE_SIZE; __i > 0; __i--) \ + { \ + rsh2 (__d1, __d0, __d1, __d0, 1); \ + __q <<= 1; \ + if (ge2 (__r1, __r0, __d1, __d0)) \ + { \ + __q++; \ + sub_ddmmss (__r1, __r0, __r1, __r0, __d1, __d0); \ + } \ + } \ + (r) = __r0; \ + (q) = __q; \ + } while (0) +#endif + +#if !defined add_ssaaaa +# define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + uintmax_t _add_x; \ + _add_x = (al) + (bl); \ + (sh) = (ah) + (bh) + (_add_x < (al)); \ + (sl) = _add_x; \ + } while (0) +#endif + +#define rsh2(rh, rl, ah, al, cnt) \ + do { \ + (rl) = ((ah) << (W_TYPE_SIZE - (cnt))) | ((al) >> (cnt)); \ + (rh) = (ah) >> (cnt); \ + } while (0) + +#define lsh2(rh, rl, ah, al, cnt) \ + do { \ + (rh) = ((ah) << cnt) | ((al) >> (W_TYPE_SIZE - (cnt))); \ + (rl) = (al) << (cnt); \ + } while (0) + +#define ge2(ah, al, bh, bl) \ + ((ah) > (bh) || ((ah) == (bh) && (al) >= (bl))) + +#define gt2(ah, al, bh, bl) \ + ((ah) > (bh) || ((ah) == (bh) && (al) > (bl))) + +#ifndef sub_ddmmss +# define sub_ddmmss(rh, rl, ah, al, bh, bl) \ + do { \ + uintmax_t _cy; \ + _cy = (al) < (bl); \ + (rl) = (al) - (bl); \ + (rh) = (ah) - (bh) - _cy; \ + } while (0) +#endif + +#ifndef count_leading_zeros +# define count_leading_zeros(count, x) do { \ + uintmax_t __clz_x = (x); \ + int __clz_c; \ + for (__clz_c = 0; \ + (__clz_x & ((uintmax_t) 0xff << (W_TYPE_SIZE - 8))) == 0; \ + __clz_c += 8) \ + __clz_x <<= 8; \ + for (; (intmax_t)__clz_x >= 0; __clz_c++) \ + __clz_x <<= 1; \ + (count) = __clz_c; \ + } while (0) +#endif + +#ifndef count_trailing_zeros +# define count_trailing_zeros(count, x) do { \ + uintmax_t __ctz_x = (x); \ + int __ctz_c = 0; \ + while ((__ctz_x & 1) == 0) \ + { \ + __ctz_x >>= 1; \ + __ctz_c++; \ + } \ + (count) = __ctz_c; \ + } while (0) +#endif + +/* Requires that a < n and b <= n */ +#define submod(r,a,b,n) \ + do { \ + uintmax_t _t = - (uintmax_t) (a < b); \ + (r) = ((n) & _t) + (a) - (b); \ + } while (0) + +#define addmod(r,a,b,n) \ + submod ((r), (a), ((n) - (b)), (n)) + +/* Modular two-word addition and subtraction. For performance reasons, the + most significant bit of n1 must be clear. The destination variables must be + distinct from the mod operand. */ +#define addmod2(r1, r0, a1, a0, b1, b0, n1, n0) \ + do { \ + add_ssaaaa ((r1), (r0), (a1), (a0), (b1), (b0)); \ + if (ge2 ((r1), (r0), (n1), (n0))) \ + sub_ddmmss ((r1), (r0), (r1), (r0), (n1), (n0)); \ + } while (0) +#define submod2(r1, r0, a1, a0, b1, b0, n1, n0) \ + do { \ + sub_ddmmss ((r1), (r0), (a1), (a0), (b1), (b0)); \ + if ((intmax_t) (r1) < 0) \ + add_ssaaaa ((r1), (r0), (r1), (r0), (n1), (n0)); \ + } while (0) + +#define HIGHBIT_TO_MASK(x) \ + (((intmax_t)-1 >> 1) < 0 \ + ? (uintmax_t)((intmax_t)(x) >> (W_TYPE_SIZE - 1)) \ + : ((x) & ((uintmax_t) 1 << (W_TYPE_SIZE - 1)) \ + ? UINTMAX_MAX : (uintmax_t) 0)) + +/* Compute r = a mod d, where r = <*t1,retval>, a = , d = . + Requires that d1 != 0. */ +static uintmax_t +mod2 (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t d1, uintmax_t d0) +{ + int cntd, cnta; + + affirm (d1 != 0); + + if (a1 == 0) + { + *r1 = 0; + return a0; + } + + count_leading_zeros (cntd, d1); + count_leading_zeros (cnta, a1); + int cnt = cntd - cnta; + lsh2 (d1, d0, d1, d0, cnt); + for (int i = 0; i < cnt; i++) + { + if (ge2 (a1, a0, d1, d0)) + sub_ddmmss (a1, a0, a1, a0, d1, d0); + rsh2 (d1, d0, d1, d0, 1); + } + + *r1 = a1; + return a0; +} + +ATTRIBUTE_CONST +static uintmax_t +gcd_odd (uintmax_t a, uintmax_t b) +{ + if ((b & 1) == 0) + { + uintmax_t t = b; + b = a; + a = t; + } + if (a == 0) + return b; + + /* Take out least significant one bit, to make room for sign */ + b >>= 1; + + for (;;) + { + uintmax_t t; + uintmax_t bgta; + + while ((a & 1) == 0) + a >>= 1; + a >>= 1; + + t = a - b; + if (t == 0) + return (a << 1) + 1; + + bgta = HIGHBIT_TO_MASK (t); + + /* b <-- min (a, b) */ + b += (bgta & t); + + /* a <-- |a - b| */ + a = (t ^ bgta) - bgta; + } +} + +static uintmax_t +gcd2_odd (uintmax_t *r1, uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0) +{ + affirm (b0 & 1); + + if ((a0 | a1) == 0) + { + *r1 = b1; + return b0; + } + + while ((a0 & 1) == 0) + rsh2 (a1, a0, a1, a0, 1); + + for (;;) + { + if ((b1 | a1) == 0) + { + *r1 = 0; + return gcd_odd (b0, a0); + } + + if (gt2 (a1, a0, b1, b0)) + { + sub_ddmmss (a1, a0, a1, a0, b1, b0); + do + rsh2 (a1, a0, a1, a0, 1); + while ((a0 & 1) == 0); + } + else if (gt2 (b1, b0, a1, a0)) + { + sub_ddmmss (b1, b0, b1, b0, a1, a0); + do + rsh2 (b1, b0, b1, b0, 1); + while ((b0 & 1) == 0); + } + else + break; + } + + *r1 = a1; + return a0; +} + +static void +factor_insert_multiplicity (struct factors *factors, + uintmax_t prime, int m) +{ + int nfactors = factors->nfactors; + uintmax_t *p = factors->p; + unsigned char *e = factors->e; + + /* Locate position for insert new or increment e. */ + int i; + for (i = nfactors - 1; i >= 0; i--) + { + if (p[i] <= prime) + break; + } + + if (i < 0 || p[i] != prime) + { + for (int j = nfactors - 1; j > i; j--) + { + p[j + 1] = p[j]; + e[j + 1] = e[j]; + } + p[i + 1] = prime; + e[i + 1] = m; + factors->nfactors = nfactors + 1; + } + else + { + e[i] += m; + } +} + +#define factor_insert(f, p) factor_insert_multiplicity (f, p, 1) + +static void +factor_insert_large (struct factors *factors, + uintmax_t p1, uintmax_t p0) +{ + if (p1 > 0) + { + affirm (factors->plarge[1] == 0); + factors->plarge[0] = p0; + factors->plarge[1] = p1; + } + else + factor_insert (factors, p0); +} + +#ifndef mpz_inits + +# include + +# define mpz_inits(...) mpz_va_init (mpz_init, __VA_ARGS__) +# define mpz_clears(...) mpz_va_init (mpz_clear, __VA_ARGS__) + +static void +mpz_va_init (void (*mpz_single_init)(mpz_t), ...) +{ + va_list ap; + + va_start (ap, mpz_single_init); + + mpz_t *mpz; + while ((mpz = va_arg (ap, mpz_t *))) + mpz_single_init (*mpz); + + va_end (ap); +} +#endif + +static void mp_factor (mpz_t, struct mp_factors *); + +static void +mp_factor_init (struct mp_factors *factors) +{ + factors->p = nullptr; + factors->e = nullptr; + factors->nfactors = 0; +} + +static void +mp_factor_clear (struct mp_factors *factors) +{ + for (idx_t i = 0; i < factors->nfactors; i++) + mpz_clear (factors->p[i]); + + free (factors->p); + free (factors->e); +} + +static void +mp_factor_insert (struct mp_factors *factors, mpz_t prime) +{ + idx_t nfactors = factors->nfactors; + mpz_t *p = factors->p; + unsigned long int *e = factors->e; + ptrdiff_t i; + + /* Locate position for insert new or increment e. */ + for (i = nfactors - 1; i >= 0; i--) + { + if (mpz_cmp (p[i], prime) <= 0) + break; + } + + if (i < 0 || mpz_cmp (p[i], prime) != 0) + { + p = xireallocarray (p, nfactors + 1, sizeof p[0]); + e = xireallocarray (e, nfactors + 1, sizeof e[0]); + + mpz_init (p[nfactors]); + for (long j = nfactors - 1; j > i; j--) + { + mpz_set (p[j + 1], p[j]); + e[j + 1] = e[j]; + } + mpz_set (p[i + 1], prime); + e[i + 1] = 1; + + factors->p = p; + factors->e = e; + factors->nfactors = nfactors + 1; + } + else + { + e[i] += 1; + } +} + +static void +mp_factor_insert_ui (struct mp_factors *factors, unsigned long int prime) +{ + mpz_t pz; + + mpz_init_set_ui (pz, prime); + mp_factor_insert (factors, pz); + mpz_clear (pz); +} + + +/* Number of bits in an uintmax_t. */ +enum { W = sizeof (uintmax_t) * CHAR_BIT }; + +/* Verify that uintmax_t does not have holes in its representation. */ +static_assert (UINTMAX_MAX >> (W - 1) == 1); + +#define P(a,b,c,d) a, +static const unsigned char primes_diff[] = { +#include "primes.h" +0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */ +}; +#undef P + +#define PRIMES_PTAB_ENTRIES \ + (sizeof (primes_diff) / sizeof (primes_diff[0]) - 8 + 1) + +#define P(a,b,c,d) b, +static const unsigned char primes_diff8[] = { +#include "primes.h" +0,0,0,0,0,0,0 /* 7 sentinels for 8-way loop */ +}; +#undef P + +struct primes_dtab +{ + uintmax_t binv, lim; +}; + +#define P(a,b,c,d) {c,d}, +static const struct primes_dtab primes_dtab[] = { +#include "primes.h" +{1,0},{1,0},{1,0},{1,0},{1,0},{1,0},{1,0} /* 7 sentinels for 8-way loop */ +}; +#undef P + +/* Verify that uintmax_t is not wider than + the integers used to generate primes.h. */ +static_assert (W <= WIDE_UINT_BITS); + +/* debugging for developers. Enables devmsg(). + This flag is used only in the GMP code. */ +static bool dev_debug = false; + +/* Prove primality or run probabilistic tests. */ +static bool flag_prove_primality = PROVE_PRIMALITY; + +/* Number of Miller-Rabin tests to run when not proving primality. */ +#define MR_REPS 25 + +static void +factor_insert_refind (struct factors *factors, uintmax_t p, int i, int off) +{ + for (int j = 0; j < off; j++) + p += primes_diff[i + j]; + factor_insert (factors, p); +} + +/* Trial division with odd primes uses the following trick. + + Let p be an odd prime, and B = 2^{W_TYPE_SIZE}. For simplicity, + consider the case t < B (this is the second loop below). + + From our tables we get + + binv = p^{-1} (mod B) + lim = floor ((B-1) / p). + + First assume that t is a multiple of p, t = q * p. Then 0 <= q <= lim + (and all quotients in this range occur for some t). + + Then t = q * p is true also (mod B), and p is invertible we get + + q = t * binv (mod B). + + Next, assume that t is *not* divisible by p. Since multiplication + by binv (mod B) is a one-to-one mapping, + + t * binv (mod B) > lim, + + because all the smaller values are already taken. + + This can be summed up by saying that the function + + q(t) = binv * t (mod B) + + is a permutation of the range 0 <= t < B, with the curious property + that it maps the multiples of p onto the range 0 <= q <= lim, in + order, and the non-multiples of p onto the range lim < q < B. + */ + +static uintmax_t +factor_using_division (uintmax_t *t1p, uintmax_t t1, uintmax_t t0, + struct factors *factors) +{ + if (t0 % 2 == 0) + { + int cnt; + + if (t0 == 0) + { + count_trailing_zeros (cnt, t1); + t0 = t1 >> cnt; + t1 = 0; + cnt += W_TYPE_SIZE; + } + else + { + count_trailing_zeros (cnt, t0); + rsh2 (t1, t0, t1, t0, cnt); + } + + factor_insert_multiplicity (factors, 2, cnt); + } + + uintmax_t p = 3; + idx_t i; + for (i = 0; t1 > 0 && i < PRIMES_PTAB_ENTRIES; i++) + { + for (;;) + { + uintmax_t q1, q0, hi; + MAYBE_UNUSED uintmax_t lo; + + q0 = t0 * primes_dtab[i].binv; + umul_ppmm (hi, lo, q0, p); + if (hi > t1) + break; + hi = t1 - hi; + q1 = hi * primes_dtab[i].binv; + if (LIKELY (q1 > primes_dtab[i].lim)) + break; + t1 = q1; t0 = q0; + factor_insert (factors, p); + } + p += primes_diff[i + 1]; + } + if (t1p) + *t1p = t1; + +#define DIVBLOCK(I) \ + do { \ + for (;;) \ + { \ + q = t0 * pd[I].binv; \ + if (LIKELY (q > pd[I].lim)) \ + break; \ + t0 = q; \ + factor_insert_refind (factors, p, i + 1, I); \ + } \ + } while (0) + + for (; i < PRIMES_PTAB_ENTRIES; i += 8) + { + uintmax_t q; + const struct primes_dtab *pd = &primes_dtab[i]; + DIVBLOCK (0); + DIVBLOCK (1); + DIVBLOCK (2); + DIVBLOCK (3); + DIVBLOCK (4); + DIVBLOCK (5); + DIVBLOCK (6); + DIVBLOCK (7); + + p += primes_diff8[i]; + if (p * p > t0) + break; + } + + return t0; +} + +static void +mp_factor_using_division (mpz_t t, struct mp_factors *factors) +{ + mpz_t q; + mp_bitcnt_t p; + + devmsg ("[trial division] "); + + mpz_init (q); + + p = mpz_scan1 (t, 0); + mpz_fdiv_q_2exp (t, t, p); + while (p) + { + mp_factor_insert_ui (factors, 2); + --p; + } + + unsigned long int d = 3; + for (idx_t i = 1; i <= PRIMES_PTAB_ENTRIES;) + { + if (! mpz_divisible_ui_p (t, d)) + { + d += primes_diff[i++]; + if (mpz_cmp_ui (t, d * d) < 0) + break; + } + else + { + mpz_tdiv_q_ui (t, t, d); + mp_factor_insert_ui (factors, d); + } + } + + mpz_clear (q); +} + +/* Entry i contains (2i+1)^(-1) mod 2^8. */ +static const unsigned char binvert_table[128] = +{ + 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF, + 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF, + 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF, + 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF, + 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF, + 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F, + 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F, + 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F, + 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F, + 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F, + 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F, + 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F, + 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F, + 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F, + 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F, + 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF +}; + +/* Compute n^(-1) mod B, using a Newton iteration. */ +#define binv(inv,n) \ + do { \ + uintmax_t __n = (n); \ + uintmax_t __inv; \ + \ + __inv = binvert_table[(__n / 2) & 0x7F]; /* 8 */ \ + if (W_TYPE_SIZE > 8) __inv = 2 * __inv - __inv * __inv * __n; \ + if (W_TYPE_SIZE > 16) __inv = 2 * __inv - __inv * __inv * __n; \ + if (W_TYPE_SIZE > 32) __inv = 2 * __inv - __inv * __inv * __n; \ + \ + if (W_TYPE_SIZE > 64) \ + { \ + int __invbits = 64; \ + do { \ + __inv = 2 * __inv - __inv * __inv * __n; \ + __invbits *= 2; \ + } while (__invbits < W_TYPE_SIZE); \ + } \ + \ + (inv) = __inv; \ + } while (0) + +/* q = u / d, assuming d|u. */ +#define divexact_21(q1, q0, u1, u0, d) \ + do { \ + uintmax_t _di, _q0; \ + binv (_di, (d)); \ + _q0 = (u0) * _di; \ + if ((u1) >= (d)) \ + { \ + uintmax_t _p1; \ + MAYBE_UNUSED intmax_t _p0; \ + umul_ppmm (_p1, _p0, _q0, d); \ + (q1) = ((u1) - _p1) * _di; \ + (q0) = _q0; \ + } \ + else \ + { \ + (q0) = _q0; \ + (q1) = 0; \ + } \ + } while (0) + +/* x B (mod n). */ +#define redcify(r_prim, r, n) \ + do { \ + MAYBE_UNUSED uintmax_t _redcify_q; \ + udiv_qrnnd (_redcify_q, r_prim, r, 0, n); \ + } while (0) + +/* x B^2 (mod n). Requires x > 0, n1 < B/2. */ +#define redcify2(r1, r0, x, n1, n0) \ + do { \ + uintmax_t _r1, _r0, _i; \ + if ((x) < (n1)) \ + { \ + _r1 = (x); _r0 = 0; \ + _i = W_TYPE_SIZE; \ + } \ + else \ + { \ + _r1 = 0; _r0 = (x); \ + _i = 2 * W_TYPE_SIZE; \ + } \ + while (_i-- > 0) \ + { \ + lsh2 (_r1, _r0, _r1, _r0, 1); \ + if (ge2 (_r1, _r0, (n1), (n0))) \ + sub_ddmmss (_r1, _r0, _r1, _r0, (n1), (n0)); \ + } \ + (r1) = _r1; \ + (r0) = _r0; \ + } while (0) + +/* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B. + Both a and b must be in redc form, the result will be in redc form too. */ +static inline uintmax_t +mulredc (uintmax_t a, uintmax_t b, uintmax_t m, uintmax_t mi) +{ + uintmax_t rh, rl, q, th, xh; + MAYBE_UNUSED uintmax_t tl; + + umul_ppmm (rh, rl, a, b); + q = rl * mi; + umul_ppmm (th, tl, q, m); + xh = rh - th; + if (rh < th) + xh += m; + + return xh; +} + +/* Modular two-word multiplication, r = a * b mod m, with mi = m^(-1) mod B. + Both a and b must be in redc form, the result will be in redc form too. + For performance reasons, the most significant bit of m must be clear. */ +static uintmax_t +mulredc2 (uintmax_t *r1p, + uintmax_t a1, uintmax_t a0, uintmax_t b1, uintmax_t b0, + uintmax_t m1, uintmax_t m0, uintmax_t mi) +{ + uintmax_t r1, r0, q, p1, t1, t0, s1, s0; + MAYBE_UNUSED uintmax_t p0; + mi = -mi; + affirm ((a1 >> (W_TYPE_SIZE - 1)) == 0); + affirm ((b1 >> (W_TYPE_SIZE - 1)) == 0); + affirm ((m1 >> (W_TYPE_SIZE - 1)) == 0); + + /* First compute a0 * B^{-1} + +-----+ + |a0 b0| + +--+--+--+ + |a0 b1| + +--+--+--+ + |q0 m0| + +--+--+--+ + |q0 m1| + -+--+--+--+ + |r1|r0| 0| + +--+--+--+ + */ + umul_ppmm (t1, t0, a0, b0); + umul_ppmm (r1, r0, a0, b1); + q = mi * t0; + umul_ppmm (p1, p0, q, m0); + umul_ppmm (s1, s0, q, m1); + r0 += (t0 != 0); /* Carry */ + add_ssaaaa (r1, r0, r1, r0, 0, p1); + add_ssaaaa (r1, r0, r1, r0, 0, t1); + add_ssaaaa (r1, r0, r1, r0, s1, s0); + + /* Next, (a1 * + B^{-1} + +-----+ + |a1 b0| + +--+--+ + |r1|r0| + +--+--+--+ + |a1 b1| + +--+--+--+ + |q1 m0| + +--+--+--+ + |q1 m1| + -+--+--+--+ + |r1|r0| 0| + +--+--+--+ + */ + umul_ppmm (t1, t0, a1, b0); + umul_ppmm (s1, s0, a1, b1); + add_ssaaaa (t1, t0, t1, t0, 0, r0); + q = mi * t0; + add_ssaaaa (r1, r0, s1, s0, 0, r1); + umul_ppmm (p1, p0, q, m0); + umul_ppmm (s1, s0, q, m1); + r0 += (t0 != 0); /* Carry */ + add_ssaaaa (r1, r0, r1, r0, 0, p1); + add_ssaaaa (r1, r0, r1, r0, 0, t1); + add_ssaaaa (r1, r0, r1, r0, s1, s0); + + if (ge2 (r1, r0, m1, m0)) + sub_ddmmss (r1, r0, r1, r0, m1, m0); + + *r1p = r1; + return r0; +} + +ATTRIBUTE_CONST +static uintmax_t +powm (uintmax_t b, uintmax_t e, uintmax_t n, uintmax_t ni, uintmax_t one) +{ + uintmax_t y = one; + + if (e & 1) + y = b; + + while (e != 0) + { + b = mulredc (b, b, n, ni); + e >>= 1; + + if (e & 1) + y = mulredc (y, b, n, ni); + } + + return y; +} + +static uintmax_t +powm2 (uintmax_t *r1m, + const uintmax_t *bp, const uintmax_t *ep, const uintmax_t *np, + uintmax_t ni, const uintmax_t *one) +{ + uintmax_t r1, r0, b1, b0, n1, n0; + int i; + uintmax_t e; + + b0 = bp[0]; + b1 = bp[1]; + n0 = np[0]; + n1 = np[1]; + + r0 = one[0]; + r1 = one[1]; + + for (e = ep[0], i = W_TYPE_SIZE; i > 0; i--, e >>= 1) + { + if (e & 1) + { + r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni); + r1 = *r1m; + } + b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni); + b1 = *r1m; + } + for (e = ep[1]; e > 0; e >>= 1) + { + if (e & 1) + { + r0 = mulredc2 (r1m, r1, r0, b1, b0, n1, n0, ni); + r1 = *r1m; + } + b0 = mulredc2 (r1m, b1, b0, b1, b0, n1, n0, ni); + b1 = *r1m; + } + *r1m = r1; + return r0; +} + +ATTRIBUTE_CONST +static bool +millerrabin (uintmax_t n, uintmax_t ni, uintmax_t b, uintmax_t q, + int k, uintmax_t one) +{ + uintmax_t y = powm (b, q, n, ni, one); + + uintmax_t nm1 = n - one; /* -1, but in redc representation. */ + + if (y == one || y == nm1) + return true; + + for (int i = 1; i < k; i++) + { + y = mulredc (y, y, n, ni); + + if (y == nm1) + return true; + if (y == one) + return false; + } + return false; +} + +ATTRIBUTE_PURE static bool +millerrabin2 (const uintmax_t *np, uintmax_t ni, const uintmax_t *bp, + const uintmax_t *qp, int k, const uintmax_t *one) +{ + uintmax_t y1, y0, nm1_1, nm1_0, r1m; + + y0 = powm2 (&r1m, bp, qp, np, ni, one); + y1 = r1m; + + if (y0 == one[0] && y1 == one[1]) + return true; + + sub_ddmmss (nm1_1, nm1_0, np[1], np[0], one[1], one[0]); + + if (y0 == nm1_0 && y1 == nm1_1) + return true; + + for (int i = 1; i < k; i++) + { + y0 = mulredc2 (&r1m, y1, y0, y1, y0, np[1], np[0], ni); + y1 = r1m; + + if (y0 == nm1_0 && y1 == nm1_1) + return true; + if (y0 == one[0] && y1 == one[1]) + return false; + } + return false; +} + +static bool +mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y, + mpz_srcptr q, mp_bitcnt_t k) +{ + mpz_powm (y, x, q, n); + + if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0) + return true; + + for (mp_bitcnt_t i = 1; i < k; i++) + { + mpz_powm_ui (y, y, 2, n); + if (mpz_cmp (y, nm1) == 0) + return true; + if (mpz_cmp_ui (y, 1) == 0) + return false; + } + return false; +} + +/* Lucas' prime test. The number of iterations vary greatly, up to a few dozen + have been observed. The average seem to be about 2. */ +static bool ATTRIBUTE_PURE +prime_p (uintmax_t n) +{ + mp_bitcnt_t k; + bool is_prime; + uintmax_t a_prim, one, ni; + struct factors factors; + + if (n <= 1) + return false; + + /* We have already cast out small primes. */ + if (n < (uintmax_t) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) + return true; + + /* Precomputation for Miller-Rabin. */ + uintmax_t q = n - 1; + for (k = 0; (q & 1) == 0; k++) + q >>= 1; + + uintmax_t a = 2; + binv (ni, n); /* ni <- 1/n mod B */ + redcify (one, 1, n); + addmod (a_prim, one, one, n); /* i.e., redcify a = 2 */ + + /* Perform a Miller-Rabin test, finds most composites quickly. */ + if (!millerrabin (n, ni, a_prim, q, k, one)) + return false; + + if (flag_prove_primality) + { + /* Factor n-1 for Lucas. */ + factor (0, n - 1, &factors); + } + + /* Loop until Lucas proves our number prime, or Miller-Rabin proves our + number composite. */ + for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++) + { + if (flag_prove_primality) + { + is_prime = true; + for (int i = 0; i < factors.nfactors && is_prime; i++) + { + is_prime + = powm (a_prim, (n - 1) / factors.p[i], n, ni, one) != one; + } + } + else + { + /* After enough Miller-Rabin runs, be content. */ + is_prime = (r == MR_REPS - 1); + } + + if (is_prime) + return true; + + a += primes_diff[r]; /* Establish new base. */ + + /* The following is equivalent to redcify (a_prim, a, n). It runs faster + on most processors, since it avoids udiv_qrnnd. If we go down the + udiv_qrnnd_preinv path, this code should be replaced. */ + { + uintmax_t s1, s0; + umul_ppmm (s1, s0, one, a); + if (LIKELY (s1 == 0)) + a_prim = s0 % n; + else + { + MAYBE_UNUSED uintmax_t dummy; + udiv_qrnnd (dummy, a_prim, s1, s0, n); + } + } + + if (!millerrabin (n, ni, a_prim, q, k, one)) + return false; + } + + affirm (!"Lucas prime test failure. This should not happen"); +} + +static bool ATTRIBUTE_PURE +prime2_p (uintmax_t n1, uintmax_t n0) +{ + uintmax_t q[2], nm1[2]; + uintmax_t a_prim[2]; + uintmax_t one[2]; + uintmax_t na[2]; + uintmax_t ni; + int k; + struct factors factors; + + if (n1 == 0) + return prime_p (n0); + + nm1[1] = n1 - (n0 == 0); + nm1[0] = n0 - 1; + if (nm1[0] == 0) + { + count_trailing_zeros (k, nm1[1]); + + q[0] = nm1[1] >> k; + q[1] = 0; + k += W_TYPE_SIZE; + } + else + { + count_trailing_zeros (k, nm1[0]); + rsh2 (q[1], q[0], nm1[1], nm1[0], k); + } + + uintmax_t a = 2; + binv (ni, n0); + redcify2 (one[1], one[0], 1, n1, n0); + addmod2 (a_prim[1], a_prim[0], one[1], one[0], one[1], one[0], n1, n0); + + /* FIXME: Use scalars or pointers in arguments? Some consistency needed. */ + na[0] = n0; + na[1] = n1; + + if (!millerrabin2 (na, ni, a_prim, q, k, one)) + return false; + + if (flag_prove_primality) + { + /* Factor n-1 for Lucas. */ + factor (nm1[1], nm1[0], &factors); + } + + /* Loop until Lucas proves our number prime, or Miller-Rabin proves our + number composite. */ + for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++) + { + bool is_prime; + uintmax_t e[2], y[2]; + + if (flag_prove_primality) + { + is_prime = true; + if (factors.plarge[1]) + { + uintmax_t pi; + binv (pi, factors.plarge[0]); + e[0] = pi * nm1[0]; + e[1] = 0; + y[0] = powm2 (&y[1], a_prim, e, na, ni, one); + is_prime = (y[0] != one[0] || y[1] != one[1]); + } + for (int i = 0; i < factors.nfactors && is_prime; i++) + { + /* FIXME: We always have the factor 2. Do we really need to + handle it here? We have done the same powering as part + of millerrabin. */ + if (factors.p[i] == 2) + rsh2 (e[1], e[0], nm1[1], nm1[0], 1); + else + divexact_21 (e[1], e[0], nm1[1], nm1[0], factors.p[i]); + y[0] = powm2 (&y[1], a_prim, e, na, ni, one); + is_prime = (y[0] != one[0] || y[1] != one[1]); + } + } + else + { + /* After enough Miller-Rabin runs, be content. */ + is_prime = (r == MR_REPS - 1); + } + + if (is_prime) + return true; + + a += primes_diff[r]; /* Establish new base. */ + redcify2 (a_prim[1], a_prim[0], a, n1, n0); + + if (!millerrabin2 (na, ni, a_prim, q, k, one)) + return false; + } + + affirm (!"Lucas prime test failure. This should not happen"); +} + +static bool +mp_prime_p (mpz_t n) +{ + bool is_prime; + mpz_t q, a, nm1, tmp; + struct mp_factors factors; + + if (mpz_cmp_ui (n, 1) <= 0) + return false; + + /* We have already cast out small primes. */ + if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0) + return true; + + mpz_inits (q, a, nm1, tmp, nullptr); + + /* Precomputation for Miller-Rabin. */ + mpz_sub_ui (nm1, n, 1); + + /* Find q and k, where q is odd and n = 1 + 2**k * q. */ + mp_bitcnt_t k = mpz_scan1 (nm1, 0); + mpz_tdiv_q_2exp (q, nm1, k); + + mpz_set_ui (a, 2); + + /* Perform a Miller-Rabin test, finds most composites quickly. */ + if (!mp_millerrabin (n, nm1, a, tmp, q, k)) + { + is_prime = false; + goto ret2; + } + + if (flag_prove_primality) + { + /* Factor n-1 for Lucas. */ + mpz_set (tmp, nm1); + mp_factor (tmp, &factors); + } + + /* Loop until Lucas proves our number prime, or Miller-Rabin proves our + number composite. */ + for (idx_t r = 0; r < PRIMES_PTAB_ENTRIES; r++) + { + if (flag_prove_primality) + { + is_prime = true; + for (idx_t i = 0; i < factors.nfactors && is_prime; i++) + { + mpz_divexact (tmp, nm1, factors.p[i]); + mpz_powm (tmp, a, tmp, n); + is_prime = mpz_cmp_ui (tmp, 1) != 0; + } + } + else + { + /* After enough Miller-Rabin runs, be content. */ + is_prime = (r == MR_REPS - 1); + } + + if (is_prime) + goto ret1; + + mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */ + + if (!mp_millerrabin (n, nm1, a, tmp, q, k)) + { + is_prime = false; + goto ret1; + } + } + + affirm (!"Lucas prime test failure. This should not happen"); + + ret1: + if (flag_prove_primality) + mp_factor_clear (&factors); + ret2: + mpz_clears (q, a, nm1, tmp, nullptr); + + return is_prime; +} + +static void +factor_using_pollard_rho (uintmax_t n, unsigned long int a, + struct factors *factors) +{ + uintmax_t x, z, y, P, t, ni, g; + + unsigned long int k = 1; + unsigned long int l = 1; + + redcify (P, 1, n); + addmod (x, P, P, n); /* i.e., redcify(2) */ + y = z = x; + + while (n != 1) + { + affirm (a < n); + + binv (ni, n); /* FIXME: when could we use old 'ni' value? */ + + for (;;) + { + do + { + x = mulredc (x, x, n, ni); + addmod (x, x, a, n); + + submod (t, z, x, n); + P = mulredc (P, t, n, ni); + + if (k % 32 == 1) + { + if (gcd_odd (P, n) != 1) + goto factor_found; + y = x; + } + } + while (--k != 0); + + z = x; + k = l; + l = 2 * l; + for (unsigned long int i = 0; i < k; i++) + { + x = mulredc (x, x, n, ni); + addmod (x, x, a, n); + } + y = x; + } + + factor_found: + do + { + y = mulredc (y, y, n, ni); + addmod (y, y, a, n); + + submod (t, z, y, n); + g = gcd_odd (t, n); + } + while (g == 1); + + if (n == g) + { + /* Found n itself as factor. Restart with different params. */ + factor_using_pollard_rho (n, a + 1, factors); + return; + } + + n = n / g; + + if (!prime_p (g)) + factor_using_pollard_rho (g, a + 1, factors); + else + factor_insert (factors, g); + + if (prime_p (n)) + { + factor_insert (factors, n); + break; + } + + x = x % n; + z = z % n; + y = y % n; + } +} + +static void +factor_using_pollard_rho2 (uintmax_t n1, uintmax_t n0, unsigned long int a, + struct factors *factors) +{ + uintmax_t x1, x0, z1, z0, y1, y0, P1, P0, t1, t0, ni, g1, g0, r1m; + + unsigned long int k = 1; + unsigned long int l = 1; + + redcify2 (P1, P0, 1, n1, n0); + addmod2 (x1, x0, P1, P0, P1, P0, n1, n0); /* i.e., redcify(2) */ + y1 = z1 = x1; + y0 = z0 = x0; + + while (n1 != 0 || n0 != 1) + { + binv (ni, n0); + + for (;;) + { + do + { + x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni); + x1 = r1m; + addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0); + + submod2 (t1, t0, z1, z0, x1, x0, n1, n0); + P0 = mulredc2 (&r1m, P1, P0, t1, t0, n1, n0, ni); + P1 = r1m; + + if (k % 32 == 1) + { + g0 = gcd2_odd (&g1, P1, P0, n1, n0); + if (g1 != 0 || g0 != 1) + goto factor_found; + y1 = x1; y0 = x0; + } + } + while (--k != 0); + + z1 = x1; z0 = x0; + k = l; + l = 2 * l; + for (unsigned long int i = 0; i < k; i++) + { + x0 = mulredc2 (&r1m, x1, x0, x1, x0, n1, n0, ni); + x1 = r1m; + addmod2 (x1, x0, x1, x0, 0, (uintmax_t) a, n1, n0); + } + y1 = x1; y0 = x0; + } + + factor_found: + do + { + y0 = mulredc2 (&r1m, y1, y0, y1, y0, n1, n0, ni); + y1 = r1m; + addmod2 (y1, y0, y1, y0, 0, (uintmax_t) a, n1, n0); + + submod2 (t1, t0, z1, z0, y1, y0, n1, n0); + g0 = gcd2_odd (&g1, t1, t0, n1, n0); + } + while (g1 == 0 && g0 == 1); + + if (g1 == 0) + { + /* The found factor is one word, and > 1. */ + divexact_21 (n1, n0, n1, n0, g0); /* n = n / g */ + + if (!prime_p (g0)) + factor_using_pollard_rho (g0, a + 1, factors); + else + factor_insert (factors, g0); + } + else + { + /* The found factor is two words. This is highly unlikely, thus hard + to trigger. Please be careful before you change this code! */ + uintmax_t ginv; + + if (n1 == g1 && n0 == g0) + { + /* Found n itself as factor. Restart with different params. */ + factor_using_pollard_rho2 (n1, n0, a + 1, factors); + return; + } + + /* Compute n = n / g. Since the result will fit one word, + we can compute the quotient modulo B, ignoring the high + divisor word. */ + binv (ginv, g0); + n0 = ginv * n0; + n1 = 0; + + if (!prime2_p (g1, g0)) + factor_using_pollard_rho2 (g1, g0, a + 1, factors); + else + factor_insert_large (factors, g1, g0); + } + + if (n1 == 0) + { + if (prime_p (n0)) + { + factor_insert (factors, n0); + break; + } + + factor_using_pollard_rho (n0, a, factors); + return; + } + + if (prime2_p (n1, n0)) + { + factor_insert_large (factors, n1, n0); + break; + } + + x0 = mod2 (&x1, x1, x0, n1, n0); + z0 = mod2 (&z1, z1, z0, n1, n0); + y0 = mod2 (&y1, y1, y0, n1, n0); + } +} + +static void +mp_factor_using_pollard_rho (mpz_t n, unsigned long int a, + struct mp_factors *factors) +{ + mpz_t x, z, y, P; + mpz_t t, t2; + + devmsg ("[pollard-rho (%lu)] ", a); + + mpz_inits (t, t2, nullptr); + mpz_init_set_si (y, 2); + mpz_init_set_si (x, 2); + mpz_init_set_si (z, 2); + mpz_init_set_ui (P, 1); + + unsigned long long int k = 1; + unsigned long long int l = 1; + + while (mpz_cmp_ui (n, 1) != 0) + { + for (;;) + { + do + { + mpz_mul (t, x, x); + mpz_mod (x, t, n); + mpz_add_ui (x, x, a); + + mpz_sub (t, z, x); + mpz_mul (t2, P, t); + mpz_mod (P, t2, n); + + if (k % 32 == 1) + { + mpz_gcd (t, P, n); + if (mpz_cmp_ui (t, 1) != 0) + goto factor_found; + mpz_set (y, x); + } + } + while (--k != 0); + + mpz_set (z, x); + k = l; + l = 2 * l; + for (unsigned long long int i = 0; i < k; i++) + { + mpz_mul (t, x, x); + mpz_mod (x, t, n); + mpz_add_ui (x, x, a); + } + mpz_set (y, x); + } + + factor_found: + do + { + mpz_mul (t, y, y); + mpz_mod (y, t, n); + mpz_add_ui (y, y, a); + + mpz_sub (t, z, y); + mpz_gcd (t, t, n); + } + while (mpz_cmp_ui (t, 1) == 0); + + mpz_divexact (n, n, t); /* divide by t, before t is overwritten */ + + if (!mp_prime_p (t)) + { + devmsg ("[composite factor--restarting pollard-rho] "); + mp_factor_using_pollard_rho (t, a + 1, factors); + } + else + { + mp_factor_insert (factors, t); + } + + if (mp_prime_p (n)) + { + mp_factor_insert (factors, n); + break; + } + + mpz_mod (x, x, n); + mpz_mod (z, z, n); + mpz_mod (y, y, n); + } + + mpz_clears (P, t2, t, z, x, y, nullptr); +} + +#if USE_SQUFOF +/* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If + algorithm is replaced, consider also returning the remainder. */ +ATTRIBUTE_CONST +static uintmax_t +isqrt (uintmax_t n) +{ + uintmax_t x; + int c; + if (n == 0) + return 0; + + count_leading_zeros (c, n); + + /* Make x > sqrt(n). This will be invariant through the loop. */ + x = (uintmax_t) 1 << ((W_TYPE_SIZE + 1 - c) >> 1); + + for (;;) + { + uintmax_t y = (x + n / x) / 2; + if (y >= x) + return x; + + x = y; + } +} + +ATTRIBUTE_CONST +static uintmax_t +isqrt2 (uintmax_t nh, uintmax_t nl) +{ + int shift; + uintmax_t x; + + /* Ensures the remainder fits in an uintmax_t. */ + affirm (nh < ((uintmax_t) 1 << (W_TYPE_SIZE - 2))); + + if (nh == 0) + return isqrt (nl); + + count_leading_zeros (shift, nh); + shift &= ~1; + + /* Make x > sqrt (n). */ + x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1; + x <<= (W_TYPE_SIZE - shift) >> 1; + + /* Do we need more than one iteration? */ + for (;;) + { + MAYBE_UNUSED uintmax_t r; + uintmax_t q, y; + udiv_qrnnd (q, r, nh, nl, x); + y = (x + q) / 2; + + if (y >= x) + { + uintmax_t hi, lo; + umul_ppmm (hi, lo, x + 1, x + 1); + affirm (gt2 (hi, lo, nh, nl)); + + umul_ppmm (hi, lo, x, x); + affirm (ge2 (nh, nl, hi, lo)); + sub_ddmmss (hi, lo, nh, nl, hi, lo); + affirm (hi == 0); + + return x; + } + + x = y; + } +} + +/* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */ +# define MAGIC64 0x0202021202030213ULL +# define MAGIC63 0x0402483012450293ULL +# define MAGIC65 0x218a019866014613ULL +# define MAGIC11 0x23b + +/* Return the square root if the input is a square, otherwise 0. */ +ATTRIBUTE_CONST +static uintmax_t +is_square (uintmax_t x) +{ + /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before + computing the square root. */ + if (((MAGIC64 >> (x & 63)) & 1) + && ((MAGIC63 >> (x % 63)) & 1) + /* Both 0 and 64 are squares mod (65). */ + && ((MAGIC65 >> ((x % 65) & 63)) & 1) + && ((MAGIC11 >> (x % 11) & 1))) + { + uintmax_t r = isqrt (x); + if (r * r == x) + return r; + } + return 0; +} + +/* invtab[i] = floor (0x10000 / (0x100 + i) */ +static short const invtab[0x81] = + { + 0x200, + 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1, + 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7, + 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af, + 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199, + 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186, + 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174, + 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164, + 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155, + 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147, + 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b, + 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f, + 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124, + 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a, + 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111, + 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108, + 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100, + }; + +/* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case + that q < 0x40; here it instead uses a table of (Euclidean) inverses. */ +# define div_smallq(q, r, u, d) \ + do { \ + if ((u) / 0x40 < (d)) \ + { \ + int _cnt; \ + uintmax_t _dinv, _mask, _q, _r; \ + count_leading_zeros (_cnt, (d)); \ + _r = (u); \ + if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \ + { \ + _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \ + _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \ + } \ + else \ + { \ + _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \ + _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \ + } \ + _r -= _q * (d); \ + \ + _mask = -(uintmax_t) (_r >= (d)); \ + (r) = _r - (_mask & (d)); \ + (q) = _q - _mask; \ + affirm ((q) * (d) + (r) == u); \ + } \ + else \ + { \ + uintmax_t _q = (u) / (d); \ + (r) = (u) - _q * (d); \ + (q) = _q; \ + } \ + } while (0) + +/* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q + = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025), + with 3025 = 55^2. + + Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652, + representing G_0 = (-55, 2 * 4652, 8653). + + In the notation of the paper: + + S_{-1} = 55, S_0 = 8653, R_0 = 4652 + + Put + + t_0 = floor([q_0 + R_0] / S0) = 1 + R_1 = t_0 * S_0 - R_0 = 4001 + S_1 = S_{-1} +t_0 (R_0 - R_1) = 706 +*/ + +/* Multipliers, in order of efficiency: + 0.7268 3*5*7*11 = 1155 = 3 (mod 4) + 0.7317 3*5*7 = 105 = 1 + 0.7820 3*5*11 = 165 = 1 + 0.7872 3*5 = 15 = 3 + 0.8101 3*7*11 = 231 = 3 + 0.8155 3*7 = 21 = 1 + 0.8284 5*7*11 = 385 = 1 + 0.8339 5*7 = 35 = 3 + 0.8716 3*11 = 33 = 1 + 0.8774 3 = 3 = 3 + 0.8913 5*11 = 55 = 3 + 0.8972 5 = 5 = 1 + 0.9233 7*11 = 77 = 1 + 0.9295 7 = 7 = 3 + 0.9934 11 = 11 = 3 +*/ +# define QUEUE_SIZE 50 +#endif + +#if STAT_SQUFOF +# define Q_FREQ_SIZE 50 +/* Element 0 keeps the total */ +static int q_freq[Q_FREQ_SIZE + 1]; +#endif + +#if USE_SQUFOF +/* Return true on success. Expected to fail only for numbers + >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */ +static bool +factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) +{ + /* Uses algorithm and notation from + + SQUARE FORM FACTORIZATION + JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR. + + https://homes.cerias.purdue.edu/~ssw/squfof.pdf + */ + + static short const multipliers_1[] = + { /* = 1 (mod 4) */ + 105, 165, 21, 385, 33, 5, 77, 1, 0 + }; + static short const multipliers_3[] = + { /* = 3 (mod 4) */ + 1155, 15, 231, 35, 3, 55, 7, 11, 0 + }; + + struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE]; + + if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2))) + return false; + + uintmax_t sqrt_n = isqrt2 (n1, n0); + + if (n0 == sqrt_n * sqrt_n) + { + uintmax_t p1, p0; + + umul_ppmm (p1, p0, sqrt_n, sqrt_n); + affirm (p0 == n0); + + if (n1 == p1) + { + if (prime_p (sqrt_n)) + factor_insert_multiplicity (factors, sqrt_n, 2); + else + { + struct factors f; + + f.nfactors = 0; + if (!factor_using_squfof (0, sqrt_n, &f)) + { + /* Try pollard rho instead */ + factor_using_pollard_rho (sqrt_n, 1, &f); + } + /* Duplicate the new factors */ + for (unsigned int i = 0; i < f.nfactors; i++) + factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]); + } + return true; + } + } + + /* Select multipliers so we always get n * mu = 3 (mod 4) */ + for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1; + *m; m++) + { + uintmax_t S, Dh, Dl, Q1, Q, P, L, L1, B; + unsigned int i; + unsigned int mu = *m; + int qpos = 0; + + affirm (mu * n0 % 4 == 3); + + /* In the notation of the paper, with mu * n == 3 (mod 4), we + get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as + I understand it, the necessary bound is 4 \mu^3 < n, or 32 + mu^3 < n. + + However, this seems insufficient: With n = 37243139 and mu = + 105, we get a trivial factor, from the square 38809 = 197^2, + without any corresponding Q earlier in the iteration. + + Requiring 64 mu^3 < n seems sufficient. */ + if (n1 == 0) + { + if ((uintmax_t) mu * mu * mu >= n0 / 64) + continue; + } + else + { + if (n1 > ((uintmax_t) 1 << (W_TYPE_SIZE - 2)) / mu) + continue; + } + umul_ppmm (Dh, Dl, n0, mu); + Dh += n1 * mu; + + affirm (Dl % 4 != 1); + affirm (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2)); + + S = isqrt2 (Dh, Dl); + + Q1 = 1; + P = S; + + /* Square root remainder fits in one word, so ignore high part. */ + Q = Dl - P * P; + /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */ + L = isqrt (2 * S); + B = 2 * L; + L1 = mu * 2 * L; + + /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) = + 4 D. */ + + for (i = 0; i <= B; i++) + { + uintmax_t q, P1, t, rem; + + div_smallq (q, rem, S + P, Q); + P1 = S - rem; /* P1 = q*Q - P */ + + affirm (q > 0 && Q > 0); + +# if STAT_SQUFOF + q_freq[0]++; + q_freq[MIN (q, Q_FREQ_SIZE)]++; +# endif + + if (Q <= L1) + { + uintmax_t g = Q; + + if ((Q & 1) == 0) + g /= 2; + + g /= gcd_odd (g, mu); + + if (g <= L) + { + if (qpos >= QUEUE_SIZE) + error (EXIT_FAILURE, 0, _("squfof queue overflow")); + queue[qpos].Q = g; + queue[qpos].P = P % g; + qpos++; + } + } + + /* I think the difference can be either sign, but mod + 2^W_TYPE_SIZE arithmetic should be fine. */ + t = Q1 + q * (P - P1); + Q1 = Q; + Q = t; + P = P1; + + if ((i & 1) == 0) + { + uintmax_t r = is_square (Q); + if (r) + { + for (int j = 0; j < qpos; j++) + { + if (queue[j].Q == r) + { + if (r == 1) + /* Traversed entire cycle. */ + goto next_multiplier; + + /* Need the absolute value for divisibility test. */ + if (P >= queue[j].P) + t = P - queue[j].P; + else + t = queue[j].P - P; + if (t % r == 0) + { + /* Delete entries up to and including entry + j, which matched. */ + memmove (queue, queue + j + 1, + (qpos - j - 1) * sizeof (queue[0])); + qpos -= (j + 1); + } + goto next_i; + } + } + + /* We have found a square form, which should give a + factor. */ + Q1 = r; + affirm (S >= P); /* What signs are possible? */ + P += r * ((S - P) / r); + + /* Note: Paper says (N - P*P) / Q1, that seems incorrect + for the case D = 2N. */ + /* Compute Q = (D - P*P) / Q1, but we need double + precision. */ + uintmax_t hi, lo; + umul_ppmm (hi, lo, P, P); + sub_ddmmss (hi, lo, Dh, Dl, hi, lo); + udiv_qrnnd (Q, rem, hi, lo, Q1); + affirm (rem == 0); + + for (;;) + { + /* Note: There appears to by a typo in the paper, + Step 4a in the algorithm description says q <-- + floor([S+P]/\hat Q), but looking at the equations + in Sec. 3.1, it should be q <-- floor([S+P] / Q). + (In this code, \hat Q is Q1). */ + div_smallq (q, rem, S + P, Q); + P1 = S - rem; /* P1 = q*Q - P */ + +# if STAT_SQUFOF + q_freq[0]++; + q_freq[MIN (q, Q_FREQ_SIZE)]++; +# endif + if (P == P1) + break; + t = Q1 + q * (P - P1); + Q1 = Q; + Q = t; + P = P1; + } + + if ((Q & 1) == 0) + Q /= 2; + Q /= gcd_odd (Q, mu); + + affirm (Q > 1 && (n1 || Q < n0)); + + if (prime_p (Q)) + factor_insert (factors, Q); + else if (!factor_using_squfof (0, Q, factors)) + factor_using_pollard_rho (Q, 2, factors); + + divexact_21 (n1, n0, n1, n0, Q); + + if (prime2_p (n1, n0)) + factor_insert_large (factors, n1, n0); + else + { + if (!factor_using_squfof (n1, n0, factors)) + { + if (n1 == 0) + factor_using_pollard_rho (n0, 1, factors); + else + factor_using_pollard_rho2 (n1, n0, 1, factors); + } + } + + return true; + } + } + next_i:; + } + next_multiplier:; + } + return false; +} +#endif + +/* Compute the prime factors of the 128-bit number (T1,T0), and put the + results in FACTORS. */ +static void +factor (uintmax_t t1, uintmax_t t0, struct factors *factors) +{ + factors->nfactors = 0; + factors->plarge[1] = 0; + + if (t1 == 0 && t0 < 2) + return; + + t0 = factor_using_division (&t1, t1, t0, factors); + + if (t1 == 0 && t0 < 2) + return; + + if (prime2_p (t1, t0)) + factor_insert_large (factors, t1, t0); + else + { +#if USE_SQUFOF + if (factor_using_squfof (t1, t0, factors)) + return; +#endif + + if (t1 == 0) + factor_using_pollard_rho (t0, 1, factors); + else + factor_using_pollard_rho2 (t1, t0, 1, factors); + } +} + +/* Use Pollard-rho to compute the prime factors of + arbitrary-precision T, and put the results in FACTORS. */ +static void +mp_factor (mpz_t t, struct mp_factors *factors) +{ + mp_factor_init (factors); + + if (mpz_sgn (t) != 0) + { + mp_factor_using_division (t, factors); + + if (mpz_cmp_ui (t, 1) != 0) + { + devmsg ("[is number prime?] "); + if (mp_prime_p (t)) + mp_factor_insert (factors, t); + else + mp_factor_using_pollard_rho (t, 1, factors); + } + } +} + +static strtol_error +strto2uintmax (uintmax_t *hip, uintmax_t *lop, char const *s) +{ + int lo_carry; + uintmax_t hi = 0, lo = 0; + + strtol_error err = LONGINT_INVALID; + + /* Initial scan for invalid digits. */ + char const *p = s; + for (;;) + { + unsigned char c = *p++; + if (c == 0) + break; + + if (UNLIKELY (!ISDIGIT (c))) + { + err = LONGINT_INVALID; + break; + } + + err = LONGINT_OK; /* we've seen at least one valid digit */ + } + + while (err == LONGINT_OK) + { + unsigned char c = *s++; + if (c == 0) + break; + + c -= '0'; + + if (UNLIKELY (hi > ~(uintmax_t)0 / 10)) + { + err = LONGINT_OVERFLOW; + break; + } + hi = 10 * hi; + + lo_carry = (lo >> (W_TYPE_SIZE - 3)) + (lo >> (W_TYPE_SIZE - 1)); + lo_carry += 10 * lo < 2 * lo; + + lo = 10 * lo; + lo += c; + + lo_carry += lo < c; + hi += lo_carry; + if (UNLIKELY (hi < lo_carry)) + { + err = LONGINT_OVERFLOW; + break; + } + } + + *hip = hi; + *lop = lo; + + return err; +} + +/* Structure and routines for buffering and outputting full lines, + to support parallel operation efficiently. */ +static struct lbuf_ +{ + char *buf; + char *end; +} lbuf; + +/* 512 is chosen to give good performance, + and also is the max guaranteed size that + consumers can read atomically through pipes. + Also it's big enough to cater for max line length + even with 128 bit uintmax_t. */ +#define FACTOR_PIPE_BUF 512 + +static void +lbuf_alloc (void) +{ + if (lbuf.buf) + return; + + /* Double to ensure enough space for + previous numbers + next number. */ + lbuf.buf = xmalloc (FACTOR_PIPE_BUF * 2); + lbuf.end = lbuf.buf; +} + +/* Write complete LBUF to standard output. */ +static void +lbuf_flush (void) +{ + size_t size = lbuf.end - lbuf.buf; + if (full_write (STDOUT_FILENO, lbuf.buf, size) != size) + write_error (); + lbuf.end = lbuf.buf; +} + +/* Add a character C to LBUF and if it's a newline + and enough bytes are already buffered, + then write atomically to standard output. */ +static void +lbuf_putc (char c) +{ + *lbuf.end++ = c; + + if (c == '\n') + { + size_t buffered = lbuf.end - lbuf.buf; + + /* Provide immediate output for interactive use. */ + static int line_buffered = -1; + if (line_buffered == -1) + line_buffered = isatty (STDIN_FILENO) || isatty (STDOUT_FILENO); + if (line_buffered) + lbuf_flush (); + else if (buffered >= FACTOR_PIPE_BUF) + { + /* Write output in <= PIPE_BUF chunks + so consumers can read atomically. */ + char const *tend = lbuf.end; + + /* Since a umaxint_t's factors must fit in 512 + we're guaranteed to find a newline here. */ + char *tlend = lbuf.buf + FACTOR_PIPE_BUF; + while (*--tlend != '\n'); + tlend++; + + lbuf.end = tlend; + lbuf_flush (); + + /* Buffer the remainder. */ + memcpy (lbuf.buf, tlend, tend - tlend); + lbuf.end = lbuf.buf + (tend - tlend); + } + } +} + +/* Buffer an int to the internal LBUF. */ +static void +lbuf_putint (uintmax_t i, size_t min_width) +{ + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + char const *umaxstr = umaxtostr (i, buf); + size_t width = sizeof (buf) - (umaxstr - buf) - 1; + size_t z = width; + + for (; z < min_width; z++) + *lbuf.end++ = '0'; + + memcpy (lbuf.end, umaxstr, width); + lbuf.end += width; +} + +static void +print_uintmaxes (uintmax_t t1, uintmax_t t0) +{ + uintmax_t q, r; + + if (t1 == 0) + lbuf_putint (t0, 0); + else + { + /* Use very plain code here since it seems hard to write fast code + without assuming a specific word size. */ + q = t1 / 1000000000; + r = t1 % 1000000000; + udiv_qrnnd (t0, r, r, t0, 1000000000); + print_uintmaxes (q, t0); + lbuf_putint (r, 9); + } +} + +/* Single-precision factoring */ +static void +print_factors_single (uintmax_t t1, uintmax_t t0) +{ + struct factors factors; + + print_uintmaxes (t1, t0); + lbuf_putc (':'); + + factor (t1, t0, &factors); + + for (int j = 0; j < factors.nfactors; j++) + for (int k = 0; k < factors.e[j]; k++) + { + lbuf_putc (' '); + print_uintmaxes (0, factors.p[j]); + if (print_exponents && factors.e[j] > 1) + { + lbuf_putc ('^'); + lbuf_putint (factors.e[j], 0); + break; + } + } + + if (factors.plarge[1]) + { + lbuf_putc (' '); + print_uintmaxes (factors.plarge[1], factors.plarge[0]); + } + + lbuf_putc ('\n'); +} + +/* Emit the factors of the indicated number. If we have the option of using + either algorithm, we select on the basis of the length of the number. + For longer numbers, we prefer the MP algorithm even if the native algorithm + has enough digits, because the algorithm is better. The turnover point + depends on the value. */ +static bool +print_factors (char const *input) +{ + /* Skip initial spaces and '+'. */ + char const *str = input; + while (*str == ' ') + str++; + str += *str == '+'; + + uintmax_t t1, t0; + + /* Try converting the number to one or two words. If it fails, use GMP or + print an error message. The 2nd condition checks that the most + significant bit of the two-word number is clear, in a typesize neutral + way. */ + strtol_error err = strto2uintmax (&t1, &t0, str); + + switch (err) + { + case LONGINT_OK: + if (((t1 << 1) >> 1) == t1) + { + devmsg ("[using single-precision arithmetic] "); + print_factors_single (t1, t0); + return true; + } + break; + + case LONGINT_OVERFLOW: + /* Try GMP. */ + break; + + default: + error (0, 0, _("%s is not a valid positive integer"), quote (input)); + return false; + } + + devmsg ("[using arbitrary-precision arithmetic] "); + mpz_t t; + struct mp_factors factors; + + mpz_init_set_str (t, str, 10); + + mpz_out_str (stdout, 10, t); + putchar (':'); + mp_factor (t, &factors); + + for (idx_t j = 0; j < factors.nfactors; j++) + for (unsigned long int k = 0; k < factors.e[j]; k++) + { + putchar (' '); + mpz_out_str (stdout, 10, factors.p[j]); + if (print_exponents && factors.e[j] > 1) + { + printf ("^%lu", factors.e[j]); + break; + } + } + + mp_factor_clear (&factors); + mpz_clear (t); + putchar ('\n'); + fflush (stdout); + return true; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION] [NUMBER]...\n\ +"), + program_name); + fputs (_("\ +Print the prime factors of each specified integer NUMBER. If none\n\ +are specified on the command line, read them from standard input.\n\ +\n\ +"), stdout); + fputs ("\ + -h, --exponents print repeated factors in form p^e unless e is 1\n\ +", stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static bool +do_stdin (void) +{ + bool ok = true; + token_buffer tokenbuffer; + + init_tokenbuffer (&tokenbuffer); + + while (true) + { + size_t token_length = readtoken (stdin, DELIM, sizeof (DELIM) - 1, + &tokenbuffer); + if (token_length == (size_t) -1) + { + if (ferror (stdin)) + error (EXIT_FAILURE, errno, _("error reading input")); + break; + } + + ok &= print_factors (tokenbuffer.buffer); + } + free (tokenbuffer.buffer); + + return ok; +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + lbuf_alloc (); + atexit (close_stdout); + atexit (lbuf_flush); + + int c; + while ((c = getopt_long (argc, argv, "h", long_options, nullptr)) != -1) + { + switch (c) + { + case 'h': /* NetBSD used -h for this functionality first. */ + print_exponents = true; + break; + + case DEV_DEBUG_OPTION: + dev_debug = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + +#if STAT_SQUFOF + memset (q_freq, 0, sizeof (q_freq)); +#endif + + bool ok; + if (argc <= optind) + ok = do_stdin (); + else + { + ok = true; + for (int i = optind; i < argc; i++) + if (! print_factors (argv[i])) + ok = false; + } + +#if STAT_SQUFOF + if (q_freq[0] > 0) + { + double acc_f; + printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]); + for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++) + { + double f = (double) q_freq[i] / q_freq[0]; + acc_f += f; + printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i, + 100.0 * f, 100.0 * acc_f); + } + } +#endif + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/false.c b/src/false.c new file mode 100644 index 0000000..bc9c703 --- /dev/null +++ b/src/false.c @@ -0,0 +1,2 @@ +#define EXIT_STATUS EXIT_FAILURE +#include "true.c" diff --git a/src/find-mount-point.c b/src/find-mount-point.c new file mode 100644 index 0000000..30906de --- /dev/null +++ b/src/find-mount-point.c @@ -0,0 +1,111 @@ +/* find-mount-point.c -- find the root mount point for a file. + Copyright (C) 2010-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include + +#include "system.h" +#include "save-cwd.h" +#include "xgetcwd.h" +#include "find-mount-point.h" + +/* Return the root mountpoint of the file system on which FILE exists, in + malloced storage. FILE_STAT should be the result of stating FILE. + Give a diagnostic and return nullptr if unable to determine the mount point. + Exit if unable to restore current working directory. */ +extern char * +find_mount_point (char const *file, struct stat const *file_stat) +{ + struct saved_cwd cwd; + struct stat last_stat; + char *mp = nullptr; /* The malloc'd mount point. */ + + if (save_cwd (&cwd) != 0) + { + error (0, errno, _("cannot get current directory")); + return nullptr; + } + + if (S_ISDIR (file_stat->st_mode)) + /* FILE is a directory, so just chdir there directly. */ + { + last_stat = *file_stat; + if (chdir (file) < 0) + { + error (0, errno, _("cannot change to directory %s"), quoteaf (file)); + return nullptr; + } + } + else + /* FILE is some other kind of file; use its directory. */ + { + char *xdir = dir_name (file); + char *dir; + ASSIGN_STRDUPA (dir, xdir); + free (xdir); + + if (chdir (dir) < 0) + { + error (0, errno, _("cannot change to directory %s"), quoteaf (dir)); + return nullptr; + } + + if (stat (".", &last_stat) < 0) + { + error (0, errno, _("cannot stat current directory (now %s)"), + quoteaf (dir)); + goto done; + } + } + + /* Now walk up FILE's parents until we find another file system or /, + chdiring as we go. LAST_STAT holds stat information for the last place + we visited. */ + while (true) + { + struct stat st; + if (stat ("..", &st) < 0) + { + error (0, errno, _("cannot stat %s"), quoteaf ("..")); + goto done; + } + if (st.st_dev != last_stat.st_dev || st.st_ino == last_stat.st_ino) + /* cwd is the mount point. */ + break; + if (chdir ("..") < 0) + { + error (0, errno, _("cannot change to directory %s"), quoteaf ("..")); + goto done; + } + last_stat = st; + } + + /* Finally reached a mount point, see what it's called. */ + mp = xgetcwd (); + +done: + /* Restore the original cwd. */ + { + int save_errno = errno; + if (restore_cwd (&cwd) != 0) + error (EXIT_FAILURE, errno, + _("failed to return to initial working directory")); + free_cwd (&cwd); + errno = save_errno; + } + + return mp; +} diff --git a/src/find-mount-point.h b/src/find-mount-point.h new file mode 100644 index 0000000..96d562f --- /dev/null +++ b/src/find-mount-point.h @@ -0,0 +1,20 @@ +/* find-mount-point.h -- find the root mount point for a file. + Copyright (C) 2010-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +extern char *find_mount_point (char const *, struct stat const *) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE _GL_ATTRIBUTE_NONNULL (); diff --git a/src/fmt.c b/src/fmt.c new file mode 100644 index 0000000..ad7a9ce --- /dev/null +++ b/src/fmt.c @@ -0,0 +1,1045 @@ +/* GNU fmt -- simple text formatter. + Copyright (C) 1994-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Ross Paterson . */ + +#include +#include +#include +#include + +/* Redefine. Otherwise, systems (Unicos for one) with headers that define + it to be a type get syntax errors for the variable declaration below. */ +#define word unused_word_type + +#include "c-ctype.h" +#include "system.h" +#include "fadvise.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "fmt" + +#define AUTHORS proper_name ("Ross Paterson") + +/* The following parameters represent the program's idea of what is + "best". Adjust to taste, subject to the caveats given. */ + +/* Default longest permitted line length (max_width). */ +#define WIDTH 75 + +/* Prefer lines to be LEEWAY % shorter than the maximum width, giving + room for optimization. */ +#define LEEWAY 7 + +/* The default secondary indent of tagged paragraph used for unindented + one-line paragraphs not preceded by any multi-line paragraphs. */ +#define DEF_INDENT 3 + +/* Costs and bonuses are expressed as the equivalent departure from the + optimal line length, multiplied by 10. e.g. assigning something a + cost of 50 means that it is as bad as a line 5 characters too short + or too long. The definition of SHORT_COST(n) should not be changed. + However, EQUIV(n) may need tuning. */ + +/* FIXME: "fmt" misbehaves given large inputs or options. One + possible workaround for part of the problem is to change COST to be + a floating-point type. There are other problems besides COST, + though; see MAXWORDS below. */ + +typedef long int COST; + +#define MAXCOST TYPE_MAXIMUM (COST) + +#define SQR(n) ((n) * (n)) +#define EQUIV(n) SQR ((COST) (n)) + +/* Cost of a filled line n chars longer or shorter than goal_width. */ +#define SHORT_COST(n) EQUIV ((n) * 10) + +/* Cost of the difference between adjacent filled lines. */ +#define RAGGED_COST(n) (SHORT_COST (n) / 2) + +/* Basic cost per line. */ +#define LINE_COST EQUIV (70) + +/* Cost of breaking a line after the first word of a sentence, where + the length of the word is N. */ +#define WIDOW_COST(n) (EQUIV (200) / ((n) + 2)) + +/* Cost of breaking a line before the last word of a sentence, where + the length of the word is N. */ +#define ORPHAN_COST(n) (EQUIV (150) / ((n) + 2)) + +/* Bonus for breaking a line at the end of a sentence. */ +#define SENTENCE_BONUS EQUIV (50) + +/* Cost of breaking a line after a period not marking end of a sentence. + With the definition of sentence we are using (borrowed from emacs, see + get_line()) such a break would then look like a sentence break. Hence + we assign a very high cost -- it should be avoided unless things are + really bad. */ +#define NOBREAK_COST EQUIV (600) + +/* Bonus for breaking a line before open parenthesis. */ +#define PAREN_BONUS EQUIV (40) + +/* Bonus for breaking a line after other punctuation. */ +#define PUNCT_BONUS EQUIV(40) + +/* Credit for breaking a long paragraph one line later. */ +#define LINE_CREDIT EQUIV(3) + +/* Size of paragraph buffer, in words and characters. Longer paragraphs + are handled neatly (cf. flush_paragraph()), so long as these values + are considerably greater than required by the width. These values + cannot be extended indefinitely: doing so would run into size limits + and/or cause more overflows in cost calculations. FIXME: Remove these + arbitrary limits. */ + +#define MAXWORDS 1000 +#define MAXCHARS 5000 + +/* Extra ctype(3)-style macros. */ + +#define isopen(c) (strchr ("(['`\"", c) != nullptr) +#define isclose(c) (strchr (")]'\"", c) != nullptr) +#define isperiod(c) (strchr (".?!", c) != nullptr) + +/* Size of a tab stop, for expansion on input and re-introduction on + output. */ +#define TABWIDTH 8 + +/* Word descriptor structure. */ + +typedef struct Word WORD; + +struct Word + { + + /* Static attributes determined during input. */ + + char const *text; /* the text of the word */ + int length; /* length of this word */ + int space; /* the size of the following space */ + unsigned int paren:1; /* starts with open paren */ + unsigned int period:1; /* ends in [.?!])* */ + unsigned int punct:1; /* ends in punctuation */ + unsigned int final:1; /* end of sentence */ + + /* The remaining fields are computed during the optimization. */ + + int line_length; /* length of the best line starting here */ + COST best_cost; /* cost of best paragraph starting here */ + WORD *next_break; /* break which achieves best_cost */ + }; + +/* Forward declarations. */ + +static void set_prefix (char *p); +static bool fmt (FILE *f, char const *); +static bool get_paragraph (FILE *f); +static int get_line (FILE *f, int c); +static int get_prefix (FILE *f); +static int get_space (FILE *f, int c); +static int copy_rest (FILE *f, int c); +static bool same_para (int c); +static void flush_paragraph (void); +static void fmt_paragraph (void); +static void check_punctuation (WORD *w); +static COST base_cost (WORD *this); +static COST line_cost (WORD *next, int len); +static void put_paragraph (WORD *finish); +static void put_line (WORD *w, int indent); +static void put_word (WORD *w); +static void put_space (int space); + +/* Option values. */ + +/* If true, first 2 lines may have different indent (default false). */ +static bool crown; + +/* If true, first 2 lines _must_ have different indent (default false). */ +static bool tagged; + +/* If true, each line is a paragraph on its own (default false). */ +static bool split; + +/* If true, don't preserve inter-word spacing (default false). */ +static bool uniform; + +/* Prefix minus leading and trailing spaces (default ""). */ +static char const *prefix; + +/* User-supplied maximum line width (default WIDTH). The only output + lines longer than this will each comprise a single word. */ +static int max_width; + +/* Values derived from the option values. */ + +/* The length of prefix minus leading space. */ +static int prefix_full_length; + +/* The length of the leading space trimmed from the prefix. */ +static int prefix_lead_space; + +/* The length of prefix minus leading and trailing space. */ +static int prefix_length; + +/* The preferred width of text lines, set to LEEWAY % less than max_width. */ +static int goal_width; + +/* Dynamic variables. */ + +/* Start column of the character most recently read from the input file. */ +static int in_column; + +/* Start column of the next character to be written to stdout. */ +static int out_column; + +/* Space for the paragraph text -- longer paragraphs are handled neatly + (cf. flush_paragraph()). */ +static char parabuf[MAXCHARS]; + +/* A pointer into parabuf, indicating the first unused character position. */ +static char *wptr; + +/* The words of a paragraph -- longer paragraphs are handled neatly + (cf. flush_paragraph()). */ +static WORD word[MAXWORDS]; + +/* A pointer into the above word array, indicating the first position + after the last complete word. Sometimes it will point at an incomplete + word. */ +static WORD *word_limit; + +/* If true, current input file contains tab characters, and so tabs can be + used for white space on output. */ +static bool tabs; + +/* Space before trimmed prefix on each line of the current paragraph. */ +static int prefix_indent; + +/* Indentation of the first line of the current paragraph. */ +static int first_indent; + +/* Indentation of other lines of the current paragraph */ +static int other_indent; + +/* To detect the end of a paragraph, we need to look ahead to the first + non-blank character after the prefix on the next line, or the first + character on the following line that failed to match the prefix. + We can reconstruct the lookahead from that character (next_char), its + position on the line (in_column) and the amount of space before the + prefix (next_prefix_indent). See get_paragraph() and copy_rest(). */ + +/* The last character read from the input file. */ +static int next_char; + +/* The space before the trimmed prefix (or part of it) on the next line + after the current paragraph. */ +static int next_prefix_indent; + +/* If nonzero, the length of the last line output in the current + paragraph, used to charge for raggedness at the split point for long + paragraphs chosen by fmt_paragraph(). */ +static int last_line_length; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [-WIDTH] [OPTION]... [FILE]...\n"), program_name); + fputs (_("\ +Reformat each paragraph in the FILE(s), writing to standard output.\n\ +The option -WIDTH is an abbreviated form of --width=DIGITS.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -c, --crown-margin preserve indentation of first two lines\n\ + -p, --prefix=STRING reformat only lines beginning with STRING,\n\ + reattaching the prefix to reformatted lines\n\ + -s, --split-only split long lines, but do not refill\n\ +"), + stdout); + /* Tell xgettext that the "% o" below is not a printf-style + format string: xgettext:no-c-format */ + fputs (_("\ + -t, --tagged-paragraph indentation of first line different from second\n\ + -u, --uniform-spacing one space between words, two after sentences\n\ + -w, --width=WIDTH maximum line width (default of 75 columns)\n\ + -g, --goal=WIDTH goal width (default of 93% of width)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Decode options and launch execution. */ + +static struct option const long_options[] = +{ + {"crown-margin", no_argument, nullptr, 'c'}, + {"prefix", required_argument, nullptr, 'p'}, + {"split-only", no_argument, nullptr, 's'}, + {"tagged-paragraph", no_argument, nullptr, 't'}, + {"uniform-spacing", no_argument, nullptr, 'u'}, + {"width", required_argument, nullptr, 'w'}, + {"goal", required_argument, nullptr, 'g'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0}, +}; + +int +main (int argc, char **argv) +{ + int optchar; + bool ok = true; + char const *max_width_option = nullptr; + char const *goal_width_option = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + crown = tagged = split = uniform = false; + max_width = WIDTH; + prefix = ""; + prefix_length = prefix_lead_space = prefix_full_length = 0; + + if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1])) + { + /* Old option syntax; a dash followed by one or more digits. */ + max_width_option = argv[1] + 1; + + /* Make the option we just parsed invisible to getopt. */ + argv[1] = argv[0]; + argv++; + argc--; + } + + while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:g:", + long_options, nullptr)) + != -1) + switch (optchar) + { + default: + if (ISDIGIT (optchar)) + error (0, 0, _("invalid option -- %c; -WIDTH is recognized\ + only when it is the first\noption; use -w N instead"), + optchar); + usage (EXIT_FAILURE); + + case 'c': + crown = true; + break; + + case 's': + split = true; + break; + + case 't': + tagged = true; + break; + + case 'u': + uniform = true; + break; + + case 'w': + max_width_option = optarg; + break; + + case 'g': + goal_width_option = optarg; + break; + + case 'p': + set_prefix (optarg); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + } + + if (max_width_option) + { + /* Limit max_width to MAXCHARS / 2; otherwise, the resulting + output can be quite ugly. */ + max_width = xdectoumax (max_width_option, 0, MAXCHARS / 2, "", + _("invalid width"), 0); + } + + if (goal_width_option) + { + /* Limit goal_width to max_width. */ + goal_width = xdectoumax (goal_width_option, 0, max_width, "", + _("invalid width"), 0); + if (max_width_option == nullptr) + max_width = goal_width + 10; + } + else + { + goal_width = max_width * (2 * (100 - LEEWAY) + 1) / 200; + } + + bool have_read_stdin = false; + + if (optind == argc) + { + have_read_stdin = true; + ok = fmt (stdin, "-"); + } + else + { + for (; optind < argc; optind++) + { + char *file = argv[optind]; + if (STREQ (file, "-")) + { + ok &= fmt (stdin, file); + have_read_stdin = true; + } + else + { + FILE *in_stream; + in_stream = fopen (file, "r"); + if (in_stream != nullptr) + ok &= fmt (in_stream, file); + else + { + error (0, errno, _("cannot open %s for reading"), + quoteaf (file)); + ok = false; + } + } + } + } + + if (have_read_stdin && fclose (stdin) != 0) + error (EXIT_FAILURE, errno, "%s", _("closing standard input")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} + +/* Trim space from the front and back of the string P, yielding the prefix, + and record the lengths of the prefix and the space trimmed. */ + +static void +set_prefix (char *p) +{ + char *s; + + prefix_lead_space = 0; + while (*p == ' ') + { + prefix_lead_space++; + p++; + } + prefix = p; + prefix_full_length = strlen (p); + s = p + prefix_full_length; + while (s > p && s[-1] == ' ') + s--; + *s = '\0'; + prefix_length = s - p; +} + +/* Read F and send formatted output to stdout. + Close F when done, unless F is stdin. Diagnose input errors, using FILE. + If !F, assume F resulted from an fopen failure and diagnose that. + Return true if successful. */ + +static bool +fmt (FILE *f, char const *file) +{ + fadvise (f, FADVISE_SEQUENTIAL); + tabs = false; + other_indent = 0; + next_char = get_prefix (f); + while (get_paragraph (f)) + { + fmt_paragraph (); + put_paragraph (word_limit); + } + + int err = ferror (f) ? 0 : -1; + if (f == stdin) + clearerr (f); + else if (fclose (f) != 0 && err < 0) + err = errno; + if (0 <= err) + error (0, err, err ? "%s" : _("read error"), quotef (file)); + return err < 0; +} + +/* Set the global variable 'other_indent' according to SAME_PARAGRAPH + and other global variables. */ + +static void +set_other_indent (bool same_paragraph) +{ + if (split) + other_indent = first_indent; + else if (crown) + { + other_indent = (same_paragraph ? in_column : first_indent); + } + else if (tagged) + { + if (same_paragraph && in_column != first_indent) + { + other_indent = in_column; + } + + /* Only one line: use the secondary indent from last time if it + splits, or 0 if there have been no multi-line paragraphs in the + input so far. But if these rules make the two indents the same, + pick a new secondary indent. */ + + else if (other_indent == first_indent) + other_indent = first_indent == 0 ? DEF_INDENT : 0; + } + else + { + other_indent = first_indent; + } +} + +/* Read a paragraph from input file F. A paragraph consists of a + maximal number of non-blank (excluding any prefix) lines subject to: + * In split mode, a paragraph is a single non-blank line. + * In crown mode, the second and subsequent lines must have the + same indentation, but possibly different from the indent of the + first line. + * Tagged mode is similar, but the first and second lines must have + different indentations. + * Otherwise, all lines of a paragraph must have the same indent. + If a prefix is in effect, it must be present at the same indent for + each line in the paragraph. + + Return false if end-of-file was encountered before the start of a + paragraph, else true. */ + +static bool +get_paragraph (FILE *f) +{ + int c; + + last_line_length = 0; + c = next_char; + + /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ + + while (c == '\n' || c == EOF + || next_prefix_indent < prefix_lead_space + || in_column < next_prefix_indent + prefix_full_length) + { + c = copy_rest (f, c); + if (c == EOF) + { + next_char = EOF; + return false; + } + putchar ('\n'); + c = get_prefix (f); + } + + /* Got a suitable first line for a paragraph. */ + + prefix_indent = next_prefix_indent; + first_indent = in_column; + wptr = parabuf; + word_limit = word; + c = get_line (f, c); + set_other_indent (same_para (c)); + + /* Read rest of paragraph (unless split is specified). */ + + if (split) + { + /* empty */ + } + else if (crown) + { + if (same_para (c)) + { + do + { /* for each line till the end of the para */ + c = get_line (f, c); + } + while (same_para (c) && in_column == other_indent); + } + } + else if (tagged) + { + if (same_para (c) && in_column != first_indent) + { + do + { /* for each line till the end of the para */ + c = get_line (f, c); + } + while (same_para (c) && in_column == other_indent); + } + } + else + { + while (same_para (c) && in_column == other_indent) + c = get_line (f, c); + } + + (word_limit - 1)->period = (word_limit - 1)->final = true; + next_char = c; + return true; +} + +/* Copy to the output a line that failed to match the prefix, or that + was blank after the prefix. In the former case, C is the character + that failed to match the prefix. In the latter, C is \n or EOF. + Return the character (\n or EOF) ending the line. */ + +static int +copy_rest (FILE *f, int c) +{ + char const *s; + + out_column = 0; + if (in_column > next_prefix_indent || (c != '\n' && c != EOF)) + { + put_space (next_prefix_indent); + for (s = prefix; out_column != in_column && *s; out_column++) + putchar (*s++); + if (c != EOF && c != '\n') + put_space (in_column - out_column); + if (c == EOF && in_column >= next_prefix_indent + prefix_length) + putchar ('\n'); + } + while (c != '\n' && c != EOF) + { + putchar (c); + c = getc (f); + } + return c; +} + +/* Return true if a line whose first non-blank character after the + prefix (if any) is C could belong to the current paragraph, + otherwise false. */ + +static bool +same_para (int c) +{ + return (next_prefix_indent == prefix_indent + && in_column >= next_prefix_indent + prefix_full_length + && c != '\n' && c != EOF); +} + +/* Read a line from input file F, given first non-blank character C + after the prefix, and the following indent, and break it into words. + A word is a maximal non-empty string of non-white characters. A word + ending in [.?!][])"']* and followed by end-of-line or at least two + spaces ends a sentence, as in emacs. + + Return the first non-blank character of the next line. */ + +static int +get_line (FILE *f, int c) +{ + int start; + char *end_of_parabuf; + WORD *end_of_word; + + end_of_parabuf = ¶buf[MAXCHARS]; + end_of_word = &word[MAXWORDS - 2]; + + do + { /* for each word in a line */ + + /* Scan word. */ + + word_limit->text = wptr; + do + { + if (wptr == end_of_parabuf) + { + set_other_indent (true); + flush_paragraph (); + } + *wptr++ = c; + c = getc (f); + } + while (c != EOF && !c_isspace (c)); + in_column += word_limit->length = wptr - word_limit->text; + check_punctuation (word_limit); + + /* Scan inter-word space. */ + + start = in_column; + c = get_space (f, c); + word_limit->space = in_column - start; + word_limit->final = (c == EOF + || (word_limit->period + && (c == '\n' || word_limit->space > 1))); + if (c == '\n' || c == EOF || uniform) + word_limit->space = word_limit->final ? 2 : 1; + if (word_limit == end_of_word) + { + set_other_indent (true); + flush_paragraph (); + } + word_limit++; + } + while (c != '\n' && c != EOF); + return get_prefix (f); +} + +/* Read a prefix from input file F. Return either first non-matching + character, or first non-blank character after the prefix. */ + +static int +get_prefix (FILE *f) +{ + int c; + + in_column = 0; + c = get_space (f, getc (f)); + if (prefix_length == 0) + next_prefix_indent = prefix_lead_space < in_column ? + prefix_lead_space : in_column; + else + { + char const *p; + next_prefix_indent = in_column; + for (p = prefix; *p != '\0'; p++) + { + unsigned char pc = *p; + if (c != pc) + return c; + in_column++; + c = getc (f); + } + c = get_space (f, c); + } + return c; +} + +/* Read blank characters from input file F, starting with C, and keeping + in_column up-to-date. Return first non-blank character. */ + +static int +get_space (FILE *f, int c) +{ + while (true) + { + if (c == ' ') + in_column++; + else if (c == '\t') + { + tabs = true; + in_column = (in_column / TABWIDTH + 1) * TABWIDTH; + } + else + return c; + c = getc (f); + } +} + +/* Set extra fields in word W describing any attached punctuation. */ + +static void +check_punctuation (WORD *w) +{ + char const *start = w->text; + char const *finish = start + (w->length - 1); + unsigned char fin = *finish; + + w->paren = isopen (*start); + w->punct = !! ispunct (fin); + while (start < finish && isclose (*finish)) + finish--; + w->period = isperiod (*finish); +} + +/* Flush part of the paragraph to make room. This function is called on + hitting the limit on the number of words or characters. */ + +static void +flush_paragraph (void) +{ + WORD *split_point; + WORD *w; + int shift; + COST best_break; + + /* In the special case where it's all one word, just flush it. */ + + if (word_limit == word) + { + fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout); + wptr = parabuf; + return; + } + + /* Otherwise: + - format what you have so far as a paragraph, + - find a low-cost line break near the end, + - output to there, + - make that the start of the paragraph. */ + + fmt_paragraph (); + + /* Choose a good split point. */ + + split_point = word_limit; + best_break = MAXCOST; + for (w = word->next_break; w != word_limit; w = w->next_break) + { + if (w->best_cost - w->next_break->best_cost < best_break) + { + split_point = w; + best_break = w->best_cost - w->next_break->best_cost; + } + if (best_break <= MAXCOST - LINE_CREDIT) + best_break += LINE_CREDIT; + } + put_paragraph (split_point); + + /* Copy text of words down to start of parabuf -- we use memmove because + the source and target may overlap. */ + + memmove (parabuf, split_point->text, wptr - split_point->text); + shift = split_point->text - parabuf; + wptr -= shift; + + /* Adjust text pointers. */ + + for (w = split_point; w <= word_limit; w++) + w->text -= shift; + + /* Copy words from split_point down to word -- we use memmove because + the source and target may overlap. */ + + memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word); + word_limit -= split_point - word; +} + +/* Compute the optimal formatting for the whole paragraph by computing + and remembering the optimal formatting for each suffix from the empty + one to the whole paragraph. */ + +static void +fmt_paragraph (void) +{ + WORD *start, *w; + int len; + COST wcost, best; + int saved_length; + + word_limit->best_cost = 0; + saved_length = word_limit->length; + word_limit->length = max_width; /* sentinel */ + + for (start = word_limit - 1; start >= word; start--) + { + best = MAXCOST; + len = start == word ? first_indent : other_indent; + + /* At least one word, however long, in the line. */ + + w = start; + len += w->length; + do + { + w++; + + /* Consider breaking before w. */ + + wcost = line_cost (w, len) + w->best_cost; + if (start == word && last_line_length > 0) + wcost += RAGGED_COST (len - last_line_length); + if (wcost < best) + { + best = wcost; + start->next_break = w; + start->line_length = len; + } + + /* This is a kludge to keep us from computing 'len' as the + sum of the sentinel length and some non-zero number. + Since the sentinel w->length may be INT_MAX, adding + to that would give a negative result. */ + if (w == word_limit) + break; + + len += (w - 1)->space + w->length; /* w > start >= word */ + } + while (len < max_width); + start->best_cost = best + base_cost (start); + } + + word_limit->length = saved_length; +} + +/* Work around . */ +#if 13 <= __GNUC__ +# pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value" +#endif + +/* Return the constant component of the cost of breaking before the + word THIS. */ + +static COST +base_cost (WORD *this) +{ + COST cost; + + cost = LINE_COST; + + if (this > word) + { + if ((this - 1)->period) + { + if ((this - 1)->final) + cost -= SENTENCE_BONUS; + else + cost += NOBREAK_COST; + } + else if ((this - 1)->punct) + cost -= PUNCT_BONUS; + else if (this > word + 1 && (this - 2)->final) + cost += WIDOW_COST ((this - 1)->length); + } + + if (this->paren) + cost -= PAREN_BONUS; + else if (this->final) + cost += ORPHAN_COST (this->length); + + return cost; +} + +/* Return the component of the cost of breaking before word NEXT that + depends on LEN, the length of the line beginning there. */ + +static COST +line_cost (WORD *next, int len) +{ + int n; + COST cost; + + if (next == word_limit) + return 0; + n = goal_width - len; + cost = SHORT_COST (n); + if (next->next_break != word_limit) + { + n = len - next->line_length; + cost += RAGGED_COST (n); + } + return cost; +} + +/* Output to stdout a paragraph from word up to (but not including) + FINISH, which must be in the next_break chain from word. */ + +static void +put_paragraph (WORD *finish) +{ + WORD *w; + + put_line (word, first_indent); + for (w = word->next_break; w != finish; w = w->next_break) + put_line (w, other_indent); +} + +/* Output to stdout the line beginning with word W, beginning in column + INDENT, including the prefix (if any). */ + +static void +put_line (WORD *w, int indent) +{ + WORD *endline; + + out_column = 0; + put_space (prefix_indent); + fputs (prefix, stdout); + out_column += prefix_length; + put_space (indent - out_column); + + endline = w->next_break - 1; + for (; w != endline; w++) + { + put_word (w); + put_space (w->space); + } + put_word (w); + last_line_length = out_column; + putchar ('\n'); +} + +/* Output to stdout the word W. */ + +static void +put_word (WORD *w) +{ + char const *s; + int n; + + s = w->text; + for (n = w->length; n != 0; n--) + putchar (*s++); + out_column += w->length; +} + +/* Output to stdout SPACE spaces, or equivalent tabs. */ + +static void +put_space (int space) +{ + int space_target, tab_target; + + space_target = out_column + space; + if (tabs) + { + tab_target = space_target / TABWIDTH * TABWIDTH; + if (out_column + 1 < tab_target) + while (out_column < tab_target) + { + putchar ('\t'); + out_column = (out_column / TABWIDTH + 1) * TABWIDTH; + } + } + while (out_column < space_target) + { + putchar (' '); + out_column++; + } +} diff --git a/src/fold.c b/src/fold.c new file mode 100644 index 0000000..5c0428d --- /dev/null +++ b/src/fold.c @@ -0,0 +1,307 @@ +/* fold -- wrap each input line to fit in specified width. + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include + +#include +#include +#include + +#include "system.h" +#include "fadvise.h" +#include "xdectoint.h" + +#define TAB_WIDTH 8 + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "fold" + +#define AUTHORS proper_name ("David MacKenzie") + +/* If nonzero, try to break on whitespace. */ +static bool break_spaces; + +/* If nonzero, count bytes, not column positions. */ +static bool count_bytes; + +/* If nonzero, at least one of the files we read was standard input. */ +static bool have_read_stdin; + +static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::"; + +static struct option const longopts[] = +{ + {"bytes", no_argument, nullptr, 'b'}, + {"spaces", no_argument, nullptr, 's'}, + {"width", required_argument, nullptr, 'w'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Wrap input lines in each FILE, writing to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --bytes count bytes rather than columns\n\ + -s, --spaces break at spaces\n\ + -w, --width=WIDTH use WIDTH columns instead of 80\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Assuming the current column is COLUMN, return the column that + printing C will move the cursor to. + The first column is 0. */ + +static size_t +adjust_column (size_t column, char c) +{ + if (!count_bytes) + { + if (c == '\b') + { + if (column > 0) + column--; + } + else if (c == '\r') + column = 0; + else if (c == '\t') + column += TAB_WIDTH - column % TAB_WIDTH; + else /* if (isprint (c)) */ + column++; + } + else + column++; + return column; +} + +/* Fold file FILENAME, or standard input if FILENAME is "-", + to stdout, with maximum line length WIDTH. + Return true if successful. */ + +static bool +fold_file (char const *filename, size_t width) +{ + FILE *istream; + int c; + size_t column = 0; /* Screen column where next char will go. */ + size_t offset_out = 0; /* Index in 'line_out' for next char. */ + static char *line_out = nullptr; + static size_t allocated_out = 0; + int saved_errno; + + if (STREQ (filename, "-")) + { + istream = stdin; + have_read_stdin = true; + } + else + istream = fopen (filename, "r"); + + if (istream == nullptr) + { + error (0, errno, "%s", quotef (filename)); + return false; + } + + fadvise (istream, FADVISE_SEQUENTIAL); + + while ((c = getc (istream)) != EOF) + { + if (offset_out + 1 >= allocated_out) + line_out = X2REALLOC (line_out, &allocated_out); + + if (c == '\n') + { + line_out[offset_out++] = c; + fwrite (line_out, sizeof (char), offset_out, stdout); + column = offset_out = 0; + continue; + } + + rescan: + column = adjust_column (column, c); + + if (column > width) + { + /* This character would make the line too long. + Print the line plus a newline, and make this character + start the next line. */ + if (break_spaces) + { + bool found_blank = false; + size_t logical_end = offset_out; + + /* Look for the last blank. */ + while (logical_end) + { + --logical_end; + if (isblank (to_uchar (line_out[logical_end]))) + { + found_blank = true; + break; + } + } + + if (found_blank) + { + size_t i; + + /* Found a blank. Don't output the part after it. */ + logical_end++; + fwrite (line_out, sizeof (char), (size_t) logical_end, + stdout); + putchar ('\n'); + /* Move the remainder to the beginning of the next line. + The areas being copied here might overlap. */ + memmove (line_out, line_out + logical_end, + offset_out - logical_end); + offset_out -= logical_end; + for (column = i = 0; i < offset_out; i++) + column = adjust_column (column, line_out[i]); + goto rescan; + } + } + + if (offset_out == 0) + { + line_out[offset_out++] = c; + continue; + } + + line_out[offset_out++] = '\n'; + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); + column = offset_out = 0; + goto rescan; + } + + line_out[offset_out++] = c; + } + + saved_errno = errno; + if (!ferror (istream)) + saved_errno = 0; + + if (offset_out) + fwrite (line_out, sizeof (char), (size_t) offset_out, stdout); + + if (STREQ (filename, "-")) + clearerr (istream); + else if (fclose (istream) != 0 && !saved_errno) + saved_errno = errno; + + if (saved_errno) + { + error (0, saved_errno, "%s", quotef (filename)); + return false; + } + + return true; +} + +int +main (int argc, char **argv) +{ + size_t width = 80; + int i; + int optc; + bool ok; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + break_spaces = count_bytes = have_read_stdin = false; + + while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1) + { + char optargbuf[2]; + + switch (optc) + { + case 'b': /* Count bytes rather than columns. */ + count_bytes = true; + break; + + case 's': /* Break at word boundaries. */ + break_spaces = true; + break; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (optarg) + optarg--; + else + { + optargbuf[0] = optc; + optargbuf[1] = '\0'; + optarg = optargbuf; + } + FALLTHROUGH; + case 'w': /* Line width. */ + width = xdectoumax (optarg, 1, SIZE_MAX - TAB_WIDTH - 1, "", + _("invalid number of columns"), 0); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (argc == optind) + ok = fold_file ("-", width); + else + { + ok = true; + for (i = optind; i < argc; i++) + ok &= fold_file (argv[i], width); + } + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, "-"); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/force-link.c b/src/force-link.c new file mode 100644 index 0000000..218616f --- /dev/null +++ b/src/force-link.c @@ -0,0 +1,184 @@ +/* Implement ln -f "atomically" + + Copyright 2017-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert. */ + +/* A naive "ln -f A B" unlinks B and then links A to B. This module + instead links A to a randomly-named temporary T in B's directory, + and then renames T to B. This approach has a window with a + randomly-named temporary, which is safer for many applications than + a window where B does not exist. */ + +#include +#include "system.h" + +#include "force-link.h" + +#include + +/* A basename pattern suitable for a temporary file. It should work + even on file systems like FAT that support only short names. + "Cu" is short for "Coreutils" or for "Changeable unstable", + take your pick.... */ + +static char const simple_pattern[] = "CuXXXXXX"; +enum { x_suffix_len = sizeof "XXXXXX" - 1 }; + +/* A size for smallish buffers containing file names. Longer file + names can use malloc. */ + +enum { smallsize = 256 }; + +/* Return a template for a file in the same directory as DSTNAME. + Use BUF if the template fits, otherwise use malloc and return nullptr + (setting errno) if unsuccessful. */ + +static char * +samedir_template (char const *dstname, char buf[smallsize]) +{ + ptrdiff_t dstdirlen = last_component (dstname) - dstname; + size_t dsttmpsize = dstdirlen + sizeof simple_pattern; + char *dsttmp; + if (dsttmpsize <= smallsize) + dsttmp = buf; + else + { + dsttmp = malloc (dsttmpsize); + if (!dsttmp) + return dsttmp; + } + strcpy (mempcpy (dsttmp, dstname, dstdirlen), simple_pattern); + return dsttmp; +} + + +/* Auxiliaries for force_linkat. */ + +struct link_arg +{ + int srcdir; + char const *srcname; + int dstdir; + int flags; +}; + +static int +try_link (char *dest, void *arg) +{ + struct link_arg *a = arg; + return linkat (a->srcdir, a->srcname, a->dstdir, dest, a->flags); +} + +/* Hard-link directory SRCDIR's file SRCNAME to directory DSTDIR's + file DSTNAME, using linkat-style FLAGS to control the linking. + If FORCE and DSTNAME already exists, replace it atomically. + If LINKAT_ERRNO is 0, the hard link is already done; if positive, + the hard link was tried and failed with errno == LINKAT_ERRNO. Return + -1 if successful and DSTNAME already existed, + 0 if successful and DSTNAME did not already exist, and + a positive errno value on failure. */ +extern int +force_linkat (int srcdir, char const *srcname, + int dstdir, char const *dstname, int flags, bool force, + int linkat_errno) +{ + if (linkat_errno < 0) + linkat_errno = (linkat (srcdir, srcname, dstdir, dstname, flags) == 0 + ? 0 : errno); + if (!force || linkat_errno != EEXIST) + return linkat_errno; + + char buf[smallsize]; + char *dsttmp = samedir_template (dstname, buf); + if (! dsttmp) + return errno; + struct link_arg arg = { srcdir, srcname, dstdir, flags }; + int err; + + if (try_tempname_len (dsttmp, 0, &arg, try_link, x_suffix_len) != 0) + err = errno; + else + { + err = renameat (dstdir, dsttmp, dstdir, dstname) == 0 ? -1 : errno; + /* Unlink DSTTMP even if renameat succeeded, in case DSTTMP + and DSTNAME were already the same hard link and renameat + was a no-op. */ + unlinkat (dstdir, dsttmp, 0); + } + + if (dsttmp != buf) + free (dsttmp); + return err; +} + + +/* Auxiliaries for force_symlinkat. */ + +struct symlink_arg +{ + char const *srcname; + int dstdir; +}; + +static int +try_symlink (char *dest, void *arg) +{ + struct symlink_arg *a = arg; + return symlinkat (a->srcname, a->dstdir, dest); +} + +/* Create a symlink containing SRCNAME in directory DSTDIR's file DSTNAME. + If FORCE and DSTNAME already exists, replace it atomically. + If SYMLINKAT_ERRNO is 0, the symlink is already done; if positive, + the symlink was tried and failed with errno == SYMLINKAT_ERRNO. Return + -1 if successful and DSTNAME already existed, + 0 if successful and DSTNAME did not already exist, and + a positive errno value on failure. */ +extern int +force_symlinkat (char const *srcname, int dstdir, char const *dstname, + bool force, int symlinkat_errno) +{ + if (symlinkat_errno < 0) + symlinkat_errno = symlinkat (srcname, dstdir, dstname) == 0 ? 0 : errno; + if (!force || symlinkat_errno != EEXIST) + return symlinkat_errno; + + char buf[smallsize]; + char *dsttmp = samedir_template (dstname, buf); + if (!dsttmp) + return errno; + struct symlink_arg arg = { srcname, dstdir }; + int err; + + if (try_tempname_len (dsttmp, 0, &arg, try_symlink, x_suffix_len) != 0) + err = errno; + else if (renameat (dstdir, dsttmp, dstdir, dstname) != 0) + { + err = errno; + unlinkat (dstdir, dsttmp, 0); + } + else + { + /* Don't worry about renameat being a no-op, since DSTTMP is + newly created. */ + err = -1; + } + + if (dsttmp != buf) + free (dsttmp); + return err; +} diff --git a/src/force-link.h b/src/force-link.h new file mode 100644 index 0000000..f051658 --- /dev/null +++ b/src/force-link.h @@ -0,0 +1,4 @@ +extern int force_linkat (int, char const *, int, char const *, int, bool, int) + _GL_ATTRIBUTE_NONNULL (); +extern int force_symlinkat (char const *, int, char const *, bool, int) + _GL_ATTRIBUTE_NONNULL (); diff --git a/src/fs-is-local.h b/src/fs-is-local.h new file mode 100644 index 0000000..0eff814 --- /dev/null +++ b/src/fs-is-local.h @@ -0,0 +1,143 @@ +/* Map each S_MAGIC_* value to 1, 0 or -1. + 1 if it is known to be a remote file system type, + 0 if it is known to be a local file system type, or -1 otherwise. */ +static inline int +is_local_fs_type (unsigned long int magic) +{ + switch (magic) + { + case S_MAGIC_AAFS: return 1; + case S_MAGIC_ACFS: return 0; + case S_MAGIC_ADFS: return 1; + case S_MAGIC_AFFS: return 1; + case S_MAGIC_AFS: return 0; + case S_MAGIC_ANON_INODE_FS: return 1; + case S_MAGIC_AUFS: return 0; + case S_MAGIC_AUTOFS: return 1; + case S_MAGIC_BALLOON_KVM: return 1; + case S_MAGIC_BEFS: return 1; + case S_MAGIC_BDEVFS: return 1; + case S_MAGIC_BFS: return 1; + case S_MAGIC_BINDERFS: return 1; + case S_MAGIC_BPF_FS: return 1; + case S_MAGIC_BINFMTFS: return 1; + case S_MAGIC_BTRFS: return 1; + case S_MAGIC_BTRFS_TEST: return 1; + case S_MAGIC_CEPH: return 0; + case S_MAGIC_CGROUP: return 1; + case S_MAGIC_CGROUP2: return 1; + case S_MAGIC_CIFS: return 0; + case S_MAGIC_CODA: return 0; + case S_MAGIC_COH: return 1; + case S_MAGIC_CONFIGFS: return 1; + case S_MAGIC_CRAMFS: return 1; + case S_MAGIC_CRAMFS_WEND: return 1; + case S_MAGIC_DAXFS: return 1; + case S_MAGIC_DEBUGFS: return 1; + case S_MAGIC_DEVFS: return 1; + case S_MAGIC_DEVMEM: return 1; + case S_MAGIC_DEVPTS: return 1; + case S_MAGIC_DMA_BUF: return 1; + case S_MAGIC_ECRYPTFS: return 1; + case S_MAGIC_EFIVARFS: return 1; + case S_MAGIC_EFS: return 1; + case S_MAGIC_EROFS_V1: return 1; + case S_MAGIC_EXFAT: return 1; + case S_MAGIC_EXFS: return 1; + case S_MAGIC_EXOFS: return 1; + case S_MAGIC_EXT: return 1; + case S_MAGIC_EXT2: return 1; + case S_MAGIC_EXT2_OLD: return 1; + case S_MAGIC_F2FS: return 1; + case S_MAGIC_FAT: return 1; + case S_MAGIC_FHGFS: return 0; + case S_MAGIC_FUSEBLK: return 0; + case S_MAGIC_FUSECTL: return 0; + case S_MAGIC_FUTEXFS: return 1; + case S_MAGIC_GFS: return 0; + case S_MAGIC_GPFS: return 0; + case S_MAGIC_HFS: return 1; + case S_MAGIC_HFS_PLUS: return 1; + case S_MAGIC_HFS_X: return 1; + case S_MAGIC_HOSTFS: return 1; + case S_MAGIC_HPFS: return 1; + case S_MAGIC_HUGETLBFS: return 1; + case S_MAGIC_MTD_INODE_FS: return 1; + case S_MAGIC_IBRIX: return 0; + case S_MAGIC_INOTIFYFS: return 1; + case S_MAGIC_ISOFS: return 1; + case S_MAGIC_ISOFS_R_WIN: return 1; + case S_MAGIC_ISOFS_WIN: return 1; + case S_MAGIC_JFFS: return 1; + case S_MAGIC_JFFS2: return 1; + case S_MAGIC_JFS: return 1; + case S_MAGIC_KAFS: return 0; + case S_MAGIC_LOGFS: return 1; + case S_MAGIC_LUSTRE: return 0; + case S_MAGIC_M1FS: return 1; + case S_MAGIC_MINIX: return 1; + case S_MAGIC_MINIX_30: return 1; + case S_MAGIC_MINIX_V2: return 1; + case S_MAGIC_MINIX_V2_30: return 1; + case S_MAGIC_MINIX_V3: return 1; + case S_MAGIC_MQUEUE: return 1; + case S_MAGIC_MSDOS: return 1; + case S_MAGIC_NCP: return 0; + case S_MAGIC_NFS: return 0; + case S_MAGIC_NFSD: return 0; + case S_MAGIC_NILFS: return 1; + case S_MAGIC_NSFS: return 1; + case S_MAGIC_NTFS: return 1; + case S_MAGIC_OPENPROM: return 1; + case S_MAGIC_OCFS2: return 0; + case S_MAGIC_OVERLAYFS: return 0; + case S_MAGIC_PANFS: return 0; + case S_MAGIC_PIPEFS: return 0; + case S_MAGIC_PPC_CMM: return 1; + case S_MAGIC_PRL_FS: return 0; + case S_MAGIC_PROC: return 1; + case S_MAGIC_PSTOREFS: return 1; + case S_MAGIC_QNX4: return 1; + case S_MAGIC_QNX6: return 1; + case S_MAGIC_RAMFS: return 1; + case S_MAGIC_RDTGROUP: return 1; + case S_MAGIC_REISERFS: return 1; + case S_MAGIC_ROMFS: return 1; + case S_MAGIC_RPC_PIPEFS: return 1; + case S_MAGIC_SDCARDFS: return 1; + case S_MAGIC_SECRETMEM: return 1; + case S_MAGIC_SECURITYFS: return 1; + case S_MAGIC_SELINUX: return 1; + case S_MAGIC_SMACK: return 1; + case S_MAGIC_SMB: return 0; + case S_MAGIC_SMB2: return 0; + case S_MAGIC_SNFS: return 0; + case S_MAGIC_SOCKFS: return 1; + case S_MAGIC_SQUASHFS: return 1; + case S_MAGIC_SYSFS: return 1; + case S_MAGIC_SYSV2: return 1; + case S_MAGIC_SYSV4: return 1; + case S_MAGIC_TMPFS: return 1; + case S_MAGIC_TRACEFS: return 1; + case S_MAGIC_UBIFS: return 1; + case S_MAGIC_UDF: return 1; + case S_MAGIC_UFS: return 1; + case S_MAGIC_UFS_BYTESWAPPED: return 1; + case S_MAGIC_USBDEVFS: return 1; + case S_MAGIC_V9FS: return 1; + case S_MAGIC_VBOXSF: return 0; + case S_MAGIC_VMHGFS: return 0; + case S_MAGIC_VXFS: return 0; + case S_MAGIC_VZFS: return 1; + case S_MAGIC_WSLFS: return 1; + case S_MAGIC_XENFS: return 1; + case S_MAGIC_XENIX: return 1; + case S_MAGIC_XFS: return 1; + case S_MAGIC_XIAFS: return 1; + case S_MAGIC_Z3FOLD: return 1; + case S_MAGIC_ZFS: return 1; + case S_MAGIC_ZONEFS: return 1; + case S_MAGIC_ZSMALLOC: return 1; + default: return -1; + } +} diff --git a/src/fs.h b/src/fs.h new file mode 100644 index 0000000..bae3f02 --- /dev/null +++ b/src/fs.h @@ -0,0 +1,140 @@ +/* Define the magic numbers as given by statfs(2). + Please send additions to bug-coreutils@gnu.org and meskes@debian.org. + This file is generated automatically from ./src/stat.c. */ + +#if defined __linux__ || defined __ANDROID__ +# define S_MAGIC_AAFS 0x5A3C69F0 +# define S_MAGIC_ACFS 0x61636673 +# define S_MAGIC_ADFS 0xADF5 +# define S_MAGIC_AFFS 0xADFF +# define S_MAGIC_AFS 0x5346414F +# define S_MAGIC_ANON_INODE_FS 0x09041934 +# define S_MAGIC_AUFS 0x61756673 +# define S_MAGIC_AUTOFS 0x0187 +# define S_MAGIC_BALLOON_KVM 0x13661366 +# define S_MAGIC_BEFS 0x42465331 +# define S_MAGIC_BDEVFS 0x62646576 +# define S_MAGIC_BFS 0x1BADFACE +# define S_MAGIC_BINDERFS 0x6C6F6F70 +# define S_MAGIC_BPF_FS 0xCAFE4A11 +# define S_MAGIC_BINFMTFS 0x42494E4D +# define S_MAGIC_BTRFS 0x9123683E +# define S_MAGIC_BTRFS_TEST 0x73727279 +# define S_MAGIC_CEPH 0x00C36400 +# define S_MAGIC_CGROUP 0x0027E0EB +# define S_MAGIC_CGROUP2 0x63677270 +# define S_MAGIC_CIFS 0xFF534D42 +# define S_MAGIC_CODA 0x73757245 +# define S_MAGIC_COH 0x012FF7B7 +# define S_MAGIC_CONFIGFS 0x62656570 +# define S_MAGIC_CRAMFS 0x28CD3D45 +# define S_MAGIC_CRAMFS_WEND 0x453DCD28 +# define S_MAGIC_DAXFS 0x64646178 +# define S_MAGIC_DEBUGFS 0x64626720 +# define S_MAGIC_DEVFS 0x1373 +# define S_MAGIC_DEVMEM 0x454D444D +# define S_MAGIC_DEVPTS 0x1CD1 +# define S_MAGIC_DMA_BUF 0x444D4142 +# define S_MAGIC_ECRYPTFS 0xF15F +# define S_MAGIC_EFIVARFS 0xDE5E81E4 +# define S_MAGIC_EFS 0x00414A53 +# define S_MAGIC_EROFS_V1 0xE0F5E1E2 +# define S_MAGIC_EXFAT 0x2011BAB0 +# define S_MAGIC_EXFS 0x45584653 +# define S_MAGIC_EXOFS 0x5DF5 +# define S_MAGIC_EXT 0x137D +# define S_MAGIC_EXT2 0xEF53 +# define S_MAGIC_EXT2_OLD 0xEF51 +# define S_MAGIC_F2FS 0xF2F52010 +# define S_MAGIC_FAT 0x4006 +# define S_MAGIC_FHGFS 0x19830326 +# define S_MAGIC_FUSEBLK 0x65735546 +# define S_MAGIC_FUSECTL 0x65735543 +# define S_MAGIC_FUTEXFS 0x0BAD1DEA +# define S_MAGIC_GFS 0x01161970 +# define S_MAGIC_GPFS 0x47504653 +# define S_MAGIC_HFS 0x4244 +# define S_MAGIC_HFS_PLUS 0x482B +# define S_MAGIC_HFS_X 0x4858 +# define S_MAGIC_HOSTFS 0x00C0FFEE +# define S_MAGIC_HPFS 0xF995E849 +# define S_MAGIC_HUGETLBFS 0x958458F6 +# define S_MAGIC_MTD_INODE_FS 0x11307854 +# define S_MAGIC_IBRIX 0x013111A8 +# define S_MAGIC_INOTIFYFS 0x2BAD1DEA +# define S_MAGIC_ISOFS 0x9660 +# define S_MAGIC_ISOFS_R_WIN 0x4004 +# define S_MAGIC_ISOFS_WIN 0x4000 +# define S_MAGIC_JFFS 0x07C0 +# define S_MAGIC_JFFS2 0x72B6 +# define S_MAGIC_JFS 0x3153464A +# define S_MAGIC_KAFS 0x6B414653 +# define S_MAGIC_LOGFS 0xC97E8168 +# define S_MAGIC_LUSTRE 0x0BD00BD0 +# define S_MAGIC_M1FS 0x5346314D +# define S_MAGIC_MINIX 0x137F +# define S_MAGIC_MINIX_30 0x138F +# define S_MAGIC_MINIX_V2 0x2468 +# define S_MAGIC_MINIX_V2_30 0x2478 +# define S_MAGIC_MINIX_V3 0x4D5A +# define S_MAGIC_MQUEUE 0x19800202 +# define S_MAGIC_MSDOS 0x4D44 +# define S_MAGIC_NCP 0x564C +# define S_MAGIC_NFS 0x6969 +# define S_MAGIC_NFSD 0x6E667364 +# define S_MAGIC_NILFS 0x3434 +# define S_MAGIC_NSFS 0x6E736673 +# define S_MAGIC_NTFS 0x5346544E +# define S_MAGIC_OPENPROM 0x9FA1 +# define S_MAGIC_OCFS2 0x7461636F +# define S_MAGIC_OVERLAYFS 0x794C7630 +# define S_MAGIC_PANFS 0xAAD7AAEA +# define S_MAGIC_PIPEFS 0x50495045 +# define S_MAGIC_PPC_CMM 0xC7571590 +# define S_MAGIC_PRL_FS 0x7C7C6673 +# define S_MAGIC_PROC 0x9FA0 +# define S_MAGIC_PSTOREFS 0x6165676C +# define S_MAGIC_QNX4 0x002F +# define S_MAGIC_QNX6 0x68191122 +# define S_MAGIC_RAMFS 0x858458F6 +# define S_MAGIC_RDTGROUP 0x07655821 +# define S_MAGIC_REISERFS 0x52654973 +# define S_MAGIC_ROMFS 0x7275 +# define S_MAGIC_RPC_PIPEFS 0x67596969 +# define S_MAGIC_SDCARDFS 0x5DCA2DF5 +# define S_MAGIC_SECRETMEM 0x5345434D +# define S_MAGIC_SECURITYFS 0x73636673 +# define S_MAGIC_SELINUX 0xF97CFF8C +# define S_MAGIC_SMACK 0x43415D53 +# define S_MAGIC_SMB 0x517B +# define S_MAGIC_SMB2 0xFE534D42 +# define S_MAGIC_SNFS 0xBEEFDEAD +# define S_MAGIC_SOCKFS 0x534F434B +# define S_MAGIC_SQUASHFS 0x73717368 +# define S_MAGIC_SYSFS 0x62656572 +# define S_MAGIC_SYSV2 0x012FF7B6 +# define S_MAGIC_SYSV4 0x012FF7B5 +# define S_MAGIC_TMPFS 0x01021994 +# define S_MAGIC_TRACEFS 0x74726163 +# define S_MAGIC_UBIFS 0x24051905 +# define S_MAGIC_UDF 0x15013346 +# define S_MAGIC_UFS 0x00011954 +# define S_MAGIC_UFS_BYTESWAPPED 0x54190100 +# define S_MAGIC_USBDEVFS 0x9FA2 +# define S_MAGIC_V9FS 0x01021997 +# define S_MAGIC_VBOXSF 0x786F4256 +# define S_MAGIC_VMHGFS 0xBACBACBC +# define S_MAGIC_VXFS 0xA501FCF5 +# define S_MAGIC_VZFS 0x565A4653 +# define S_MAGIC_WSLFS 0x53464846 +# define S_MAGIC_XENFS 0xABBA1974 +# define S_MAGIC_XENIX 0x012FF7B4 +# define S_MAGIC_XFS 0x58465342 +# define S_MAGIC_XIAFS 0x012FD16D +# define S_MAGIC_Z3FOLD 0x0033 +# define S_MAGIC_ZFS 0x2FC12FC1 +# define S_MAGIC_ZONEFS 0x5A4F4653 +# define S_MAGIC_ZSMALLOC 0x58295829 +#elif defined __GNU__ +# include +#endif diff --git a/src/getlimits.c b/src/getlimits.c new file mode 100644 index 0000000..4a9dffe --- /dev/null +++ b/src/getlimits.c @@ -0,0 +1,172 @@ +/* getlimits - print various platform dependent limits. + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady */ + +#include /* sets _FILE_OFFSET_BITS=64 etc. */ +#include +#include +#include + +#include "ftoastr.h" +#include "system.h" +#include "long-options.h" + +#define PROGRAM_NAME "getlimits" + +#define AUTHORS proper_name_lite ("Padraig Brady", "P\303\241draig Brady") + +#ifndef TIME_T_MAX +# define TIME_T_MAX TYPE_MAXIMUM (time_t) +#endif + +#ifndef TIME_T_MIN +# define TIME_T_MIN TYPE_MINIMUM (time_t) +#endif + +#ifndef SSIZE_MIN +# define SSIZE_MIN TYPE_MINIMUM (ssize_t) +#endif + +#ifndef PID_T_MIN +# define PID_T_MIN TYPE_MINIMUM (pid_t) +#endif + +/* These are not interesting to print. + * Instead of these defines it would be nice to be able to do + * #ifdef (TYPE##_MIN) in function macro below. */ +#define SIZE_MIN 0 +#define UCHAR_MIN 0 +#define UINT_MIN 0 +#define ULONG_MIN 0 +#define UINTMAX_MIN 0 +#define UID_T_MIN 0 +#define GID_T_MIN 0 + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s\n\ +"), program_name); + + fputs (_("\ +Output platform dependent limits in a format useful for shell scripts.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Add one to the absolute value of the number whose textual + representation is BUF + 1. Do this in-place, in the buffer. + Return a pointer to the result, which is normally BUF + 1, but is + BUF if the representation grew in size. */ +static char const * +decimal_absval_add_one (char *buf) +{ + bool negative = (buf[1] == '-'); + char *absnum = buf + 1 + negative; + char *p = absnum + strlen (absnum); + absnum[-1] = '0'; + while (*--p == '9') + *p = '0'; + ++*p; + char *result = MIN (absnum, p); + if (negative) + *--result = '-'; + return result; +} + +#define PRINT_FLOATTYPE(N, T, FTOASTR, BUFSIZE) \ +static void \ +N (T x) \ +{ \ + char buf[BUFSIZE]; \ + FTOASTR (buf, sizeof buf, FTOASTR_LEFT_JUSTIFY, 0, x); \ + puts (buf); \ +} + +PRINT_FLOATTYPE (print_FLT, float, ftoastr, FLT_BUFSIZE_BOUND) +PRINT_FLOATTYPE (print_DBL, double, dtoastr, DBL_BUFSIZE_BOUND) +PRINT_FLOATTYPE (print_LDBL, long double, ldtoastr, LDBL_BUFSIZE_BOUND) + +int +main (int argc, char **argv) +{ + char limit[1 + MAX (INT_BUFSIZE_BOUND (intmax_t), + INT_BUFSIZE_BOUND (uintmax_t))]; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + VERSION, true, usage, AUTHORS, + (char const *) nullptr); + +#define print_int(TYPE) \ + sprintf (limit + 1, "%"PRIuMAX, (uintmax_t) TYPE##_MAX); \ + printf (#TYPE"_MAX=%s\n", limit + 1); \ + printf (#TYPE"_OFLOW=%s\n", decimal_absval_add_one (limit)); \ + if (TYPE##_MIN) \ + { \ + sprintf (limit + 1, "%"PRIdMAX, (intmax_t) TYPE##_MIN); \ + printf (#TYPE"_MIN=%s\n", limit + 1); \ + printf (#TYPE"_UFLOW=%s\n", decimal_absval_add_one (limit)); \ + } + +#define print_float(TYPE) \ + printf (#TYPE"_MIN="); print_##TYPE (TYPE##_MIN); \ + printf (#TYPE"_MAX="); print_##TYPE (TYPE##_MAX); + + /* Variable sized ints */ + print_int (CHAR); + print_int (SCHAR); + print_int (UCHAR); + print_int (SHRT); + print_int (INT); + print_int (UINT); + print_int (LONG); + print_int (ULONG); + print_int (SIZE); + print_int (SSIZE); + print_int (TIME_T); + print_int (UID_T); + print_int (GID_T); + print_int (PID_T); + print_int (OFF_T); + print_int (INTMAX); + print_int (UINTMAX); + + /* Variable sized floats */ + print_float (FLT); + print_float (DBL); + print_float (LDBL); + + return EXIT_SUCCESS; +} diff --git a/src/group-list.c b/src/group-list.c new file mode 100644 index 0000000..5230123 --- /dev/null +++ b/src/group-list.c @@ -0,0 +1,129 @@ +/* group-list.c --Print a list of group IDs or names. + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Arnold Robbins. + Major rewrite by David MacKenzie, djm@gnu.ai.mit.edu. + Extracted from id.c by James Youngman. */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "mgetgroups.h" +#include "quote.h" +#include "group-list.h" + + +/* Print all of the distinct groups the user is in. */ +extern bool +print_group_list (char const *username, + uid_t ruid, gid_t rgid, gid_t egid, + bool use_names, char delim) +{ + bool ok = true; + struct passwd *pwd = nullptr; + + if (username) + { + pwd = getpwuid (ruid); + if (pwd == nullptr) + ok = false; + } + + if (!print_group (rgid, use_names)) + ok = false; + + if (egid != rgid) + { + putchar (delim); + if (!print_group (egid, use_names)) + ok = false; + } + + { + gid_t *groups; + + int n_groups = xgetgroups (username, (pwd ? pwd->pw_gid : egid), &groups); + if (n_groups < 0) + { + if (username) + { + error (0, errno, _("failed to get groups for user %s"), + quote (username)); + } + else + { + error (0, errno, _("failed to get groups for the current process")); + } + return false; + } + + for (int i = 0; i < n_groups; i++) + if (groups[i] != rgid && groups[i] != egid) + { + putchar (delim); + if (!print_group (groups[i], use_names)) + ok = false; + } + free (groups); + } + return ok; +} + +/* Convert a gid_t to string. Do not use this function directly. + Instead, use it via the gidtostr macro. + Beware that it returns a pointer to static storage. */ +static char * +gidtostr_ptr (gid_t const *gid) +{ + static char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + return umaxtostr (*gid, buf); +} +#define gidtostr(g) gidtostr_ptr (&(g)) + +/* Print the name or value of group ID GID. */ +extern bool +print_group (gid_t gid, bool use_name) +{ + struct group *grp = nullptr; + bool ok = true; + + if (use_name) + { + grp = getgrgid (gid); + if (grp == nullptr) + { + if (TYPE_SIGNED (gid_t)) + { + intmax_t g = gid; + error (0, 0, _("cannot find name for group ID %"PRIdMAX), g); + } + else + { + uintmax_t g = gid; + error (0, 0, _("cannot find name for group ID %"PRIuMAX), g); + } + ok = false; + } + } + + char *s = grp ? grp->gr_name : gidtostr (gid); + fputs (s, stdout); + return ok; +} diff --git a/src/group-list.h b/src/group-list.h new file mode 100644 index 0000000..399a07a --- /dev/null +++ b/src/group-list.h @@ -0,0 +1,19 @@ +/* group-list.h -- prototypes shared by id and groups. + + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +bool print_group (gid_t, bool); +bool print_group_list (char const *, uid_t, gid_t, gid_t, bool, char); diff --git a/src/groups.c b/src/groups.c new file mode 100644 index 0000000..cc90694 --- /dev/null +++ b/src/groups.c @@ -0,0 +1,144 @@ +/* groups -- print the groups a user is in + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by James Youngman based on id.c and groups.sh, + which were written by Arnold Robbins and David MacKenzie. */ + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "group-list.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "groups" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("James Youngman") + + +static struct option const longopts[] = +{ + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [USERNAME]...\n"), program_name); + fputs (_("\ +Print group memberships for each USERNAME or, if no USERNAME is specified, for\ +\n\ +the current process (which may differ if the groups database has changed).\n"), + stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + bool ok = true; + gid_t rgid, egid; + uid_t ruid; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + /* Processing the arguments this way makes groups.c behave differently to + * groups.sh if one of the arguments is "--". + */ + while ((optc = getopt_long (argc, argv, "", longopts, nullptr)) != -1) + { + switch (optc) + { + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind == argc) + { + /* No arguments. Divulge the details of the current process. */ + uid_t NO_UID = -1; + gid_t NO_GID = -1; + + errno = 0; + ruid = getuid (); + if (ruid == NO_UID && errno) + error (EXIT_FAILURE, errno, _("cannot get real UID")); + + errno = 0; + egid = getegid (); + if (egid == NO_GID && errno) + error (EXIT_FAILURE, errno, _("cannot get effective GID")); + + errno = 0; + rgid = getgid (); + if (rgid == NO_GID && errno) + error (EXIT_FAILURE, errno, _("cannot get real GID")); + + if (!print_group_list (nullptr, ruid, rgid, egid, true, ' ')) + ok = false; + putchar ('\n'); + } + else + { + /* At least one argument. Divulge the details of the specified users. */ + for ( ; optind < argc; optind++) + { + struct passwd *pwd = getpwnam (argv[optind]); + if (pwd == nullptr) + { + error (0, 0, _("%s: no such user"), quote (argv[optind])); + ok = false; + continue; + } + ruid = pwd->pw_uid; + rgid = egid = pwd->pw_gid; + + printf ("%s : ", argv[optind]); + if (!print_group_list (argv[optind], ruid, rgid, egid, true, ' ')) + ok = false; + putchar ('\n'); + } + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/head.c b/src/head.c new file mode 100644 index 0000000..da32c88 --- /dev/null +++ b/src/head.c @@ -0,0 +1,1097 @@ +/* head -- output first part of file(s) + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Options: (see usage) + Reads from standard input if no files are given or when a filename of + ''-'' is encountered. + By default, filename headers are printed only if more than one file + is given. + By default, prints the first 10 lines (head -n 10). + + David MacKenzie */ + +#include + +#include +#include +#include + +#include "system.h" + +#include "assure.h" +#include "full-read.h" +#include "quote.h" +#include "safe-read.h" +#include "stat-size.h" +#include "xbinary-io.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "head" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +/* Number of lines/chars/blocks to head. */ +#define DEFAULT_NUMBER 10 + +/* Useful only when eliding tail bytes or lines. + If true, skip the is-regular-file test used to determine whether + to use the lseek optimization. Instead, use the more general (and + more expensive) code unconditionally. Intended solely for testing. */ +static bool presume_input_pipe; + +/* If true, print filename headers. */ +static bool print_headers; + +/* Character to split lines by. */ +static char line_end; + +/* When to print the filename banners. */ +enum header_mode +{ + multiple_files, always, never +}; + +/* Have we ever read standard input? */ +static bool have_read_stdin; + +enum Copy_fd_status + { + COPY_FD_OK = 0, + COPY_FD_READ_ERROR, + COPY_FD_UNEXPECTED_EOF + }; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + PRESUME_INPUT_PIPE_OPTION = CHAR_MAX + 1 +}; + +static struct option const long_options[] = +{ + {"bytes", required_argument, nullptr, 'c'}, + {"lines", required_argument, nullptr, 'n'}, + {"-presume-input-pipe", no_argument, nullptr, + PRESUME_INPUT_PIPE_OPTION}, /* do not document */ + {"quiet", no_argument, nullptr, 'q'}, + {"silent", no_argument, nullptr, 'q'}, + {"verbose", no_argument, nullptr, 'v'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + printf (_("\ +Print the first %d lines of each FILE to standard output.\n\ +With more than one FILE, precede each with a header giving the file name.\n\ +"), DEFAULT_NUMBER); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + printf (_("\ + -c, --bytes=[-]NUM print the first NUM bytes of each file;\n\ + with the leading '-', print all but the last\n\ + NUM bytes of each file\n\ + -n, --lines=[-]NUM print the first NUM lines instead of the first %d;\n\ + with the leading '-', print all but the last\n\ + NUM lines of each file\n\ +"), DEFAULT_NUMBER); + fputs (_("\ + -q, --quiet, --silent never print headers giving file names\n\ + -v, --verbose always print headers giving file names\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +NUM may have a multiplier suffix:\n\ +b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\ +GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y, R, Q.\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +diagnose_copy_fd_failure (enum Copy_fd_status err, char const *filename) +{ + switch (err) + { + case COPY_FD_READ_ERROR: + error (0, errno, _("error reading %s"), quoteaf (filename)); + break; + case COPY_FD_UNEXPECTED_EOF: + error (0, errno, _("%s: file has shrunk too much"), quotef (filename)); + break; + default: + affirm (false); + } +} + +static void +write_header (char const *filename) +{ + static bool first_file = true; + + printf ("%s==> %s <==\n", (first_file ? "" : "\n"), filename); + first_file = false; +} + +/* Write N_BYTES from BUFFER to stdout. + Exit immediately on error with a single diagnostic. */ + +static void +xwrite_stdout (char const *buffer, size_t n_bytes) +{ + if (n_bytes > 0 && fwrite (buffer, 1, n_bytes, stdout) < n_bytes) + { + clearerr (stdout); /* To avoid redundant close_stdout diagnostic. */ + fpurge (stdout); + error (EXIT_FAILURE, errno, _("error writing %s"), + quoteaf ("standard output")); + } +} + +/* Copy no more than N_BYTES from file descriptor SRC_FD to stdout. + Return an appropriate indication of success or read failure. */ + +static enum Copy_fd_status +copy_fd (int src_fd, uintmax_t n_bytes) +{ + char buf[BUFSIZ]; + const size_t buf_size = sizeof (buf); + + /* Copy the file contents. */ + while (0 < n_bytes) + { + size_t n_to_read = MIN (buf_size, n_bytes); + size_t n_read = safe_read (src_fd, buf, n_to_read); + if (n_read == SAFE_READ_ERROR) + return COPY_FD_READ_ERROR; + + n_bytes -= n_read; + + if (n_read == 0 && n_bytes != 0) + return COPY_FD_UNEXPECTED_EOF; + + xwrite_stdout (buf, n_read); + } + + return COPY_FD_OK; +} + +/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD + corresponds to the file FILENAME. WHENCE must be SEEK_SET or + SEEK_CUR. Return the resulting offset. Give a diagnostic and + return -1 if lseek fails. */ + +static off_t +elseek (int fd, off_t offset, int whence, char const *filename) +{ + off_t new_offset = lseek (fd, offset, whence); + char buf[INT_BUFSIZE_BOUND (offset)]; + + if (new_offset < 0) + error (0, errno, + _(whence == SEEK_SET + ? N_("%s: cannot seek to offset %s") + : N_("%s: cannot seek to relative offset %s")), + quotef (filename), + offtostr (offset, buf)); + + return new_offset; +} + +/* For an input file with name FILENAME and descriptor FD, + output all but the last N_ELIDE_0 bytes. + If CURRENT_POS is nonnegative, assume that the input file is + positioned at CURRENT_POS and that it should be repositioned to + just before the elided bytes before returning. + Return true upon success. + Give a diagnostic and return false upon error. */ +static bool +elide_tail_bytes_pipe (char const *filename, int fd, uintmax_t n_elide_0, + off_t current_pos) +{ + size_t n_elide = n_elide_0; + uintmax_t desired_pos = current_pos; + bool ok = true; + +#ifndef HEAD_TAIL_PIPE_READ_BUFSIZE +# define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZ +#endif +#define READ_BUFSIZE HEAD_TAIL_PIPE_READ_BUFSIZE + + /* If we're eliding no more than this many bytes, then it's ok to allocate + more memory in order to use a more time-efficient algorithm. + FIXME: use a fraction of available memory instead, as in sort. + FIXME: is this even worthwhile? */ +#ifndef HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD +# define HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD 1024 * 1024 +#endif + +#if HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD < 2 * READ_BUFSIZE + "HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD must be at least 2 * READ_BUFSIZE" +#endif + + if (SIZE_MAX < n_elide_0 + READ_BUFSIZE) + { + char umax_buf[INT_BUFSIZE_BOUND (n_elide_0)]; + error (EXIT_FAILURE, 0, _("%s: number of bytes is too large"), + umaxtostr (n_elide_0, umax_buf)); + } + + /* Two cases to consider... + 1) n_elide is small enough that we can afford to double-buffer: + allocate 2 * (READ_BUFSIZE + n_elide) bytes + 2) n_elide is too big for that, so we allocate only + (READ_BUFSIZE + n_elide) bytes + + FIXME: profile, to see if double-buffering is worthwhile + + CAUTION: do not fail (out of memory) when asked to elide + a ridiculous amount, but when given only a small input. */ + + if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD) + { + bool first = true; + bool eof = false; + size_t n_to_read = READ_BUFSIZE + n_elide; + bool i; + char *b[2]; + b[0] = xnmalloc (2, n_to_read); + b[1] = b[0] + n_to_read; + + for (i = false; ! eof ; i = !i) + { + size_t n_read = full_read (fd, b[i], n_to_read); + size_t delta = 0; + if (n_read < n_to_read) + { + if (errno != 0) + { + error (0, errno, _("error reading %s"), quoteaf (filename)); + ok = false; + break; + } + + /* reached EOF */ + if (n_read <= n_elide) + { + if (first) + { + /* The input is no larger than the number of bytes + to elide. So there's nothing to output, and + we're done. */ + } + else + { + delta = n_elide - n_read; + } + } + eof = true; + } + + /* Output any (but maybe just part of the) elided data from + the previous round. */ + if (! first) + { + desired_pos += n_elide - delta; + xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta); + } + first = false; + + if (n_elide < n_read) + { + desired_pos += n_read - n_elide; + xwrite_stdout (b[i], n_read - n_elide); + } + } + + free (b[0]); + } + else + { + /* Read blocks of size READ_BUFSIZE, until we've read at least n_elide + bytes. Then, for each new buffer we read, also write an old one. */ + + bool eof = false; + size_t n_read; + bool buffered_enough; + size_t i, i_next; + char **b = nullptr; + /* Round n_elide up to a multiple of READ_BUFSIZE. */ + size_t rem = READ_BUFSIZE - (n_elide % READ_BUFSIZE); + size_t n_elide_round = n_elide + rem; + size_t n_bufs = n_elide_round / READ_BUFSIZE + 1; + size_t n_alloc = 0; + size_t n_array_alloc = 0; + + buffered_enough = false; + for (i = 0, i_next = 1; !eof; i = i_next, i_next = (i_next + 1) % n_bufs) + { + if (n_array_alloc == i) + { + /* reallocate between 16 and n_bufs entries. */ + if (n_array_alloc == 0) + n_array_alloc = MIN (n_bufs, 16); + else if (n_array_alloc <= n_bufs / 2) + n_array_alloc *= 2; + else + n_array_alloc = n_bufs; + b = xnrealloc (b, n_array_alloc, sizeof *b); + } + + if (! buffered_enough) + { + b[i] = xmalloc (READ_BUFSIZE); + n_alloc = i + 1; + } + n_read = full_read (fd, b[i], READ_BUFSIZE); + if (n_read < READ_BUFSIZE) + { + if (errno != 0) + { + error (0, errno, _("error reading %s"), quoteaf (filename)); + ok = false; + goto free_mem; + } + eof = true; + } + + if (i + 1 == n_bufs) + buffered_enough = true; + + if (buffered_enough) + { + desired_pos += n_read; + xwrite_stdout (b[i_next], n_read); + } + } + + /* Output any remainder: rem bytes from b[i] + n_read. */ + if (rem) + { + if (buffered_enough) + { + size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read; + desired_pos += rem; + if (rem < n_bytes_left_in_b_i) + { + xwrite_stdout (b[i] + n_read, rem); + } + else + { + xwrite_stdout (b[i] + n_read, n_bytes_left_in_b_i); + xwrite_stdout (b[i_next], rem - n_bytes_left_in_b_i); + } + } + else if (i + 1 == n_bufs) + { + /* This happens when n_elide < file_size < n_elide_round. + + |READ_BUF.| + | | rem | + |---------!---------!---------!---------| + |---- n_elide ---------| + | | x | + | |y | + |---- file size -----------| + | |n_read| + |---- n_elide_round ----------| + */ + size_t y = READ_BUFSIZE - rem; + size_t x = n_read - y; + desired_pos += x; + xwrite_stdout (b[i_next], x); + } + } + + free_mem: + for (i = 0; i < n_alloc; i++) + free (b[i]); + free (b); + } + + if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0) + ok = false; + return ok; +} + +/* For the file FILENAME with descriptor FD, output all but the last N_ELIDE + bytes. If SIZE is nonnegative, this is a regular file positioned + at CURRENT_POS with SIZE bytes. Return true on success. + Give a diagnostic and return false upon error. */ + +/* NOTE: if the input file shrinks by more than N_ELIDE bytes between + the length determination and the actual reading, then head fails. */ + +static bool +elide_tail_bytes_file (char const *filename, int fd, uintmax_t n_elide, + struct stat const *st, off_t current_pos) +{ + off_t size = st->st_size; + if (presume_input_pipe || current_pos < 0 || size <= ST_BLKSIZE (*st)) + return elide_tail_bytes_pipe (filename, fd, n_elide, current_pos); + else + { + /* Be careful here. The current position may actually be + beyond the end of the file. */ + off_t diff = size - current_pos; + off_t bytes_remaining = diff < 0 ? 0 : diff; + + if (bytes_remaining <= n_elide) + return true; + + enum Copy_fd_status err = copy_fd (fd, bytes_remaining - n_elide); + if (err == COPY_FD_OK) + return true; + + diagnose_copy_fd_failure (err, filename); + return false; + } +} + +/* For an input file with name FILENAME and descriptor FD, + output all but the last N_ELIDE_0 bytes. + If CURRENT_POS is nonnegative, the input file is positioned there + and should be repositioned to just before the elided bytes. + Buffer the specified number of lines as a linked list of LBUFFERs, + adding them as needed. Return true if successful. */ + +static bool +elide_tail_lines_pipe (char const *filename, int fd, uintmax_t n_elide, + off_t current_pos) +{ + struct linebuffer + { + char buffer[BUFSIZ + 1]; + size_t nbytes; + size_t nlines; + struct linebuffer *next; + }; + uintmax_t desired_pos = current_pos; + typedef struct linebuffer LBUFFER; + LBUFFER *first, *last, *tmp; + size_t total_lines = 0; /* Total number of newlines in all buffers. */ + bool ok = true; + size_t n_read; /* Size in bytes of most recent read */ + + first = last = xmalloc (sizeof (LBUFFER)); + first->nbytes = first->nlines = 0; + first->next = nullptr; + tmp = xmalloc (sizeof (LBUFFER)); + + /* Always read into a fresh buffer. + Read, (producing no output) until we've accumulated at least + n_elide newlines, or until EOF, whichever comes first. */ + while (true) + { + n_read = safe_read (fd, tmp->buffer, BUFSIZ); + if (n_read == 0 || n_read == SAFE_READ_ERROR) + break; + + if (! n_elide) + { + desired_pos += n_read; + xwrite_stdout (tmp->buffer, n_read); + continue; + } + + tmp->nbytes = n_read; + tmp->nlines = 0; + tmp->next = nullptr; + + /* Count the number of newlines just read. */ + { + char *buffer_end = tmp->buffer + n_read; + *buffer_end = line_end; + char const *p = tmp->buffer; + while ((p = rawmemchr (p, line_end)) < buffer_end) + { + ++p; + ++tmp->nlines; + } + } + total_lines += tmp->nlines; + + /* If there is enough room in the last buffer read, just append the new + one to it. This is because when reading from a pipe, 'n_read' can + often be very small. */ + if (tmp->nbytes + last->nbytes < BUFSIZ) + { + memcpy (&last->buffer[last->nbytes], tmp->buffer, tmp->nbytes); + last->nbytes += tmp->nbytes; + last->nlines += tmp->nlines; + } + else + { + /* If there's not enough room, link the new buffer onto the end of + the list, then either free up the oldest buffer for the next + read if that would leave enough lines, or else malloc a new one. + Some compaction mechanism is possible but probably not + worthwhile. */ + last = last->next = tmp; + if (n_elide < total_lines - first->nlines) + { + desired_pos += first->nbytes; + xwrite_stdout (first->buffer, first->nbytes); + tmp = first; + total_lines -= first->nlines; + first = first->next; + } + else + tmp = xmalloc (sizeof (LBUFFER)); + } + } + + free (tmp); + + if (n_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (filename)); + ok = false; + goto free_lbuffers; + } + + /* If we read any bytes at all, count the incomplete line + on files that don't end with a newline. */ + if (last->nbytes && last->buffer[last->nbytes - 1] != line_end) + { + ++last->nlines; + ++total_lines; + } + + for (tmp = first; n_elide < total_lines - tmp->nlines; tmp = tmp->next) + { + desired_pos += tmp->nbytes; + xwrite_stdout (tmp->buffer, tmp->nbytes); + total_lines -= tmp->nlines; + } + + /* Print the first 'total_lines - n_elide' lines of tmp->buffer. */ + if (n_elide < total_lines) + { + size_t n = total_lines - n_elide; + char const *buffer_end = tmp->buffer + tmp->nbytes; + char const *p = tmp->buffer; + while (n && (p = memchr (p, line_end, buffer_end - p))) + { + ++p; + ++tmp->nlines; + --n; + } + desired_pos += p - tmp->buffer; + xwrite_stdout (tmp->buffer, p - tmp->buffer); + } + +free_lbuffers: + while (first) + { + tmp = first->next; + free (first); + first = tmp; + } + + if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0) + ok = false; + return ok; +} + +/* Output all but the last N_LINES lines of the input stream defined by + FD, START_POS, and SIZE. + START_POS is the starting position of the read pointer for the file + associated with FD (may be nonzero). + SIZE is the file size in bytes. + Return true upon success. + Give a diagnostic and return false upon error. + + NOTE: this code is very similar to that of tail.c's file_lines function. + Unfortunately, factoring out some common core looks like it'd result + in a less efficient implementation or a messy interface. */ +static bool +elide_tail_lines_seekable (char const *pretty_filename, int fd, + uintmax_t n_lines, + off_t start_pos, off_t size) +{ + char buffer[BUFSIZ]; + size_t bytes_read; + off_t pos = size; + + /* Set 'bytes_read' to the size of the last, probably partial, buffer; + 0 < 'bytes_read' <= 'BUFSIZ'. */ + bytes_read = (pos - start_pos) % BUFSIZ; + if (bytes_read == 0) + bytes_read = BUFSIZ; + /* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all + reads will be on block boundaries, which might increase efficiency. */ + pos -= bytes_read; + if (elseek (fd, pos, SEEK_SET, pretty_filename) < 0) + return false; + bytes_read = safe_read (fd, buffer, bytes_read); + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return false; + } + + /* n_lines == 0 case needs special treatment. */ + const bool all_lines = !n_lines; + + /* Count the incomplete line on files that don't end with a newline. */ + if (n_lines && bytes_read && buffer[bytes_read - 1] != line_end) + --n_lines; + + while (true) + { + /* Scan backward, counting the newlines in this bufferfull. */ + + size_t n = bytes_read; + while (n) + { + if (all_lines) + n -= 1; + else + { + char const *nl; + nl = memrchr (buffer, line_end, n); + if (nl == nullptr) + break; + n = nl - buffer; + } + if (n_lines-- == 0) + { + /* Found it. */ + /* If necessary, restore the file pointer and copy + input to output up to position, POS. */ + if (start_pos < pos) + { + enum Copy_fd_status err; + if (elseek (fd, start_pos, SEEK_SET, pretty_filename) < 0) + return false; + + err = copy_fd (fd, pos - start_pos); + if (err != COPY_FD_OK) + { + diagnose_copy_fd_failure (err, pretty_filename); + return false; + } + } + + /* Output the initial portion of the buffer + in which we found the desired newline byte. */ + xwrite_stdout (buffer, n + 1); + + /* Set file pointer to the byte after what we've output. */ + return 0 <= elseek (fd, pos + n + 1, SEEK_SET, pretty_filename); + } + } + + /* Not enough newlines in that bufferfull. */ + if (pos == start_pos) + { + /* Not enough lines in the file. */ + return true; + } + pos -= BUFSIZ; + if (elseek (fd, pos, SEEK_SET, pretty_filename) < 0) + return false; + + bytes_read = safe_read (fd, buffer, BUFSIZ); + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return false; + } + + /* FIXME: is this dead code? + Consider the test, pos == start_pos, above. */ + if (bytes_read == 0) + return true; + } +} + +/* For the file FILENAME with descriptor FD, output all but the last N_ELIDE + lines. If SIZE is nonnegative, this is a regular file positioned + at START_POS with SIZE bytes. Return true on success. + Give a diagnostic and return nonzero upon error. */ + +static bool +elide_tail_lines_file (char const *filename, int fd, uintmax_t n_elide, + struct stat const *st, off_t current_pos) +{ + off_t size = st->st_size; + if (presume_input_pipe || current_pos < 0 || size <= ST_BLKSIZE (*st)) + return elide_tail_lines_pipe (filename, fd, n_elide, current_pos); + else + { + /* Find the offset, OFF, of the Nth newline from the end, + but not counting the last byte of the file. + If found, write from current position to OFF, inclusive. + Otherwise, just return true. */ + + return (size <= current_pos + || elide_tail_lines_seekable (filename, fd, n_elide, + current_pos, size)); + } +} + +static bool +head_bytes (char const *filename, int fd, uintmax_t bytes_to_write) +{ + char buffer[BUFSIZ]; + size_t bytes_to_read = BUFSIZ; + + while (bytes_to_write) + { + size_t bytes_read; + if (bytes_to_write < bytes_to_read) + bytes_to_read = bytes_to_write; + bytes_read = safe_read (fd, buffer, bytes_to_read); + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (filename)); + return false; + } + if (bytes_read == 0) + break; + xwrite_stdout (buffer, bytes_read); + bytes_to_write -= bytes_read; + } + return true; +} + +static bool +head_lines (char const *filename, int fd, uintmax_t lines_to_write) +{ + char buffer[BUFSIZ]; + + while (lines_to_write) + { + size_t bytes_read = safe_read (fd, buffer, BUFSIZ); + size_t bytes_to_write = 0; + + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (filename)); + return false; + } + if (bytes_read == 0) + break; + while (bytes_to_write < bytes_read) + if (buffer[bytes_to_write++] == line_end && --lines_to_write == 0) + { + off_t n_bytes_past_EOL = bytes_read - bytes_to_write; + /* If we have read more data than that on the specified number + of lines, try to seek back to the position we would have + gotten to had we been reading one byte at a time. */ + if (lseek (fd, -n_bytes_past_EOL, SEEK_CUR) < 0) + { + struct stat st; + if (fstat (fd, &st) != 0 || S_ISREG (st.st_mode)) + elseek (fd, -n_bytes_past_EOL, SEEK_CUR, filename); + } + break; + } + xwrite_stdout (buffer, bytes_to_write); + } + return true; +} + +static bool +head (char const *filename, int fd, uintmax_t n_units, bool count_lines, + bool elide_from_end) +{ + if (print_headers) + write_header (filename); + + if (elide_from_end) + { + off_t current_pos = -1; + struct stat st; + if (fstat (fd, &st) != 0) + { + error (0, errno, _("cannot fstat %s"), + quoteaf (filename)); + return false; + } + if (! presume_input_pipe && usable_st_size (&st)) + { + current_pos = elseek (fd, 0, SEEK_CUR, filename); + if (current_pos < 0) + return false; + } + if (count_lines) + return elide_tail_lines_file (filename, fd, n_units, &st, current_pos); + else + return elide_tail_bytes_file (filename, fd, n_units, &st, current_pos); + } + if (count_lines) + return head_lines (filename, fd, n_units); + else + return head_bytes (filename, fd, n_units); +} + +static bool +head_file (char const *filename, uintmax_t n_units, bool count_lines, + bool elide_from_end) +{ + int fd; + bool ok; + bool is_stdin = STREQ (filename, "-"); + + if (is_stdin) + { + have_read_stdin = true; + fd = STDIN_FILENO; + filename = _("standard input"); + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + { + fd = open (filename, O_RDONLY | O_BINARY); + if (fd < 0) + { + error (0, errno, _("cannot open %s for reading"), quoteaf (filename)); + return false; + } + } + + ok = head (filename, fd, n_units, count_lines, elide_from_end); + if (!is_stdin && close (fd) != 0) + { + error (0, errno, _("failed to close %s"), quoteaf (filename)); + return false; + } + return ok; +} + +/* Convert a string of decimal digits, N_STRING, with an optional suffix + to an integral value. Upon successful conversion, + return that value. If it cannot be converted, give a diagnostic and exit. + COUNT_LINES indicates whether N_STRING is a number of bytes or a number + of lines. It is used solely to give a more specific diagnostic. */ + +static uintmax_t +string_to_integer (bool count_lines, char const *n_string) +{ + return xdectoumax (n_string, 0, UINTMAX_MAX, "bkKmMGTPEZYRQ0", + count_lines ? _("invalid number of lines") + : _("invalid number of bytes"), 0); +} + +int +main (int argc, char **argv) +{ + enum header_mode header_mode = multiple_files; + bool ok = true; + int c; + size_t i; + + /* Number of items to print. */ + uintmax_t n_units = DEFAULT_NUMBER; + + /* If true, interpret the numeric argument as the number of lines. + Otherwise, interpret it as the number of bytes. */ + bool count_lines = true; + + /* Elide the specified number of lines or bytes, counting from + the end of the file. */ + bool elide_from_end = false; + + /* Initializer for file_list if no file-arguments + were specified on the command line. */ + static char const *const default_file_list[] = {"-", nullptr}; + char const *const *file_list; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + have_read_stdin = false; + + print_headers = false; + + line_end = '\n'; + + if (1 < argc && argv[1][0] == '-' && ISDIGIT (argv[1][1])) + { + char *a = argv[1]; + char *n_string = ++a; + char *end_n_string; + char multiplier_char = 0; + + /* Old option syntax; a dash, one or more digits, and one or + more option letters. Move past the number. */ + do ++a; + while (ISDIGIT (*a)); + + /* Pointer to the byte after the last digit. */ + end_n_string = a; + + /* Parse any appended option letters. */ + for (; *a; a++) + { + switch (*a) + { + case 'c': + count_lines = false; + multiplier_char = 0; + break; + + case 'b': + case 'k': + case 'm': + count_lines = false; + multiplier_char = *a; + break; + + case 'l': + count_lines = true; + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + case 'z': + line_end = '\0'; + break; + + default: + error (0, 0, _("invalid trailing option -- %c"), *a); + usage (EXIT_FAILURE); + } + } + + /* Append the multiplier character (if any) onto the end of + the digit string. Then add NUL byte if necessary. */ + *end_n_string = multiplier_char; + if (multiplier_char) + *(++end_n_string) = 0; + + n_units = string_to_integer (count_lines, n_string); + + /* Make the options we just parsed invisible to getopt. */ + argv[1] = argv[0]; + argv++; + argc--; + } + + while ((c = getopt_long (argc, argv, "c:n:qvz0123456789", + long_options, nullptr)) + != -1) + { + switch (c) + { + case PRESUME_INPUT_PIPE_OPTION: + presume_input_pipe = true; + break; + + case 'c': + count_lines = false; + elide_from_end = (*optarg == '-'); + if (elide_from_end) + ++optarg; + n_units = string_to_integer (count_lines, optarg); + break; + + case 'n': + count_lines = true; + elide_from_end = (*optarg == '-'); + if (elide_from_end) + ++optarg; + n_units = string_to_integer (count_lines, optarg); + break; + + case 'q': + header_mode = never; + break; + + case 'v': + header_mode = always; + break; + + case 'z': + line_end = '\0'; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + if (ISDIGIT (c)) + error (0, 0, _("invalid trailing option -- %c"), c); + usage (EXIT_FAILURE); + } + } + + if (header_mode == always + || (header_mode == multiple_files && optind < argc - 1)) + print_headers = true; + + if ( ! count_lines && elide_from_end && OFF_T_MAX < n_units) + { + char umax_buf[INT_BUFSIZE_BOUND (n_units)]; + error (EXIT_FAILURE, EOVERFLOW, "%s: %s", _("invalid number of bytes"), + quote (umaxtostr (n_units, umax_buf))); + } + + file_list = (optind < argc + ? (char const *const *) &argv[optind] + : default_file_list); + + xset_binary_mode (STDOUT_FILENO, O_BINARY); + + for (i = 0; file_list[i]; ++i) + ok &= head_file (file_list[i], n_units, count_lines, elide_from_end); + + if (have_read_stdin && close (STDIN_FILENO) < 0) + error (EXIT_FAILURE, errno, "-"); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/hostid.c b/src/hostid.c new file mode 100644 index 0000000..79f83b3 --- /dev/null +++ b/src/hostid.c @@ -0,0 +1,85 @@ +/* print the hexadecimal identifier for the current host + + Copyright (C) 1997-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering. */ + +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "hostid" + +#define AUTHORS proper_name ("Jim Meyering") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]\n\ +Print the numeric identifier (in hexadecimal) for the current host.\n\ +\n\ +"), program_name); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + unsigned int id; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + id = gethostid (); + + /* POSIX says gethostid returns a "32-bit identifier" but is silent + whether it's sign-extended. Turn off any sign-extension. This + is a no-op unless unsigned int is wider than 32 bits. */ + id &= 0xffffffff; + + printf ("%08x\n", id); + + return EXIT_SUCCESS; +} diff --git a/src/hostname.c b/src/hostname.c new file mode 100644 index 0000000..115cac2 --- /dev/null +++ b/src/hostname.c @@ -0,0 +1,112 @@ +/* hostname - set or print the name of current host system + Copyright (C) 1994-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering. */ + +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" +#include "xgethostname.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "hostname" + +#define AUTHORS proper_name ("Jim Meyering") + +#ifndef HAVE_SETHOSTNAME +# if defined HAVE_SYSINFO && defined HAVE_SYS_SYSTEMINFO_H +# include +# endif + +static int +sethostname (char const *name, size_t namelen) +{ +# if defined HAVE_SYSINFO && defined HAVE_SYS_SYSTEMINFO_H + /* Using sysinfo() is the SVR4 mechanism to set a hostname. */ + return (sysinfo (SI_SET_HOSTNAME, name, namelen) < 0 ? -1 : 0); +# else + errno = ENOTSUP; + return -1; +# endif +} +#endif + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [NAME]\n\ + or: %s OPTION\n\ +Print or set the hostname of the current system.\n\ +\n\ +"), + program_name, program_name); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + char *hostname; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (optind + 1 < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + if (optind + 1 == argc) + { + /* Set hostname to operand. */ + char const *name = argv[optind]; + if (sethostname (name, strlen (name)) != 0) + error (EXIT_FAILURE, errno, _("cannot set name to %s"), + quote (name)); + } + else + { + hostname = xgethostname (); + if (hostname == nullptr) + error (EXIT_FAILURE, errno, _("cannot determine hostname")); + puts (hostname); + } + + main_exit (EXIT_SUCCESS); +} diff --git a/src/id.c b/src/id.c new file mode 100644 index 0000000..cf153ae --- /dev/null +++ b/src/id.c @@ -0,0 +1,460 @@ +/* id -- print real and effective UIDs and GIDs + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Arnold Robbins. + Major rewrite by David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "mgetgroups.h" +#include "quote.h" +#include "group-list.h" +#include "smack.h" +#include "userspec.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "id" + +#define AUTHORS \ + proper_name ("Arnold Robbins"), \ + proper_name ("David MacKenzie") + +/* If nonzero, output only the SELinux context. */ +static bool just_context = 0; +/* If true, delimit entries with NUL characters, not whitespace */ +static bool opt_zero = false; +/* If true, output the list of all group IDs. -G */ +static bool just_group_list = false; +/* If true, output only the group ID(s). -g */ +static bool just_group = false; +/* If true, output real UID/GID instead of default effective UID/GID. -r */ +static bool use_real = false; +/* If true, output only the user ID(s). -u */ +static bool just_user = false; +/* True unless errors have been encountered. */ +static bool ok = true; +/* If true, we are using multiple users. Terminate -G with double NUL. */ +static bool multiple_users = false; +/* If true, output user/group name instead of ID number. -n */ +static bool use_name = false; + +/* The real and effective IDs of the user to print. */ +static uid_t ruid, euid; +static gid_t rgid, egid; + +/* The SELinux context. Start with a known invalid value so print_full_info + knows when 'context' has not been set to a meaningful value. */ +static char *context = nullptr; + +static void print_user (uid_t uid); +static void print_full_info (char const *username); +static void print_stuff (char const *pw_name); + +static struct option const longopts[] = +{ + {"context", no_argument, nullptr, 'Z'}, + {"group", no_argument, nullptr, 'g'}, + {"groups", no_argument, nullptr, 'G'}, + {"name", no_argument, nullptr, 'n'}, + {"real", no_argument, nullptr, 'r'}, + {"user", no_argument, nullptr, 'u'}, + {"zero", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [USER]...\n"), program_name); + fputs (_("\ +Print user and group information for each specified USER,\n\ +or (when USER omitted) for the current process.\n\ +\n"), + stdout); + fputs (_("\ + -a ignore, for compatibility with other versions\n\ + -Z, --context print only the security context of the process\n\ + -g, --group print only the effective group ID\n\ + -G, --groups print all group IDs\n\ + -n, --name print a name instead of a number, for -ugG\n\ + -r, --real print the real ID instead of the effective ID, with -ugG\n\ + -u, --user print only the effective user ID\n\ + -z, --zero delimit entries with NUL characters, not whitespace;\n\ + not permitted in default format\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Without any OPTION, print some useful set of identified information.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + int selinux_enabled = (is_selinux_enabled () > 0); + bool smack_enabled = is_smack_enabled (); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "agnruzGZ", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'a': + /* Ignore -a, for compatibility with SVR4. */ + break; + + case 'Z': + /* politely decline if we're not on a SELinux/SMACK-enabled kernel. */ +#ifdef HAVE_SMACK + if (!selinux_enabled && !smack_enabled) + error (EXIT_FAILURE, 0, + _("--context (-Z) works only on " + "an SELinux/SMACK-enabled kernel")); +#else + if (!selinux_enabled) + error (EXIT_FAILURE, 0, + _("--context (-Z) works only on an SELinux-enabled kernel")); +#endif + just_context = true; + break; + + case 'g': + just_group = true; + break; + case 'n': + use_name = true; + break; + case 'r': + use_real = true; + break; + case 'u': + just_user = true; + break; + case 'z': + opt_zero = true; + break; + case 'G': + just_group_list = true; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + size_t n_ids = argc - optind; + + if (n_ids && just_context) + error (EXIT_FAILURE, 0, + _("cannot print security context when user specified")); + + if (just_user + just_group + just_group_list + just_context > 1) + error (EXIT_FAILURE, 0, _("cannot print \"only\" of more than one choice")); + + bool default_format = ! (just_user + || just_group + || just_group_list + || just_context); + + if (default_format && (use_real || use_name)) + error (EXIT_FAILURE, 0, + _("cannot print only names or real IDs in default format")); + + if (default_format && opt_zero) + error (EXIT_FAILURE, 0, + _("option --zero not permitted in default format")); + + /* If we are on a SELinux/SMACK-enabled kernel, no user is specified, and + either --context is specified or none of (-u,-g,-G) is specified, + and we're not in POSIXLY_CORRECT mode, get our context. Otherwise, + leave the context variable alone - it has been initialized to an + invalid value that will be not displayed in print_full_info(). */ + if (n_ids == 0 + && (just_context + || (default_format && ! getenv ("POSIXLY_CORRECT")))) + { + /* Report failure only if --context (-Z) was explicitly requested. */ + if ((selinux_enabled && getcon (&context) && just_context) + || (smack_enabled + && smack_new_label_from_self (&context) < 0 + && just_context)) + error (EXIT_FAILURE, 0, _("can't get process context")); + } + + if (n_ids >= 1) + { + multiple_users = n_ids > 1 ? true : false; + /* Changing the value of n_ids to the last index in the array where we + have the last possible user id. This helps us because we don't have + to declare a different variable to keep a track of where the + last username lies in argv[]. */ + n_ids += optind; + /* For each username/userid to get its pw_name field */ + for (; optind < n_ids; optind++) + { + char *pw_name = nullptr; + struct passwd *pwd = nullptr; + char const *spec = argv[optind]; + /* Disallow an empty spec here as parse_user_spec() doesn't + give an error for that as it seems it's a valid way to + specify a noop or "reset special bits" depending on the system. */ + if (*spec) + { + if (! parse_user_spec (spec, &euid, nullptr, &pw_name, nullptr)) + pwd = pw_name ? getpwnam (pw_name) : getpwuid (euid); + } + if (pwd == nullptr) + { + error (0, errno, _("%s: no such user"), quote (spec)); + ok &= false; + } + else + { + if (!pw_name) + pw_name = xstrdup (pwd->pw_name); + ruid = euid = pwd->pw_uid; + rgid = egid = pwd->pw_gid; + print_stuff (pw_name); + } + free (pw_name); + } + } + else + { + /* POSIX says identification functions (getuid, getgid, and + others) cannot fail, but they can fail under GNU/Hurd and a + few other systems. Test for failure by checking errno. */ + uid_t NO_UID = -1; + gid_t NO_GID = -1; + + if (just_user ? !use_real + : !just_group && !just_group_list && !just_context) + { + errno = 0; + euid = geteuid (); + if (euid == NO_UID && errno) + error (EXIT_FAILURE, errno, _("cannot get effective UID")); + } + + if (just_user ? use_real + : !just_group && (just_group_list || !just_context)) + { + errno = 0; + ruid = getuid (); + if (ruid == NO_UID && errno) + error (EXIT_FAILURE, errno, _("cannot get real UID")); + } + + if (!just_user && (just_group || just_group_list || !just_context)) + { + errno = 0; + egid = getegid (); + if (egid == NO_GID && errno) + error (EXIT_FAILURE, errno, _("cannot get effective GID")); + + errno = 0; + rgid = getgid (); + if (rgid == NO_GID && errno) + error (EXIT_FAILURE, errno, _("cannot get real GID")); + } + print_stuff (nullptr); + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} + +/* Convert a gid_t to string. Do not use this function directly. + Instead, use it via the gidtostr macro. + Beware that it returns a pointer to static storage. */ +static char * +gidtostr_ptr (gid_t const *gid) +{ + static char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + return umaxtostr (*gid, buf); +} +#define gidtostr(g) gidtostr_ptr (&(g)) + +/* Convert a uid_t to string. Do not use this function directly. + Instead, use it via the uidtostr macro. + Beware that it returns a pointer to static storage. */ +static char * +uidtostr_ptr (uid_t const *uid) +{ + static char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + return umaxtostr (*uid, buf); +} +#define uidtostr(u) uidtostr_ptr (&(u)) + +/* Print the name or value of user ID UID. */ + +static void +print_user (uid_t uid) +{ + struct passwd *pwd = nullptr; + + if (use_name) + { + pwd = getpwuid (uid); + if (pwd == nullptr) + { + error (0, 0, _("cannot find name for user ID %s"), + uidtostr (uid)); + ok &= false; + } + } + + char *s = pwd ? pwd->pw_name : uidtostr (uid); + fputs (s, stdout); +} + +/* Print all of the info about the user's user and group IDs. */ + +static void +print_full_info (char const *username) +{ + struct passwd *pwd; + struct group *grp; + + printf (_("uid=%s"), uidtostr (ruid)); + pwd = getpwuid (ruid); + if (pwd) + printf ("(%s)", pwd->pw_name); + + printf (_(" gid=%s"), gidtostr (rgid)); + grp = getgrgid (rgid); + if (grp) + printf ("(%s)", grp->gr_name); + + if (euid != ruid) + { + printf (_(" euid=%s"), uidtostr (euid)); + pwd = getpwuid (euid); + if (pwd) + printf ("(%s)", pwd->pw_name); + } + + if (egid != rgid) + { + printf (_(" egid=%s"), gidtostr (egid)); + grp = getgrgid (egid); + if (grp) + printf ("(%s)", grp->gr_name); + } + + { + gid_t *groups; + + gid_t primary_group; + if (username) + primary_group = pwd ? pwd->pw_gid : -1; + else + primary_group = egid; + + int n_groups = xgetgroups (username, primary_group, &groups); + if (n_groups < 0) + { + if (username) + error (0, errno, _("failed to get groups for user %s"), + quote (username)); + else + error (0, errno, _("failed to get groups for the current process")); + ok &= false; + return; + } + + if (n_groups > 0) + fputs (_(" groups="), stdout); + for (int i = 0; i < n_groups; i++) + { + if (i > 0) + putchar (','); + fputs (gidtostr (groups[i]), stdout); + grp = getgrgid (groups[i]); + if (grp) + printf ("(%s)", grp->gr_name); + } + free (groups); + } + + /* POSIX mandates the precise output format, and that it not include + any context=... part, so skip that if POSIXLY_CORRECT is set. */ + if (context) + printf (_(" context=%s"), context); +} + +/* Print information about the user based on the arguments passed. */ + +static void +print_stuff (char const *pw_name) +{ + if (just_user) + print_user (use_real ? ruid : euid); + + /* print_group and print_group_list return true on successful + execution but false if something goes wrong. We then AND this value with + the current value of 'ok' because we want to know if one of the previous + users faced a problem in these functions. This value of 'ok' is later used + to understand what status program should exit with. */ + else if (just_group) + ok &= print_group (use_real ? rgid : egid, use_name); + else if (just_group_list) + ok &= print_group_list (pw_name, ruid, rgid, egid, + use_name, opt_zero ? '\0' : ' '); + else if (just_context) + fputs (context, stdout); + else + print_full_info (pw_name); + + /* When printing records for more than 1 user, at the end of groups + of each user terminate the record with two consequent NUL characters + to make parsing and distinguishing between two records possible. */ + if (opt_zero && just_group_list && multiple_users) + { + putchar ('\0'); + putchar ('\0'); + } + else + { + putchar (opt_zero ? '\0' : '\n'); + } +} diff --git a/src/install.c b/src/install.c new file mode 100644 index 0000000..31a48f1 --- /dev/null +++ b/src/install.c @@ -0,0 +1,1050 @@ +/* install - copy files and set attributes + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "backupfile.h" +#include "cp-hash.h" +#include "copy.h" +#include "filenamecat.h" +#include "full-read.h" +#include "mkancesdirs.h" +#include "mkdir-p.h" +#include "modechange.h" +#include "prog-fprintf.h" +#include "quote.h" +#include "savewd.h" +#include "selinux.h" +#include "stat-time.h" +#include "targetdir.h" +#include "utimens.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "install" + +#define AUTHORS proper_name ("David MacKenzie") + +static int selinux_enabled = 0; +static bool use_default_selinux_context = true; + +#if ! HAVE_ENDGRENT +# define endgrent() ((void) 0) +#endif + +#if ! HAVE_ENDPWENT +# define endpwent() ((void) 0) +#endif + +/* The user name that will own the files, or nullptr to make the owner + the current user ID. */ +static char *owner_name; + +/* The user ID corresponding to 'owner_name'. */ +static uid_t owner_id; + +/* The group name that will own the files, or nullptr to make the group + the current group ID. */ +static char *group_name; + +/* The group ID corresponding to 'group_name'. */ +static gid_t group_id; + +#define DEFAULT_MODE (S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) + +/* The file mode bits to which non-directory files will be set. The umask has + no effect. */ +static mode_t mode = DEFAULT_MODE; + +/* Similar, but for directories. */ +static mode_t dir_mode = DEFAULT_MODE; + +/* The file mode bits that the user cares about. This should be a + superset of DIR_MODE and a subset of CHMOD_MODE_BITS. This matters + for directories, since otherwise directories may keep their S_ISUID + or S_ISGID bits. */ +static mode_t dir_mode_bits = CHMOD_MODE_BITS; + +/* Compare files before installing (-C) */ +static bool copy_only_if_needed; + +/* If true, strip executable files after copying them. */ +static bool strip_files; + +/* If true, install a directory instead of a regular file. */ +static bool dir_arg; + +/* Program used to strip binaries, "strip" is default */ +static char const *strip_program = "strip"; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEBUG_OPTION = CHAR_MAX + 1, + PRESERVE_CONTEXT_OPTION, + STRIP_PROGRAM_OPTION +}; + +static struct option const long_options[] = +{ + {"backup", optional_argument, nullptr, 'b'}, + {"compare", no_argument, nullptr, 'C'}, + {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, + {"debug", no_argument, nullptr, DEBUG_OPTION}, + {"directory", no_argument, nullptr, 'd'}, + {"group", required_argument, nullptr, 'g'}, + {"mode", required_argument, nullptr, 'm'}, + {"no-target-directory", no_argument, nullptr, 'T'}, + {"owner", required_argument, nullptr, 'o'}, + {"preserve-timestamps", no_argument, nullptr, 'p'}, + {"preserve-context", no_argument, nullptr, PRESERVE_CONTEXT_OPTION}, + {"strip", no_argument, nullptr, 's'}, + {"strip-program", required_argument, nullptr, STRIP_PROGRAM_OPTION}, + {"suffix", required_argument, nullptr, 'S'}, + {"target-directory", required_argument, nullptr, 't'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Compare content of opened files using file descriptors A_FD and B_FD. Return + true if files are equal. */ +static bool +have_same_content (int a_fd, int b_fd) +{ + enum { CMP_BLOCK_SIZE = 4096 }; + static char a_buff[CMP_BLOCK_SIZE]; + static char b_buff[CMP_BLOCK_SIZE]; + + size_t size; + while (0 < (size = full_read (a_fd, a_buff, sizeof a_buff))) { + if (size != full_read (b_fd, b_buff, sizeof b_buff)) + return false; + + if (memcmp (a_buff, b_buff, size) != 0) + return false; + } + + return size == 0; +} + +/* Return true for mode with non-permission bits. */ +static bool +extra_mode (mode_t input) +{ + mode_t mask = S_IRWXUGO | S_IFMT; + return !! (input & ~ mask); +} + +/* Return true if copy of file SRC_NAME to file DEST_NAME aka + DEST_DIRFD+DEST_RELNAME is necessary. */ +static bool +need_copy (char const *src_name, char const *dest_name, + int dest_dirfd, char const *dest_relname, + const struct cp_options *x) +{ + struct stat src_sb, dest_sb; + int src_fd, dest_fd; + bool content_match; + + if (extra_mode (mode)) + return true; + + /* compare files using stat */ + if (lstat (src_name, &src_sb) != 0) + return true; + + if (fstatat (dest_dirfd, dest_relname, &dest_sb, AT_SYMLINK_NOFOLLOW) != 0) + return true; + + if (!S_ISREG (src_sb.st_mode) || !S_ISREG (dest_sb.st_mode) + || extra_mode (src_sb.st_mode) || extra_mode (dest_sb.st_mode)) + return true; + + if (src_sb.st_size != dest_sb.st_size + || (dest_sb.st_mode & CHMOD_MODE_BITS) != mode) + return true; + + if (owner_id == (uid_t) -1) + { + errno = 0; + uid_t ruid = getuid (); + if ((ruid == (uid_t) -1 && errno) || dest_sb.st_uid != ruid) + return true; + } + else if (dest_sb.st_uid != owner_id) + return true; + + if (group_id == (uid_t) -1) + { + errno = 0; + gid_t rgid = getgid (); + if ((rgid == (uid_t) -1 && errno) || dest_sb.st_gid != rgid) + return true; + } + else if (dest_sb.st_gid != group_id) + return true; + + /* compare SELinux context if preserving */ + if (selinux_enabled && x->preserve_security_context) + { + char *file_scontext = nullptr; + char *to_scontext = nullptr; + bool scontext_match; + + if (getfilecon (src_name, &file_scontext) == -1) + return true; + + if (getfilecon (dest_name, &to_scontext) == -1) + { + freecon (file_scontext); + return true; + } + + scontext_match = STREQ (file_scontext, to_scontext); + + freecon (file_scontext); + freecon (to_scontext); + if (!scontext_match) + return true; + } + + /* compare files content */ + src_fd = open (src_name, O_RDONLY | O_BINARY); + if (src_fd < 0) + return true; + + dest_fd = openat (dest_dirfd, dest_relname, O_RDONLY | O_BINARY); + if (dest_fd < 0) + { + close (src_fd); + return true; + } + + content_match = have_same_content (src_fd, dest_fd); + + close (src_fd); + close (dest_fd); + return !content_match; +} + +static void +cp_option_init (struct cp_options *x) +{ + cp_options_default (x); + x->copy_as_regular = true; + x->reflink_mode = REFLINK_AUTO; + x->dereference = DEREF_ALWAYS; + x->unlink_dest_before_opening = true; + x->unlink_dest_after_failed_open = false; + x->hard_link = false; + x->interactive = I_UNSPECIFIED; + x->move_mode = false; + x->install_mode = true; + x->one_file_system = false; + x->preserve_ownership = false; + x->preserve_links = false; + x->preserve_mode = false; + x->preserve_timestamps = false; + x->explicit_no_preserve_mode = false; + x->reduce_diagnostics=false; + x->data_copy_required = true; + x->require_preserve = false; + x->require_preserve_xattr = false; + x->recursive = false; + x->sparse_mode = SPARSE_AUTO; + x->symbolic_link = false; + x->backup_type = no_backups; + + /* Create destination files initially writable so we can run strip on them. + Although GNU strip works fine on read-only files, some others + would fail. */ + x->set_mode = true; + x->mode = S_IRUSR | S_IWUSR; + x->stdin_tty = false; + + x->open_dangling_dest_symlink = false; + x->update = false; + x->require_preserve_context = false; /* Not used by install currently. */ + x->preserve_security_context = false; /* Whether to copy context from src. */ + x->set_security_context = nullptr; /* Whether to set sys default context. */ + x->preserve_xattr = false; + x->verbose = false; + x->dest_info = nullptr; + x->src_info = nullptr; +} + +static struct selabel_handle * +get_labeling_handle (void) +{ + static bool initialized; + static struct selabel_handle *hnd; + if (!initialized) + { + initialized = true; + hnd = selabel_open (SELABEL_CTX_FILE, nullptr, 0); + if (!hnd) + error (0, errno, _("warning: security labeling handle failed")); + } + return hnd; +} + +/* Modify file context to match the specified policy. + If an error occurs the file will remain with the default directory + context. Note this sets the context to that returned by selabel_lookup + and thus discards MLS levels and user identity of the FILE. */ +static void +setdefaultfilecon (char const *file) +{ + struct stat st; + char *scontext = nullptr; + + if (selinux_enabled != 1) + { + /* Indicate no context found. */ + return; + } + if (lstat (file, &st) != 0) + return; + + struct selabel_handle *hnd = get_labeling_handle (); + if (!hnd) + return; + if (selabel_lookup (hnd, &scontext, file, st.st_mode) != 0) + { + if (errno != ENOENT && ! ignorable_ctx_err (errno)) + error (0, errno, _("warning: %s: context lookup failed"), + quotef (file)); + return; + } + + if (lsetfilecon (file, scontext) < 0 && errno != ENOTSUP) + error (0, errno, + _("warning: %s: failed to change context to %s"), + quotef_n (0, file), quote_n (1, scontext)); + + freecon (scontext); +} + +/* Report that directory DIR was made, if OPTIONS requests this. */ +static void +announce_mkdir (char const *dir, void *options) +{ + struct cp_options const *x = options; + if (x->verbose) + prog_fprintf (stdout, _("creating directory %s"), quoteaf (dir)); +} + +/* Make ancestor directory DIR, whose last file name component is + COMPONENT, with options OPTIONS. Assume the working directory is + COMPONENT's parent. */ +static int +make_ancestor (char const *dir, char const *component, void *options) +{ + struct cp_options const *x = options; + if (x->set_security_context + && defaultcon (x->set_security_context, component, S_IFDIR) < 0 + && ! ignorable_ctx_err (errno)) + error (0, errno, _("failed to set default creation context for %s"), + quoteaf (dir)); + + int r = mkdir (component, DEFAULT_MODE); + if (r == 0) + announce_mkdir (dir, options); + return r; +} + +/* Process a command-line file name, for the -d option. */ +static int +process_dir (char *dir, struct savewd *wd, void *options) +{ + struct cp_options const *x = options; + + int ret = (make_dir_parents (dir, wd, make_ancestor, options, + dir_mode, announce_mkdir, + dir_mode_bits, owner_id, group_id, false) + ? EXIT_SUCCESS + : EXIT_FAILURE); + + /* FIXME: Due to the current structure of make_dir_parents() + we don't have the facility to call defaultcon() before the + final component of DIR is created. So for now, create the + final component with the context from previous component + and here we set the context for the final component. */ + if (ret == EXIT_SUCCESS && x->set_security_context) + { + if (! restorecon (x->set_security_context, last_component (dir), false) + && ! ignorable_ctx_err (errno)) + error (0, errno, _("failed to restore context for %s"), + quoteaf (dir)); + } + + return ret; +} + +/* Copy file FROM onto file TO aka TO_DIRFD+TO_RELNAME, creating TO if + necessary. Return true if successful. */ + +static bool +copy_file (char const *from, char const *to, + int to_dirfd, char const *to_relname, const struct cp_options *x) +{ + bool copy_into_self; + + if (copy_only_if_needed && !need_copy (from, to, to_dirfd, to_relname, x)) + return true; + + /* Allow installing from non-regular files like /dev/null. + Charles Karney reported that some Sun version of install allows that + and that sendmail's installation process relies on the behavior. + However, since !x->recursive, the call to "copy" will fail if FROM + is a directory. */ + + return copy (from, to, to_dirfd, to_relname, 0, x, ©_into_self, nullptr); +} + +/* Set the attributes of file or directory NAME aka DIRFD+RELNAME. + Return true if successful. */ + +static bool +change_attributes (char const *name, int dirfd, char const *relname) +{ + bool ok = false; + /* chown must precede chmod because on some systems, + chown clears the set[ug]id bits for non-superusers, + resulting in incorrect permissions. + On System V, users can give away files with chown and then not + be able to chmod them. So don't give files away. + + We don't normally ignore errors from chown because the idea of + the install command is that the file is supposed to end up with + precisely the attributes that the user specified (or defaulted). + If the file doesn't end up with the group they asked for, they'll + want to know. */ + + if (! (owner_id == (uid_t) -1 && group_id == (gid_t) -1) + && lchownat (dirfd, relname, owner_id, group_id) != 0) + error (0, errno, _("cannot change ownership of %s"), quoteaf (name)); + else if (chmodat (dirfd, relname, mode) != 0) + error (0, errno, _("cannot change permissions of %s"), quoteaf (name)); + else + ok = true; + + if (use_default_selinux_context) + setdefaultfilecon (name); + + return ok; +} + +/* Set the timestamps of file DEST aka DIRFD+RELNAME to match those of SRC_SB. + Return true if successful. */ + +static bool +change_timestamps (struct stat const *src_sb, char const *dest, + int dirfd, char const *relname) +{ + struct timespec timespec[2]; + timespec[0] = get_stat_atime (src_sb); + timespec[1] = get_stat_mtime (src_sb); + + if (utimensat (dirfd, relname, timespec, 0)) + { + error (0, errno, _("cannot set timestamps for %s"), quoteaf (dest)); + return false; + } + return true; +} + +/* Strip the symbol table from the file NAME. + We could dig the magic number out of the file first to + determine whether to strip it, but the header files and + magic numbers vary so much from system to system that making + it portable would be very difficult. Not worth the effort. */ + +static bool +strip (char const *name) +{ + int status; + bool ok = false; + pid_t pid = fork (); + + switch (pid) + { + case -1: + error (0, errno, _("fork system call failed")); + break; + case 0: /* Child. */ + { + char const *safe_name = name; + if (name && *name == '-') + safe_name = file_name_concat (".", name, nullptr); + execlp (strip_program, strip_program, safe_name, nullptr); + error (EXIT_FAILURE, errno, _("cannot run %s"), + quoteaf (strip_program)); + } + default: /* Parent. */ + if (waitpid (pid, &status, 0) < 0) + error (0, errno, _("waiting for strip")); + else if (! WIFEXITED (status) || WEXITSTATUS (status)) + error (0, 0, _("strip process terminated abnormally")); + else + ok = true; /* strip succeeded */ + break; + } + return ok; +} + +/* Initialize the user and group ownership of the files to install. */ + +static void +get_ids (void) +{ + struct passwd *pw; + struct group *gr; + + if (owner_name) + { + pw = getpwnam (owner_name); + if (pw == nullptr) + { + uintmax_t tmp; + if (xstrtoumax (owner_name, nullptr, 0, &tmp, "") != LONGINT_OK + || UID_T_MAX < tmp) + error (EXIT_FAILURE, 0, _("invalid user %s"), + quoteaf (owner_name)); + owner_id = tmp; + } + else + owner_id = pw->pw_uid; + endpwent (); + } + else + owner_id = (uid_t) -1; + + if (group_name) + { + gr = getgrnam (group_name); + if (gr == nullptr) + { + uintmax_t tmp; + if (xstrtoumax (group_name, nullptr, 0, &tmp, "") != LONGINT_OK + || GID_T_MAX < tmp) + error (EXIT_FAILURE, 0, _("invalid group %s"), + quoteaf (group_name)); + group_id = tmp; + } + else + group_id = gr->gr_gid; + endgrent (); + } + else + group_id = (gid_t) -1; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [-T] SOURCE DEST\n\ + or: %s [OPTION]... SOURCE... DIRECTORY\n\ + or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ + or: %s [OPTION]... -d DIRECTORY...\n\ +"), + program_name, program_name, program_name, program_name); + fputs (_("\ +\n\ +This install program copies files (often just compiled) into destination\n\ +locations you choose. If you want to download and install a ready-to-use\n\ +package on a GNU/Linux system, you should instead be using a package manager\n\ +like yum(1) or apt-get(1).\n\ +\n\ +In the first three forms, copy SOURCE to DEST or multiple SOURCE(s) to\n\ +the existing DIRECTORY, while setting permission modes and owner/group.\n\ +In the 4th form, create all components of the given DIRECTORY(ies).\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + --backup[=CONTROL] make a backup of each existing destination file\n\ + -b like --backup but does not accept an argument\n\ + -c (ignored)\n\ + -C, --compare compare content of source and destination files, and\n\ + if no change to content, ownership, and permissions,\n\ + do not modify the destination at all\n\ + -d, --directory treat all arguments as directory names; create all\n\ + components of the specified directories\n\ +"), stdout); + fputs (_("\ + -D create all leading components of DEST except the last,\n\ + or all components of --target-directory,\n\ + then copy SOURCE to DEST\n\ +"), stdout); + fputs (_("\ + --debug explain how a file is copied. Implies -v\n\ +"), stdout); + fputs (_("\ + -g, --group=GROUP set group ownership, instead of process' current group\n\ + -m, --mode=MODE set permission mode (as in chmod), instead of rwxr-xr-x\n\ + -o, --owner=OWNER set ownership (super-user only)\n\ +"), stdout); + fputs (_("\ + -p, --preserve-timestamps apply access/modification times of SOURCE files\n\ + to corresponding destination files\n\ + -s, --strip strip symbol tables\n\ + --strip-program=PROGRAM program used to strip binaries\n\ + -S, --suffix=SUFFIX override the usual backup suffix\n\ + -t, --target-directory=DIRECTORY copy all SOURCE arguments into DIRECTORY\n\ + -T, --no-target-directory treat DEST as a normal file\n\ +"), stdout); + fputs (_("\ + -v, --verbose print the name of each created file or directory\n\ +"), stdout); + fputs (_("\ + --preserve-context preserve SELinux security context\n\ + -Z set SELinux security context of destination\n\ + file and each created directory to default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the\n\ + SELinux or SMACK security context to CTX\n\ +"), stdout); + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_backup_suffix_note (); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Copy file FROM onto file TO aka TO_DIRFD+TO_RELNAME and give TO the + appropriate attributes. X gives the command options. + Return true if successful. */ + +static bool +install_file_in_file (char const *from, char const *to, + int to_dirfd, char const *to_relname, + const struct cp_options *x) +{ + struct stat from_sb; + if (x->preserve_timestamps && stat (from, &from_sb) != 0) + { + error (0, errno, _("cannot stat %s"), quoteaf (from)); + return false; + } + if (! copy_file (from, to, to_dirfd, to_relname, x)) + return false; + if (strip_files) + if (! strip (to)) + { + if (unlinkat (to_dirfd, to_relname, 0) != 0) /* Cleanup. */ + error (EXIT_FAILURE, errno, _("cannot unlink %s"), quoteaf (to)); + return false; + } + if (x->preserve_timestamps && (strip_files || ! S_ISREG (from_sb.st_mode)) + && ! change_timestamps (&from_sb, to, to_dirfd, to_relname)) + return false; + return change_attributes (to, to_dirfd, to_relname); +} + +/* Create any missing parent directories of TO, + while maintaining the current Working Directory. + Return true if successful. */ + +static bool +mkancesdirs_safe_wd (char const *from, char *to, struct cp_options *x, + bool save_always) +{ + bool save_working_directory = + save_always + || ! (IS_ABSOLUTE_FILE_NAME (from) && IS_ABSOLUTE_FILE_NAME (to)); + int status = EXIT_SUCCESS; + + struct savewd wd; + savewd_init (&wd); + if (! save_working_directory) + savewd_finish (&wd); + + if (mkancesdirs (to, &wd, make_ancestor, x) == -1) + { + error (0, errno, _("cannot create directory %s"), quoteaf (to)); + status = EXIT_FAILURE; + } + + if (save_working_directory) + { + int restore_result = savewd_restore (&wd, status); + int restore_errno = errno; + savewd_finish (&wd); + if (EXIT_SUCCESS < restore_result) + return false; + if (restore_result < 0 && status == EXIT_SUCCESS) + { + error (0, restore_errno, _("cannot create directory %s"), + quoteaf (to)); + return false; + } + } + return status == EXIT_SUCCESS; +} + +/* Copy file FROM onto file TO, creating any missing parent directories of TO. + Return true if successful. */ + +static bool +install_file_in_file_parents (char const *from, char *to, + const struct cp_options *x) +{ + return (mkancesdirs_safe_wd (from, to, (struct cp_options *)x, false) + && install_file_in_file (from, to, AT_FDCWD, to, x)); +} + +/* Copy file FROM into directory TO_DIR, keeping its same name, + and give the copy the appropriate attributes. + Return true if successful. */ + +static bool +install_file_in_dir (char const *from, char const *to_dir, + const struct cp_options *x, bool mkdir_and_install, + int *target_dirfd) +{ + char const *from_base = last_component (from); + char *to_relname; + char *to = file_name_concat (to_dir, from_base, &to_relname); + bool ret = true; + + if (!target_dirfd_valid (*target_dirfd) + && (ret = mkdir_and_install) + && (ret = mkancesdirs_safe_wd (from, to, (struct cp_options *) x, true))) + { + int fd = open (to_dir, O_PATHSEARCH | O_DIRECTORY); + if (fd < 0) + { + error (0, errno, _("cannot open %s"), quoteaf (to)); + ret = false; + } + else + *target_dirfd = fd; + } + + if (ret) + { + int to_dirfd = *target_dirfd; + if (!target_dirfd_valid (to_dirfd)) + { + to_dirfd = AT_FDCWD; + to_relname = to; + } + ret = install_file_in_file (from, to, to_dirfd, to_relname, x); + } + + free (to); + return ret; +} + +int +main (int argc, char **argv) +{ + int optc; + int exit_status = EXIT_SUCCESS; + char const *specified_mode = nullptr; + bool make_backups = false; + char const *backup_suffix = nullptr; + char *version_control_string = nullptr; + bool mkdir_and_install = false; + struct cp_options x; + char const *target_directory = nullptr; + bool no_target_directory = false; + int n_files; + char **file; + bool strip_program_specified = false; + char const *scontext = nullptr; + /* set iff kernel has extra selinux system calls */ + selinux_enabled = (0 < is_selinux_enabled ()); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdin); + + cp_option_init (&x); + + owner_name = nullptr; + group_name = nullptr; + strip_files = false; + dir_arg = false; + umask (0); + + while ((optc = getopt_long (argc, argv, "bcCsDdg:m:o:pt:TvS:Z", long_options, + nullptr)) + != -1) + { + switch (optc) + { + case 'b': + make_backups = true; + if (optarg) + version_control_string = optarg; + break; + case 'c': + break; + case 'C': + copy_only_if_needed = true; + break; + case 's': + strip_files = true; +#ifdef SIGCHLD + /* System V fork+wait does not work if SIGCHLD is ignored. */ + signal (SIGCHLD, SIG_DFL); +#endif + break; + case DEBUG_OPTION: + x.debug = x.verbose = true; + break; + case STRIP_PROGRAM_OPTION: + strip_program = xstrdup (optarg); + strip_program_specified = true; + break; + case 'd': + dir_arg = true; + break; + case 'D': + mkdir_and_install = true; + break; + case 'v': + x.verbose = true; + break; + case 'g': + group_name = optarg; + break; + case 'm': + specified_mode = optarg; + break; + case 'o': + owner_name = optarg; + break; + case 'p': + x.preserve_timestamps = true; + break; + case 'S': + make_backups = true; + backup_suffix = optarg; + break; + case 't': + if (target_directory) + error (EXIT_FAILURE, 0, + _("multiple target directories specified")); + target_directory = optarg; + break; + case 'T': + no_target_directory = true; + break; + + case PRESERVE_CONTEXT_OPTION: + if (! selinux_enabled) + { + error (0, 0, _("WARNING: ignoring --preserve-context; " + "this kernel is not SELinux-enabled")); + break; + } + x.preserve_security_context = true; + use_default_selinux_context = false; + break; + case 'Z': + if (selinux_enabled) + { + /* Disable use of the install(1) specific setdefaultfilecon(). + Note setdefaultfilecon() is different from the newer and more + generic restorecon() in that the former sets the context of + the dest files to that returned by selabel_lookup directly, + thus discarding MLS level and user identity of the file. + TODO: consider removing setdefaultfilecon() in future. */ + use_default_selinux_context = false; + + if (optarg) + scontext = optarg; + else + x.set_security_context = get_labeling_handle (); + } + else if (optarg) + { + error (0, 0, + _("warning: ignoring --context; " + "it requires an SELinux-enabled kernel")); + } + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + /* Check for invalid combinations of arguments. */ + if (dir_arg && strip_files) + error (EXIT_FAILURE, 0, + _("the strip option may not be used when installing a directory")); + if (dir_arg && target_directory) + error (EXIT_FAILURE, 0, + _("target directory not allowed when installing a directory")); + + x.backup_type = (make_backups + ? xget_version (_("backup type"), + version_control_string) + : no_backups); + set_simple_backup_suffix (backup_suffix); + + if (x.preserve_security_context && (x.set_security_context || scontext)) + error (EXIT_FAILURE, 0, + _("cannot set target context and preserve it")); + + if (scontext && setfscreatecon (scontext) < 0) + error (EXIT_FAILURE, errno, + _("failed to set default file creation context to %s"), + quote (scontext)); + + n_files = argc - optind; + file = argv + optind; + + if (n_files <= ! (dir_arg || target_directory)) + { + if (n_files <= 0) + error (0, 0, _("missing file operand")); + else + error (0, 0, _("missing destination file operand after %s"), + quoteaf (file[0])); + usage (EXIT_FAILURE); + } + + struct stat sb; + int target_dirfd = AT_FDCWD; + if (no_target_directory) + { + if (target_directory) + error (EXIT_FAILURE, 0, + _("cannot combine --target-directory (-t) " + "and --no-target-directory (-T)")); + if (2 < n_files) + { + error (0, 0, _("extra operand %s"), quoteaf (file[2])); + usage (EXIT_FAILURE); + } + } + else if (target_directory) + { + target_dirfd = target_directory_operand (target_directory, &sb); + if (! (target_dirfd_valid (target_dirfd) + || (mkdir_and_install && errno == ENOENT))) + error (EXIT_FAILURE, errno, _("failed to access %s"), + quoteaf (target_directory)); + } + else if (!dir_arg) + { + char const *lastfile = file[n_files - 1]; + int fd = target_directory_operand (lastfile, &sb); + if (target_dirfd_valid (fd)) + { + target_dirfd = fd; + target_directory = lastfile; + n_files--; + } + else if (2 < n_files) + error (EXIT_FAILURE, errno, _("target %s"), quoteaf (lastfile)); + } + + if (specified_mode) + { + struct mode_change *change = mode_compile (specified_mode); + if (!change) + error (EXIT_FAILURE, 0, _("invalid mode %s"), quote (specified_mode)); + mode = mode_adjust (0, false, 0, change, nullptr); + dir_mode = mode_adjust (0, true, 0, change, &dir_mode_bits); + free (change); + } + + if (strip_program_specified && !strip_files) + error (0, 0, _("WARNING: ignoring --strip-program option as -s option was " + "not specified")); + + if (copy_only_if_needed && x.preserve_timestamps) + { + error (0, 0, _("options --compare (-C) and --preserve-timestamps are " + "mutually exclusive")); + usage (EXIT_FAILURE); + } + + if (copy_only_if_needed && strip_files) + { + error (0, 0, _("options --compare (-C) and --strip are mutually " + "exclusive")); + usage (EXIT_FAILURE); + } + + if (copy_only_if_needed && extra_mode (mode)) + error (0, 0, _("the --compare (-C) option is ignored when you" + " specify a mode with non-permission bits")); + + get_ids (); + + if (dir_arg) + exit_status = savewd_process_files (n_files, file, process_dir, &x); + else + { + /* FIXME: it's a little gross that this initialization is + required by copy.c::copy. */ + hash_init (); + + if (!target_directory) + { + if (! (mkdir_and_install + ? install_file_in_file_parents (file[0], file[1], &x) + : install_file_in_file (file[0], file[1], AT_FDCWD, + file[1], &x))) + exit_status = EXIT_FAILURE; + } + else + { + int i; + dest_info_init (&x); + for (i = 0; i < n_files; i++) + if (! install_file_in_dir (file[i], target_directory, &x, + i == 0 && mkdir_and_install, + &target_dirfd)) + exit_status = EXIT_FAILURE; + } + } + + main_exit (exit_status); +} diff --git a/src/ioblksize.h b/src/ioblksize.h new file mode 100644 index 0000000..59c1653 --- /dev/null +++ b/src/ioblksize.h @@ -0,0 +1,107 @@ +/* I/O block size definitions for coreutils + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Include this file _after_ system headers if possible. */ + +/* sys/stat.h and minmax.h will already have been included by system.h. */ +#include "count-leading-zeros.h" +#include "stat-size.h" + + +/* As of May 2014, 128KiB is determined to be the minimum + blksize to best minimize system call overhead. + This can be tested with this script: + + for i in $(seq 0 10); do + bs=$((1024*2**$i)) + printf "%7s=" $bs + timeout --foreground -sINT 2 \ + dd bs=$bs if=/dev/zero of=/dev/null 2>&1 \ + | sed -n 's/.* \([0-9.]* [GM]B\/s\)/\1/p' + done + + With the results shown for these systems: + system #1: 1.7GHz pentium-m with 400MHz DDR2 RAM, arch=i686 + system #2: 2.1GHz i3-2310M with 1333MHz DDR3 RAM, arch=x86_64 + system #3: 3.2GHz i7-970 with 1333MHz DDR3, arch=x86_64 + system #4: 2.20GHz Xeon E5-2660 with 1333MHz DDR3, arch=x86_64 + system #5: 2.30GHz i7-3615QM with 1600MHz DDR3, arch=x86_64 + system #6: 1.30GHz i5-4250U with 1-channel 1600MHz DDR3, arch=x86_64 + system #7: 3.55GHz IBM,8231-E2B with 1066MHz DDR3, POWER7 revision 2.1 + + per-system transfer rate (GB/s) + blksize #1 #2 #3 #4 #5 #6 #7 + ------------------------------------------------------------------------ + 1024 .73 1.7 2.6 .64 1.0 2.5 1.3 + 2048 1.3 3.0 4.4 1.2 2.0 4.4 2.5 + 4096 2.4 5.1 6.5 2.3 3.7 7.4 4.8 + 8192 3.5 7.3 8.5 4.0 6.0 10.4 9.2 + 16384 3.9 9.4 10.1 6.3 8.3 13.3 16.8 + 32768 5.2 9.9 11.1 8.1 10.7 13.2 28.0 + 65536 5.3 11.2 12.0 10.6 12.8 16.1 41.4 + 131072 5.5 11.8 12.3 12.1 14.0 16.7 54.8 + 262144 5.7 11.6 12.5 12.3 14.7 16.4 40.0 + 524288 5.7 11.4 12.5 12.1 14.7 15.5 34.5 + 1048576 5.8 11.4 12.6 12.2 14.9 15.7 36.5 + + + Note that this is to minimize system call overhead. + Other values may be appropriate to minimize file system + overhead. For example on my current GNU/Linux system + the readahead setting is 128KiB which was read using: + + file="." + device=$(df --output=source --local "$file" | tail -n1) + echo $(( $(blockdev --getra $device) * 512 )) + + However there isn't a portable way to get the above. + In the future we could use the above method if available + and default to io_blksize() if not. + */ +enum { IO_BUFSIZE = 128 * 1024 }; +static inline idx_t +io_blksize (struct stat sb) +{ + /* Treat impossible blocksizes as if they were IO_BUFSIZE. */ + idx_t blocksize = ST_BLKSIZE (sb) <= 0 ? IO_BUFSIZE : ST_BLKSIZE (sb); + + /* Use a blocksize of at least IO_BUFSIZE bytes, keeping it a + multiple of the original blocksize. */ + blocksize += (IO_BUFSIZE - 1) - (IO_BUFSIZE - 1) % blocksize; + + /* For regular files we can ignore the blocksize if we think we know better. + ZFS sometimes understates the blocksize, because it thinks + apps stupidly allocate a block that large even for small files. + This misinformation can cause coreutils to use wrong-sized blocks. + Work around some of the performance bug by substituting the next + power of two when the reported blocksize is not a power of two. */ + if (S_ISREG (sb.st_mode) + && blocksize & (blocksize - 1)) + { + int leading_zeros = count_leading_zeros_ll (blocksize); + if (IDX_MAX < ULLONG_MAX || leading_zeros) + { + unsigned long long power = 1ull << (ULLONG_WIDTH - leading_zeros); + if (power <= IDX_MAX) + blocksize = power; + } + } + + /* Don’t go above the largest power of two that fits in idx_t and size_t, + as that is asking for trouble. */ + return MIN (MIN (IDX_MAX, SIZE_MAX) / 2 + 1, + blocksize); +} diff --git a/src/iopoll.c b/src/iopoll.c new file mode 100644 index 0000000..e60e019 --- /dev/null +++ b/src/iopoll.c @@ -0,0 +1,239 @@ +/* iopoll.c -- broken pipe detection / non blocking output handling + Copyright (C) 2022 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Carl Edquist in collaboration with Arsen Arsenović. */ + +#include + +/* poll(2) is needed on AIX (where 'select' gives a readable + event immediately) and Solaris (where 'select' never gave + a readable event). Also use poll(2) on systems we know work + and/or are already using poll (linux). */ + +#if defined _AIX || defined __sun || defined __APPLE__ || \ + defined __linux__ || defined __ANDROID__ +# define IOPOLL_USES_POLL 1 + /* Check we've not enabled gnulib's poll module + as that will emulate poll() in a way not + currently compatible with our usage. */ +# if defined HAVE_POLL +# error "gnulib's poll() replacement is currently incompatible" +# endif +#endif + +#if IOPOLL_USES_POLL +# include +#else +# include +#endif + +#include "system.h" +#include "assure.h" +#include "iopoll.h" +#include "isapipe.h" + + +/* BROKEN_OUTPUT selects the mode of operation of this function. + If BROKEN_OUTPUT, wait for FDIN to become ready for reading + or FDOUT to become a broken pipe. + If !BROKEN_OUTPUT, wait for FDIN or FDOUT to become ready for writing. + If either of those are -1, then they're not checked. Set BLOCK to true + to wait for an event, otherwise return the status immediately. + Return 0 if not BLOCKing and there is no event on the requested descriptors. + Return 0 if FDIN can be read() without blocking, or IOPOLL_BROKEN_OUTPUT if + FDOUT becomes a broken pipe. If !BROKEN_OUTPUT return 0 if FDOUT writable. + Otherwise return IOPOLL_ERROR if there is a poll() or select() error. */ + +static int +iopoll_internal (int fdin, int fdout, bool block, bool broken_output) +{ + affirm (fdin != -1 || fdout != -1); + +#if IOPOLL_USES_POLL + struct pollfd pfds[2] = { /* POLLRDBAND needed for illumos, macOS. */ + { .fd = fdin, .events = POLLIN | POLLRDBAND, .revents = 0 }, + { .fd = fdout, .events = POLLRDBAND, .revents = 0 }, + }; + int check_out_events = POLLERR | POLLHUP | POLLNVAL; + int ret = 0; + + if (! broken_output) + { + pfds[0].events = pfds[1].events = POLLOUT; + check_out_events = POLLOUT; + } + + while (0 <= ret || errno == EINTR) + { + ret = poll (pfds, 2, block ? -1 : 0); + + if (ret < 0) + continue; + if (ret == 0 && ! block) + return 0; + affirm (0 < ret); + if (pfds[0].revents) /* input available or pipe closed indicating EOF; */ + return 0; /* should now be able to read() without blocking */ + if (pfds[1].revents & check_out_events) + return broken_output ? IOPOLL_BROKEN_OUTPUT : 0; + } + +#else /* fall back to select()-based implementation */ + + int nfds = (fdin > fdout ? fdin : fdout) + 1; + int ret = 0; + + if (FD_SETSIZE < nfds) + { + errno = EINVAL; + ret = -1; + } + + /* If fdout has an error condition (like a broken pipe) it will be seen + as ready for reading. Assumes fdout is not actually readable. */ + while (0 <= ret || errno == EINTR) + { + fd_set fds; + FD_ZERO (&fds); + if (0 <= fdin) + FD_SET (fdin, &fds); + if (0 <= fdout) + FD_SET (fdout, &fds); + + struct timeval delay = { .tv_sec = 0, .tv_usec = 0 }; + ret = select (nfds, + broken_output ? &fds : nullptr, + broken_output ? nullptr : &fds, + nullptr, block ? nullptr : &delay); + + if (ret < 0) + continue; + if (ret == 0 && ! block) + return 0; + affirm (0 < ret); + if (0 <= fdin && FD_ISSET (fdin, &fds)) /* input available or EOF; */ + return 0; /* should now be able to read() without blocking */ + if (0 <= fdout && FD_ISSET (fdout, &fds)) /* equiv to POLLERR */ + return broken_output ? IOPOLL_BROKEN_OUTPUT : 0; + } + +#endif + return IOPOLL_ERROR; +} + +extern int +iopoll (int fdin, int fdout, bool block) +{ + return iopoll_internal (fdin, fdout, block, true); +} + + + +/* Return true if fdin is relevant for iopoll(). + An fd is not relevant for iopoll() if it is always ready for reading, + which is the case for a regular file or block device. */ + +extern bool +iopoll_input_ok (int fdin) +{ + struct stat st; + bool always_ready = fstat (fdin, &st) == 0 + && (S_ISREG (st.st_mode) + || S_ISBLK (st.st_mode)); + return ! always_ready; +} + +/* Return true if fdout is suitable for iopoll(). + Namely, fdout refers to a pipe. */ + +extern bool +iopoll_output_ok (int fdout) +{ + return isapipe (fdout) > 0; +} + +#ifdef EWOULDBLOCK +# define IS_EAGAIN(errcode) ((errcode) == EAGAIN || (errcode) == EWOULDBLOCK) +#else +# define IS_EAGAIN(errcode) ((errcode) == EAGAIN) +#endif + +/* Inspect the errno of the previous syscall. + On EAGAIN, wait for the underlying file descriptor to become writable. + Return true, if EAGAIN has been successfully handled. */ + +static bool +fwait_for_nonblocking_write (FILE *f) +{ + if (! IS_EAGAIN (errno)) + /* non-recoverable write error */ + return false; + + int fd = fileno (f); + if (fd == -1) + goto fail; + + /* wait for the file descriptor to become writable */ + if (iopoll_internal (-1, fd, true, false) != 0) + goto fail; + + /* successfully waited for the descriptor to become writable */ + clearerr (f); + return true; + +fail: + errno = EAGAIN; + return false; +} + + +/* wrapper for fclose() that also waits for F if non blocking. */ + +extern bool +fclose_wait (FILE *f) +{ + for (;;) + { + if (fflush (f) == 0) + break; + + if (! fwait_for_nonblocking_write (f)) + break; + } + + return fclose (f) == 0; +} + + +/* wrapper for fwrite() that also waits for F if non blocking. */ + +extern bool +fwrite_wait (char const *buf, ssize_t size, FILE *f) +{ + for (;;) + { + const size_t written = fwrite (buf, 1, size, f); + size -= written; + affirm (size >= 0); + if (size <= 0) /* everything written */ + return true; + + if (! fwait_for_nonblocking_write (f)) + return false; + + buf += written; + } +} diff --git a/src/iopoll.h b/src/iopoll.h new file mode 100644 index 0000000..0177a4d --- /dev/null +++ b/src/iopoll.h @@ -0,0 +1,9 @@ +#define IOPOLL_BROKEN_OUTPUT -2 +#define IOPOLL_ERROR -3 + +int iopoll (int fdin, int fdout, bool block); +bool iopoll_input_ok (int fdin); +bool iopoll_output_ok (int fdout); + +bool fclose_wait (FILE *f); +bool fwrite_wait (char const *buf, ssize_t size, FILE *f); diff --git a/src/join.c b/src/join.c new file mode 100644 index 0000000..0bcfa75 --- /dev/null +++ b/src/join.c @@ -0,0 +1,1186 @@ +/* join - join lines of two files on a common field + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Mike Haertel, mike@gnu.ai.mit.edu. */ + +#include + +#include +#include + +#include "system.h" +#include "assure.h" +#include "fadvise.h" +#include "hard-locale.h" +#include "linebuffer.h" +#include "memcasecmp.h" +#include "quote.h" +#include "stdio--.h" +#include "xmemcoll.h" +#include "xstrtol.h" +#include "argmatch.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "join" + +#define AUTHORS proper_name ("Mike Haertel") + +#define join system_join + +#define SWAPLINES(a, b) do { \ + struct line *tmp = a; \ + a = b; \ + b = tmp; \ +} while (0); + +/* An element of the list identifying which fields to print for each + output line. */ +struct outlist + { + /* File number: 0, 1, or 2. 0 means use the join field. + 1 means use the first file argument, 2 the second. */ + int file; + + /* Field index (zero-based), specified only when FILE is 1 or 2. */ + idx_t field; + + struct outlist *next; + }; + +/* A field of a line. */ +struct field + { + char *beg; /* First character in field. */ + idx_t len; /* The length of the field. */ + }; + +/* A line read from an input file. */ +struct line + { + struct linebuffer buf; /* The line itself. */ + idx_t nfields; /* Number of elements in 'fields'. */ + idx_t nfields_allocated; /* Number of elements allocated for 'fields'. */ + struct field *fields; + }; + +/* One or more consecutive lines read from a file that all have the + same join field value. */ +struct seq + { + idx_t count; /* Elements used in 'lines'. */ + idx_t alloc; /* Elements allocated in 'lines'. */ + struct line **lines; + }; + +/* The previous line read from each file. */ +static struct line *prevline[2] = {nullptr, nullptr}; + +/* The number of lines read from each file. */ +static uintmax_t line_no[2] = {0, 0}; + +/* The input file names. */ +static char *g_names[2]; + +/* This provides an extra line buffer for each file. We need these if we + try to read two consecutive lines into the same buffer, since we don't + want to overwrite the previous buffer before we check order. */ +static struct line *spareline[2] = {nullptr, nullptr}; + +/* True if the LC_COLLATE locale is hard. */ +static bool hard_LC_COLLATE; + +/* If nonzero, print unpairable lines in file 1 or 2. */ +static bool print_unpairables_1, print_unpairables_2; + +/* If nonzero, print pairable lines. */ +static bool print_pairables; + +/* If nonzero, we have seen at least one unpairable line. */ +static bool seen_unpairable; + +/* If nonzero, we have warned about disorder in that file. */ +static bool issued_disorder_warning[2]; + +/* Empty output field filler. */ +static char const *empty_filler; + +/* Whether to ensure the same number of fields are output from each line. */ +static bool autoformat; +/* The number of fields to output for each line. + Only significant when autoformat is true. */ +static idx_t autocount_1; +static idx_t autocount_2; + +/* Field to join on; -1 means they haven't been determined yet. */ +static ptrdiff_t join_field_1 = -1; +static ptrdiff_t join_field_2 = -1; + +/* List of fields to print. */ +static struct outlist outlist_head; + +/* Last element in 'outlist', where a new element can be added. */ +static struct outlist *outlist_end = &outlist_head; + +/* Tab character separating fields. If negative, fields are separated + by any nonempty string of blanks, otherwise by exactly one + tab character whose value (when cast to unsigned char) equals TAB. */ +static int tab = -1; + +/* If nonzero, check that the input is correctly ordered. */ +static enum + { + CHECK_ORDER_DEFAULT, + CHECK_ORDER_ENABLED, + CHECK_ORDER_DISABLED + } check_input_order; + +enum +{ + CHECK_ORDER_OPTION = CHAR_MAX + 1, + NOCHECK_ORDER_OPTION, + HEADER_LINE_OPTION +}; + + +static struct option const longopts[] = +{ + {"ignore-case", no_argument, nullptr, 'i'}, + {"check-order", no_argument, nullptr, CHECK_ORDER_OPTION}, + {"nocheck-order", no_argument, nullptr, NOCHECK_ORDER_OPTION}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {"header", no_argument, nullptr, HEADER_LINE_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Used to print non-joining lines */ +static struct line uni_blank; + +/* If nonzero, ignore case when comparing join fields. */ +static bool ignore_case; + +/* If nonzero, treat the first line of each file as column headers -- + join them without checking for ordering */ +static bool join_header_lines; + +/* The character marking end of line. Default to \n. */ +static char eolchar = '\n'; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... FILE1 FILE2\n\ +"), + program_name); + fputs (_("\ +For each pair of input lines with identical join fields, write a line to\n\ +standard output. The default join field is the first, delimited by blanks.\ +\n\ +"), stdout); + fputs (_("\ +\n\ +When FILE1 or FILE2 (not both) is -, read standard input.\n\ +"), stdout); + fputs (_("\ +\n\ + -a FILENUM also print unpairable lines from file FILENUM, where\n\ + FILENUM is 1 or 2, corresponding to FILE1 or FILE2\n\ +"), stdout); + fputs (_("\ + -e STRING replace missing (empty) input fields with STRING;\n\ + I.e., missing fields specified with '-12jo' options\ +\n\ +"), stdout); + fputs (_("\ + -i, --ignore-case ignore differences in case when comparing fields\n\ + -j FIELD equivalent to '-1 FIELD -2 FIELD'\n\ + -o FORMAT obey FORMAT while constructing output line\n\ + -t CHAR use CHAR as input and output field separator\n\ +"), stdout); + fputs (_("\ + -v FILENUM like -a FILENUM, but suppress joined output lines\n\ + -1 FIELD join on this FIELD of file 1\n\ + -2 FIELD join on this FIELD of file 2\n\ + --check-order check that the input is correctly sorted, even\n\ + if all input lines are pairable\n\ + --nocheck-order do not check that the input is correctly sorted\n\ + --header treat the first line in each file as field headers,\n\ + print them without trying to pair them\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Unless -t CHAR is given, leading blanks separate fields and are ignored,\n\ +else fields are separated by CHAR. Any FIELD is a field number counted\n\ +from 1. FORMAT is one or more comma or blank separated specifications,\n\ +each being 'FILENUM.FIELD' or '0'. Default FORMAT outputs the join field,\n\ +the remaining fields from FILE1, the remaining fields from FILE2, all\n\ +separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\ +line of each file determines the number of fields output for each line.\n\ +\n\ +Important: FILE1 and FILE2 must be sorted on the join fields.\n\ +E.g., use \"sort -k 1b,1\" if 'join' has no options,\n\ +or use \"join -t ''\" if 'sort' has no options.\n\ +Note, comparisons honor the rules specified by 'LC_COLLATE'.\n\ +If the input is not sorted and some lines cannot be joined, a\n\ +warning message will be given.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Record a field in LINE, with location FIELD and size LEN. */ + +static void +extract_field (struct line *line, char *field, idx_t len) +{ + if (line->nfields >= line->nfields_allocated) + line->fields = xpalloc (line->fields, &line->nfields_allocated, 1, + -1, sizeof *line->fields); + line->fields[line->nfields].beg = field; + line->fields[line->nfields].len = len; + ++(line->nfields); +} + +/* Fill in the 'fields' structure in LINE. */ + +static void +xfields (struct line *line) +{ + char *ptr = line->buf.buffer; + char const *lim = ptr + line->buf.length - 1; + + if (ptr == lim) + return; + + if (0 <= tab && tab != '\n') + { + char *sep; + for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1) + extract_field (line, ptr, sep - ptr); + } + else if (tab < 0) + { + /* Skip leading blanks before the first field. */ + while (field_sep (*ptr)) + if (++ptr == lim) + return; + + do + { + char *sep; + for (sep = ptr + 1; sep != lim && ! field_sep (*sep); sep++) + continue; + extract_field (line, ptr, sep - ptr); + if (sep == lim) + return; + for (ptr = sep + 1; ptr != lim && field_sep (*ptr); ptr++) + continue; + } + while (ptr != lim); + } + + extract_field (line, ptr, lim - ptr); +} + +static void +freeline (struct line *line) +{ + if (line == nullptr) + return; + free (line->fields); + line->fields = nullptr; + free (line->buf.buffer); + line->buf.buffer = nullptr; +} + +/* Return <0 if the join field in LINE1 compares less than the one in LINE2; + >0 if it compares greater; 0 if it compares equal. + Report an error and exit if the comparison fails. + Use join fields JF_1 and JF_2 respectively. */ + +static int +keycmp (struct line const *line1, struct line const *line2, + idx_t jf_1, idx_t jf_2) +{ + /* Start of field to compare in each file. */ + char *beg1; + char *beg2; + + idx_t len1; + idx_t len2; /* Length of fields to compare. */ + int diff; + + if (jf_1 < line1->nfields) + { + beg1 = line1->fields[jf_1].beg; + len1 = line1->fields[jf_1].len; + } + else + { + beg1 = nullptr; + len1 = 0; + } + + if (jf_2 < line2->nfields) + { + beg2 = line2->fields[jf_2].beg; + len2 = line2->fields[jf_2].len; + } + else + { + beg2 = nullptr; + len2 = 0; + } + + if (len1 == 0) + return len2 == 0 ? 0 : -1; + if (len2 == 0) + return 1; + + if (ignore_case) + { + /* FIXME: ignore_case does not work with NLS (in particular, + with multibyte chars). */ + diff = memcasecmp (beg1, beg2, MIN (len1, len2)); + } + else + { + if (hard_LC_COLLATE) + return xmemcoll (beg1, len1, beg2, len2); + diff = memcmp (beg1, beg2, MIN (len1, len2)); + } + + if (diff) + return diff; + return (len1 > len2) - (len1 < len2); +} + +/* Check that successive input lines PREV and CURRENT from input file + WHATFILE are presented in order, unless the user may be relying on + the GNU extension that input lines may be out of order if no input + lines are unpairable. + + If the user specified --nocheck-order, the check is not made. + If the user specified --check-order, the problem is fatal. + Otherwise (the default), the message is simply a warning. + + A message is printed at most once per input file. */ + +static void +check_order (const struct line *prev, + const struct line *current, + int whatfile) +{ + if (check_input_order != CHECK_ORDER_DISABLED + && ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) + { + if (!issued_disorder_warning[whatfile - 1]) + { + idx_t join_field = whatfile == 1 ? join_field_1 : join_field_2; + if (keycmp (prev, current, join_field, join_field) > 0) + { + /* Exclude any trailing newline. */ + idx_t len = current->buf.length; + if (0 < len && current->buf.buffer[len - 1] == '\n') + --len; + + /* If the offending line is longer than INT_MAX, output + only the first INT_MAX bytes in this diagnostic. */ + len = MIN (INT_MAX, len); + + error ((check_input_order == CHECK_ORDER_ENABLED + ? EXIT_FAILURE : 0), + 0, _("%s:%"PRIuMAX": is not sorted: %.*s"), + g_names[whatfile - 1], line_no[whatfile - 1], + (int) len, current->buf.buffer); + + /* If we get to here, the message was merely a warning. + Arrange to issue it only once per file. */ + issued_disorder_warning[whatfile - 1] = true; + } + } + } +} + +static inline void +reset_line (struct line *line) +{ + line->nfields = 0; +} + +static struct line * +init_linep (struct line **linep) +{ + struct line *line = xzalloc (sizeof *line); + *linep = line; + return line; +} + +/* Read a line from FP into LINE and split it into fields. + Return true if successful. */ + +static bool +get_line (FILE *fp, struct line **linep, int which) +{ + struct line *line = *linep; + + if (line == prevline[which - 1]) + { + SWAPLINES (line, spareline[which - 1]); + *linep = line; + } + + if (line) + reset_line (line); + else + line = init_linep (linep); + + if (! readlinebuffer_delim (&line->buf, fp, eolchar)) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("read error")); + freeline (line); + return false; + } + ++line_no[which - 1]; + + xfields (line); + + if (prevline[which - 1]) + check_order (prevline[which - 1], line, which); + + prevline[which - 1] = line; + return true; +} + +static void +free_spareline (void) +{ + for (idx_t i = 0; i < ARRAY_CARDINALITY (spareline); i++) + { + if (spareline[i]) + { + freeline (spareline[i]); + free (spareline[i]); + } + } +} + +static void +initseq (struct seq *seq) +{ + seq->count = 0; + seq->alloc = 0; + seq->lines = nullptr; +} + +/* Read a line from FP and add it to SEQ. Return true if successful. */ + +static bool +getseq (FILE *fp, struct seq *seq, int whichfile) +{ + if (seq->count == seq->alloc) + { + seq->lines = xpalloc (seq->lines, &seq->alloc, 1, -1, sizeof *seq->lines); + for (idx_t i = seq->count; i < seq->alloc; i++) + seq->lines[i] = nullptr; + } + + if (get_line (fp, &seq->lines[seq->count], whichfile)) + { + ++seq->count; + return true; + } + return false; +} + +/* Read a line from FP and add it to SEQ, as the first item if FIRST is + true, else as the next. */ +static bool +advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) +{ + if (first) + seq->count = 0; + + return getseq (fp, seq, whichfile); +} + +static void +delseq (struct seq *seq) +{ + for (idx_t i = 0; i < seq->alloc; i++) + { + freeline (seq->lines[i]); + free (seq->lines[i]); + } + free (seq->lines); +} + + +/* Print field N of LINE if it exists and is nonempty, otherwise + 'empty_filler' if it is nonempty. */ + +static void +prfield (idx_t n, struct line const *line) +{ + if (n < line->nfields) + { + idx_t len = line->fields[n].len; + if (len) + fwrite (line->fields[n].beg, 1, len, stdout); + else if (empty_filler) + fputs (empty_filler, stdout); + } + else if (empty_filler) + fputs (empty_filler, stdout); +} + +/* Output all the fields in line, other than the join field. */ + +static void +prfields (struct line const *line, idx_t join_field, idx_t autocount) +{ + idx_t i; + idx_t nfields = autoformat ? autocount : line->nfields; + char output_separator = tab < 0 ? ' ' : tab; + + for (i = 0; i < join_field && i < nfields; ++i) + { + putchar (output_separator); + prfield (i, line); + } + for (i = join_field + 1; i < nfields; ++i) + { + putchar (output_separator); + prfield (i, line); + } +} + +/* Print the join of LINE1 and LINE2. */ + +static void +prjoin (struct line const *line1, struct line const *line2) +{ + const struct outlist *outlist; + char output_separator = tab < 0 ? ' ' : tab; + idx_t field; + struct line const *line; + + outlist = outlist_head.next; + if (outlist) + { + const struct outlist *o; + + o = outlist; + while (true) + { + if (o->file == 0) + { + if (line1 == &uni_blank) + { + line = line2; + field = join_field_2; + } + else + { + line = line1; + field = join_field_1; + } + } + else + { + line = (o->file == 1 ? line1 : line2); + field = o->field; + } + prfield (field, line); + o = o->next; + if (o == nullptr) + break; + putchar (output_separator); + } + putchar (eolchar); + } + else + { + if (line1 == &uni_blank) + { + line = line2; + field = join_field_2; + } + else + { + line = line1; + field = join_field_1; + } + + /* Output the join field. */ + prfield (field, line); + + /* Output other fields. */ + prfields (line1, join_field_1, autocount_1); + prfields (line2, join_field_2, autocount_2); + + putchar (eolchar); + } + + if (ferror (stdout)) + write_error (); +} + +/* Print the join of the files in FP1 and FP2. */ + +static void +join (FILE *fp1, FILE *fp2) +{ + struct seq seq1, seq2; + int diff; + bool eof1, eof2; + + fadvise (fp1, FADVISE_SEQUENTIAL); + fadvise (fp2, FADVISE_SEQUENTIAL); + + /* Read the first line of each file. */ + initseq (&seq1); + getseq (fp1, &seq1, 1); + initseq (&seq2); + getseq (fp2, &seq2, 2); + + if (autoformat) + { + autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0; + autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0; + } + + if (join_header_lines && (seq1.count || seq2.count)) + { + struct line const *hline1 = seq1.count ? seq1.lines[0] : &uni_blank; + struct line const *hline2 = seq2.count ? seq2.lines[0] : &uni_blank; + prjoin (hline1, hline2); + prevline[0] = nullptr; + prevline[1] = nullptr; + if (seq1.count) + advance_seq (fp1, &seq1, true, 1); + if (seq2.count) + advance_seq (fp2, &seq2, true, 2); + } + + while (seq1.count && seq2.count) + { + diff = keycmp (seq1.lines[0], seq2.lines[0], + join_field_1, join_field_2); + if (diff < 0) + { + if (print_unpairables_1) + prjoin (seq1.lines[0], &uni_blank); + advance_seq (fp1, &seq1, true, 1); + seen_unpairable = true; + continue; + } + if (diff > 0) + { + if (print_unpairables_2) + prjoin (&uni_blank, seq2.lines[0]); + advance_seq (fp2, &seq2, true, 2); + seen_unpairable = true; + continue; + } + + /* Keep reading lines from file1 as long as they continue to + match the current line from file2. */ + eof1 = false; + do + if (!advance_seq (fp1, &seq1, false, 1)) + { + eof1 = true; + ++seq1.count; + break; + } + while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], + join_field_1, join_field_2)); + + /* Keep reading lines from file2 as long as they continue to + match the current line from file1. */ + eof2 = false; + do + if (!advance_seq (fp2, &seq2, false, 2)) + { + eof2 = true; + ++seq2.count; + break; + } + while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], + join_field_1, join_field_2)); + + if (print_pairables) + { + for (idx_t i = 0; i < seq1.count - 1; ++i) + { + idx_t j; + for (j = 0; j < seq2.count - 1; ++j) + prjoin (seq1.lines[i], seq2.lines[j]); + } + } + + if (!eof1) + { + SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); + seq1.count = 1; + } + else + seq1.count = 0; + + if (!eof2) + { + SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); + seq2.count = 1; + } + else + seq2.count = 0; + } + + /* If the user did not specify --nocheck-order, then we read the + tail ends of both inputs to verify that they are in order. We + skip the rest of the tail once we have issued a warning for that + file, unless we actually need to print the unpairable lines. */ + struct line *line = nullptr; + bool checktail = false; + + if (check_input_order != CHECK_ORDER_DISABLED + && !(issued_disorder_warning[0] && issued_disorder_warning[1])) + checktail = true; + + if ((print_unpairables_1 || checktail) && seq1.count) + { + if (print_unpairables_1) + prjoin (seq1.lines[0], &uni_blank); + if (seq2.count) + seen_unpairable = true; + while (get_line (fp1, &line, 1)) + { + if (print_unpairables_1) + prjoin (line, &uni_blank); + if (issued_disorder_warning[0] && !print_unpairables_1) + break; + } + } + + if ((print_unpairables_2 || checktail) && seq2.count) + { + if (print_unpairables_2) + prjoin (&uni_blank, seq2.lines[0]); + if (seq1.count) + seen_unpairable = true; + while (get_line (fp2, &line, 2)) + { + if (print_unpairables_2) + prjoin (&uni_blank, line); + if (issued_disorder_warning[1] && !print_unpairables_2) + break; + } + } + + freeline (line); + free (line); + + delseq (&seq1); + delseq (&seq2); +} + +/* Add a field spec for field FIELD of file FILE to 'outlist'. */ + +static void +add_field (int file, idx_t field) +{ + struct outlist *o; + + affirm (file == 0 || file == 1 || file == 2); + affirm (file != 0 || field == 0); + + o = xmalloc (sizeof *o); + o->file = file; + o->field = field; + o->next = nullptr; + + /* Add to the end of the list so the fields are in the right order. */ + outlist_end->next = o; + outlist_end = o; +} + +/* Convert a string of decimal digits, STR (the 1-based join field number), + to an integral value. Upon successful conversion, return one less + (the zero-based field number). Silently convert too-large values + to PTRDIFF_MAX. Otherwise, if a value cannot be converted, give a + diagnostic and exit. */ + +static idx_t +string_to_join_field (char const *str) +{ + intmax_t val; + + strtol_error s_err = xstrtoimax (str, nullptr, 10, &val, ""); + if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && PTRDIFF_MAX < val)) + val = PTRDIFF_MAX; + else if (s_err != LONGINT_OK || val <= 0) + error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str)); + + return val - 1; +} + +/* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX + pair. In S, the field index string is 1-based; *FIELD_INDEX is zero-based. + If S is valid, return true. Otherwise, give a diagnostic and exit. */ + +static void +decode_field_spec (char const *s, int *file_index, idx_t *field_index) +{ + /* The first character must be 0, 1, or 2. */ + switch (s[0]) + { + case '0': + if (s[1]) + { + /* '0' must be all alone -- no '.FIELD'. */ + error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); + } + *file_index = 0; + *field_index = 0; + break; + + case '1': + case '2': + if (s[1] != '.') + error (EXIT_FAILURE, 0, _("invalid field specifier: %s"), quote (s)); + *file_index = s[0] - '0'; + *field_index = string_to_join_field (s + 2); + break; + + default: + error (EXIT_FAILURE, 0, + _("invalid file number in field spec: %s"), quote (s)); + } +} + +/* Add the comma or blank separated field spec(s) in STR to 'outlist'. */ + +static void +add_field_list (char *str) +{ + char *p = str; + + do + { + int file_index; + idx_t field_index; + char const *spec_item = p; + + p = strpbrk (p, ", \t"); + if (p) + *p++ = '\0'; + decode_field_spec (spec_item, &file_index, &field_index); + add_field (file_index, field_index); + } + while (p); +} + +/* Set the join field *VAR to VAL, but report an error if *VAR is set + more than once to incompatible values. */ + +static void +set_join_field (ptrdiff_t *var, idx_t val) +{ + if (0 <= *var && *var != val) + error (EXIT_FAILURE, 0, + _("incompatible join fields %td, %td"), *var, val); + *var = val; +} + +/* Status of command-line arguments. */ + +enum operand_status + { + /* This argument must be an operand, i.e., one of the files to be + joined. */ + MUST_BE_OPERAND, + + /* This might be the argument of the preceding -j1 or -j2 option, + or it might be an operand. */ + MIGHT_BE_J1_ARG, + MIGHT_BE_J2_ARG, + + /* This might be the argument of the preceding -o option, or it might be + an operand. */ + MIGHT_BE_O_ARG + }; + +/* Add NAME to the array of input file NAMES with operand statuses + OPERAND_STATUS; currently there are NFILES names in the list. */ + +static void +add_file_name (char *name, char *names[2], + int operand_status[2], int joption_count[2], int *nfiles, + int *prev_optc_status, int *optc_status) +{ + int n = *nfiles; + + if (n == 2) + { + bool op0 = (operand_status[0] == MUST_BE_OPERAND); + char *arg = names[op0]; + switch (operand_status[op0]) + { + case MUST_BE_OPERAND: + error (0, 0, _("extra operand %s"), quoteaf (name)); + usage (EXIT_FAILURE); + + case MIGHT_BE_J1_ARG: + joption_count[0]--; + set_join_field (&join_field_1, string_to_join_field (arg)); + break; + + case MIGHT_BE_J2_ARG: + joption_count[1]--; + set_join_field (&join_field_2, string_to_join_field (arg)); + break; + + case MIGHT_BE_O_ARG: + add_field_list (arg); + break; + } + if (!op0) + { + operand_status[0] = operand_status[1]; + names[0] = names[1]; + } + n = 1; + } + + operand_status[n] = *prev_optc_status; + names[n] = name; + *nfiles = n + 1; + if (*prev_optc_status == MIGHT_BE_O_ARG) + *optc_status = MIGHT_BE_O_ARG; +} + +int +main (int argc, char **argv) +{ + int optc_status; + int prev_optc_status = MUST_BE_OPERAND; + int operand_status[2]; + int joption_count[2] = { 0, 0 }; + FILE *fp1, *fp2; + int optc; + int nfiles = 0; + int i; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + hard_LC_COLLATE = hard_locale (LC_COLLATE); + + atexit (close_stdout); + atexit (free_spareline); + + print_pairables = true; + seen_unpairable = false; + issued_disorder_warning[0] = issued_disorder_warning[1] = false; + check_input_order = CHECK_ORDER_DEFAULT; + + while ((optc = getopt_long (argc, argv, "-a:e:i1:2:j:o:t:v:z", + longopts, nullptr)) + != -1) + { + optc_status = MUST_BE_OPERAND; + + switch (optc) + { + case 'v': + print_pairables = false; + FALLTHROUGH; + + case 'a': + { + long int val; + if (xstrtol (optarg, nullptr, 10, &val, "") != LONGINT_OK + || (val != 1 && val != 2)) + error (EXIT_FAILURE, 0, + _("invalid field number: %s"), quote (optarg)); + if (val == 1) + print_unpairables_1 = true; + else + print_unpairables_2 = true; + } + break; + + case 'e': + if (empty_filler && ! STREQ (empty_filler, optarg)) + error (EXIT_FAILURE, 0, + _("conflicting empty-field replacement strings")); + empty_filler = optarg; + break; + + case 'i': + ignore_case = true; + break; + + case '1': + set_join_field (&join_field_1, string_to_join_field (optarg)); + break; + + case '2': + set_join_field (&join_field_2, string_to_join_field (optarg)); + break; + + case 'j': + if ((optarg[0] == '1' || optarg[0] == '2') && !optarg[1] + && optarg == argv[optind - 1] + 2) + { + /* The argument was either "-j1" or "-j2". */ + bool is_j2 = (optarg[0] == '2'); + joption_count[is_j2]++; + optc_status = MIGHT_BE_J1_ARG + is_j2; + } + else + { + set_join_field (&join_field_1, string_to_join_field (optarg)); + set_join_field (&join_field_2, join_field_1); + } + break; + + case 'o': + if (STREQ (optarg, "auto")) + autoformat = true; + else + { + add_field_list (optarg); + optc_status = MIGHT_BE_O_ARG; + } + break; + + case 't': + { + unsigned char newtab = optarg[0]; + if (! newtab) + newtab = '\n'; /* '' => process the whole line. */ + else if (optarg[1]) + { + if (STREQ (optarg, "\\0")) + newtab = '\0'; + else + error (EXIT_FAILURE, 0, _("multi-character tab %s"), + quote (optarg)); + } + if (0 <= tab && tab != newtab) + error (EXIT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; + } + break; + + case 'z': + eolchar = 0; + break; + + case NOCHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_DISABLED; + break; + + case CHECK_ORDER_OPTION: + check_input_order = CHECK_ORDER_ENABLED; + break; + + case 1: /* Non-option argument. */ + add_file_name (optarg, g_names, operand_status, joption_count, + &nfiles, &prev_optc_status, &optc_status); + break; + + case HEADER_LINE_OPTION: + join_header_lines = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + prev_optc_status = optc_status; + } + + /* Process any operands after "--". */ + prev_optc_status = MUST_BE_OPERAND; + while (optind < argc) + add_file_name (argv[optind++], g_names, operand_status, joption_count, + &nfiles, &prev_optc_status, &optc_status); + + if (nfiles != 2) + { + if (nfiles == 0) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + usage (EXIT_FAILURE); + } + + /* If "-j1" was specified and it turns out not to have had an argument, + treat it as "-j 1". Likewise for -j2. */ + for (i = 0; i < 2; i++) + if (joption_count[i] != 0) + { + set_join_field (&join_field_1, i); + set_join_field (&join_field_2, i); + } + + if (join_field_1 < 0) + join_field_1 = 0; + if (join_field_2 < 0) + join_field_2 = 0; + + fp1 = STREQ (g_names[0], "-") ? stdin : fopen (g_names[0], "r"); + if (!fp1) + error (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); + fp2 = STREQ (g_names[1], "-") ? stdin : fopen (g_names[1], "r"); + if (!fp2) + error (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); + if (fp1 == fp2) + error (EXIT_FAILURE, errno, _("both files cannot be standard input")); + join (fp1, fp2); + + if (fclose (fp1) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (g_names[0])); + if (fclose (fp2) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (g_names[1])); + + if (issued_disorder_warning[0] || issued_disorder_warning[1]) + error (EXIT_FAILURE, 0, _("input is not in sorted order")); + else + return EXIT_SUCCESS; +} diff --git a/src/kill.c b/src/kill.c new file mode 100644 index 0000000..47aaa16 --- /dev/null +++ b/src/kill.c @@ -0,0 +1,315 @@ +/* kill -- send a signal to a process + Copyright (C) 2002-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Eggert. */ + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "sig2str.h" +#include "operand2sig.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "kill" + +#define AUTHORS proper_name ("Paul Eggert") + +#if ! (HAVE_DECL_STRSIGNAL || defined strsignal) +# if ! (HAVE_DECL_SYS_SIGLIST || defined sys_siglist) +# if HAVE_DECL__SYS_SIGLIST || defined _sys_siglist +# define sys_siglist _sys_siglist +# elif HAVE_DECL___SYS_SIGLIST || defined __sys_siglist +# define sys_siglist __sys_siglist +# endif +# endif +# if HAVE_DECL_SYS_SIGLIST || defined sys_siglist +# define strsignal(signum) (0 <= (signum) && (signum) <= SIGNUM_BOUND \ + ? sys_siglist[signum] \ + : 0) +# endif +# ifndef strsignal +# define strsignal(signum) 0 +# endif +#endif + +static char const short_options[] = + "0::1::2::3::4::5::6::7::8::9::" + "A::B::C::D::E::F::G::H::I::J::K::M::" + "N::O::P::Q::R::S::T::U::V::W::X::Y::Z::" + "Lln:s:t"; + +static struct option const long_options[] = +{ + {"list", no_argument, nullptr, 'l'}, + {"signal", required_argument, nullptr, 's'}, + {"table", no_argument, nullptr, 't'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [-s SIGNAL | -SIGNAL] PID...\n\ + or: %s -l [SIGNAL]...\n\ + or: %s -t [SIGNAL]...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Send signals to processes, or list signals.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -s, --signal=SIGNAL, -SIGNAL\n\ + specify the name or number of the signal to be sent\n\ + -l, --list list signal names, or convert signal names to/from numbers\n\ + -t, --table print a table of signal information\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\n\ +SIGNAL may be a signal name like 'HUP', or a signal number like '1',\n\ +or the exit status of a process terminated by a signal.\n\ +PID is an integer; if negative it identifies a process group.\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Print a row of 'kill -t' output. NUM_WIDTH is the maximum signal + number width, and SIGNUM is the signal number to print. The + maximum name width is NAME_WIDTH, and SIGNAME is the name to print. */ + +static void +print_table_row (int num_width, int signum, + int name_width, char const *signame) +{ + char const *description = strsignal (signum); + printf ("%*d %-*s %s\n", num_width, signum, name_width, signame, + description ? description : "?"); +} + +/* Print a list of signal names. If TABLE, print a table. + Print the names specified by ARGV if nonzero; otherwise, + print all known names. Return a suitable exit status. */ + +static int +list_signals (bool table, char *const *argv) +{ + int signum; + int status = EXIT_SUCCESS; + char signame[SIG2STR_MAX]; + + if (table) + { + int name_width = 0; + + /* Compute the maximum width of a signal number. */ + int num_width = 1; + for (signum = 1; signum <= SIGNUM_BOUND / 10; signum *= 10) + num_width++; + + /* Compute the maximum width of a signal name. */ + for (signum = 1; signum <= SIGNUM_BOUND; signum++) + if (sig2str (signum, signame) == 0) + { + idx_t len = strlen (signame); + if (name_width < len) + name_width = len; + } + + if (argv) + for (; *argv; argv++) + { + signum = operand2sig (*argv, signame); + if (signum < 0) + status = EXIT_FAILURE; + else + print_table_row (num_width, signum, name_width, signame); + } + else + for (signum = 1; signum <= SIGNUM_BOUND; signum++) + if (sig2str (signum, signame) == 0) + print_table_row (num_width, signum, name_width, signame); + } + else + { + if (argv) + for (; *argv; argv++) + { + signum = operand2sig (*argv, signame); + if (signum < 0) + status = EXIT_FAILURE; + else + { + if (ISDIGIT (**argv)) + puts (signame); + else + printf ("%d\n", signum); + } + } + else + for (signum = 1; signum <= SIGNUM_BOUND; signum++) + if (sig2str (signum, signame) == 0) + puts (signame); + } + + return status; +} + +/* Send signal SIGNUM to all the processes or process groups specified + by ARGV. Return a suitable exit status. */ + +static int +send_signals (int signum, char *const *argv) +{ + int status = EXIT_SUCCESS; + char const *arg = *argv; + + do + { + char *endp; + intmax_t n = (errno = 0, strtoimax (arg, &endp, 10)); + pid_t pid; + + if (errno == ERANGE || ckd_add (&pid, n, 0) + || arg == endp || *endp) + { + error (0, 0, _("%s: invalid process id"), quote (arg)); + status = EXIT_FAILURE; + } + else if (kill (pid, signum) != 0) + { + error (0, errno, "%s", quote (arg)); + status = EXIT_FAILURE; + } + } + while ((arg = *++argv)); + + return status; +} + +int +main (int argc, char **argv) +{ + int optc; + bool list = false; + bool table = false; + int signum = -1; + char signame[SIG2STR_MAX]; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, short_options, long_options, nullptr)) + != -1) + switch (optc) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (optind != 2) + { + /* This option is actually a process-id. */ + optind--; + goto no_more_options; + } + FALLTHROUGH; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': /*case 'L':*/ case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + if (! optarg) + optarg = argv[optind - 1] + strlen (argv[optind - 1]); + if (optarg != argv[optind - 1] + 2) + { + error (0, 0, _("invalid option -- %c"), optc); + usage (EXIT_FAILURE); + } + optarg--; + FALLTHROUGH; + case 'n': /* -n is not documented, but is for Bash compatibility. */ + case 's': + if (0 <= signum) + { + error (0, 0, _("%s: multiple signals specified"), quote (optarg)); + usage (EXIT_FAILURE); + } + signum = operand2sig (optarg, signame); + if (signum < 0) + usage (EXIT_FAILURE); + break; + + case 'L': /* -L is not documented, but is for procps compatibility. */ + case 't': + table = true; + FALLTHROUGH; + case 'l': + if (list) + { + error (0, 0, _("multiple -l or -t options specified")); + usage (EXIT_FAILURE); + } + list = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + no_more_options: + + if (signum < 0) + signum = SIGTERM; + else if (list) + { + error (0, 0, _("cannot combine signal with -l or -t")); + usage (EXIT_FAILURE); + } + + if ( ! list && argc <= optind) + { + error (0, 0, _("no process ID specified")); + usage (EXIT_FAILURE); + } + + return (list + ? list_signals (table, optind < argc ? argv + optind : nullptr) + : send_signals (signum, argv + optind)); +} diff --git a/src/lbracket.c b/src/lbracket.c new file mode 100644 index 0000000..b57ca9b --- /dev/null +++ b/src/lbracket.c @@ -0,0 +1,2 @@ +#define LBRACKET 1 +#include "test.c" diff --git a/src/libstdbuf.c b/src/libstdbuf.c new file mode 100644 index 0000000..16b65fa --- /dev/null +++ b/src/libstdbuf.c @@ -0,0 +1,150 @@ +/* libstdbuf -- a shared lib to preload to setup stdio buffering for a command + Copyright (C) 2009-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. LD_PRELOAD idea from Brian Dessent. */ + +#include +#include +#include +#include "system.h" + +/* Deactivate config.h's "rpl_"-prefixed definitions, since we don't + link gnulib here, and the replacements aren't needed. */ +#undef fprintf +#undef free +#undef malloc +#undef strtoumax + +/* Note currently for glibc (2.3.5) the following call does not change + the buffer size, and more problematically does not give any indication + that the new size request was ignored: + + setvbuf (stdout, nullptr, _IOFBF, 8192); + + The ISO C99 standard section 7.19.5.6 on the setvbuf function says: + + ... If buf is not a null pointer, the array it points to _may_ be used + instead of a buffer allocated by the setvbuf function and the argument + size specifies the size of the array; otherwise, size _may_ determine + the size of a buffer allocated by the setvbuf function. ... + + Obviously some interpret the above to mean setvbuf(....,size) + is only a hint from the application which I don't agree with. + + FreeBSD's libc seems more sensible in this regard. From the man page: + + The size argument may be given as zero to obtain deferred optimal-size + buffer allocation as usual. If it is not zero, then except for + unbuffered files, the buf argument should point to a buffer at least size + bytes long; this buffer will be used instead of the current buffer. (If + the size argument is not zero but buf is null, a buffer of the given size + will be allocated immediately, and released on close. This is an extension + to ANSI C; portable code should use a size of 0 with any null buffer.) + -------------------- + Another issue is that on glibc-2.7 the following doesn't buffer + the first write if it's greater than 1 byte. + + setvbuf(stdout,buf,_IOFBF,127); + + Now the POSIX standard says that "allocating a buffer of size bytes does + not necessarily imply that all of size bytes are used for the buffer area". + However I think it's just a buggy implementation due to the various + inconsistencies with write sizes and subsequent writes. */ + +static char const * +fileno_to_name (const int fd) +{ + char const *ret = nullptr; + + switch (fd) + { + case 0: + ret = "stdin"; + break; + case 1: + ret = "stdout"; + break; + case 2: + ret = "stderr"; + break; + default: + ret = "unknown"; + break; + } + + return ret; +} + +static void +apply_mode (FILE *stream, char const *mode) +{ + char *buf = nullptr; + int setvbuf_mode; + uintmax_t size = 0; + + if (*mode == '0') + setvbuf_mode = _IONBF; + else if (*mode == 'L') + setvbuf_mode = _IOLBF; /* FIXME: should we allow 1ML */ + else + { + setvbuf_mode = _IOFBF; + char *mode_end; + size = strtoumax (mode, &mode_end, 10); + if (size == 0 || *mode_end) + { + fprintf (stderr, _("invalid buffering mode %s for %s\n"), + mode, fileno_to_name (fileno (stream))); + return; + } + + buf = size <= SIZE_MAX ? malloc (size) : nullptr; + if (!buf) + { + /* We could defer the allocation to libc, however since + glibc currently ignores the combination of null buffer + with non zero size, we'll fail here. */ + fprintf (stderr, + _("failed to allocate a %" PRIuMAX + " byte stdio buffer\n"), + size); + return; + } + /* buf will be freed by fclose. */ + } + + if (setvbuf (stream, buf, setvbuf_mode, size) != 0) + { + fprintf (stderr, _("could not set buffering of %s to mode %s\n"), + fileno_to_name (fileno (stream)), mode); + free (buf); + } +} + +/* Use __attribute to avoid elision of __attribute__ on SUNPRO_C etc. */ +static void __attribute ((constructor)) +stdbuf (void) +{ + char *e_mode = getenv ("_STDBUF_E"); + char *i_mode = getenv ("_STDBUF_I"); + char *o_mode = getenv ("_STDBUF_O"); + if (e_mode) /* Do first so can write errors to stderr */ + apply_mode (stderr, e_mode); + if (i_mode) + apply_mode (stdin, i_mode); + if (o_mode) + apply_mode (stdout, o_mode); +} diff --git a/src/link.c b/src/link.c new file mode 100644 index 0000000..1f92773 --- /dev/null +++ b/src/link.c @@ -0,0 +1,91 @@ +/* link utility for GNU. + Copyright (C) 2001-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Michael Stone */ + +/* Implementation overview: + + Simply call the system 'link' function */ + +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "link" + +#define AUTHORS proper_name ("Michael Stone") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s FILE1 FILE2\n\ + or: %s OPTION\n"), program_name, program_name); + fputs (_("Call the link function to create a link named FILE2\ + to an existing FILE1.\n\n"), + stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (argc < optind + 2) + { + if (argc < optind + 1) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + if (optind + 2 < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); + usage (EXIT_FAILURE); + } + + if (link (argv[optind], argv[optind + 1]) != 0) + error (EXIT_FAILURE, errno, _("cannot create link %s to %s"), + quoteaf_n (0, argv[optind + 1]), quoteaf_n (1, argv[optind])); + + return EXIT_SUCCESS; +} diff --git a/src/ln.c b/src/ln.c new file mode 100644 index 0000000..3b34fec --- /dev/null +++ b/src/ln.c @@ -0,0 +1,681 @@ +/* 'ln' program to create links between files. + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Mike Parker and David MacKenzie. */ + +#include +#include +#include +#include + +#include "system.h" +#include "backupfile.h" +#include "fcntl-safer.h" +#include "filenamecat.h" +#include "file-set.h" +#include "force-link.h" +#include "hash.h" +#include "hash-triple.h" +#include "priv-set.h" +#include "relpath.h" +#include "same.h" +#include "unlinkdir.h" +#include "yesno.h" +#include "canonicalize.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "ln" + +#define AUTHORS \ + proper_name ("Mike Parker"), \ + proper_name ("David MacKenzie") + +/* FIXME: document */ +static enum backup_type backup_type; + +/* If true, make symbolic links; otherwise, make hard links. */ +static bool symbolic_link; + +/* If true, make symbolic links relative */ +static bool relative; + +/* If true, hard links are logical rather than physical. */ +static bool logical = !!LINK_FOLLOWS_SYMLINKS; + +/* If true, ask the user before removing existing files. */ +static bool interactive; + +/* If true, remove existing files unconditionally. */ +static bool remove_existing_files; + +/* If true, list each file as it is moved. */ +static bool verbose; + +/* If true, allow the superuser to *attempt* to make hard links + to directories. However, it appears that this option is not useful + in practice, since even the superuser is prohibited from hard-linking + directories on most existing systems (Solaris being an exception). */ +static bool hard_dir_link; + +/* If true, watch out for creating or removing hard links to directories. */ +static bool beware_hard_dir_link; + +/* If nonzero, and the specified destination is a symbolic link to a + directory, treat it just as if it were a directory. Otherwise, the + command 'ln --force --no-dereference file symlink-to-dir' deletes + symlink-to-dir before creating the new link. */ +static bool dereference_dest_dir_symlinks = true; + +/* This is a set of destination name/inode/dev triples for hard links + created by ln. Use this data structure to avoid data loss via a + sequence of commands like this: + rm -rf a b c; mkdir a b c; touch a/f b/f; ln -f a/f b/f c && rm -r a b */ +static Hash_table *dest_set; + +/* Initial size of the dest_set hash table. */ +enum { DEST_INFO_INITIAL_CAPACITY = 61 }; + +static struct option const long_options[] = +{ + {"backup", optional_argument, nullptr, 'b'}, + {"directory", no_argument, nullptr, 'F'}, + {"no-dereference", no_argument, nullptr, 'n'}, + {"no-target-directory", no_argument, nullptr, 'T'}, + {"force", no_argument, nullptr, 'f'}, + {"interactive", no_argument, nullptr, 'i'}, + {"suffix", required_argument, nullptr, 'S'}, + {"target-directory", required_argument, nullptr, 't'}, + {"logical", no_argument, nullptr, 'L'}, + {"physical", no_argument, nullptr, 'P'}, + {"relative", no_argument, nullptr, 'r'}, + {"symbolic", no_argument, nullptr, 's'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return an errno value for a system call that returned STATUS. + This is zero if STATUS is zero, and is errno otherwise. */ + +static int +errnoize (int status) +{ + return status < 0 ? errno : 0; +} + +/* Return FROM represented as relative to the dir of TARGET. + The result is malloced. */ + +static char * +convert_abs_rel (char const *from, char const *target) +{ + /* Get dirname to generate paths relative to. We don't resolve + the full TARGET as the last component could be an existing symlink. */ + char *targetdir = dir_name (target); + + char *realdest = canonicalize_filename_mode (targetdir, CAN_MISSING); + char *realfrom = canonicalize_filename_mode (from, CAN_MISSING); + + char *relative_from = nullptr; + if (realdest && realfrom) + { + /* Write to a PATH_MAX buffer. */ + relative_from = xmalloc (PATH_MAX); + + if (!relpath (realfrom, realdest, relative_from, PATH_MAX)) + { + free (relative_from); + relative_from = nullptr; + } + } + + free (targetdir); + free (realdest); + free (realfrom); + + return relative_from ? relative_from : xstrdup (from); +} + +/* Link SOURCE to DESTDIR_FD + DEST_BASE atomically. DESTDIR_FD is + the directory containing DEST_BASE. Return 0 if successful, a + positive errno value on failure, and -1 if an atomic link cannot be + done. This handles the common case where the destination does not + already exist and -r is not specified. */ + +static int +atomic_link (char const *source, int destdir_fd, char const *dest_base) +{ + return (symbolic_link + ? (relative ? -1 + : errnoize (symlinkat (source, destdir_fd, dest_base))) + : beware_hard_dir_link ? -1 + : errnoize (linkat (AT_FDCWD, source, destdir_fd, dest_base, + logical ? AT_SYMLINK_FOLLOW : 0))); +} + +/* Link SOURCE to a directory entry under DESTDIR_FD named DEST_BASE. + DEST is the full name of the destination, useful for diagnostics. + LINK_ERRNO is zero if the link has already been made, + positive if attempting the link failed with errno == LINK_ERRNO, + -1 if no attempt has been made to create the link. + Return true if successful. */ + +static bool +do_link (char const *source, int destdir_fd, char const *dest_base, + char const *dest, int link_errno) +{ + struct stat source_stats; + int source_status = 1; + char *backup_base = nullptr; + char *rel_source = nullptr; + int nofollow_flag = logical ? 0 : AT_SYMLINK_NOFOLLOW; + if (link_errno < 0) + link_errno = atomic_link (source, destdir_fd, dest_base); + + /* Get SOURCE_STATS if later code will need it, if only for sharper + diagnostics. */ + if ((link_errno || dest_set) && !symbolic_link) + { + source_status = fstatat (AT_FDCWD, source, &source_stats, nofollow_flag); + if (source_status != 0) + { + error (0, errno, _("failed to access %s"), quoteaf (source)); + return false; + } + } + + if (link_errno) + { + if (!symbolic_link && !hard_dir_link && S_ISDIR (source_stats.st_mode)) + { + error (0, 0, _("%s: hard link not allowed for directory"), + quotef (source)); + return false; + } + + if (relative) + source = rel_source = convert_abs_rel (source, dest); + + bool force = (remove_existing_files || interactive + || backup_type != no_backups); + if (force) + { + struct stat dest_stats; + if (fstatat (destdir_fd, dest_base, &dest_stats, AT_SYMLINK_NOFOLLOW) + != 0) + { + if (errno != ENOENT) + { + error (0, errno, _("failed to access %s"), quoteaf (dest)); + goto fail; + } + force = false; + } + else if (S_ISDIR (dest_stats.st_mode)) + { + error (0, 0, _("%s: cannot overwrite directory"), quotef (dest)); + goto fail; + } + else if (seen_file (dest_set, dest, &dest_stats)) + { + /* The current target was created as a hard link to another + source file. */ + error (0, 0, + _("will not overwrite just-created %s with %s"), + quoteaf_n (0, dest), quoteaf_n (1, source)); + goto fail; + } + else + { + /* Beware removing DEST if it is the same directory entry as + SOURCE, because in that case removing DEST can cause the + subsequent link creation either to fail (for hard links), or + to replace a non-symlink DEST with a self-loop (for symbolic + links) which loses the contents of DEST. So, when backing + up, worry about creating hard links (since the backups cover + the symlink case); otherwise, worry about about -f. */ + if (backup_type != no_backups + ? !symbolic_link + : remove_existing_files) + { + /* Detect whether removing DEST would also remove SOURCE. + If the file has only one link then both are surely the + same directory entry. Otherwise check whether they point + to the same name in the same directory. */ + if (source_status != 0) + source_status = stat (source, &source_stats); + if (source_status == 0 + && SAME_INODE (source_stats, dest_stats) + && (source_stats.st_nlink == 1 + || same_nameat (AT_FDCWD, source, + destdir_fd, dest_base))) + { + error (0, 0, _("%s and %s are the same file"), + quoteaf_n (0, source), quoteaf_n (1, dest)); + goto fail; + } + } + + if (link_errno < 0 || link_errno == EEXIST) + { + if (interactive) + { + fprintf (stderr, _("%s: replace %s? "), + program_name, quoteaf (dest)); + if (!yesno ()) + { + free (rel_source); + return false; + } + } + + if (backup_type != no_backups) + { + backup_base = find_backup_file_name (destdir_fd, + dest_base, + backup_type); + if (renameat (destdir_fd, dest_base, + destdir_fd, backup_base) + != 0) + { + int rename_errno = errno; + free (backup_base); + backup_base = nullptr; + if (rename_errno != ENOENT) + { + error (0, rename_errno, _("cannot backup %s"), + quoteaf (dest)); + goto fail; + } + force = false; + } + } + } + } + } + + /* If the attempt to create a link fails and we are removing or + backing up destinations, unlink the destination and try again. + + On the surface, POSIX states that 'ln -f A B' unlinks B before trying + to link A to B. But strictly following this has the counterintuitive + effect of losing the contents of B if A does not exist. Fortunately, + POSIX 2008 clarified that an application is free to fail early if it + can prove that continuing onward cannot succeed, so we can try to + link A to B before blindly unlinking B, thus sometimes attempting to + link a second time during a successful 'ln -f A B'. + + Try to unlink DEST even if we may have backed it up successfully. + In some unusual cases (when DEST and the backup are hard-links + that refer to the same file), rename succeeds and DEST remains. + If we didn't remove DEST in that case, the subsequent symlink or + link call would fail. */ + link_errno + = (symbolic_link + ? force_symlinkat (source, destdir_fd, dest_base, + force, link_errno) + : force_linkat (AT_FDCWD, source, destdir_fd, dest_base, + logical ? AT_SYMLINK_FOLLOW : 0, + force, link_errno)); + /* Until now, link_errno < 0 meant the link has not been tried. + From here on, link_errno < 0 means the link worked but + required removing the destination first. */ + } + + if (link_errno <= 0) + { + /* Right after creating a hard link, do this: (note dest name and + source_stats, which are also the just-linked-destinations stats) */ + if (! symbolic_link) + record_file (dest_set, dest, &source_stats); + + if (verbose) + { + char const *quoted_backup = ""; + char const *backup_sep = ""; + if (backup_base) + { + char *backup = backup_base; + void *alloc = nullptr; + ptrdiff_t destdirlen = dest_base - dest; + if (0 < destdirlen) + { + alloc = xmalloc (destdirlen + strlen (backup_base) + 1); + backup = memcpy (alloc, dest, destdirlen); + strcpy (backup + destdirlen, backup_base); + } + quoted_backup = quoteaf_n (2, backup); + backup_sep = " ~ "; + free (alloc); + } + printf ("%s%s%s %c> %s\n", quoted_backup, backup_sep, + quoteaf_n (0, dest), symbolic_link ? '-' : '=', + quoteaf_n (1, source)); + } + } + else + { + error (0, link_errno, + (symbolic_link + ? (link_errno != ENAMETOOLONG && *source + ? _("failed to create symbolic link %s") + : _("failed to create symbolic link %s -> %s")) + : (link_errno == EMLINK + ? _("failed to create hard link to %.0s%s") + : (link_errno == EDQUOT || link_errno == EEXIST + || link_errno == ENOSPC || link_errno == EROFS) + ? _("failed to create hard link %s") + : _("failed to create hard link %s => %s"))), + quoteaf_n (0, dest), quoteaf_n (1, source)); + + if (backup_base) + { + if (renameat (destdir_fd, backup_base, destdir_fd, dest_base) != 0) + error (0, errno, _("cannot un-backup %s"), quoteaf (dest)); + } + } + + free (backup_base); + free (rel_source); + return link_errno <= 0; + +fail: + free (rel_source); + return false; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [-T] TARGET LINK_NAME\n\ + or: %s [OPTION]... TARGET\n\ + or: %s [OPTION]... TARGET... DIRECTORY\n\ + or: %s [OPTION]... -t DIRECTORY TARGET...\n\ +"), + program_name, program_name, program_name, program_name); + fputs (_("\ +In the 1st form, create a link to TARGET with the name LINK_NAME.\n\ +In the 2nd form, create a link to TARGET in the current directory.\n\ +In the 3rd and 4th forms, create links to each TARGET in DIRECTORY.\n\ +Create hard links by default, symbolic links with --symbolic.\n\ +By default, each destination (name of new link) should not already exist.\n\ +When creating hard links, each TARGET must exist. Symbolic links\n\ +can hold arbitrary text; if later resolved, a relative link is\n\ +interpreted in relation to its parent directory.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + --backup[=CONTROL] make a backup of each existing destination file\n\ + -b like --backup but does not accept an argument\n\ + -d, -F, --directory allow the superuser to attempt to hard link\n\ + directories (note: will probably fail due to\n\ + system restrictions, even for the superuser)\n\ + -f, --force remove existing destination files\n\ +"), stdout); + fputs (_("\ + -i, --interactive prompt whether to remove destinations\n\ + -L, --logical dereference TARGETs that are symbolic links\n\ + -n, --no-dereference treat LINK_NAME as a normal file if\n\ + it is a symbolic link to a directory\n\ + -P, --physical make hard links directly to symbolic links\n\ + -r, --relative with -s, create links relative to link location\n\ + -s, --symbolic make symbolic links instead of hard links\n\ +"), stdout); + fputs (_("\ + -S, --suffix=SUFFIX override the usual backup suffix\n\ + -t, --target-directory=DIRECTORY specify the DIRECTORY in which to create\n\ + the links\n\ + -T, --no-target-directory treat LINK_NAME as a normal file always\n\ + -v, --verbose print name of each linked file\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_backup_suffix_note (); + printf (_("\ +\n\ +Using -s ignores -L and -P. Otherwise, the last option specified controls\n\ +behavior when a TARGET is a symbolic link, defaulting to %s.\n\ +"), LINK_FOLLOWS_SYMLINKS ? "-L" : "-P"); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int c; + bool ok; + bool make_backups = false; + char const *backup_suffix = nullptr; + char *version_control_string = nullptr; + char const *target_directory = nullptr; + int destdir_fd; + bool no_target_directory = false; + int n_files; + char **file; + int link_errno = -1; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdin); + + symbolic_link = remove_existing_files = interactive = verbose + = hard_dir_link = false; + + while ((c = getopt_long (argc, argv, "bdfinrst:vFLPS:T", + long_options, nullptr)) + != -1) + { + switch (c) + { + case 'b': + make_backups = true; + if (optarg) + version_control_string = optarg; + break; + case 'd': + case 'F': + hard_dir_link = true; + break; + case 'f': + remove_existing_files = true; + interactive = false; + break; + case 'i': + remove_existing_files = false; + interactive = true; + break; + case 'L': + logical = true; + break; + case 'n': + dereference_dest_dir_symlinks = false; + break; + case 'P': + logical = false; + break; + case 'r': + relative = true; + break; + case 's': + symbolic_link = true; + break; + case 't': + if (target_directory) + error (EXIT_FAILURE, 0, _("multiple target directories specified")); + else + { + struct stat st; + if (stat (optarg, &st) != 0) + error (EXIT_FAILURE, errno, _("failed to access %s"), + quoteaf (optarg)); + if (! S_ISDIR (st.st_mode)) + error (EXIT_FAILURE, 0, _("target %s is not a directory"), + quoteaf (optarg)); + } + target_directory = optarg; + break; + case 'T': + no_target_directory = true; + break; + case 'v': + verbose = true; + break; + case 'S': + make_backups = true; + backup_suffix = optarg; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + break; + } + } + + n_files = argc - optind; + file = argv + optind; + + if (n_files <= 0) + { + error (0, 0, _("missing file operand")); + usage (EXIT_FAILURE); + } + + if (relative && !symbolic_link) + error (EXIT_FAILURE, 0, _("cannot do --relative without --symbolic")); + + if (!hard_dir_link) + { + priv_set_remove_linkdir (); + beware_hard_dir_link = !cannot_unlink_dir (); + } + + if (no_target_directory) + { + if (target_directory) + error (EXIT_FAILURE, 0, + _("cannot combine --target-directory " + "and --no-target-directory")); + if (n_files != 2) + { + if (n_files < 2) + error (0, 0, + _("missing destination file operand after %s"), + quoteaf (file[0])); + else + error (0, 0, _("extra operand %s"), quoteaf (file[2])); + usage (EXIT_FAILURE); + } + } + else if (n_files < 2 && !target_directory) + { + target_directory = "."; + destdir_fd = AT_FDCWD; + } + else + { + if (n_files == 2 && !target_directory) + link_errno = atomic_link (file[0], AT_FDCWD, file[1]); + if (link_errno < 0 || link_errno == EEXIST || link_errno == ENOTDIR + || link_errno == EINVAL) + { + char const *d + = target_directory ? target_directory : file[n_files - 1]; + int flags = (O_PATHSEARCH | O_DIRECTORY + | (dereference_dest_dir_symlinks ? 0 : O_NOFOLLOW)); + destdir_fd = openat_safer (AT_FDCWD, d, flags); + int err = errno; + if (!O_DIRECTORY && 0 <= destdir_fd) + { + struct stat st; + err = (fstat (destdir_fd, &st) != 0 ? errno + : S_ISDIR (st.st_mode) ? 0 : ENOTDIR); + if (err != 0) + { + close (destdir_fd); + destdir_fd = -1; + } + } + if (0 <= destdir_fd) + { + n_files -= !target_directory; + target_directory = d; + } + else if (! (n_files == 2 && !target_directory)) + error (EXIT_FAILURE, err, _("target %s"), quoteaf (d)); + } + } + + backup_type = (make_backups + ? xget_version (_("backup type"), version_control_string) + : no_backups); + set_simple_backup_suffix (backup_suffix); + + + if (target_directory) + { + /* Create the data structure we'll use to record which hard links we + create. Used to ensure that ln detects an obscure corner case that + might result in user data loss. Create it only if needed. */ + if (2 <= n_files + && remove_existing_files + /* Don't bother trying to protect symlinks, since ln clobbering + a just-created symlink won't ever lead to real data loss. */ + && ! symbolic_link + /* No destination hard link can be clobbered when making + numbered backups. */ + && backup_type != numbered_backups) + { + dest_set = hash_initialize (DEST_INFO_INITIAL_CAPACITY, + nullptr, + triple_hash, + triple_compare, + triple_free); + if (dest_set == nullptr) + xalloc_die (); + } + + ok = true; + for (int i = 0; i < n_files; ++i) + { + char *dest_base; + char *dest = file_name_concat (target_directory, + last_component (file[i]), + &dest_base); + strip_trailing_slashes (dest_base); + ok &= do_link (file[i], destdir_fd, dest_base, dest, -1); + free (dest); + } + } + else + ok = do_link (file[0], AT_FDCWD, file[1], file[1], link_errno); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/local.mk b/src/local.mk new file mode 100644 index 0000000..f45b911 --- /dev/null +++ b/src/local.mk @@ -0,0 +1,704 @@ +# Make coreutils programs. -*-Makefile-*- +# This is included by the top-level Makefile.am. + +## Copyright (C) 1990-2023 Free Software Foundation, Inc. + +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +# FIXME: once lib/ and gnulib-tests/ are also converted, hoist to Makefile.am +AM_CFLAGS = $(WERROR_CFLAGS) + +# The list of all programs (separated in different variables to express +# the how and when they should be installed) is defined in this makefile +# fragment, autogenerated by the 'gen-lists-of-programs.sh' auxiliary +# script. +include $(srcdir)/src/cu-progs.mk + +EXTRA_PROGRAMS = \ + $(no_install__progs) \ + $(build_if_possible__progs) \ + $(default__progs) + +# The user can tweak these lists at configure time. +bin_PROGRAMS = @bin_PROGRAMS@ +pkglibexec_PROGRAMS = @pkglibexec_PROGRAMS@ + +# Needed by the testsuite. +noinst_PROGRAMS = \ + src/getlimits \ + src/make-prime-list + +noinst_HEADERS = \ + src/chown-core.h \ + src/copy.h \ + src/cp-hash.h \ + src/dircolors.h \ + src/expand-common.h \ + src/find-mount-point.h \ + src/fs.h \ + src/fs-is-local.h \ + src/group-list.h \ + src/ioblksize.h \ + src/iopoll.h \ + src/longlong.h \ + src/ls.h \ + src/operand2sig.h \ + src/prog-fprintf.h \ + src/remove.h \ + src/set-fields.h \ + src/statx.h \ + src/system.h \ + src/temp-stream.h \ + src/uname.h + +EXTRA_DIST += \ + src/dcgen \ + src/dircolors.hin \ + src/primes.h \ + src/tac-pipe.c \ + src/extract-magic + +CLEANFILES += $(SCRIPTS) + +# Also remove these sometimes-built programs. +# For example, even when excluded, they're built via 'sc_check-AUTHORS' +# or 'dist'. +CLEANFILES += $(no_install__progs) + +noinst_LIBRARIES += src/libver.a +nodist_src_libver_a_SOURCES = src/version.c src/version.h + +# Tell the linker to omit references to unused shared libraries. +AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS) + +# Extra libraries needed by more than one program. Will be updated later. +copy_ldadd = +remove_ldadd = + +# Sometimes, the expansion of $(LIBINTL) includes -lc which may +# include modules defining variables like 'optind', so libcoreutils.a +# must precede $(LIBINTL) in order to ensure we use GNU getopt. +# But libcoreutils.a must also follow $(LIBINTL), since libintl uses +# replacement functions defined in libcoreutils.a. +# Similarly for $(MBRTOWC_LIB). +LDADD = src/libver.a lib/libcoreutils.a $(LIBINTL) $(MBRTOWC_LIB) \ + lib/libcoreutils.a + +# First, list all programs, to make listing per-program libraries easier. +# See [ below. +src_arch_LDADD = $(LDADD) +src_base64_LDADD = $(LDADD) +src_base32_LDADD = $(LDADD) +src_basenc_LDADD = $(LDADD) +src_basename_LDADD = $(LDADD) +src_cat_LDADD = $(LDADD) +src_chcon_LDADD = $(LDADD) +src_chgrp_LDADD = $(LDADD) +src_chmod_LDADD = $(LDADD) +src_chown_LDADD = $(LDADD) +src_chroot_LDADD = $(LDADD) +src_cksum_LDADD = $(LDADD) +src_comm_LDADD = $(LDADD) +src_nproc_LDADD = $(LDADD) +src_cp_LDADD = $(LDADD) +if !SINGLE_BINARY +src_coreutils_LDADD = $(LDADD) +endif +src_csplit_LDADD = $(LDADD) +src_cut_LDADD = $(LDADD) +src_date_LDADD = $(LDADD) +src_dd_LDADD = $(LDADD) +src_df_LDADD = $(LDADD) +# See dir_LDADD below +src_dircolors_LDADD = $(LDADD) +src_dirname_LDADD = $(LDADD) +src_du_LDADD = $(LDADD) +src_echo_LDADD = $(LDADD) +src_env_LDADD = $(LDADD) +src_expand_LDADD = $(LDADD) +src_expr_LDADD = $(LDADD) +src_factor_LDADD = $(LDADD) +src_false_LDADD = $(LDADD) +src_fmt_LDADD = $(LDADD) +src_fold_LDADD = $(LDADD) +src_getlimits_LDADD = $(LDADD) +src_ginstall_LDADD = $(LDADD) +src_groups_LDADD = $(LDADD) +src_head_LDADD = $(LDADD) +src_hostid_LDADD = $(LDADD) +src_hostname_LDADD = $(LDADD) +src_id_LDADD = $(LDADD) +src_join_LDADD = $(LDADD) +src_kill_LDADD = $(LDADD) +src_link_LDADD = $(LDADD) +src_ln_LDADD = $(LDADD) +src_logname_LDADD = $(LDADD) +src_ls_LDADD = $(LDADD) + +# This must *not* depend on anything in lib/, since it is used to generate +# src/primes.h. If it depended on libcoreutils.a, that would pull all lib/*.c +# into BUILT_SOURCES. +src_make_prime_list_LDADD = + +src_md5sum_LDADD = $(LDADD) +src_mkdir_LDADD = $(LDADD) +src_mkfifo_LDADD = $(LDADD) +src_mknod_LDADD = $(LDADD) +src_mktemp_LDADD = $(LDADD) +src_mv_LDADD = $(LDADD) +src_nice_LDADD = $(LDADD) +src_nl_LDADD = $(LDADD) +src_nohup_LDADD = $(LDADD) +src_numfmt_LDADD = $(LDADD) +src_od_LDADD = $(LDADD) +src_paste_LDADD = $(LDADD) +src_pathchk_LDADD = $(LDADD) +src_pinky_LDADD = $(LDADD) +src_pr_LDADD = $(LDADD) +src_printenv_LDADD = $(LDADD) +src_printf_LDADD = $(LDADD) +src_ptx_LDADD = $(LDADD) +src_pwd_LDADD = $(LDADD) +src_readlink_LDADD = $(LDADD) +src_realpath_LDADD = $(LDADD) +src_rm_LDADD = $(LDADD) +src_rmdir_LDADD = $(LDADD) +src_runcon_LDADD = $(LDADD) +src_seq_LDADD = $(LDADD) +src_sha1sum_LDADD = $(LDADD) +src_sha224sum_LDADD = $(LDADD) +src_sha256sum_LDADD = $(LDADD) +src_sha384sum_LDADD = $(LDADD) +src_sha512sum_LDADD = $(LDADD) +src_shred_LDADD = $(LDADD) +src_shuf_LDADD = $(LDADD) +src_sleep_LDADD = $(LDADD) +src_sort_LDADD = $(LDADD) +src_split_LDADD = $(LDADD) +src_stat_LDADD = $(LDADD) +src_stdbuf_LDADD = $(LDADD) +src_stty_LDADD = $(LDADD) +src_sum_LDADD = $(LDADD) +src_sync_LDADD = $(LDADD) +src_tac_LDADD = $(LDADD) +src_tail_LDADD = $(LDADD) +src_tee_LDADD = $(LDADD) +src_test_LDADD = $(LDADD) +src_timeout_LDADD = $(LDADD) +src_touch_LDADD = $(LDADD) +src_tr_LDADD = $(LDADD) +src_true_LDADD = $(LDADD) +src_truncate_LDADD = $(LDADD) +src_tsort_LDADD = $(LDADD) +src_tty_LDADD = $(LDADD) +src_uname_LDADD = $(LDADD) +src_unexpand_LDADD = $(LDADD) +src_uniq_LDADD = $(LDADD) +src_unlink_LDADD = $(LDADD) +src_uptime_LDADD = $(LDADD) +src_users_LDADD = $(LDADD) +# See vdir_LDADD below +src_wc_LDADD = $(LDADD) +src_who_LDADD = $(LDADD) +src_whoami_LDADD = $(LDADD) +src_yes_LDADD = $(LDADD) + +# Synonyms. Recall that Automake transliterates '[' and '/' to '_'. +src___LDADD = $(src_test_LDADD) +src_dir_LDADD = $(src_ls_LDADD) +src_vdir_LDADD = $(src_ls_LDADD) + +src_cp_LDADD += $(copy_ldadd) +src_ginstall_LDADD += $(copy_ldadd) +src_mv_LDADD += $(copy_ldadd) + +src_mv_LDADD += $(remove_ldadd) +src_rm_LDADD += $(remove_ldadd) + +# for eaccess, euidaccess +copy_ldadd += $(EUIDACCESS_LIBGEN) +remove_ldadd += $(EUIDACCESS_LIBGEN) +src_sort_LDADD += $(EUIDACCESS_LIBGEN) +src_test_LDADD += $(EUIDACCESS_LIBGEN) + +# for selinux use +copy_ldadd += $(LIB_SELINUX) +src_chcon_LDADD += $(LIB_SELINUX) +src_ginstall_LDADD += $(LIB_SELINUX) +src_id_LDADD += $(LIB_SELINUX) +src_id_LDADD += $(LIB_SMACK) +src_ls_LDADD += $(LIB_SELINUX) +src_ls_LDADD += $(LIB_SMACK) +src_mkdir_LDADD += $(LIB_SELINUX) +src_mkdir_LDADD += $(LIB_SMACK) +src_mkfifo_LDADD += $(LIB_SELINUX) +src_mkfifo_LDADD += $(LIB_SMACK) +src_mknod_LDADD += $(LIB_SELINUX) +src_mknod_LDADD += $(LIB_SMACK) +src_runcon_LDADD += $(LIB_SELINUX) +src_stat_LDADD += $(LIB_SELINUX) + +# for nvlist_lookup_uint64_array +src_stat_LDADD += $(LIB_NVPAIR) + +# for gettime, settime, tempname, utimecmp, utimens +copy_ldadd += $(CLOCK_TIME_LIB) +src_date_LDADD += $(CLOCK_TIME_LIB) +src_ginstall_LDADD += $(CLOCK_TIME_LIB) +src_ln_LDADD += $(CLOCK_TIME_LIB) +src_ls_LDADD += $(CLOCK_TIME_LIB) +src_mktemp_LDADD += $(CLOCK_TIME_LIB) +src_pr_LDADD += $(CLOCK_TIME_LIB) +src_sort_LDADD += $(CLOCK_TIME_LIB) +src_split_LDADD += $(CLOCK_TIME_LIB) +src_tac_LDADD += $(CLOCK_TIME_LIB) +src_timeout_LDADD += $(LIB_TIMER_TIME) +src_touch_LDADD += $(CLOCK_TIME_LIB) + +# for gethrxtime +src_dd_LDADD += $(GETHRXTIME_LIB) + +# for cap_get_file +src_ls_LDADD += $(LIB_CAP) + +# for fdatasync +src_dd_LDADD += $(LIB_FDATASYNC) +src_shred_LDADD += $(LIB_FDATASYNC) +src_sync_LDADD += $(LIB_FDATASYNC) + +# for xnanosleep +src_sleep_LDADD += $(NANOSLEEP_LIB) +src_sort_LDADD += $(NANOSLEEP_LIB) +src_tail_LDADD += $(NANOSLEEP_LIB) + +# for various GMP functions +src_expr_LDADD += $(LIBGMP) +src_factor_LDADD += $(LIBGMP) + +# for getloadavg +src_uptime_LDADD += $(GETLOADAVG_LIBS) + +# for various ACL functions +copy_ldadd += $(LIB_ACL) +src_ls_LDADD += $(FILE_HAS_ACL_LIB) + +# for various xattr functions +copy_ldadd += $(LIB_XATTR) + +# for print_unicode_char +src_printf_LDADD += $(LIBICONV) + +# for libcrypto hash routines +src_md5sum_LDADD += $(LIB_CRYPTO) +src_sort_LDADD += $(LIB_CRYPTO) +src_sha1sum_LDADD += $(LIB_CRYPTO) +src_sha224sum_LDADD += $(LIB_CRYPTO) +src_sha256sum_LDADD += $(LIB_CRYPTO) +src_sha384sum_LDADD += $(LIB_CRYPTO) +src_sha512sum_LDADD += $(LIB_CRYPTO) +src_cksum_LDADD += $(LIB_CRYPTO) + +# for canon_host +src_pinky_LDADD += $(GETADDRINFO_LIB) +src_who_LDADD += $(GETADDRINFO_LIB) + +# for gethostname, uname +src_hostname_LDADD += $(GETHOSTNAME_LIB) +src_uname_LDADD += $(GETHOSTNAME_LIB) + +# for read_utmp +src_pinky_LDADD += $(READUTMP_LIB) +src_uptime_LDADD += $(READUTMP_LIB) +src_users_LDADD += $(READUTMP_LIB) +src_who_LDADD += $(READUTMP_LIB) + +# for strsignal +src_kill_LDADD += $(LIBTHREAD) + +# for pthread-cond, pthread-mutex, pthread-thread +src_sort_LDADD += $(LIBPMULTITHREAD) + +# for pthread_sigmask +src_sort_LDADD += $(PTHREAD_SIGMASK_LIB) + +# Get the release year from lib/version-etc.c. +RELEASE_YEAR = \ + `sed -n '/.*COPYRIGHT_YEAR = \([0-9][0-9][0-9][0-9]\) };/s//\1/p' \ + $(top_srcdir)/lib/version-etc.c` + +selinux_sources = \ + src/selinux.c \ + src/selinux.h + +copy_sources = \ + src/copy.c \ + src/cp-hash.c \ + src/force-link.c \ + src/force-link.h + +# Use 'ginstall' in the definition of PROGRAMS and in dependencies to avoid +# confusion with the 'install' target. The install rule transforms 'ginstall' +# to install before applying any user-specified name transformations. + +# Don't apply prefix transformations to libstdbuf shared lib +# as that's not generally needed, and we need to reference the +# name directly in LD_PRELOAD etc. In general it's surprising +# that $(transform) is applied to libexec at all given that is +# for internal package naming, not privy to $(transform). + +transform = s/ginstall/install/;/libstdbuf/!$(program_transform_name) + +src_ginstall_SOURCES = src/install.c src/prog-fprintf.c $(copy_sources) \ + $(selinux_sources) + +# This is for the '[' program. Automake transliterates '[' and '/' to '_'. +src___SOURCES = src/lbracket.c + +nodist_src_coreutils_SOURCES = src/coreutils.h +src_coreutils_SOURCES = src/coreutils.c + +src_cp_SOURCES = src/cp.c $(copy_sources) $(selinux_sources) +src_dir_SOURCES = src/ls.c src/ls-dir.c +src_env_SOURCES = src/env.c src/operand2sig.c +src_vdir_SOURCES = src/ls.c src/ls-vdir.c +src_id_SOURCES = src/id.c src/group-list.c +src_groups_SOURCES = src/groups.c src/group-list.c +src_ls_SOURCES = src/ls.c src/ls-ls.c +src_ln_SOURCES = src/ln.c \ + src/force-link.c src/force-link.h \ + src/relpath.c src/relpath.h +src_chown_SOURCES = src/chown.c src/chown-core.c +src_chgrp_SOURCES = src/chgrp.c src/chown-core.c +src_kill_SOURCES = src/kill.c src/operand2sig.c +src_realpath_SOURCES = src/realpath.c src/relpath.c src/relpath.h +src_timeout_SOURCES = src/timeout.c src/operand2sig.c + +src_mv_SOURCES = src/mv.c src/remove.c $(copy_sources) $(selinux_sources) +src_rm_SOURCES = src/rm.c src/remove.c + +src_mkdir_SOURCES = src/mkdir.c src/prog-fprintf.c $(selinux_sources) +src_rmdir_SOURCES = src/rmdir.c src/prog-fprintf.c + +src_mkfifo_SOURCES = src/mkfifo.c $(selinux_sources) +src_mknod_SOURCES = src/mknod.c $(selinux_sources) + +src_df_SOURCES = src/df.c src/find-mount-point.c +src_stat_SOURCES = src/stat.c src/find-mount-point.c + +src_uname_SOURCES = src/uname.c src/uname-uname.c +src_arch_SOURCES = src/uname.c src/uname-arch.c + +src_cut_SOURCES = src/cut.c src/set-fields.c +src_numfmt_SOURCES = src/numfmt.c src/set-fields.c + +src_split_SOURCES = src/split.c src/temp-stream.c +src_tac_SOURCES = src/tac.c src/temp-stream.c + +src_tail_SOURCES = src/tail.c src/iopoll.c +src_tee_SOURCES = src/tee.c src/iopoll.c + +src_sum_SOURCES = src/sum.c src/sum.h src/digest.c +src_sum_CPPFLAGS = -DHASH_ALGO_SUM=1 $(AM_CPPFLAGS) + +src_md5sum_SOURCES = src/digest.c +src_md5sum_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS) +src_sha1sum_SOURCES = src/digest.c +src_sha1sum_CPPFLAGS = -DHASH_ALGO_SHA1=1 $(AM_CPPFLAGS) +src_sha224sum_SOURCES = src/digest.c +src_sha224sum_CPPFLAGS = -DHASH_ALGO_SHA224=1 $(AM_CPPFLAGS) +src_sha256sum_SOURCES = src/digest.c +src_sha256sum_CPPFLAGS = -DHASH_ALGO_SHA256=1 $(AM_CPPFLAGS) +src_sha384sum_SOURCES = src/digest.c +src_sha384sum_CPPFLAGS = -DHASH_ALGO_SHA384=1 $(AM_CPPFLAGS) +src_sha512sum_SOURCES = src/digest.c +src_sha512sum_CPPFLAGS = -DHASH_ALGO_SHA512=1 $(AM_CPPFLAGS) +src_b2sum_CPPFLAGS = -DHASH_ALGO_BLAKE2=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS) +src_b2sum_SOURCES = src/digest.c \ + src/blake2/blake2.h src/blake2/blake2-impl.h \ + src/blake2/blake2b-ref.c \ + src/blake2/b2sum.c src/blake2/b2sum.h + +src_cksum_SOURCES = $(src_b2sum_SOURCES) src/sum.c src/sum.h \ + src/cksum.c src/cksum.h src/crctab.c +src_cksum_CPPFLAGS = -DHASH_ALGO_CKSUM=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS) +if USE_PCLMUL_CRC32 +noinst_LIBRARIES += src/libcksum_pclmul.a +src_libcksum_pclmul_a_SOURCES = src/cksum_pclmul.c src/cksum.h +cksum_pclmul_ldadd = src/libcksum_pclmul.a +src_cksum_LDADD += $(cksum_pclmul_ldadd) +src_libcksum_pclmul_a_CFLAGS = -mavx -mpclmul $(AM_CFLAGS) +endif + +src_base64_SOURCES = src/basenc.c +src_base64_CPPFLAGS = -DBASE_TYPE=64 $(AM_CPPFLAGS) +src_base32_SOURCES = src/basenc.c +src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +src_basenc_SOURCES = src/basenc.c +src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) + +src_expand_SOURCES = src/expand.c src/expand-common.c +src_unexpand_SOURCES = src/unexpand.c src/expand-common.c + +src_wc_SOURCES = src/wc.c +if USE_AVX2_WC_LINECOUNT +noinst_LIBRARIES += src/libwc_avx2.a +src_libwc_avx2_a_SOURCES = src/wc_avx2.c +wc_avx2_ldadd = src/libwc_avx2.a +src_wc_LDADD += $(wc_avx2_ldadd) +src_libwc_avx2_a_CFLAGS = -mavx2 $(AM_CFLAGS) +endif + +# Ensure we don't link against libcoreutils.a as that lib is +# not compiled with -fPIC which causes issues on 64 bit at least +src_libstdbuf_so_LDADD = $(LIBINTL) + +# Note libstdbuf is only compiled if GCC is available +# (as per the check in configure.ac), so these flags should be available. +# libtool is probably required to relax this dependency. +src_libstdbuf_so_LDFLAGS = -shared +src_libstdbuf_so_CFLAGS = -fPIC $(AM_CFLAGS) + +BUILT_SOURCES += src/coreutils.h +if SINGLE_BINARY +# Single binary dependencies +src_coreutils_CFLAGS = -DSINGLE_BINARY $(AM_CFLAGS) +#src_coreutils_LDFLAGS = $(AM_LDFLAGS) +src_coreutils_LDADD = $(single_binary_deps) $(LDADD) $(single_binary_libs) +src_coreutils_DEPENDENCIES = $(LDADD) $(single_binary_deps) + +include $(top_srcdir)/src/single-binary.mk + +# Creates symlinks or shebangs to the installed programs when building +# coreutils single binary. +EXTRA_src_coreutils_DEPENDENCIES = src/coreutils_$(single_binary_install_type) +endif SINGLE_BINARY + +CLEANFILES += src/coreutils_symlinks +src/coreutils_symlinks: Makefile + $(AM_V_GEN)touch $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)for i in x $(single_binary_progs); do \ + test $$i = x && continue; \ + rm -f src/$$i$(EXEEXT) || exit $$?; \ + $(LN_S) -s coreutils$(EXEEXT) src/$$i$(EXEEXT) || exit $$?; \ + done + +CLEANFILES += src/coreutils_shebangs +src/coreutils_shebangs: Makefile + $(AM_V_GEN)touch $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)for i in x $(single_binary_progs); do \ + test $$i = x && continue; \ + rm -f src/$$i$(EXEEXT) || exit $$?; \ + printf '#!%s --coreutils-prog-shebang=%s\n' \ + $(abs_top_builddir)/src/coreutils$(EXEEXT) $$i \ + >src/$$i$(EXEEXT) || exit $$?; \ + chmod a+x,a-w src/$$i$(EXEEXT) || exit $$?; \ + done + +clean-local: + $(AM_V_at)for i in x $(single_binary_progs); do \ + test $$i = x && continue; \ + rm -f src/$$i$(EXEEXT) || exit $$?; \ + done + + +BUILT_SOURCES += src/dircolors.h +src/dircolors.h: src/dcgen src/dircolors.hin + $(AM_V_GEN)rm -f $@ $@-t + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)$(PERL) -w -- $(srcdir)/src/dcgen \ + $(srcdir)/src/dircolors.hin > $@-t + $(AM_V_at)chmod a-w $@-t + $(AM_V_at)mv $@-t $@ + +# This file is built by maintainers. It's architecture-independent, +# and it needs to be built on a widest-known-int architecture, so it's +# built only if absent. It is not cleaned because we don't want to +# insist that maintainers must build on hosts that support the widest +# known ints (currently 128-bit). +BUILT_SOURCES += $(top_srcdir)/src/primes.h +$(top_srcdir)/src/primes.h: + $(AM_V_at)${MKDIR_P} src + $(MAKE) src/make-prime-list$(EXEEXT) + $(AM_V_GEN)rm -f $@ $@-t + $(AM_V_at)src/make-prime-list$(EXEEXT) 5000 > $@-t + $(AM_V_at)chmod a-w $@-t + $(AM_V_at)mv $@-t $@ + +# false exits nonzero even with --help or --version. +# test doesn't support --help or --version. +# Tell automake to exempt then from that installcheck test. +AM_INSTALLCHECK_STD_OPTIONS_EXEMPT = src/false src/test + +# Compare fs.h with the list of file system names/magic-numbers in the +# Linux statfs man page. This target prints any new name/number pairs. +# Also compare against /usr/include/linux/magic.h +.PHONY: src/fs-magic-compare +src/fs-magic-compare: src/fs-magic src/fs-kernel-magic src/fs-def + @join -v1 -t@ src/fs-magic src/fs-def + @join -v1 -t@ src/fs-kernel-magic src/fs-def + +CLEANFILES += src/fs-def +src/fs-def: src/fs.h + @grep '^# *define ' src/fs.h | $(ASSORT) > $@-t && mv $@-t $@ + +# Massage bits of the statfs man page and definitions from +# /usr/include/linux/magic.h to be in a form consistent with what's in fs.h. +fs_normalize_perl_subst = \ + -e 's/MINIX_SUPER_MAGIC\b/MINIX/;' \ + -e 's/MINIX_SUPER_MAGIC2\b/MINIX_30/;' \ + -e 's/MINIX2_SUPER_MAGIC\b/MINIX_V2/;' \ + -e 's/MINIX2_SUPER_MAGIC2\b/MINIX_V2_30/;' \ + -e 's/MINIX3_SUPER_MAGIC\b/MINIX_V3/;' \ + -e 's/CIFS_MAGIC_NUMBER/CIFS/;' \ + -e 's/AFS_FS/KAFS/;' \ + -e 's/(_SUPER)?_MAGIC//;' \ + -e 's/\s+0x(\S+)/" 0x" . uc $$1/e;' \ + -e 's/(\s+0x)(\X{2})\b/$${1}00$$2/;' \ + -e 's/(\s+0x)(\X{3})\b/$${1}0$$2/;' \ + -e 's/(\s+0x)(\X{6})\b/$${1}00$$2/;' \ + -e 's/(\s+0x)(\X{7})\b/$${1}0$$2/;' \ + -e 's/^\s+//;' \ + -e 's/^\043define\s+//;' \ + -e 's/^_(XIAFS)/$$1/;' \ + -e 's/^USBDEVICE/USBDEVFS/;' \ + -e 's/NTFS_SB/NTFS/;' \ + -e 's/^/\043 define S_MAGIC_/;' \ + -e 's,\s*/\* .*? \*/,,;' + +CLEANFILES += src/fs-magic +src/fs-magic: Makefile + @MANPAGER= man statfs \ + |perl -ne '/File system types:/.../Nobody kno/ and print' \ + |grep 0x | perl -p \ + $(fs_normalize_perl_subst) \ + | grep -Ev 'S_MAGIC_EXT[34]|STACK_END' \ + | $(ASSORT) \ + > $@-t && mv $@-t $@ + +DISTCLEANFILES += src/fs-latest-magic.h +# This rule currently gets the latest header, but probably isn't general +# enough to enable by default. +# @kgit='https://git.kernel.org/cgit/linux/kernel/git'; \ +# wget -q $$kgit/torvalds/linux.git/plain/include/uapi/linux/magic.h \ +# -O $@ +src/fs-latest-magic.h: + @touch $@ + +CLEANFILES += src/fs-kernel-magic +src/fs-kernel-magic: Makefile src/fs-latest-magic.h + @perl -ne '/^#define.*0x/ and print' \ + /usr/include/linux/magic.h src/fs-latest-magic.h \ + | perl -p \ + $(fs_normalize_perl_subst) \ + | grep -Ev 'S_MAGIC_EXT[34]|STACK_END' \ + | $(ASSORT) -u \ + > $@-t && mv $@-t $@ + +BUILT_SOURCES += src/fs-is-local.h +src/fs-is-local.h: src/stat.c src/extract-magic + $(AM_V_GEN)rm -f $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)$(PERL) $(srcdir)/src/extract-magic \ + --local $(srcdir)/src/stat.c > $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +BUILT_SOURCES += src/fs.h +src/fs.h: src/stat.c src/extract-magic + $(AM_V_GEN)rm -f $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)$(PERL) $(srcdir)/src/extract-magic \ + $(srcdir)/src/stat.c > $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +BUILT_SOURCES += src/version.c +src/version.c: Makefile + $(AM_V_GEN)rm -f $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)printf '#include \n' > $@t + $(AM_V_at)printf 'char const *Version = "$(PACKAGE_VERSION)";\n' >> $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +BUILT_SOURCES += src/version.h +src/version.h: Makefile + $(AM_V_GEN)rm -f $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)printf 'extern char const *Version;\n' > $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +# Generates a list of macro invocations like: +# SINGLE_BINARY_PROGRAM(program_name_str, main_name) +# once for each program list on $(single_binary_progs). Note that +# for [ the macro invocation is: +# SINGLE_BINARY_PROGRAM("[", _) +DISTCLEANFILES += src/coreutils.h +src/coreutils.h: Makefile + $(AM_V_GEN)rm -f $@ + $(AM_V_at)${MKDIR_P} src + $(AM_V_at)for prog in x $(single_binary_progs); do \ + test $$prog = x && continue; \ + prog=`basename $$prog`; \ + main=`echo $$prog | tr '[' '_'`; \ + echo "SINGLE_BINARY_PROGRAM(\"$$prog\", $$main)"; \ + done | sort > $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +DISTCLEANFILES += src/version.c src/version.h +MAINTAINERCLEANFILES += $(BUILT_SOURCES) + +all_programs = \ + $(bin_PROGRAMS) \ + $(bin_SCRIPTS) \ + $(EXTRA_PROGRAMS) + +pm = progs-makefile +pr = progs-readme +# Ensure that the list of programs in README matches the list +# of programs we can build. +check-local: check-README check-duplicate-no-install +.PHONY: check-README +check-README: + $(AM_V_GEN)rm -rf $(pr) $(pm) + $(AM_V_at)echo $(all_programs) \ + | tr -s ' ' '\n' \ + | sed -e 's,$(EXEEXT)$$,,' \ + -e 's,^src/,,' \ + -e 's/^ginstall$$/install/' \ + | sed /libstdbuf/d \ + | $(ASSORT) -u > $(pm) && \ + sed -n '/^The programs .* are:/,/^[a-zA-Z]/p' $(top_srcdir)/README \ + | sed -n '/^ */s///p' | tr -s ' ' '\n' > $(pr) + $(AM_V_at)diff $(pm) $(pr) && rm -rf $(pr) $(pm) + +# Ensure that a by-default-not-installed program (listed in +# $(no_install__progs) is not also listed as another $(EXTRA_PROGRAMS) +# entry, because if that were to happen, it *would* be installed +# by default. +.PHONY: check-duplicate-no-install +check-duplicate-no-install: + $(AM_V_GEN)test -z "`echo '$(EXTRA_PROGRAMS)' | tr ' ' '\n' | uniq -d`" + +# Use the just-built 'ginstall', when not cross-compiling. +if CROSS_COMPILING +cu_install_program = @INSTALL@ +else +cu_install_program = src/ginstall +endif +INSTALL = $(cu_install_program) -c diff --git a/src/logname.c b/src/logname.c new file mode 100644 index 0000000..1d74ad9 --- /dev/null +++ b/src/logname.c @@ -0,0 +1,80 @@ +/* logname -- print user's login name + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "logname" + +#define AUTHORS proper_name ("FIXME: unknown") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]\n"), program_name); + fputs (_("\ +Print the user's login name.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + char *cp; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + /* POSIX requires using getlogin (or equivalent code) and prohibits + using a fallback technique. */ + cp = getlogin (); + if (! cp) + error (EXIT_FAILURE, 0, _("no login name")); + + puts (cp); + return EXIT_SUCCESS; +} diff --git a/src/longlong.h b/src/longlong.h new file mode 100644 index 0000000..d5f3831 --- /dev/null +++ b/src/longlong.h @@ -0,0 +1,2275 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + +Copyright 1991-2023 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it under the +terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +details. + +You should have received a copy of the GNU Lesser General Public License +along with this file. If not, see https://www.gnu.org/licenses/. */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + SItype, USItype -- Signed and unsigned 32 bit types + DItype, UDItype -- Signed and unsigned 64 bit types + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. + + Optionally, define: + + LONGLONG_STANDALONE -- Avoid code that needs machine-dependent support files + NO_ASM -- Disable inline asm + + + CAUTION! Using this version of longlong.h outside of GMP is not safe. You + need to include gmp.h and gmp-impl.h, or certain things might not work as + expected. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two + UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + UDWtype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first non-zero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. + + + Notes: + + For add_ssaaaa the two high and two low addends can both commute, but + unfortunately gcc only supports one "%" commutative in each asm block. + This has always been so but is only documented in recent versions + (eg. pre-release 3.3). Having two or more "%"s can cause an internal + compiler error in certain rare circumstances. + + Apparently it was only the last "%" that was ever actually respected, so + the code has been updated to leave just that. Clearly there's a free + choice whether high or low should get it, if there's a reason to favour + one over the other. Also obviously when the constraints on the two + operands are identical there's no benefit to the reloader in any "%" at + all. + + */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! */ + + +/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc + 3.4 __builtin_clzl or __builtin_clzll, according to our limb size. + Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or + __builtin_ctzll. + + These builtins are only used when we check what code comes out, on some + chips they're merely libgcc calls, where we will instead want an inline + in that case (either asm or generic C). + + These builtins are better than an asm block of the same insn, since an + asm block doesn't give gcc any information about scheduling or resource + usage. We keep an asm block for use on prior versions of gcc though. + + For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but + it's not used (for count_leading_zeros) because it generally gives extra + code to ensure the result is 0 when the input is 0, which we don't need + or want. */ + +#ifdef _LONG_LONG_LIMB +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzll (x); \ + } while (0) +#else +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzl (x); \ + } while (0) +#endif + +#ifdef _LONG_LONG_LIMB +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzll (x); \ + } while (0) +#else +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzl (x); \ + } while (0) +#endif + + +/* FIXME: The macros using external routines like __MPN(count_leading_zeros) + don't need to be under !NO_ASM */ +#if ! defined (NO_ASM) + +#if defined (__alpha) && W_TYPE_SIZE == 64 +/* Most alpha-based machines, except Cray systems. */ +#if defined (__GNUC__) +#if __GMP_GNUC_PREREQ (3,3) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh (__m0, __m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#else +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh %r1,%2,%0" \ + : "=r" (ph) \ + : "%rJ" (__m0), "rI" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#else /* ! __GNUC__ */ +#include +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __UMULH (__m0, __m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#define UDIV_PREINV_ALWAYS 1 +#define UDIV_NEEDS_NORMALIZATION 1 +#endif /* LONGLONG_STANDALONE */ + +/* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm + always goes into libgmp.so, even when not actually used. */ +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB + +#if defined (__GNUC__) && HAVE_HOST_CPU_alpha_CIX +#define count_leading_zeros(COUNT,X) \ + __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X)) +#define count_trailing_zeros(COUNT,X) \ + __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) +#endif /* clz/ctz using cix */ + +#if ! defined (count_leading_zeros) \ + && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE) +/* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0. + "$31" is written explicitly in the asm, since an "r" constraint won't + select reg 31. There seems no need to worry about "r31" syntax for cray, + since gcc itself (pre-release 3.4) emits just $31 in various places. */ +#define ALPHA_CMPBGE_0(dst, src) \ + do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0) +/* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts + them, locating the highest non-zero byte. A second __clz_tab lookup + counts the leading zero bits in that byte, giving the result. */ +#define count_leading_zeros(count, x) \ + do { \ + UWtype __clz__b, __clz__c, __clz__x = (x); \ + ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \ + __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \ + __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \ + __clz__x >>= __clz__b; \ + __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \ + __clz__b = 65 - __clz__b; \ + (count) = __clz__b - __clz__c; \ + } while (0) +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#endif /* clz using cmpbge */ + +#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE) +#if HAVE_ATTRIBUTE_CONST +long __MPN(count_leading_zeros) (UDItype) __attribute__ ((const)); +#else +long __MPN(count_leading_zeros) (UDItype); +#endif +#define count_leading_zeros(count, x) \ + ((count) = __MPN(count_leading_zeros) (x)) +#endif /* clz using mpn */ +#endif /* __alpha */ + +#if defined (__AVR) && W_TYPE_SIZE == 8 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + unsigned short __p = (unsigned short) (m0) * (m1); \ + (ph) = __p >> 8; \ + (pl) = __p; \ + } while (0) +#endif /* AVR */ + +#if defined (_CRAY) && W_TYPE_SIZE == 64 +#include +#define UDIV_PREINV_ALWAYS 1 +#define UDIV_NEEDS_NORMALIZATION 1 +long __MPN(count_leading_zeros) (UDItype); +#define count_leading_zeros(count, x) \ + ((count) = _leadz ((UWtype) (x))) +#if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */ +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = _int_mult_upper (__m0, __m1); \ + (pl) = __m0 * __m1; \ + } while (0) +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#endif /* LONGLONG_STANDALONE */ +#endif /* _CRAYIEEE */ +#endif /* _CRAY */ + +#if defined (__ia64) && W_TYPE_SIZE == 64 +/* This form encourages gcc (pre-release 3.4 at least) to emit predicated + "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic + code using "al>= _c; \ + if (_x >= 1 << 4) \ + _x >>= 4, _c += 4; \ + if (_x >= 1 << 2) \ + _x >>= 2, _c += 2; \ + _c += _x >> 1; \ + (count) = W_TYPE_SIZE - 1 - _c; \ + } while (0) +/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 + based, and we don't need a special case for x==0 here */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + __asm__ ("popcnt %0 = %1" \ + : "=r" (count) \ + : "r" ((__ctz_x-1) & ~__ctz_x)); \ + } while (0) +#endif +#if defined (__INTEL_COMPILER) +#include +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UWtype __m0 = (m0), __m1 = (m1); \ + ph = _m64_xmahu (__m0, __m1, 0); \ + pl = __m0 * __m1; \ + } while (0) +#endif +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#define UDIV_PREINV_ALWAYS 1 +#define UDIV_NEEDS_NORMALIZATION 1 +#endif +#endif + + +#if defined (__GNUC__) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" (xl) \ + : "r" (__m0), "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" (xh) \ + : "r" (__m0), "r" (__m1)); \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" (q), "=q" (r) \ + : "1" (n1), "r" (n0), "r" (d)) +#define count_leading_zeros(count, x) \ + __asm__ ("clz %0,%1" \ + : "=r" (count) \ + : "r" (x)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __a29k__ */ + +#if defined (__arc__) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype) (ah)), \ + "rICal" ((USItype) (bh)), \ + "%r" ((USItype) (al)), \ + "rICal" ((USItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), \ + "=&r" (sl) \ + : "r" ((USItype) (ah)), \ + "rICal" ((USItype) (bh)), \ + "r" ((USItype) (al)), \ + "rICal" ((USItype) (bl))) +#endif + +#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ + && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bl) && -(USItype)(bl) < (USItype)(bl)) \ + __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), \ + "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC); \ + else \ + __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC); \ + } while (0) +/* FIXME: Extend the immediate range for the low word by using both ADDS and + SUBS, since they set carry in the same way. We need separate definitions + for thumb and non-thumb since thumb lacks RSC. */ +#if defined (__thumb__) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \ + && (ah) == (bh)) \ + __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \ + : "=r" (sh), "=r" (sl) \ + : "r" (al), "rI" (bl) __CLOBBER_CC); \ + else if (__builtin_constant_p (al)) \ + __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + } while (0) +#else +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && __builtin_constant_p (bh) \ + && (ah) == (bh)) \ + __asm__ ("subs\t%1, %2, %3\n\tsbc\t%0, %0, %0" \ + : "=r" (sh), "=r" (sl) \ + : "r" (al), "rI" (bl) __CLOBBER_CC); \ + else if (__builtin_constant_p (al)) \ + { \ + if (__builtin_constant_p (ah)) \ + __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + } \ + else if (__builtin_constant_p (ah)) \ + { \ + if (__builtin_constant_p (bl)) \ + __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + } \ + else \ + __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + } while (0) +#endif +#if defined (__ARM_ARCH_2__) || defined (__ARM_ARCH_2A__) \ + || defined (__ARM_ARCH_3__) +#define umul_ppmm(xh, xl, a, b) \ + do { \ + register USItype __t0, __t1, __t2; \ + __asm__ ("%@ Inlined umul_ppmm\n" \ + " mov %2, %5, lsr #16\n" \ + " mov %0, %6, lsr #16\n" \ + " bic %3, %5, %2, lsl #16\n" \ + " bic %4, %6, %0, lsl #16\n" \ + " mul %1, %3, %4\n" \ + " mul %4, %2, %4\n" \ + " mul %3, %0, %3\n" \ + " mul %0, %2, %0\n" \ + " adds %3, %4, %3\n" \ + " addcs %0, %0, #65536\n" \ + " adds %1, %1, %3, lsl #16\n" \ + " adc %0, %0, %3, lsr #16" \ + : "=&r" ((USItype) (xh)), "=r" ((USItype) (xl)), \ + "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ + : "r" ((USItype) (a)), "r" ((USItype) (b)) __CLOBBER_CC); \ + } while (0) +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __r; \ + (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); +#endif /* LONGLONG_STANDALONE */ +#else /* ARMv4 or newer */ +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) +#define smul_ppmm(xh, xl, a, b) \ + __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#define UDIV_PREINV_ALWAYS 1 +#define UDIV_NEEDS_NORMALIZATION 1 +#endif /* LONGLONG_STANDALONE */ +#endif /* defined(__ARM_ARCH_2__) ... */ +#define count_leading_zeros(count, x) count_leading_zeros_gcc_clz(count, x) +#define count_trailing_zeros(count, x) count_trailing_zeros_gcc_ctz(count, x) +#endif /* __arm__ */ + +#if defined (__aarch64__) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl)) \ + __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \ + : "=r" (sh), "=&r" (sl) \ + : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \ + "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\ + else \ + __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \ + : "=r" (sh), "=&r" (sl) \ + : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)), \ + "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bl) && ~(UDItype)(bl) <= (UDItype)(bl)) \ + __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \ + : "=r,r" (sh), "=&r,&r" (sl) \ + : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \ + "r,Z" ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) __CLOBBER_CC);\ + else \ + __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \ + : "=r,r" (sh), "=&r,&r" (sl) \ + : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)), \ + "r,Z" ((UDItype)(al)), "rI,r" ((UDItype)(bl)) __CLOBBER_CC);\ + } while(0); +#if __GMP_GNUC_PREREQ (4,9) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ + __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ + w1 = __ll >> 64; \ + w0 = __ll; \ + } while (0) +#endif +#if !defined (umul_ppmm) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (__m0), "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#define count_leading_zeros(count, x) count_leading_zeros_gcc_clz(count, x) +#define count_trailing_zeros(count, x) count_trailing_zeros_gcc_ctz(count, x) +#endif /* __aarch64__ */ + +#if defined (__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__x.__ll) \ + : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __x; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__x.__ll) \ + : "%0" ((SItype)(u)), "r" ((SItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +/* Fujitsu vector computers. */ +#if defined (__uxp__) && W_TYPE_SIZE == 32 +#define umul_ppmm(ph, pl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\ + (ph) = __x.__i.__h; \ + (pl) = __x.__i.__l; \ + } while (0) +#define smul_ppmm(ph, pl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \ + (ph) = __x.__i.__h; \ + (pl) = __x.__i.__l; \ + } while (0) +#endif + +#if defined (__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1\n\taddx %3,%0" \ + : "=g" (sh), "=&g" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \ + : "=g" (sh), "=&g" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" (ph), "=r" (pl) \ + : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" (q), "=r" (r) \ + : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bsch/1 %1,%0" \ + : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0)) +#endif + +#if defined (__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%I5 %5,%r4,%1\n\taddc %r2,%r3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%I4 %4,%r5,%1\n\tsubb %r2,%r3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl)) +#if defined (_PA_RISC1_1) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +#endif +#define count_leading_zeros(count, x) \ + do { \ + USItype __tmp; \ + __asm__ ( \ + "ldi 1,%0\n" \ +" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ +" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ +" ldo 16(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ +" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ +" ldo 8(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ +" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ +" ldo 4(%0),%0 ; Yes. Perform add.\n" \ +" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ +" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ +" ldo 2(%0),%0 ; Yes. Perform add.\n" \ +" extru %1,30,1,%1 ; Extract bit 1.\n" \ +" sub %0,%1,%0 ; Subtract it.\n" \ + : "=r" (count), "=r" (__tmp) : "1" (x)); \ + } while (0) +#endif /* hppa */ + +/* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC + (3.2) puts longlong into two adjacent 32-bit registers. Presumably this + is just a case of no direct support for 2.0n but treating it like 1.0. */ +#if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%I5 %5,%r4,%1\n\tadd,dc %r2,%r3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%I4 %4,%r5,%1\n\tsub,db %r2,%r3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl)) +#endif /* hppa */ + +#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 +#if defined (__zarch__) || defined (HAVE_HOST_CPU_s390_zarch) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ +/* if (__builtin_constant_p (bl)) \ + __asm__ ("alfi\t%1,%o5\n\talcr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" (ah), "r" (bh), "%1" (al), "n" (bl) __CLOBBER_CC);\ + else \ +*/ __asm__ ("alr\t%1,%5\n\talcr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" (ah), "r" (bh), "%1" (al), "r" (bl)__CLOBBER_CC); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ +/* if (__builtin_constant_p (bl)) \ + __asm__ ("slfi\t%1,%o5\n\tslbr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" (ah), "r" (bh), "1" (al), "n" (bl) __CLOBBER_CC); \ + else \ +*/ __asm__ ("slr\t%1,%5\n\tslbr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" (ah), "r" (bh), "1" (al), "r" (bl) __CLOBBER_CC); \ + } while (0) +#if __GMP_GNUC_PREREQ (4,5) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __x.__ll = (UDItype) (m0) * (UDItype) (m1); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#else +#if 0 +/* FIXME: this fails if gcc knows about the 64-bit registers. Use only + with a new enough processor pretending we have 32-bit registers. */ +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mlr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "%0" (m0), "r" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#else +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + /* When we have 64-bit regs and gcc is aware of that, we cannot simply use + DImode for the product, since that would be allocated to a single 64-bit + register, whereas mlr uses the low 32-bits of an even-odd register pair. + */ \ + register USItype __r0 __asm__ ("0"); \ + register USItype __r1 __asm__ ("1") = (m0); \ + __asm__ ("mlr\t%0,%3" \ + : "=r" (__r0), "=r" (__r1) \ + : "r" (__r1), "r" (m1)); \ + (xh) = __r0; (xl) = __r1; \ + } while (0) +#endif /* if 0 */ +#endif +#if 0 +/* FIXME: this fails if gcc knows about the 64-bit registers. Use only + with a new enough processor pretending we have 32-bit registers. */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("dlr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "0" (__x.__ll), "r" (d)); \ + (q) = __x.__i.__l; (r) = __x.__i.__h; \ + } while (0) +#else +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + register USItype __r0 __asm__ ("0") = (n1); \ + register USItype __r1 __asm__ ("1") = (n0); \ + __asm__ ("dlr\t%0,%4" \ + : "=r" (__r0), "=r" (__r1) \ + : "r" (__r0), "r" (__r1), "r" (d)); \ + (q) = __r1; (r) = __r0; \ + } while (0) +#endif /* if 0 */ +#else /* if __zarch__ */ +/* FIXME: this fails if gcc knows about the 64-bit registers. */ +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "%0" (m0), "r" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +/* FIXME: this fails if gcc knows about the 64-bit registers. */ +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("dr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "0" (__x.__ll), "r" (d)); \ + (q) = __x.__i.__l; (r) = __x.__i.__h; \ + } while (0) +#endif /* if __zarch__ */ +#endif + +#if defined (__s390x__) && W_TYPE_SIZE == 64 +/* We need to cast operands with register constraints, otherwise their types + will be assumed to be SImode by gcc. For these machines, such operations + will insert a value into the low 32 bits, and leave the high 32 bits with + garbage. */ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + __asm__ ("algr\t%1,%5\n\talcgr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((UDItype)(ah)), "r" ((UDItype)(bh)), \ + "%1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + __asm__ ("slgr\t%1,%5\n\tslbgr\t%0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((UDItype)(ah)), "r" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), "r" ((UDItype)(bl)) __CLOBBER_CC); \ + } while (0) +#if !defined (__clang__) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {unsigned int __attribute__ ((mode(TI))) __ll; \ + struct {UDItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mlgr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "%0" ((UDItype)(m0)), "r" ((UDItype)(m1))); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {unsigned int __attribute__ ((mode(TI))) __ll; \ + struct {UDItype __h, __l;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("dlgr\t%0,%2" \ + : "=r" (__x.__ll) \ + : "0" (__x.__ll), "r" ((UDItype)(d))); \ + (q) = __x.__i.__l; (r) = __x.__i.__h; \ + } while (0) +#endif +#if 0 /* FIXME: Enable for z10 (?) */ +#define count_leading_zeros(cnt, x) \ + do { \ + union {unsigned int __attribute__ ((mode(TI))) __ll; \ + struct {UDItype __h, __l;} __i; \ + } __clr_cnt; \ + __asm__ ("flogr\t%0,%1" \ + : "=r" (__clr_cnt.__ll) \ + : "r" (x) __CLOBBER_CC); \ + (cnt) = __clr_cnt.__i.__h; \ + } while (0) +#endif +#endif + +/* On x86 and x86_64, every asm implicitly clobbers "flags" and "fpsr", + so we don't need __CLOBBER_CC. */ +#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%k1\n\tadcl %3,%k0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%k1\n\tsbbl %3,%k0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" (w0), "=d" (w1) \ + : "%0" ((USItype)(u)), "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ + __asm__ ("divl %4" /* stringification in K&R C */ \ + : "=a" (q), "=d" (r) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx))) + +#if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium || HAVE_HOST_CPU_pentiummmx +/* Pentium bsrl takes between 10 and 72 cycles depending where the most + significant 1 bit is, hence the use of the following alternatives. bsfl + is slow too, between 18 and 42 depending where the least significant 1 + bit is, so let the generic count_trailing_zeros below make use of the + count_leading_zeros here too. */ + +#if HAVE_HOST_CPU_pentiummmx && ! defined (LONGLONG_STANDALONE) +/* The following should be a fixed 14 or 15 cycles, but possibly plus an L1 + cache miss reading from __clz_tab. For P55 it's favoured over the float + below so as to avoid mixing MMX and x87, since the penalty for switching + between the two is about 100 cycles. + + The asm block sets __shift to -3 if the high 24 bits are clear, -2 for + 16, -1 for 8, or 0 otherwise. This could be written equivalently as + follows, but as of gcc 2.95.2 it results in conditional jumps. + + __shift = -(__n < 0x1000000); + __shift -= (__n < 0x10000); + __shift -= (__n < 0x100); + + The middle two sbbl and cmpl's pair, and with luck something gcc + generates might pair with the first cmpl and the last sbbl. The "32+1" + constant could be folded into __clz_tab[], but it doesn't seem worth + making a different table just for that. */ + +#define count_leading_zeros(c,n) \ + do { \ + USItype __n = (n); \ + USItype __shift; \ + __asm__ ("cmpl $0x1000000, %1\n" \ + "sbbl %0, %0\n" \ + "cmpl $0x10000, %1\n" \ + "sbbl $0, %0\n" \ + "cmpl $0x100, %1\n" \ + "sbbl $0, %0\n" \ + : "=&r" (__shift) : "r" (__n)); \ + __shift = __shift*8 + 24 + 1; \ + (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \ + } while (0) +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */ + +#else /* ! pentiummmx || LONGLONG_STANDALONE */ +/* The following should be a fixed 14 cycles or so. Some scheduling + opportunities should be available between the float load/store too. This + sort of code is used in gcc 3 for __builtin_ffs (with "n&-n") and is + apparently suggested by the Intel optimizing manual (don't know exactly + where). gcc 2.95 or up will be best for this, so the "double" is + correctly aligned on the stack. */ +#define count_leading_zeros(c,n) \ + do { \ + union { \ + double d; \ + unsigned a[2]; \ + } __u; \ + __u.d = (UWtype) (n); \ + (c) = 0x3FF + 31 - (__u.a[1] >> 20); \ + } while (0) +#define COUNT_LEADING_ZEROS_0 (0x3FF + 31) +#endif /* pentiummx */ + +#else /* ! pentium */ + +#if __GMP_GNUC_PREREQ (3,4) /* using bsrl */ +#define count_leading_zeros(count,x) count_leading_zeros_gcc_clz(count,x) +#endif /* gcc clz */ + +/* On P6, gcc prior to 3.0 generates a partial register stall for + __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former + being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the + cost of one extra instruction. Do this for "i386" too, since that means + generic x86. */ +#if ! defined (count_leading_zeros) && __GNUC__ < 3 \ + && (HAVE_HOST_CPU_i386 \ + || HAVE_HOST_CPU_i686 \ + || HAVE_HOST_CPU_pentiumpro \ + || HAVE_HOST_CPU_pentium2 \ + || HAVE_HOST_CPU_pentium3) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + ASSERT ((x) != 0); \ + __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = 31 - __cbtmp; \ + } while (0) +#endif /* gcc<3 asm bsrl */ + +#ifndef count_leading_zeros +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + ASSERT ((x) != 0); \ + __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#endif /* asm bsrl */ + +#if __GMP_GNUC_PREREQ (3,4) /* using bsfl */ +#define count_trailing_zeros(count,x) count_trailing_zeros_gcc_ctz(count,x) +#endif /* gcc ctz */ + +#ifndef count_trailing_zeros +#define count_trailing_zeros(count, x) \ + do { \ + ASSERT ((x) != 0); \ + __asm__ ("bsfl %1,%k0" : "=r" (count) : "rm" ((USItype)(x))); \ + } while (0) +#endif /* asm bsfl */ + +#endif /* ! pentium */ + +#endif /* 80x86 */ + +#if defined (__amd64__) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addq %5,%q1\n\tadcq %3,%q0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \ + "%1" ((UDItype)(al)), "rme" ((UDItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subq %5,%q1\n\tsbbq %3,%q0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \ + "1" ((UDItype)(al)), "rme" ((UDItype)(bl))) +#if X86_ASM_MULX \ + && (HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell \ + || HAVE_HOST_CPU_skylake || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulx\t%3, %q0, %q1" \ + : "=r" (w0), "=r" (w1) \ + : "%d" ((UDItype)(u)), "rm" ((UDItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulq\t%3" \ + : "=a" (w0), "=d" (w1) \ + : "%0" ((UDItype)(u)), "rm" ((UDItype)(v))) +#endif +#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ + __asm__ ("divq %4" /* stringification in K&R C */ \ + : "=a" (q), "=d" (r) \ + : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx))) + +#if HAVE_HOST_CPU_haswell || HAVE_HOST_CPU_broadwell || HAVE_HOST_CPU_skylake \ + || HAVE_HOST_CPU_k10 || HAVE_HOST_CPU_bd1 || HAVE_HOST_CPU_bd2 \ + || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 || HAVE_HOST_CPU_zen \ + || HAVE_HOST_CPU_bobcat || HAVE_HOST_CPU_jaguar +#define count_leading_zeros(count, x) \ + do { \ + /* This is lzcnt, spelled for older assemblers. Destination and */ \ + /* source must be a 64-bit registers, hence cast and %q. */ \ + __asm__ ("rep;bsr\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \ + } while (0) +#define COUNT_LEADING_ZEROS_0 64 +#else +#define count_leading_zeros(count, x) \ + do { \ + UDItype __cbtmp; \ + ASSERT ((x) != 0); \ + __asm__ ("bsr\t%1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \ + (count) = __cbtmp ^ 63; \ + } while (0) +#endif + +#if HAVE_HOST_CPU_bd2 || HAVE_HOST_CPU_bd3 || HAVE_HOST_CPU_bd4 \ + || HAVE_HOST_CPU_zen || HAVE_HOST_CPU_jaguar +#define count_trailing_zeros(count, x) \ + do { \ + /* This is tzcnt, spelled for older assemblers. Destination and */ \ + /* source must be a 64-bit registers, hence cast and %q. */ \ + __asm__ ("rep;bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \ + } while (0) +#define COUNT_TRAILING_ZEROS_0 64 +#else +#define count_trailing_zeros(count, x) \ + do { \ + ASSERT ((x) != 0); \ + __asm__ ("bsf\t%1, %q0" : "=r" (count) : "rm" ((UDItype)(x))); \ + } while (0) +#endif +#endif /* __amd64__ */ + +#if defined (__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r,h,l,c) \ + __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +#if defined (__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl)) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ + } while (0) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 (-32) /* sic */ +#if defined (__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r,h,l,c) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ + } +#endif /* i960mx */ +#endif /* i960 */ + + +#if defined (__loongarch64) && W_TYPE_SIZE == 64 +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __u = (u), __v = (v); \ + (w0) = __u * __v; \ + (w1) = (unsigned __int128__) __u * __v >> 64; \ + } while (0) +#endif + + +#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ + || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ + || defined (__mc5307__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ + : "=d" (sh), "=&d" (sl) \ + : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ + : "=d" (sh), "=&d" (sl) \ + : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */ +#if defined (__mc68020__) || defined(mc68020) \ + || defined (__mc68030__) || defined (mc68030) \ + || defined (__mc68040__) || defined (mc68040) \ + || defined (__mcpu32__) || defined (mcpu32) \ + || defined (__NeXT__) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" (w0), "=d" (w1) \ + : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" (q), "=d" (r) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" (q), "=d" (r) \ + : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) +#else /* for other 68k family members use 16x16->32 multiplication */ +#define umul_ppmm(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm\n" \ +" move%.l %5,%3\n" \ +" move%.l %2,%0\n" \ +" move%.w %3,%1\n" \ +" swap %3\n" \ +" swap %0\n" \ +" mulu%.w %2,%1\n" \ +" mulu%.w %3,%0\n" \ +" mulu%.w %2,%3\n" \ +" swap %2\n" \ +" mulu%.w %5,%2\n" \ +" add%.l %3,%2\n" \ +" jcc 1f\n" \ +" add%.l %#0x10000,%0\n" \ +"1: move%.l %2,%3\n" \ +" clr%.w %2\n" \ +" swap %2\n" \ +" swap %3\n" \ +" clr%.w %3\n" \ +" add%.l %3,%1\n" \ +" addx%.l %2,%0\n" \ +" | End inlined umul_ppmm" \ + : "=&d" (xh), "=&d" (xl), \ + "=&d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) +#endif /* not mc68020 */ +/* The '020, '030, '040 and '060 have bitfield insns. + GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to + exclude bfffo on that chip (bitfield insns not available). */ +#if (defined (__mc68020__) || defined (mc68020) \ + || defined (__mc68030__) || defined (mc68030) \ + || defined (__mc68040__) || defined (mc68040) \ + || defined (__mc68060__) || defined (mc68060) \ + || defined (__NeXT__)) \ + && ! defined (__mcpu32__) +#define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" (count) \ + : "od" ((USItype) (x)), "n" (0)) +#define COUNT_LEADING_ZEROS_0 32 +#endif +#endif /* mc68000 */ + +#if defined (__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl)) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl)) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 63 /* sic */ +#if defined (__m88110__) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +#if defined (__mips) && W_TYPE_SIZE == 32 +#if __GMP_GNUC_PREREQ (4,4) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __ll = (UDItype)(u) * (v); \ + w1 = __ll >> 32; \ + w0 = __ll; \ + } while (0) +#endif +#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) +#endif +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \ + : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) +#endif +#endif /* __mips */ + +#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if defined (_MIPS_ARCH_MIPS64R6) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __m0 = (u), __m1 = (v); \ + (w0) = __m0 * __m1; \ + __asm__ ("dmuhu\t%0, %1, %2" : "=d" (w1) : "d" (__m0), "d" (__m1)); \ + } while (0) +#endif +#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (4,4) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ + __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ + w1 = __ll >> 64; \ + w0 = __ll; \ + } while (0) +#endif +#if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) && !defined (__clang__) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" (w0), "=h" (w1) \ + : "d" ((UDItype)(u)), "d" ((UDItype)(v))) +#endif +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \ + : "=d" (w0), "=d" (w1) \ + : "d" ((UDItype)(u)), "d" ((UDItype)(v))) +#endif +#endif /* __mips */ + +#if defined (__mmix__) && W_TYPE_SIZE == 64 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("MULU %0,%2,%3" : "=r" (w0), "=z" (w1) : "r" (u), "r" (v)) +#endif + +#if defined (__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __asm__ ("meid %2,%0" \ + : "=g" (__x.__ll) \ + : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__x.__ll) \ + : "0" (__x.__ll), "g" ((USItype)(d))); \ + (r) = __x.__i.__l; (q) = __x.__i.__h; }) +#define count_trailing_zeros(count,x) \ + do { \ + __asm__ ("ffsd %2,%0" \ + : "=r" (count) \ + : "0" ((USItype) 0), "r" ((USItype) (x))); \ + } while (0) +#endif /* __ns32000__ */ + +/* In the past we had a block of various #defines tested + _ARCH_PPC - AIX + _ARCH_PWR - AIX + __powerpc__ - gcc + __POWERPC__ - BEOS + __ppc__ - Darwin + PPC - old gcc, GNU/Linux, SysV + The plain PPC test was not good for vxWorks, since PPC is defined on all + CPUs there (eg. m68k too), as a constant one is expected to compare + CPU_FAMILY against. + + At any rate, this was pretty unattractive and a bit fragile. The use of + HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of + getting the desired effect. + + ENHANCE-ME: We should test _IBMR2 here when we add assembly support for + the system vendor compilers. (Is that vendor compilers with inline asm, + or what?) */ + +#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc) \ + && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl) \ + __CLOBBER_CC); \ + else \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "%r" (al), "rI" (bl) \ + __CLOBBER_CC); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl) \ + __CLOBBER_CC); \ + else \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl) \ + __CLOBBER_CC); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 32 +#if HAVE_HOST_CPU_FAMILY_powerpc +#if __GMP_GNUC_PREREQ (4,4) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UDItype __ll = (UDItype)(u) * (v); \ + w1 = __ll >> 32; \ + w0 = __ll; \ + } while (0) +#endif +#if !defined (umul_ppmm) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#else +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) +#endif +#endif /* 32-bit POWER architecture variants. */ + +/* We should test _IBMR2 here when we add assembly support for the system + vendor compilers. */ +#if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64 +#if !defined (_LONG_LONG_LIMB) +/* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values. So + use adde etc only when not _LONG_LONG_LIMB. */ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \ + "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + } while (0) +/* We use "*rI" for the constant operand here, since with just "I", gcc barfs. + This might seem strange, but gcc folds away the dead code late. */ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bl) \ + && (bl) > -0x8000 && (bl) <= 0x8000 && (bl) != 0) { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \ + __CLOBBER_CC); \ + else \ + __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \ + "r" ((UDItype)(al)), "*rI" (-((UDItype)(bl))) \ + __CLOBBER_CC); \ + } else { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(bh)), \ + "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(bh)), \ + "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), \ + "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + else \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" ((UDItype)(ah)), "r" ((UDItype)(bh)), \ + "rI" ((UDItype)(al)), "r" ((UDItype)(bl)) \ + __CLOBBER_CC); \ + } \ + } while (0) +#endif /* ! _LONG_LONG_LIMB */ +#define count_leading_zeros(count, x) \ + __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) +#define COUNT_LEADING_ZEROS_0 64 +#if __GMP_GNUC_PREREQ (4,8) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ + __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ + w1 = __ll >> 64; \ + w0 = __ll; \ + } while (0) +#endif +#if !defined (umul_ppmm) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + DItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (__m0), "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#endif /* 64-bit PowerPC. */ + +#if defined (__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1\n\taddwc %3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1\n\tsubwb %3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("movw %1,%R0\n\tuemul %2,%0" \ + : "=&r" (__x.__ll) \ + : "g" ((USItype) (u)), "g" ((USItype)(v))); \ + (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) +#endif /* __pyr__ */ + +#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5\n\tae %0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5\n\tse %0,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), "r" ((USItype)(bl))) +#define smul_ppmm(ph, pl, m0, m1) \ + __asm__ ( \ + "s r2,r2\n" \ +" mts r10,%2\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" m r2,%3\n" \ +" cas %0,r2,r0\n" \ +" mfs r10,%1" \ + : "=r" (ph), "=r" (pl) \ + : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \ + : "r2") +#define count_leading_zeros(count, x) \ + do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" (count) : "r" ((USItype)(x) >> 16)); \ + else \ + { \ + __asm__ ("clz %0,%1" \ + : "=r" (count) : "r" ((USItype)(x))); \ + (count) += 16; \ + } \ + } while (0) +#endif /* RT/ROMP */ + +#if defined (__riscv) && defined (__riscv_mul) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, u, v) \ + do { \ + UDItype __u = (u), __v = (v); \ + (pl) = __u * __v; \ + __asm__ ("mulhu\t%0, %1, %2" : "=r" (ph) : "%r" (__u), "r" (__v)); \ + } while (0) +#endif + +#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \ + : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach") +#endif + +#if defined (__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \ + __CLOBBER_CC) +/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h + doesn't define anything to indicate that to us, it only sets __sparcv8. */ +#if defined (__sparc_v9__) || defined (__sparcv9) +/* Perhaps we should use floating-point operations here? */ +#if 0 +/* Triggers a bug making mpz/tests/t-gcd.c fail. + Perhaps we simply need explicitly zero-extend the inputs? */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \ + "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1") +#else +/* Use v8 umul until above bug is fixed. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) +#endif +/* Use a plain v8 divide for v9. */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +#else +#if defined (__sparc_v8__) /* gcc normal */ \ + || defined (__sparcv8) /* gcc solaris */ \ + || HAVE_HOST_CPU_supersparc +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) + +#if HAVE_HOST_CPU_supersparc +#else +/* Don't use this on SuperSPARC because its udiv only handles 53 bit + dividends and will trap to the kernel for the rest. */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +#endif /* HAVE_HOST_CPU_supersparc */ + +#else /* ! __sparc_v8__ */ +#if defined (__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd\n" \ +" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ +" tst %%g0\n" \ +" divscc %3,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%%g1\n" \ +" divscc %%g1,%4,%0\n" \ +" rd %%y,%1\n" \ +" bl,a 1f\n" \ +" add %1,%4,%1\n" \ +"1: ! End of inline udiv_qrnnd" \ + : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \ + : "%g1" __AND_CLOBBER_CC) +#define count_leading_zeros(count, x) \ + __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x)) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +#endif /* __sparc_v9__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm\n" \ +" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ +" sra %3,31,%%g2 ! Don't move this insn\n" \ +" and %2,%%g2,%%g2 ! Don't move this insn\n" \ +" andcc %%g0,0,%%g1 ! Don't move this insn\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,%3,%%g1\n" \ +" mulscc %%g1,0,%%g1\n" \ +" add %%g1,%%g2,%0\n" \ +" rd %%y,%1" \ + : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#endif +#ifndef udiv_qrnnd +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __r; \ + (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); +#endif /* LONGLONG_STANDALONE */ +#endif /* udiv_qrnnd */ +#endif /* __sparc__ */ + +#if defined (__sparc__) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ( \ + "addcc %r4,%5,%1\n" \ + " addccc %r6,%7,%%g0\n" \ + " addc %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" ((UDItype)(ah)), "rI" ((UDItype)(bh)), \ + "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)), \ + "%rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ( \ + "subcc %r4,%5,%1\n" \ + " subccc %r6,%7,%%g0\n" \ + " subc %r2,%3,%0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" ((UDItype)(ah)), "rI" ((UDItype)(bh)), \ + "rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)), \ + "rJ" ((UDItype)(al) >> 32), "rI" ((UDItype)(bl) >> 32) \ + __CLOBBER_CC) +#if __VIS__ >= 0x300 +#undef add_ssaaaa +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ( \ + "addcc %r4, %5, %1\n" \ + " addxc %r2, %r3, %0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" ((UDItype)(ah)), "rJ" ((UDItype)(bh)), \ + "%rJ" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (pl) = __m0 * __m1; \ + __asm__ ("umulxhi\t%2, %1, %0" \ + : "=r" (ph) \ + : "%r" (__m0), "r" (__m1)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x)) +/* Needed by count_leading_zeros_32 in sparc64.h. */ +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#endif +#endif + +#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ + : "=g" (sh), "=&g" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ + : "=g" (sh), "=&g" (sl) \ + : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), "g" ((USItype)(bl))) +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __x; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __x; \ + __x.__i.__h = n1; __x.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ + } while (0) +#if 0 +/* FIXME: This instruction appears to be unimplemented on some systems (vax + 8800 maybe). */ +#define count_trailing_zeros(count,x) \ + do { \ + __asm__ ("ffs 0, 31, %1, %0" \ + : "=g" (count) \ + : "g" ((USItype) (x))); \ + } while (0) +#endif +#endif /* vax */ + +#if defined (__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" (sh), "=&r" (sl) \ + : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {long int __ll; \ + struct {unsigned int __h, __l;} __i; \ + } __x; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ + : "%1" (m0), "rQR" (m1)); \ + (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ + } while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + +#endif /* NO_ASM */ + + +/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti". */ +#if !defined (umul_ppmm) && defined (__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDWtype __ll = __umulsidi3 (m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ + } while (0) +#endif + +#if !defined (__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm (__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + + +#if defined (__cplusplus) +#define __longlong_h_C "C" +#else +#define __longlong_h_C +#endif + +/* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist. The "_r" + forms have "reversed" arguments, meaning the pointer is last, which + sometimes allows better parameter passing, in particular on 64-bit + hppa. */ + +#define mpn_umul_ppmm __MPN(umul_ppmm) +extern __longlong_h_C UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype); + +#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm \ + && ! defined (LONGLONG_STANDALONE) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UWtype __umul_ppmm__p0; \ + (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));\ + (wl) = __umul_ppmm__p0; \ + } while (0) +#endif + +#define mpn_umul_ppmm_r __MPN(umul_ppmm_r) +extern __longlong_h_C UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *); + +#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r \ + && ! defined (LONGLONG_STANDALONE) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UWtype __umul_p0; \ + (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_p0); \ + (wl) = __umul_p0; \ + } while (0) +#endif + +#define mpn_udiv_qrnnd __MPN(udiv_qrnnd) +extern __longlong_h_C UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype); + +#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd \ + && ! defined (LONGLONG_STANDALONE) +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + UWtype __udiv_qrnnd_r; \ + (q) = mpn_udiv_qrnnd (&__udiv_qrnnd_r, \ + (UWtype) (n1), (UWtype) (n0), (UWtype) d); \ + (r) = __udiv_qrnnd_r; \ + } while (0) +#endif + +#define mpn_udiv_qrnnd_r __MPN(udiv_qrnnd_r) +extern __longlong_h_C UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *); + +#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r \ + && ! defined (LONGLONG_STANDALONE) +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + UWtype __udiv_qrnnd_r; \ + (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d, \ + &__udiv_qrnnd_r); \ + (r) = __udiv_qrnnd_r; \ + } while (0) +#endif + + +/* If this machine has no inline assembler, use C macros. */ + +#if !defined (add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + UWtype __al = (al); \ + UWtype __bl = (bl); \ + __x = __al + __bl; \ + (sh) = (ah) + (bh) + (__x < __al); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + UWtype __al = (al); \ + UWtype __bl = (bl); \ + __x = __al - __bl; \ + (sh) = (ah) - (bh) - (__al < __bl); \ + (sl) = __x; \ + } while (0) +#endif + +/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of + smul_ppmm. */ +#if !defined (umul_ppmm) && defined (smul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __xm0 = (u), __xm1 = (v); \ + smul_ppmm (__w1, w0, __xm0, __xm1); \ + (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ + } while (0) +#endif + +/* If we still don't have umul_ppmm, define it using plain C. + + For reference, when this code is used for squaring (ie. u and v identical + expressions), gcc recognises __x1 and __x2 are the same and generates 3 + multiplies, not 4. The subsequent additions could be optimized a bit, + but the only place GMP currently uses such a square is mpn_sqr_basecase, + and chips obliged to use this generic C umul will have plenty of worse + performance problems than a couple of extra instructions on the diagonal + of sqr_basecase. */ + +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \ + } while (0) +#endif + +/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will + exist in one form or another. */ +#if !defined (smul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __xm0 = (u), __xm1 = (v); \ + umul_ppmm (__w1, w0, __xm0, __xm1); \ + (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ + } while (0) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + \ + ASSERT ((d) != 0); \ + ASSERT ((n1) < (d)); \ + \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __q1 = (n1) / __d1; \ + __r1 = (n1) - __q1 * __d1; \ + __m = __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __q0 = __r1 / __d1; \ + __r0 = __r1 - __q0 * __d1; \ + __m = __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) \ + && ! defined (LONGLONG_STANDALONE) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) +__GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype); +#endif + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#if !defined (count_leading_zeros) +#define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE == 32) \ + { \ + __a = __xr < ((UWtype) 1 << 2*__BITS4) \ + ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \ + : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \ + : 3*__BITS4 + 1); \ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + ++__a; \ + } \ + \ + (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ + } while (0) +/* This version gives a well-defined value for zero. */ +#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1) +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#define COUNT_LEADING_ZEROS_SLOW +#endif + +/* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */ +#if HAVE_HOST_CPU_FAMILY_x86 && WANT_FAT_BINARY +#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB +#endif + +#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB +extern const unsigned char __GMP_DECLSPEC __clz_tab[129]; +#endif + +#if !defined (count_trailing_zeros) +#if !defined (COUNT_LEADING_ZEROS_SLOW) +/* Define count_trailing_zeros using an asm count_leading_zeros. */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + ASSERT (__ctz_x != 0); \ + count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ + } while (0) +#else +/* Define count_trailing_zeros in plain C, assuming small counts are common. + We use clz_tab without ado, since the C count_leading_zeros above will have + pulled it in. */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + int __ctz_c; \ + \ + if (LIKELY ((__ctz_x & 0xff) != 0)) \ + (count) = __clz_tab[__ctz_x & -__ctz_x] - 2; \ + else \ + { \ + for (__ctz_c = 8 - 2; __ctz_c < W_TYPE_SIZE - 2; __ctz_c += 8) \ + { \ + __ctz_x >>= 8; \ + if (LIKELY ((__ctz_x & 0xff) != 0)) \ + break; \ + } \ + \ + (count) = __ctz_c + __clz_tab[__ctz_x & -__ctz_x]; \ + } \ + } while (0) +#endif +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif + +/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and + that hence the latter should always be used. */ +#ifndef UDIV_PREINV_ALWAYS +#define UDIV_PREINV_ALWAYS 0 +#endif diff --git a/src/ls-dir.c b/src/ls-dir.c new file mode 100644 index 0000000..85fe242 --- /dev/null +++ b/src/ls-dir.c @@ -0,0 +1,2 @@ +#include "ls.h" +int ls_mode = LS_MULTI_COL; diff --git a/src/ls-ls.c b/src/ls-ls.c new file mode 100644 index 0000000..f33fbbc --- /dev/null +++ b/src/ls-ls.c @@ -0,0 +1,2 @@ +#include "ls.h" +int ls_mode = LS_LS; diff --git a/src/ls-vdir.c b/src/ls-vdir.c new file mode 100644 index 0000000..36ebf91 --- /dev/null +++ b/src/ls-vdir.c @@ -0,0 +1,2 @@ +#include "ls.h" +int ls_mode = LS_LONG_FORMAT; diff --git a/src/ls.c b/src/ls.c new file mode 100644 index 0000000..336d5bd --- /dev/null +++ b/src/ls.c @@ -0,0 +1,5647 @@ +/* 'dir', 'vdir' and 'ls' directory listing programs for GNU. + Copyright (C) 1985-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* If ls_mode is LS_MULTI_COL, + the multi-column format is the default regardless + of the type of output device. + This is for the 'dir' program. + + If ls_mode is LS_LONG_FORMAT, + the long format is the default regardless of the + type of output device. + This is for the 'vdir' program. + + If ls_mode is LS_LS, + the output format depends on whether the output + device is a terminal. + This is for the 'ls' program. */ + +/* Written by Richard Stallman and David MacKenzie. */ + +/* Color support by Peter Anvin and Dennis + Flaherty based on original patches by + Greg Lee . */ + +#include +#include + +#include +#if HAVE_STROPTS_H +# include +#endif +#include + +#ifdef WINSIZE_IN_PTEM +# include +# include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#if HAVE_LANGINFO_CODESET +# include +#endif + +/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is + present. */ +#ifndef SA_NOCLDSTOP +# define SA_NOCLDSTOP 0 +# define sigprocmask(How, Set, Oset) /* empty */ +# define sigset_t int +# if ! HAVE_SIGINTERRUPT +# define siginterrupt(sig, flag) /* empty */ +# endif +#endif + +/* NonStop circa 2011 lacks both SA_RESTART and siginterrupt, so don't + restart syscalls after a signal handler fires. This may cause + colors to get messed up on the screen if 'ls' is interrupted, but + that's the best we can do on such a platform. */ +#ifndef SA_RESTART +# define SA_RESTART 0 +#endif + +#include "system.h" +#include + +#include "acl.h" +#include "argmatch.h" +#include "assure.h" +#include "c-strcase.h" +#include "dev-ino.h" +#include "filenamecat.h" +#include "hard-locale.h" +#include "hash.h" +#include "human.h" +#include "filemode.h" +#include "filevercmp.h" +#include "idcache.h" +#include "ls.h" +#include "mbswidth.h" +#include "mpsort.h" +#include "obstack.h" +#include "quote.h" +#include "smack.h" +#include "stat-size.h" +#include "stat-time.h" +#include "strftime.h" +#include "xdectoint.h" +#include "xstrtol.h" +#include "xstrtol-error.h" +#include "areadlink.h" +#include "mbsalign.h" +#include "dircolors.h" +#include "xgethostname.h" +#include "c-ctype.h" +#include "canonicalize.h" +#include "statx.h" + +/* Include last to avoid a clash of + include guards with some premature versions of libcap. + For more details, see . */ +#ifdef HAVE_CAP +# include +#endif + +#define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \ + : (ls_mode == LS_MULTI_COL \ + ? "dir" : "vdir")) + +#define AUTHORS \ + proper_name ("Richard M. Stallman"), \ + proper_name ("David MacKenzie") + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +/* Unix-based readdir implementations have historically returned a dirent.d_ino + value that is sometimes not equal to the stat-obtained st_ino value for + that same entry. This error occurs for a readdir entry that refers + to a mount point. readdir's error is to return the inode number of + the underlying directory -- one that typically cannot be stat'ed, as + long as a file system is mounted on that directory. RELIABLE_D_INO + encapsulates whether we can use the more efficient approach of relying + on readdir-supplied d_ino values, or whether we must incur the cost of + calling stat or lstat to obtain each guaranteed-valid inode number. */ + +#ifndef READDIR_LIES_ABOUT_MOUNTPOINT_D_INO +# define READDIR_LIES_ABOUT_MOUNTPOINT_D_INO 1 +#endif + +#if READDIR_LIES_ABOUT_MOUNTPOINT_D_INO +# define RELIABLE_D_INO(dp) NOT_AN_INODE_NUMBER +#else +# define RELIABLE_D_INO(dp) D_INO (dp) +#endif + +#if ! HAVE_STRUCT_STAT_ST_AUTHOR +# define st_author st_uid +#endif + +enum filetype + { + unknown, + fifo, + chardev, + directory, + blockdev, + normal, + symbolic_link, + sock, + whiteout, + arg_directory + }; + +/* Display letters and indicators for each filetype. + Keep these in sync with enum filetype. */ +static char const filetype_letter[] = "?pcdb-lswd"; + +/* Ensure that filetype and filetype_letter have the same + number of elements. */ +static_assert (sizeof filetype_letter - 1 == arg_directory + 1); + +#define FILETYPE_INDICATORS \ + { \ + C_ORPHAN, C_FIFO, C_CHR, C_DIR, C_BLK, C_FILE, \ + C_LINK, C_SOCK, C_FILE, C_DIR \ + } + +enum acl_type + { + ACL_T_NONE, + ACL_T_LSM_CONTEXT_ONLY, + ACL_T_YES + }; + +struct fileinfo + { + /* The file name. */ + char *name; + + /* For symbolic link, name of the file linked to, otherwise zero. */ + char *linkname; + + /* For terminal hyperlinks. */ + char *absolute_name; + + struct stat stat; + + enum filetype filetype; + + /* For symbolic link and long listing, st_mode of file linked to, otherwise + zero. */ + mode_t linkmode; + + /* security context. */ + char *scontext; + + bool stat_ok; + + /* For symbolic link and color printing, true if linked-to file + exists, otherwise false. */ + bool linkok; + + /* For long listings, true if the file has an access control list, + or a security context. */ + enum acl_type acl_type; + + /* For color listings, true if a regular file has capability info. */ + bool has_capability; + + /* Whether file name needs quoting. tri-state with -1 == unknown. */ + int quoted; + + /* Cached screen width (including quoting). */ + size_t width; + }; + +#define LEN_STR_PAIR(s) sizeof (s) - 1, s + +/* Null is a valid character in a color indicator (think about Epson + printers, for example) so we have to use a length/buffer string + type. */ + +struct bin_str + { + size_t len; /* Number of bytes */ + char const *string; /* Pointer to the same */ + }; + +#if ! HAVE_TCGETPGRP +# define tcgetpgrp(Fd) 0 +#endif + +static size_t quote_name (char const *name, + struct quoting_options const *options, + int needs_general_quoting, + const struct bin_str *color, + bool allow_pad, struct obstack *stack, + char const *absolute_name); +static size_t quote_name_buf (char **inbuf, size_t bufsize, char *name, + struct quoting_options const *options, + int needs_general_quoting, size_t *width, + bool *pad); +static int decode_switches (int argc, char **argv); +static bool file_ignored (char const *name); +static uintmax_t gobble_file (char const *name, enum filetype type, + ino_t inode, bool command_line_arg, + char const *dirname); +static const struct bin_str * get_color_indicator (const struct fileinfo *f, + bool symlink_target); +static bool print_color_indicator (const struct bin_str *ind); +static void put_indicator (const struct bin_str *ind); +static void add_ignore_pattern (char const *pattern); +static void attach (char *dest, char const *dirname, char const *name); +static void clear_files (void); +static void extract_dirs_from_files (char const *dirname, + bool command_line_arg); +static void get_link_name (char const *filename, struct fileinfo *f, + bool command_line_arg); +static void indent (size_t from, size_t to); +static size_t calculate_columns (bool by_columns); +static void print_current_files (void); +static void print_dir (char const *name, char const *realname, + bool command_line_arg); +static size_t print_file_name_and_frills (const struct fileinfo *f, + size_t start_col); +static void print_horizontal (void); +static int format_user_width (uid_t u); +static int format_group_width (gid_t g); +static void print_long_format (const struct fileinfo *f); +static void print_many_per_line (void); +static size_t print_name_with_quoting (const struct fileinfo *f, + bool symlink_target, + struct obstack *stack, + size_t start_col); +static void prep_non_filename_text (void); +static bool print_type_indicator (bool stat_ok, mode_t mode, + enum filetype type); +static void print_with_separator (char sep); +static void queue_directory (char const *name, char const *realname, + bool command_line_arg); +static void sort_files (void); +static void parse_ls_color (void); + +static int getenv_quoting_style (void); + +static size_t quote_name_width (char const *name, + struct quoting_options const *options, + int needs_general_quoting); + +/* Initial size of hash table. + Most hierarchies are likely to be shallower than this. */ +enum { INITIAL_TABLE_SIZE = 30 }; + +/* The set of 'active' directories, from the current command-line argument + to the level in the hierarchy at which files are being listed. + A directory is represented by its device and inode numbers (struct dev_ino). + A directory is added to this set when ls begins listing it or its + entries, and it is removed from the set just after ls has finished + processing it. This set is used solely to detect loops, e.g., with + mkdir loop; cd loop; ln -s ../loop sub; ls -RL */ +static Hash_table *active_dir_set; + +#define LOOP_DETECT (!!active_dir_set) + +/* The table of files in the current directory: + + 'cwd_file' points to a vector of 'struct fileinfo', one per file. + 'cwd_n_alloc' is the number of elements space has been allocated for. + 'cwd_n_used' is the number actually in use. */ + +/* Address of block containing the files that are described. */ +static struct fileinfo *cwd_file; + +/* Length of block that 'cwd_file' points to, measured in files. */ +static size_t cwd_n_alloc; + +/* Index of first unused slot in 'cwd_file'. */ +static size_t cwd_n_used; + +/* Whether files needs may need padding due to quoting. */ +static bool cwd_some_quoted; + +/* Whether quoting style _may_ add outer quotes, + and whether aligning those is useful. */ +static bool align_variable_outer_quotes; + +/* Vector of pointers to files, in proper sorted order, and the number + of entries allocated for it. */ +static void **sorted_file; +static size_t sorted_file_alloc; + +/* When true, in a color listing, color each symlink name according to the + type of file it points to. Otherwise, color them according to the 'ln' + directive in LS_COLORS. Dangling (orphan) symlinks are treated specially, + regardless. This is set when 'ln=target' appears in LS_COLORS. */ + +static bool color_symlink_as_referent; + +static char const *hostname; + +/* Mode of appropriate file for coloring. */ +static mode_t +file_or_link_mode (struct fileinfo const *file) +{ + return (color_symlink_as_referent && file->linkok + ? file->linkmode : file->stat.st_mode); +} + + +/* Record of one pending directory waiting to be listed. */ + +struct pending + { + char *name; + /* If the directory is actually the file pointed to by a symbolic link we + were told to list, 'realname' will contain the name of the symbolic + link, otherwise zero. */ + char *realname; + bool command_line_arg; + struct pending *next; + }; + +static struct pending *pending_dirs; + +/* Current time in seconds and nanoseconds since 1970, updated as + needed when deciding whether a file is recent. */ + +static struct timespec current_time; + +static bool print_scontext; +static char UNKNOWN_SECURITY_CONTEXT[] = "?"; + +/* Whether any of the files has an ACL. This affects the width of the + mode column. */ + +static bool any_has_acl; + +/* The number of columns to use for columns containing inode numbers, + block sizes, link counts, owners, groups, authors, major device + numbers, minor device numbers, and file sizes, respectively. */ + +static int inode_number_width; +static int block_size_width; +static int nlink_width; +static int scontext_width; +static int owner_width; +static int group_width; +static int author_width; +static int major_device_number_width; +static int minor_device_number_width; +static int file_size_width; + +/* Option flags */ + +/* long_format for lots of info, one per line. + one_per_line for just names, one per line. + many_per_line for just names, many per line, sorted vertically. + horizontal for just names, many per line, sorted horizontally. + with_commas for just names, many per line, separated by commas. + + -l (and other options that imply -l), -1, -C, -x and -m control + this parameter. */ + +enum format + { + long_format, /* -l and other options that imply -l */ + one_per_line, /* -1 */ + many_per_line, /* -C */ + horizontal, /* -x */ + with_commas /* -m */ + }; + +static enum format format; + +/* 'full-iso' uses full ISO-style dates and times. 'long-iso' uses longer + ISO-style timestamps, though shorter than 'full-iso'. 'iso' uses shorter + ISO-style timestamps. 'locale' uses locale-dependent timestamps. */ +enum time_style + { + full_iso_time_style, /* --time-style=full-iso */ + long_iso_time_style, /* --time-style=long-iso */ + iso_time_style, /* --time-style=iso */ + locale_time_style /* --time-style=locale */ + }; + +static char const *const time_style_args[] = +{ + "full-iso", "long-iso", "iso", "locale", nullptr +}; +static enum time_style const time_style_types[] = +{ + full_iso_time_style, long_iso_time_style, iso_time_style, + locale_time_style +}; +ARGMATCH_VERIFY (time_style_args, time_style_types); + +/* Type of time to print or sort by. Controlled by -c and -u. + The values of each item of this enum are important since they are + used as indices in the sort functions array (see sort_files()). */ + +enum time_type + { + time_mtime = 0, /* default */ + time_ctime, /* -c */ + time_atime, /* -u */ + time_btime, /* birth time */ + time_numtypes /* the number of elements of this enum */ + }; + +static enum time_type time_type; + +/* The file characteristic to sort by. Controlled by -t, -S, -U, -X, -v. + The values of each item of this enum are important since they are + used as indices in the sort functions array (see sort_files()). */ + +enum sort_type + { + sort_name = 0, /* default */ + sort_extension, /* -X */ + sort_width, + sort_size, /* -S */ + sort_version, /* -v */ + sort_time, /* -t; must be second to last */ + sort_none, /* -U; must be last */ + sort_numtypes /* the number of elements of this enum */ + }; + +static enum sort_type sort_type; + +/* Direction of sort. + false means highest first if numeric, + lowest first if alphabetic; + these are the defaults. + true means the opposite order in each case. -r */ + +static bool sort_reverse; + +/* True means to display owner information. -g turns this off. */ + +static bool print_owner = true; + +/* True means to display author information. */ + +static bool print_author; + +/* True means to display group information. -G and -o turn this off. */ + +static bool print_group = true; + +/* True means print the user and group id's as numbers rather + than as names. -n */ + +static bool numeric_ids; + +/* True means mention the size in blocks of each file. -s */ + +static bool print_block_size; + +/* Human-readable options for output, when printing block counts. */ +static int human_output_opts; + +/* The units to use when printing block counts. */ +static uintmax_t output_block_size; + +/* Likewise, but for file sizes. */ +static int file_human_output_opts; +static uintmax_t file_output_block_size = 1; + +/* Follow the output with a special string. Using this format, + Emacs' dired mode starts up twice as fast, and can handle all + strange characters in file names. */ +static bool dired; + +/* 'none' means don't mention the type of files. + 'slash' means mention directories only, with a '/'. + 'file_type' means mention file types. + 'classify' means mention file types and mark executables. + + Controlled by -F, -p, and --indicator-style. */ + +enum indicator_style + { + none = 0, /* --indicator-style=none (default) */ + slash, /* -p, --indicator-style=slash */ + file_type, /* --indicator-style=file-type */ + classify /* -F, --indicator-style=classify */ + }; + +static enum indicator_style indicator_style; + +/* Names of indicator styles. */ +static char const *const indicator_style_args[] = +{ + "none", "slash", "file-type", "classify", nullptr +}; +static enum indicator_style const indicator_style_types[] = +{ + none, slash, file_type, classify +}; +ARGMATCH_VERIFY (indicator_style_args, indicator_style_types); + +/* True means use colors to mark types. Also define the different + colors as well as the stuff for the LS_COLORS environment variable. + The LS_COLORS variable is now in a termcap-like format. */ + +static bool print_with_color; + +static bool print_hyperlink; + +/* Whether we used any colors in the output so far. If so, we will + need to restore the default color later. If not, we will need to + call prep_non_filename_text before using color for the first time. */ + +static bool used_color = false; + +enum when_type + { + when_never, /* 0: default or --color=never */ + when_always, /* 1: --color=always */ + when_if_tty /* 2: --color=tty */ + }; + +enum Dereference_symlink + { + DEREF_UNDEFINED = 0, /* default */ + DEREF_NEVER, + DEREF_COMMAND_LINE_ARGUMENTS, /* -H */ + DEREF_COMMAND_LINE_SYMLINK_TO_DIR, /* the default, in certain cases */ + DEREF_ALWAYS /* -L */ + }; + +enum indicator_no + { + C_LEFT, C_RIGHT, C_END, C_RESET, C_NORM, C_FILE, C_DIR, C_LINK, + C_FIFO, C_SOCK, + C_BLK, C_CHR, C_MISSING, C_ORPHAN, C_EXEC, C_DOOR, C_SETUID, C_SETGID, + C_STICKY, C_OTHER_WRITABLE, C_STICKY_OTHER_WRITABLE, C_CAP, C_MULTIHARDLINK, + C_CLR_TO_EOL + }; + +static char const *const indicator_name[]= + { + "lc", "rc", "ec", "rs", "no", "fi", "di", "ln", "pi", "so", + "bd", "cd", "mi", "or", "ex", "do", "su", "sg", "st", + "ow", "tw", "ca", "mh", "cl", nullptr + }; + +struct color_ext_type + { + struct bin_str ext; /* The extension we're looking for */ + struct bin_str seq; /* The sequence to output when we do */ + bool exact_match; /* Whether to compare case insensitively */ + struct color_ext_type *next; /* Next in list */ + }; + +static struct bin_str color_indicator[] = + { + { LEN_STR_PAIR ("\033[") }, /* lc: Left of color sequence */ + { LEN_STR_PAIR ("m") }, /* rc: Right of color sequence */ + { 0, nullptr }, /* ec: End color (replaces lc+rs+rc) */ + { LEN_STR_PAIR ("0") }, /* rs: Reset to ordinary colors */ + { 0, nullptr }, /* no: Normal */ + { 0, nullptr }, /* fi: File: default */ + { LEN_STR_PAIR ("01;34") }, /* di: Directory: bright blue */ + { LEN_STR_PAIR ("01;36") }, /* ln: Symlink: bright cyan */ + { LEN_STR_PAIR ("33") }, /* pi: Pipe: yellow/brown */ + { LEN_STR_PAIR ("01;35") }, /* so: Socket: bright magenta */ + { LEN_STR_PAIR ("01;33") }, /* bd: Block device: bright yellow */ + { LEN_STR_PAIR ("01;33") }, /* cd: Char device: bright yellow */ + { 0, nullptr }, /* mi: Missing file: undefined */ + { 0, nullptr }, /* or: Orphaned symlink: undefined */ + { LEN_STR_PAIR ("01;32") }, /* ex: Executable: bright green */ + { LEN_STR_PAIR ("01;35") }, /* do: Door: bright magenta */ + { LEN_STR_PAIR ("37;41") }, /* su: setuid: white on red */ + { LEN_STR_PAIR ("30;43") }, /* sg: setgid: black on yellow */ + { LEN_STR_PAIR ("37;44") }, /* st: sticky: black on blue */ + { LEN_STR_PAIR ("34;42") }, /* ow: other-writable: blue on green */ + { LEN_STR_PAIR ("30;42") }, /* tw: ow w/ sticky: black on green */ + { 0, nullptr }, /* ca: disabled by default */ + { 0, nullptr }, /* mh: disabled by default */ + { LEN_STR_PAIR ("\033[K") }, /* cl: clear to end of line */ + }; + +/* A list mapping file extensions to corresponding display sequence. */ +static struct color_ext_type *color_ext_list = nullptr; + +/* Buffer for color sequences */ +static char *color_buf; + +/* True means to check for orphaned symbolic link, for displaying + colors, or to group symlink to directories with other dirs. */ + +static bool check_symlink_mode; + +/* True means mention the inode number of each file. -i */ + +static bool print_inode; + +/* What to do with symbolic links. Affected by -d, -F, -H, -l (and + other options that imply -l), and -L. */ + +static enum Dereference_symlink dereference; + +/* True means when a directory is found, display info on its + contents. -R */ + +static bool recursive; + +/* True means when an argument is a directory name, display info + on it itself. -d */ + +static bool immediate_dirs; + +/* True means that directories are grouped before files. */ + +static bool directories_first; + +/* Which files to ignore. */ + +static enum +{ + /* Ignore files whose names start with '.', and files specified by + --hide and --ignore. */ + IGNORE_DEFAULT = 0, + + /* Ignore '.', '..', and files specified by --ignore. */ + IGNORE_DOT_AND_DOTDOT, + + /* Ignore only files specified by --ignore. */ + IGNORE_MINIMAL +} ignore_mode; + +/* A linked list of shell-style globbing patterns. If a non-argument + file name matches any of these patterns, it is ignored. + Controlled by -I. Multiple -I options accumulate. + The -B option adds '*~' and '.*~' to this list. */ + +struct ignore_pattern + { + char const *pattern; + struct ignore_pattern *next; + }; + +static struct ignore_pattern *ignore_patterns; + +/* Similar to IGNORE_PATTERNS, except that -a or -A causes this + variable itself to be ignored. */ +static struct ignore_pattern *hide_patterns; + +/* True means output nongraphic chars in file names as '?'. + (-q, --hide-control-chars) + qmark_funny_chars and the quoting style (-Q, --quoting-style=WORD) are + independent. The algorithm is: first, obey the quoting style to get a + string representing the file name; then, if qmark_funny_chars is set, + replace all nonprintable chars in that string with '?'. It's necessary + to replace nonprintable chars even in quoted strings, because we don't + want to mess up the terminal if control chars get sent to it, and some + quoting methods pass through control chars as-is. */ +static bool qmark_funny_chars; + +/* Quoting options for file and dir name output. */ + +static struct quoting_options *filename_quoting_options; +static struct quoting_options *dirname_quoting_options; + +/* The number of chars per hardware tab stop. Setting this to zero + inhibits the use of TAB characters for separating columns. -T */ +static size_t tabsize; + +/* True means print each directory name before listing it. */ + +static bool print_dir_name; + +/* The line length to use for breaking lines in many-per-line format. + Can be set with -w. If zero, there is no limit. */ + +static size_t line_length; + +/* The local time zone rules, as per the TZ environment variable. */ + +static timezone_t localtz; + +/* If true, the file listing format requires that stat be called on + each file. */ + +static bool format_needs_stat; + +/* Similar to 'format_needs_stat', but set if only the file type is + needed. */ + +static bool format_needs_type; + +/* An arbitrary limit on the number of bytes in a printed timestamp. + This is set to a relatively small value to avoid the need to worry + about denial-of-service attacks on servers that run "ls" on behalf + of remote clients. 1000 bytes should be enough for any practical + timestamp format. */ + +enum { TIME_STAMP_LEN_MAXIMUM = MAX (1000, INT_STRLEN_BOUND (time_t)) }; + +/* strftime formats for non-recent and recent files, respectively, in + -l output. */ + +static char const *long_time_format[2] = + { + /* strftime format for non-recent files (older than 6 months), in + -l output. This should contain the year, month and day (at + least), in an order that is understood by people in your + locale's territory. Please try to keep the number of used + screen columns small, because many people work in windows with + only 80 columns. But make this as wide as the other string + below, for recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ + N_("%b %e %Y"), + /* strftime format for recent files (younger than 6 months), in -l + output. This should contain the month, day and time (at + least), in an order that is understood by people in your + locale's territory. Please try to keep the number of used + screen columns small, because many people work in windows with + only 80 columns. But make this as wide as the other string + above, for non-recent files. */ + /* TRANSLATORS: ls output needs to be aligned for ease of reading, + so be wary of using variable width fields from the locale. + Note %b is handled specially by ls and aligned correctly. + Note also that specifying a width as in %5b is erroneous as strftime + will count bytes rather than characters in multibyte locales. */ + N_("%b %e %H:%M") + }; + +/* The set of signals that are caught. */ + +static sigset_t caught_signals; + +/* If nonzero, the value of the pending fatal signal. */ + +static sig_atomic_t volatile interrupt_signal; + +/* A count of the number of pending stop signals that have been received. */ + +static sig_atomic_t volatile stop_signal_count; + +/* Desired exit status. */ + +static int exit_status; + +/* Exit statuses. */ +enum + { + /* "ls" had a minor problem. E.g., while processing a directory, + ls obtained the name of an entry via readdir, yet was later + unable to stat that name. This happens when listing a directory + in which entries are actively being removed or renamed. */ + LS_MINOR_PROBLEM = 1, + + /* "ls" had more serious trouble (e.g., memory exhausted, invalid + option or failure to stat a command line argument. */ + LS_FAILURE = 2 + }; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + AUTHOR_OPTION = CHAR_MAX + 1, + BLOCK_SIZE_OPTION, + COLOR_OPTION, + DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION, + FILE_TYPE_INDICATOR_OPTION, + FORMAT_OPTION, + FULL_TIME_OPTION, + GROUP_DIRECTORIES_FIRST_OPTION, + HIDE_OPTION, + HYPERLINK_OPTION, + INDICATOR_STYLE_OPTION, + QUOTING_STYLE_OPTION, + SHOW_CONTROL_CHARS_OPTION, + SI_OPTION, + SORT_OPTION, + TIME_OPTION, + TIME_STYLE_OPTION, + ZERO_OPTION, +}; + +static struct option const long_options[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"escape", no_argument, nullptr, 'b'}, + {"directory", no_argument, nullptr, 'd'}, + {"dired", no_argument, nullptr, 'D'}, + {"full-time", no_argument, nullptr, FULL_TIME_OPTION}, + {"group-directories-first", no_argument, nullptr, + GROUP_DIRECTORIES_FIRST_OPTION}, + {"human-readable", no_argument, nullptr, 'h'}, + {"inode", no_argument, nullptr, 'i'}, + {"kibibytes", no_argument, nullptr, 'k'}, + {"numeric-uid-gid", no_argument, nullptr, 'n'}, + {"no-group", no_argument, nullptr, 'G'}, + {"hide-control-chars", no_argument, nullptr, 'q'}, + {"reverse", no_argument, nullptr, 'r'}, + {"size", no_argument, nullptr, 's'}, + {"width", required_argument, nullptr, 'w'}, + {"almost-all", no_argument, nullptr, 'A'}, + {"ignore-backups", no_argument, nullptr, 'B'}, + {"classify", optional_argument, nullptr, 'F'}, + {"file-type", no_argument, nullptr, FILE_TYPE_INDICATOR_OPTION}, + {"si", no_argument, nullptr, SI_OPTION}, + {"dereference-command-line", no_argument, nullptr, 'H'}, + {"dereference-command-line-symlink-to-dir", no_argument, nullptr, + DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION}, + {"hide", required_argument, nullptr, HIDE_OPTION}, + {"ignore", required_argument, nullptr, 'I'}, + {"indicator-style", required_argument, nullptr, INDICATOR_STYLE_OPTION}, + {"dereference", no_argument, nullptr, 'L'}, + {"literal", no_argument, nullptr, 'N'}, + {"quote-name", no_argument, nullptr, 'Q'}, + {"quoting-style", required_argument, nullptr, QUOTING_STYLE_OPTION}, + {"recursive", no_argument, nullptr, 'R'}, + {"format", required_argument, nullptr, FORMAT_OPTION}, + {"show-control-chars", no_argument, nullptr, SHOW_CONTROL_CHARS_OPTION}, + {"sort", required_argument, nullptr, SORT_OPTION}, + {"tabsize", required_argument, nullptr, 'T'}, + {"time", required_argument, nullptr, TIME_OPTION}, + {"time-style", required_argument, nullptr, TIME_STYLE_OPTION}, + {"zero", no_argument, nullptr, ZERO_OPTION}, + {"color", optional_argument, nullptr, COLOR_OPTION}, + {"hyperlink", optional_argument, nullptr, HYPERLINK_OPTION}, + {"block-size", required_argument, nullptr, BLOCK_SIZE_OPTION}, + {"context", no_argument, 0, 'Z'}, + {"author", no_argument, nullptr, AUTHOR_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static char const *const format_args[] = +{ + "verbose", "long", "commas", "horizontal", "across", + "vertical", "single-column", nullptr +}; +static enum format const format_types[] = +{ + long_format, long_format, with_commas, horizontal, horizontal, + many_per_line, one_per_line +}; +ARGMATCH_VERIFY (format_args, format_types); + +static char const *const sort_args[] = +{ + "none", "time", "size", "extension", "version", "width", nullptr +}; +static enum sort_type const sort_types[] = +{ + sort_none, sort_time, sort_size, sort_extension, sort_version, sort_width +}; +ARGMATCH_VERIFY (sort_args, sort_types); + +static char const *const time_args[] = +{ + "atime", "access", "use", + "ctime", "status", + "mtime", "modification", + "birth", "creation", + nullptr +}; +static enum time_type const time_types[] = +{ + time_atime, time_atime, time_atime, + time_ctime, time_ctime, + time_mtime, time_mtime, + time_btime, time_btime, +}; +ARGMATCH_VERIFY (time_args, time_types); + +static char const *const when_args[] = +{ + /* force and none are for compatibility with another color-ls version */ + "always", "yes", "force", + "never", "no", "none", + "auto", "tty", "if-tty", nullptr +}; +static enum when_type const when_types[] = +{ + when_always, when_always, when_always, + when_never, when_never, when_never, + when_if_tty, when_if_tty, when_if_tty +}; +ARGMATCH_VERIFY (when_args, when_types); + +/* Information about filling a column. */ +struct column_info +{ + bool valid_len; + size_t line_len; + size_t *col_arr; +}; + +/* Array with information about column fullness. */ +static struct column_info *column_info; + +/* Maximum number of columns ever possible for this display. */ +static size_t max_idx; + +/* The minimum width of a column is 3: 1 character for the name and 2 + for the separating white space. */ +enum { MIN_COLUMN_WIDTH = 3 }; + + +/* This zero-based index is for the --dired option. It is incremented + for each byte of output generated by this program so that the beginning + and ending indices (in that output) of every file name can be recorded + and later output themselves. */ +static off_t dired_pos; + +static void +dired_outbyte (char c) +{ + dired_pos++; + putchar (c); +} + +/* Output the buffer S, of length S_LEN, and increment DIRED_POS by S_LEN. */ +static void +dired_outbuf (char const *s, size_t s_len) +{ + dired_pos += s_len; + fwrite (s, sizeof *s, s_len, stdout); +} + +/* Output the string S, and increment DIRED_POS by its length. */ +static void +dired_outstring (char const *s) +{ + dired_outbuf (s, strlen (s)); +} + +static void +dired_indent (void) +{ + if (dired) + dired_outstring (" "); +} + +/* With --dired, store pairs of beginning and ending indices of file names. */ +static struct obstack dired_obstack; + +/* With --dired, store pairs of beginning and ending indices of any + directory names that appear as headers (just before 'total' line) + for lists of directory entries. Such directory names are seen when + listing hierarchies using -R and when a directory is listed with at + least one other command line argument. */ +static struct obstack subdired_obstack; + +/* Save the current index on the specified obstack, OBS. */ +static void +push_current_dired_pos (struct obstack *obs) +{ + if (dired) + obstack_grow (obs, &dired_pos, sizeof dired_pos); +} + +/* With -R, this stack is used to help detect directory cycles. + The device/inode pairs on this stack mirror the pairs in the + active_dir_set hash table. */ +static struct obstack dev_ino_obstack; + +/* Push a pair onto the device/inode stack. */ +static void +dev_ino_push (dev_t dev, ino_t ino) +{ + void *vdi; + struct dev_ino *di; + int dev_ino_size = sizeof *di; + obstack_blank (&dev_ino_obstack, dev_ino_size); + vdi = obstack_next_free (&dev_ino_obstack); + di = vdi; + di--; + di->st_dev = dev; + di->st_ino = ino; +} + +/* Pop a dev/ino struct off the global dev_ino_obstack + and return that struct. */ +static struct dev_ino +dev_ino_pop (void) +{ + void *vdi; + struct dev_ino *di; + int dev_ino_size = sizeof *di; + affirm (dev_ino_size <= obstack_object_size (&dev_ino_obstack)); + obstack_blank_fast (&dev_ino_obstack, -dev_ino_size); + vdi = obstack_next_free (&dev_ino_obstack); + di = vdi; + return *di; +} + +static void +assert_matching_dev_ino (char const *name, struct dev_ino di) +{ + MAYBE_UNUSED struct stat sb; + assure (0 <= stat (name, &sb)); + assure (sb.st_dev == di.st_dev); + assure (sb.st_ino == di.st_ino); +} + +static char eolbyte = '\n'; + +/* Write to standard output PREFIX, followed by the quoting style and + a space-separated list of the integers stored in OS all on one line. */ + +static void +dired_dump_obstack (char const *prefix, struct obstack *os) +{ + size_t n_pos; + + n_pos = obstack_object_size (os) / sizeof (dired_pos); + if (n_pos > 0) + { + off_t *pos = obstack_finish (os); + fputs (prefix, stdout); + for (size_t i = 0; i < n_pos; i++) + { + intmax_t p = pos[i]; + printf (" %"PRIdMAX, p); + } + putchar ('\n'); + } +} + +/* Return the platform birthtime member of the stat structure, + or fallback to the mtime member, which we have populated + from the statx structure or reset to an invalid timestamp + where birth time is not supported. */ +static struct timespec +get_stat_btime (struct stat const *st) +{ + struct timespec btimespec; + +#if HAVE_STATX && defined STATX_INO + btimespec = get_stat_mtime (st); +#else + btimespec = get_stat_birthtime (st); +#endif + + return btimespec; +} + +#if HAVE_STATX && defined STATX_INO +ATTRIBUTE_PURE +static unsigned int +time_type_to_statx (void) +{ + switch (time_type) + { + case time_ctime: + return STATX_CTIME; + case time_mtime: + return STATX_MTIME; + case time_atime: + return STATX_ATIME; + case time_btime: + return STATX_BTIME; + default: + unreachable (); + } + return 0; +} + +ATTRIBUTE_PURE +static unsigned int +calc_req_mask (void) +{ + unsigned int mask = STATX_MODE; + + if (print_inode) + mask |= STATX_INO; + + if (print_block_size) + mask |= STATX_BLOCKS; + + if (format == long_format) { + mask |= STATX_NLINK | STATX_SIZE | time_type_to_statx (); + if (print_owner || print_author) + mask |= STATX_UID; + if (print_group) + mask |= STATX_GID; + } + + switch (sort_type) + { + case sort_none: + case sort_name: + case sort_version: + case sort_extension: + case sort_width: + break; + case sort_time: + mask |= time_type_to_statx (); + break; + case sort_size: + mask |= STATX_SIZE; + break; + default: + unreachable (); + } + + return mask; +} + +static int +do_statx (int fd, char const *name, struct stat *st, int flags, + unsigned int mask) +{ + struct statx stx; + bool want_btime = mask & STATX_BTIME; + int ret = statx (fd, name, flags | AT_NO_AUTOMOUNT, mask, &stx); + if (ret >= 0) + { + statx_to_stat (&stx, st); + /* Since we only need one timestamp type, + store birth time in st_mtim. */ + if (want_btime) + { + if (stx.stx_mask & STATX_BTIME) + st->st_mtim = statx_timestamp_to_timespec (stx.stx_btime); + else + st->st_mtim.tv_sec = st->st_mtim.tv_nsec = -1; + } + } + + return ret; +} + +static int +do_stat (char const *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, calc_req_mask ()); +} + +static int +do_lstat (char const *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, AT_SYMLINK_NOFOLLOW, calc_req_mask ()); +} + +static int +stat_for_mode (char const *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, STATX_MODE); +} + +/* dev+ino should be static, so no need to sync with backing store */ +static int +stat_for_ino (char const *name, struct stat *st) +{ + return do_statx (AT_FDCWD, name, st, 0, STATX_INO); +} + +static int +fstat_for_ino (int fd, struct stat *st) +{ + return do_statx (fd, "", st, AT_EMPTY_PATH, STATX_INO); +} +#else +static int +do_stat (char const *name, struct stat *st) +{ + return stat (name, st); +} + +static int +do_lstat (char const *name, struct stat *st) +{ + return lstat (name, st); +} + +static int +stat_for_mode (char const *name, struct stat *st) +{ + return stat (name, st); +} + +static int +stat_for_ino (char const *name, struct stat *st) +{ + return stat (name, st); +} + +static int +fstat_for_ino (int fd, struct stat *st) +{ + return fstat (fd, st); +} +#endif + +/* Return the address of the first plain %b spec in FMT, or nullptr if + there is no such spec. %5b etc. do not match, so that user + widths/flags are honored. */ + +ATTRIBUTE_PURE +static char const * +first_percent_b (char const *fmt) +{ + for (; *fmt; fmt++) + if (fmt[0] == '%') + switch (fmt[1]) + { + case 'b': return fmt; + case '%': fmt++; break; + } + return nullptr; +} + +static char RFC3986[256]; +static void +file_escape_init (void) +{ + for (int i = 0; i < 256; i++) + RFC3986[i] |= c_isalnum (i) || i == '~' || i == '-' || i == '.' || i == '_'; +} + +/* Read the abbreviated month names from the locale, to align them + and to determine the max width of the field and to truncate names + greater than our max allowed. + Note even though this handles multibyte locales correctly + it's not restricted to them as single byte locales can have + variable width abbreviated months and also precomputing/caching + the names was seen to increase the performance of ls significantly. */ + +/* max number of display cells to use. + As of 2018 the abmon for Arabic has entries with width 12. + It doesn't make much sense to support wider than this + and locales should aim for abmon entries of width <= 5. */ +enum { MAX_MON_WIDTH = 12 }; +/* abformat[RECENT][MON] is the format to use for timestamps with + recentness RECENT and month MON. */ +enum { ABFORMAT_SIZE = 128 }; +static char abformat[2][12][ABFORMAT_SIZE]; +/* True if precomputed formats should be used. This can be false if + nl_langinfo fails, if a format or month abbreviation is unusually + long, or if a month abbreviation contains '%'. */ +static bool use_abformat; + +/* Store into ABMON the abbreviated month names, suitably aligned. + Return true if successful. */ + +static bool +abmon_init (char abmon[12][ABFORMAT_SIZE]) +{ +#ifndef HAVE_NL_LANGINFO + return false; +#else + size_t required_mon_width = MAX_MON_WIDTH; + size_t curr_max_width; + do + { + curr_max_width = required_mon_width; + required_mon_width = 0; + for (int i = 0; i < 12; i++) + { + size_t width = curr_max_width; + char const *abbr = nl_langinfo (ABMON_1 + i); + if (strchr (abbr, '%')) + return false; + mbs_align_t alignment = isdigit (to_uchar (*abbr)) + ? MBS_ALIGN_RIGHT : MBS_ALIGN_LEFT; + size_t req = mbsalign (abbr, abmon[i], ABFORMAT_SIZE, + &width, alignment, 0); + if (! (req < ABFORMAT_SIZE)) + return false; + required_mon_width = MAX (required_mon_width, width); + } + } + while (curr_max_width > required_mon_width); + + return true; +#endif +} + +/* Initialize ABFORMAT and USE_ABFORMAT. */ + +static void +abformat_init (void) +{ + char const *pb[2]; + for (int recent = 0; recent < 2; recent++) + pb[recent] = first_percent_b (long_time_format[recent]); + if (! (pb[0] || pb[1])) + return; + + char abmon[12][ABFORMAT_SIZE]; + if (! abmon_init (abmon)) + return; + + for (int recent = 0; recent < 2; recent++) + { + char const *fmt = long_time_format[recent]; + for (int i = 0; i < 12; i++) + { + char *nfmt = abformat[recent][i]; + int nbytes; + + if (! pb[recent]) + nbytes = snprintf (nfmt, ABFORMAT_SIZE, "%s", fmt); + else + { + if (! (pb[recent] - fmt <= MIN (ABFORMAT_SIZE, INT_MAX))) + return; + int prefix_len = pb[recent] - fmt; + nbytes = snprintf (nfmt, ABFORMAT_SIZE, "%.*s%s%s", + prefix_len, fmt, abmon[i], pb[recent] + 2); + } + + if (! (0 <= nbytes && nbytes < ABFORMAT_SIZE)) + return; + } + } + + use_abformat = true; +} + +static size_t +dev_ino_hash (void const *x, size_t table_size) +{ + struct dev_ino const *p = x; + return (uintmax_t) p->st_ino % table_size; +} + +static bool +dev_ino_compare (void const *x, void const *y) +{ + struct dev_ino const *a = x; + struct dev_ino const *b = y; + return SAME_INODE (*a, *b) ? true : false; +} + +static void +dev_ino_free (void *x) +{ + free (x); +} + +/* Add the device/inode pair (P->st_dev/P->st_ino) to the set of + active directories. Return true if there is already a matching + entry in the table. */ + +static bool +visit_dir (dev_t dev, ino_t ino) +{ + struct dev_ino *ent; + struct dev_ino *ent_from_table; + bool found_match; + + ent = xmalloc (sizeof *ent); + ent->st_ino = ino; + ent->st_dev = dev; + + /* Attempt to insert this entry into the table. */ + ent_from_table = hash_insert (active_dir_set, ent); + + if (ent_from_table == nullptr) + { + /* Insertion failed due to lack of memory. */ + xalloc_die (); + } + + found_match = (ent_from_table != ent); + + if (found_match) + { + /* ent was not inserted, so free it. */ + free (ent); + } + + return found_match; +} + +static void +free_pending_ent (struct pending *p) +{ + free (p->name); + free (p->realname); + free (p); +} + +static bool +is_colored (enum indicator_no type) +{ + size_t len = color_indicator[type].len; + char const *s = color_indicator[type].string; + return ! (len == 0 + || (len == 1 && STRNCMP_LIT (s, "0") == 0) + || (len == 2 && STRNCMP_LIT (s, "00") == 0)); +} + +static void +restore_default_color (void) +{ + put_indicator (&color_indicator[C_LEFT]); + put_indicator (&color_indicator[C_RIGHT]); +} + +static void +set_normal_color (void) +{ + if (print_with_color && is_colored (C_NORM)) + { + put_indicator (&color_indicator[C_LEFT]); + put_indicator (&color_indicator[C_NORM]); + put_indicator (&color_indicator[C_RIGHT]); + } +} + +/* An ordinary signal was received; arrange for the program to exit. */ + +static void +sighandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, SIG_IGN); + if (! interrupt_signal) + interrupt_signal = sig; +} + +/* A SIGTSTP was received; arrange for the program to suspend itself. */ + +static void +stophandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, stophandler); + if (! interrupt_signal) + stop_signal_count++; +} + +/* Process any pending signals. If signals are caught, this function + should be called periodically. Ideally there should never be an + unbounded amount of time when signals are not being processed. + Signal handling can restore the default colors, so callers must + immediately change colors after invoking this function. */ + +static void +process_signals (void) +{ + while (interrupt_signal || stop_signal_count) + { + int sig; + int stops; + sigset_t oldset; + + if (used_color) + restore_default_color (); + fflush (stdout); + + sigprocmask (SIG_BLOCK, &caught_signals, &oldset); + + /* Reload interrupt_signal and stop_signal_count, in case a new + signal was handled before sigprocmask took effect. */ + sig = interrupt_signal; + stops = stop_signal_count; + + /* SIGTSTP is special, since the application can receive that signal + more than once. In this case, don't set the signal handler to the + default. Instead, just raise the uncatchable SIGSTOP. */ + if (stops) + { + stop_signal_count = stops - 1; + sig = SIGSTOP; + } + else + signal (sig, SIG_DFL); + + /* Exit or suspend the program. */ + raise (sig); + sigprocmask (SIG_SETMASK, &oldset, nullptr); + + /* If execution reaches here, then the program has been + continued (after being suspended). */ + } +} + +/* Setup signal handlers if INIT is true, + otherwise restore to the default. */ + +static void +signal_setup (bool init) +{ + /* The signals that are trapped, and the number of such signals. */ + static int const sig[] = + { + /* This one is handled specially. */ + SIGTSTP, + + /* The usual suspects. */ + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGVTALRM + SIGVTALRM, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + enum { nsigs = ARRAY_CARDINALITY (sig) }; + +#if ! SA_NOCLDSTOP + static bool caught_sig[nsigs]; +#endif + + int j; + + if (init) + { +#if SA_NOCLDSTOP + struct sigaction act; + + sigemptyset (&caught_signals); + for (j = 0; j < nsigs; j++) + { + sigaction (sig[j], nullptr, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, sig[j]); + } + + act.sa_mask = caught_signals; + act.sa_flags = SA_RESTART; + + for (j = 0; j < nsigs; j++) + if (sigismember (&caught_signals, sig[j])) + { + act.sa_handler = sig[j] == SIGTSTP ? stophandler : sighandler; + sigaction (sig[j], &act, nullptr); + } +#else + for (j = 0; j < nsigs; j++) + { + caught_sig[j] = (signal (sig[j], SIG_IGN) != SIG_IGN); + if (caught_sig[j]) + { + signal (sig[j], sig[j] == SIGTSTP ? stophandler : sighandler); + siginterrupt (sig[j], 0); + } + } +#endif + } + else /* restore. */ + { +#if SA_NOCLDSTOP + for (j = 0; j < nsigs; j++) + if (sigismember (&caught_signals, sig[j])) + signal (sig[j], SIG_DFL); +#else + for (j = 0; j < nsigs; j++) + if (caught_sig[j]) + signal (sig[j], SIG_DFL); +#endif + } +} + +static void +signal_init (void) +{ + signal_setup (true); +} + +static void +signal_restore (void) +{ + signal_setup (false); +} + +int +main (int argc, char **argv) +{ + int i; + struct pending *thispend; + int n_files; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (LS_FAILURE); + atexit (close_stdout); + + static_assert (ARRAY_CARDINALITY (color_indicator) + 1 + == ARRAY_CARDINALITY (indicator_name)); + + exit_status = EXIT_SUCCESS; + print_dir_name = true; + pending_dirs = nullptr; + + current_time.tv_sec = TYPE_MINIMUM (time_t); + current_time.tv_nsec = -1; + + i = decode_switches (argc, argv); + + if (print_with_color) + parse_ls_color (); + + /* Test print_with_color again, because the call to parse_ls_color + may have just reset it -- e.g., if LS_COLORS is invalid. */ + + if (print_with_color) + { + /* Don't use TAB characters in output. Some terminal + emulators can't handle the combination of tabs and + color codes on the same line. */ + tabsize = 0; + } + + if (directories_first) + check_symlink_mode = true; + else if (print_with_color) + { + /* Avoid following symbolic links when possible. */ + if (is_colored (C_ORPHAN) + || (is_colored (C_EXEC) && color_symlink_as_referent) + || (is_colored (C_MISSING) && format == long_format)) + check_symlink_mode = true; + } + + if (dereference == DEREF_UNDEFINED) + dereference = ((immediate_dirs + || indicator_style == classify + || format == long_format) + ? DEREF_NEVER + : DEREF_COMMAND_LINE_SYMLINK_TO_DIR); + + /* When using -R, initialize a data structure we'll use to + detect any directory cycles. */ + if (recursive) + { + active_dir_set = hash_initialize (INITIAL_TABLE_SIZE, nullptr, + dev_ino_hash, + dev_ino_compare, + dev_ino_free); + if (active_dir_set == nullptr) + xalloc_die (); + + obstack_init (&dev_ino_obstack); + } + + localtz = tzalloc (getenv ("TZ")); + + format_needs_stat = sort_type == sort_time || sort_type == sort_size + || format == long_format + || print_scontext + || print_block_size; + format_needs_type = (! format_needs_stat + && (recursive + || print_with_color + || indicator_style != none + || directories_first)); + + if (dired) + { + obstack_init (&dired_obstack); + obstack_init (&subdired_obstack); + } + + if (print_hyperlink) + { + file_escape_init (); + + hostname = xgethostname (); + /* The hostname is generally ignored, + so ignore failures obtaining it. */ + if (! hostname) + hostname = ""; + } + + cwd_n_alloc = 100; + cwd_file = xnmalloc (cwd_n_alloc, sizeof *cwd_file); + cwd_n_used = 0; + + clear_files (); + + n_files = argc - i; + + if (n_files <= 0) + { + if (immediate_dirs) + gobble_file (".", directory, NOT_AN_INODE_NUMBER, true, ""); + else + queue_directory (".", nullptr, true); + } + else + do + gobble_file (argv[i++], unknown, NOT_AN_INODE_NUMBER, true, ""); + while (i < argc); + + if (cwd_n_used) + { + sort_files (); + if (!immediate_dirs) + extract_dirs_from_files (nullptr, true); + /* 'cwd_n_used' might be zero now. */ + } + + /* In the following if/else blocks, it is sufficient to test 'pending_dirs' + (and not pending_dirs->name) because there may be no markers in the queue + at this point. A marker may be enqueued when extract_dirs_from_files is + called with a non-empty string or via print_dir. */ + if (cwd_n_used) + { + print_current_files (); + if (pending_dirs) + dired_outbyte ('\n'); + } + else if (n_files <= 1 && pending_dirs && pending_dirs->next == 0) + print_dir_name = false; + + while (pending_dirs) + { + thispend = pending_dirs; + pending_dirs = pending_dirs->next; + + if (LOOP_DETECT) + { + if (thispend->name == nullptr) + { + /* thispend->name == nullptr means this is a marker entry + indicating we've finished processing the directory. + Use its dev/ino numbers to remove the corresponding + entry from the active_dir_set hash table. */ + struct dev_ino di = dev_ino_pop (); + struct dev_ino *found = hash_remove (active_dir_set, &di); + if (false) + assert_matching_dev_ino (thispend->realname, di); + affirm (found); + dev_ino_free (found); + free_pending_ent (thispend); + continue; + } + } + + print_dir (thispend->name, thispend->realname, + thispend->command_line_arg); + + free_pending_ent (thispend); + print_dir_name = true; + } + + if (print_with_color && used_color) + { + int j; + + /* Skip the restore when it would be a no-op, i.e., + when left is "\033[" and right is "m". */ + if (!(color_indicator[C_LEFT].len == 2 + && memcmp (color_indicator[C_LEFT].string, "\033[", 2) == 0 + && color_indicator[C_RIGHT].len == 1 + && color_indicator[C_RIGHT].string[0] == 'm')) + restore_default_color (); + + fflush (stdout); + + signal_restore (); + + /* Act on any signals that arrived before the default was restored. + This can process signals out of order, but there doesn't seem to + be an easy way to do them in order, and the order isn't that + important anyway. */ + for (j = stop_signal_count; j; j--) + raise (SIGSTOP); + j = interrupt_signal; + if (j) + raise (j); + } + + if (dired) + { + /* No need to free these since we're about to exit. */ + dired_dump_obstack ("//DIRED//", &dired_obstack); + dired_dump_obstack ("//SUBDIRED//", &subdired_obstack); + printf ("//DIRED-OPTIONS// --quoting-style=%s\n", + quoting_style_args[get_quoting_style (filename_quoting_options)]); + } + + if (LOOP_DETECT) + { + assure (hash_get_n_entries (active_dir_set) == 0); + hash_free (active_dir_set); + } + + return exit_status; +} + +/* Return the line length indicated by the value given by SPEC, or -1 + if unsuccessful. 0 means no limit on line length. */ + +static ptrdiff_t +decode_line_length (char const *spec) +{ + uintmax_t val; + + /* Treat too-large values as if they were 0, which is + effectively infinity. */ + switch (xstrtoumax (spec, nullptr, 0, &val, "")) + { + case LONGINT_OK: + return val <= MIN (PTRDIFF_MAX, SIZE_MAX) ? val : 0; + + case LONGINT_OVERFLOW: + return 0; + + default: + return -1; + } +} + +/* Return true if standard output is a tty, caching the result. */ + +static bool +stdout_isatty (void) +{ + static signed char out_tty = -1; + if (out_tty < 0) + out_tty = isatty (STDOUT_FILENO); + assume (out_tty == 0 || out_tty == 1); + return out_tty; +} + +/* Set all the option flags according to the switches specified. + Return the index of the first non-option argument. */ + +static int +decode_switches (int argc, char **argv) +{ + char *time_style_option = nullptr; + + /* These variables are false or -1 unless a switch says otherwise. */ + bool kibibytes_specified = false; + int format_opt = -1; + int hide_control_chars_opt = -1; + int quoting_style_opt = -1; + int sort_opt = -1; + ptrdiff_t tabsize_opt = -1; + ptrdiff_t width_opt = -1; + + while (true) + { + int oi = -1; + int c = getopt_long (argc, argv, + "abcdfghiklmnopqrstuvw:xABCDFGHI:LNQRST:UXZ1", + long_options, &oi); + if (c == -1) + break; + + switch (c) + { + case 'a': + ignore_mode = IGNORE_MINIMAL; + break; + + case 'b': + quoting_style_opt = escape_quoting_style; + break; + + case 'c': + time_type = time_ctime; + break; + + case 'd': + immediate_dirs = true; + break; + + case 'f': + /* Same as -a -U -1 --color=none --hyperlink=none, + while disabling -s. */ + ignore_mode = IGNORE_MINIMAL; + sort_opt = sort_none; + if (format_opt == long_format) + format_opt = -1; + print_with_color = false; + print_hyperlink = false; + print_block_size = false; + break; + + case FILE_TYPE_INDICATOR_OPTION: /* --file-type */ + indicator_style = file_type; + break; + + case 'g': + format_opt = long_format; + print_owner = false; + break; + + case 'h': + file_human_output_opts = human_output_opts = + human_autoscale | human_SI | human_base_1024; + file_output_block_size = output_block_size = 1; + break; + + case 'i': + print_inode = true; + break; + + case 'k': + kibibytes_specified = true; + break; + + case 'l': + format_opt = long_format; + break; + + case 'm': + format_opt = with_commas; + break; + + case 'n': + numeric_ids = true; + format_opt = long_format; + break; + + case 'o': /* Just like -l, but don't display group info. */ + format_opt = long_format; + print_group = false; + break; + + case 'p': + indicator_style = slash; + break; + + case 'q': + hide_control_chars_opt = true; + break; + + case 'r': + sort_reverse = true; + break; + + case 's': + print_block_size = true; + break; + + case 't': + sort_opt = sort_time; + break; + + case 'u': + time_type = time_atime; + break; + + case 'v': + sort_opt = sort_version; + break; + + case 'w': + width_opt = decode_line_length (optarg); + if (width_opt < 0) + error (LS_FAILURE, 0, "%s: %s", _("invalid line width"), + quote (optarg)); + break; + + case 'x': + format_opt = horizontal; + break; + + case 'A': + ignore_mode = IGNORE_DOT_AND_DOTDOT; + break; + + case 'B': + add_ignore_pattern ("*~"); + add_ignore_pattern (".*~"); + break; + + case 'C': + format_opt = many_per_line; + break; + + case 'D': + dired = true; + break; + + case 'F': + { + int i; + if (optarg) + i = XARGMATCH ("--classify", optarg, when_args, when_types); + else + /* Using --classify with no argument is equivalent to using + --classify=always. */ + i = when_always; + + if (i == when_always || (i == when_if_tty && stdout_isatty ())) + indicator_style = classify; + break; + } + + case 'G': /* inhibit display of group info */ + print_group = false; + break; + + case 'H': + dereference = DEREF_COMMAND_LINE_ARGUMENTS; + break; + + case DEREFERENCE_COMMAND_LINE_SYMLINK_TO_DIR_OPTION: + dereference = DEREF_COMMAND_LINE_SYMLINK_TO_DIR; + break; + + case 'I': + add_ignore_pattern (optarg); + break; + + case 'L': + dereference = DEREF_ALWAYS; + break; + + case 'N': + quoting_style_opt = literal_quoting_style; + break; + + case 'Q': + quoting_style_opt = c_quoting_style; + break; + + case 'R': + recursive = true; + break; + + case 'S': + sort_opt = sort_size; + break; + + case 'T': + tabsize_opt = xnumtoumax (optarg, 0, 0, MIN (PTRDIFF_MAX, SIZE_MAX), + "", _("invalid tab size"), LS_FAILURE); + break; + + case 'U': + sort_opt = sort_none; + break; + + case 'X': + sort_opt = sort_extension; + break; + + case '1': + /* -1 has no effect after -l. */ + if (format_opt != long_format) + format_opt = one_per_line; + break; + + case AUTHOR_OPTION: + print_author = true; + break; + + case HIDE_OPTION: + { + struct ignore_pattern *hide = xmalloc (sizeof *hide); + hide->pattern = optarg; + hide->next = hide_patterns; + hide_patterns = hide; + } + break; + + case SORT_OPTION: + sort_opt = XARGMATCH ("--sort", optarg, sort_args, sort_types); + break; + + case GROUP_DIRECTORIES_FIRST_OPTION: + directories_first = true; + break; + + case TIME_OPTION: + time_type = XARGMATCH ("--time", optarg, time_args, time_types); + break; + + case FORMAT_OPTION: + format_opt = XARGMATCH ("--format", optarg, format_args, + format_types); + break; + + case FULL_TIME_OPTION: + format_opt = long_format; + time_style_option = bad_cast ("full-iso"); + break; + + case COLOR_OPTION: + { + int i; + if (optarg) + i = XARGMATCH ("--color", optarg, when_args, when_types); + else + /* Using --color with no argument is equivalent to using + --color=always. */ + i = when_always; + + print_with_color = (i == when_always + || (i == when_if_tty && stdout_isatty ())); + break; + } + + case HYPERLINK_OPTION: + { + int i; + if (optarg) + i = XARGMATCH ("--hyperlink", optarg, when_args, when_types); + else + /* Using --hyperlink with no argument is equivalent to using + --hyperlink=always. */ + i = when_always; + + print_hyperlink = (i == when_always + || (i == when_if_tty && stdout_isatty ())); + break; + } + + case INDICATOR_STYLE_OPTION: + indicator_style = XARGMATCH ("--indicator-style", optarg, + indicator_style_args, + indicator_style_types); + break; + + case QUOTING_STYLE_OPTION: + quoting_style_opt = XARGMATCH ("--quoting-style", optarg, + quoting_style_args, + quoting_style_vals); + break; + + case TIME_STYLE_OPTION: + time_style_option = optarg; + break; + + case SHOW_CONTROL_CHARS_OPTION: + hide_control_chars_opt = false; + break; + + case BLOCK_SIZE_OPTION: + { + enum strtol_error e = human_options (optarg, &human_output_opts, + &output_block_size); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, 0, long_options, optarg); + file_human_output_opts = human_output_opts; + file_output_block_size = output_block_size; + } + break; + + case SI_OPTION: + file_human_output_opts = human_output_opts = + human_autoscale | human_SI; + file_output_block_size = output_block_size = 1; + break; + + case 'Z': + print_scontext = true; + break; + + case ZERO_OPTION: + eolbyte = 0; + hide_control_chars_opt = false; + if (format_opt != long_format) + format_opt = one_per_line; + print_with_color = false; + quoting_style_opt = literal_quoting_style; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (LS_FAILURE); + } + } + + if (! output_block_size) + { + char const *ls_block_size = getenv ("LS_BLOCK_SIZE"); + human_options (ls_block_size, + &human_output_opts, &output_block_size); + if (ls_block_size || getenv ("BLOCK_SIZE")) + { + file_human_output_opts = human_output_opts; + file_output_block_size = output_block_size; + } + if (kibibytes_specified) + { + human_output_opts = 0; + output_block_size = 1024; + } + } + + format = (0 <= format_opt ? format_opt + : ls_mode == LS_LS ? (stdout_isatty () + ? many_per_line : one_per_line) + : ls_mode == LS_MULTI_COL ? many_per_line + : /* ls_mode == LS_LONG_FORMAT */ long_format); + + /* If the line length was not set by a switch but is needed to determine + output, go to the work of obtaining it from the environment. */ + ptrdiff_t linelen = width_opt; + if (format == many_per_line || format == horizontal || format == with_commas + || print_with_color) + { +#ifdef TIOCGWINSZ + if (linelen < 0) + { + /* Suppress bogus warning re comparing ws.ws_col to big integer. */ +# if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" +# endif + struct winsize ws; + if (stdout_isatty () + && 0 <= ioctl (STDOUT_FILENO, TIOCGWINSZ, &ws) + && 0 < ws.ws_col) + linelen = ws.ws_col <= MIN (PTRDIFF_MAX, SIZE_MAX) ? ws.ws_col : 0; +# if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) +# pragma GCC diagnostic pop +# endif + } +#endif + if (linelen < 0) + { + char const *p = getenv ("COLUMNS"); + if (p && *p) + { + linelen = decode_line_length (p); + if (linelen < 0) + error (0, 0, + _("ignoring invalid width" + " in environment variable COLUMNS: %s"), + quote (p)); + } + } + } + + line_length = linelen < 0 ? 80 : linelen; + + /* Determine the max possible number of display columns. */ + max_idx = line_length / MIN_COLUMN_WIDTH; + /* Account for first display column not having a separator, + or line_lengths shorter than MIN_COLUMN_WIDTH. */ + max_idx += line_length % MIN_COLUMN_WIDTH != 0; + + if (format == many_per_line || format == horizontal || format == with_commas) + { + if (0 <= tabsize_opt) + tabsize = tabsize_opt; + else + { + tabsize = 8; + char const *p = getenv ("TABSIZE"); + if (p) + { + uintmax_t tmp; + if (xstrtoumax (p, nullptr, 0, &tmp, "") == LONGINT_OK + && tmp <= SIZE_MAX) + tabsize = tmp; + else + error (0, 0, + _("ignoring invalid tab size" + " in environment variable TABSIZE: %s"), + quote (p)); + } + } + } + + qmark_funny_chars = (hide_control_chars_opt < 0 + ? ls_mode == LS_LS && stdout_isatty () + : hide_control_chars_opt); + + int qs = quoting_style_opt; + if (qs < 0) + qs = getenv_quoting_style (); + if (qs < 0) + qs = (ls_mode == LS_LS + ? (stdout_isatty () ? shell_escape_quoting_style : -1) + : escape_quoting_style); + if (0 <= qs) + set_quoting_style (nullptr, qs); + qs = get_quoting_style (nullptr); + align_variable_outer_quotes + = ((format == long_format + || ((format == many_per_line || format == horizontal) && line_length)) + && (qs == shell_quoting_style + || qs == shell_escape_quoting_style + || qs == c_maybe_quoting_style)); + filename_quoting_options = clone_quoting_options (nullptr); + if (qs == escape_quoting_style) + set_char_quoting (filename_quoting_options, ' ', 1); + if (file_type <= indicator_style) + { + char const *p; + for (p = &"*=>@|"[indicator_style - file_type]; *p; p++) + set_char_quoting (filename_quoting_options, *p, 1); + } + + dirname_quoting_options = clone_quoting_options (nullptr); + set_char_quoting (dirname_quoting_options, ':', 1); + + /* --dired is meaningful only with --format=long (-l) and sans --hyperlink. + Otherwise, ignore it. FIXME: warn about this? + Alternatively, make --dired imply --format=long? */ + dired &= (format == long_format) & !print_hyperlink; + + if (eolbyte < dired) + error (LS_FAILURE, 0, _("--dired and --zero are incompatible")); + + /* If -c or -u is specified and not -l (or any other option that implies -l), + and no sort-type was specified, then sort by the ctime (-c) or atime (-u). + The behavior of ls when using either -c or -u but with neither -l nor -t + appears to be unspecified by POSIX. So, with GNU ls, '-u' alone means + sort by atime (this is the one that's not specified by the POSIX spec), + -lu means show atime and sort by name, -lut means show atime and sort + by atime. */ + + sort_type = (0 <= sort_opt ? sort_opt + : (format != long_format + && (time_type == time_ctime || time_type == time_atime + || time_type == time_btime)) + ? sort_time : sort_name); + + if (format == long_format) + { + char *style = time_style_option; + static char const posix_prefix[] = "posix-"; + + if (! style) + if (! (style = getenv ("TIME_STYLE"))) + style = bad_cast ("locale"); + + while (STREQ_LEN (style, posix_prefix, sizeof posix_prefix - 1)) + { + if (! hard_locale (LC_TIME)) + return optind; + style += sizeof posix_prefix - 1; + } + + if (*style == '+') + { + char *p0 = style + 1; + char *p1 = strchr (p0, '\n'); + if (! p1) + p1 = p0; + else + { + if (strchr (p1 + 1, '\n')) + error (LS_FAILURE, 0, _("invalid time style format %s"), + quote (p0)); + *p1++ = '\0'; + } + long_time_format[0] = p0; + long_time_format[1] = p1; + } + else + { + ptrdiff_t res = argmatch (style, time_style_args, + (char const *) time_style_types, + sizeof (*time_style_types)); + if (res < 0) + { + /* This whole block used to be a simple use of XARGMATCH. + but that didn't print the "posix-"-prefixed variants or + the "+"-prefixed format string option upon failure. */ + argmatch_invalid ("time style", style, res); + + /* The following is a manual expansion of argmatch_valid, + but with the added "+ ..." description and the [posix-] + prefixes prepended. Note that this simplification works + only because all four existing time_style_types values + are distinct. */ + fputs (_("Valid arguments are:\n"), stderr); + char const *const *p = time_style_args; + while (*p) + fprintf (stderr, " - [posix-]%s\n", *p++); + fputs (_(" - +FORMAT (e.g., +%H:%M) for a 'date'-style" + " format\n"), stderr); + usage (LS_FAILURE); + } + switch (res) + { + case full_iso_time_style: + long_time_format[0] = long_time_format[1] = + "%Y-%m-%d %H:%M:%S.%N %z"; + break; + + case long_iso_time_style: + long_time_format[0] = long_time_format[1] = "%Y-%m-%d %H:%M"; + break; + + case iso_time_style: + long_time_format[0] = "%Y-%m-%d "; + long_time_format[1] = "%m-%d %H:%M"; + break; + + case locale_time_style: + if (hard_locale (LC_TIME)) + { + for (int i = 0; i < 2; i++) + long_time_format[i] = + dcgettext (nullptr, long_time_format[i], LC_TIME); + } + } + } + + abformat_init (); + } + + return optind; +} + +/* Parse a string as part of the LS_COLORS variable; this may involve + decoding all kinds of escape characters. If equals_end is set an + unescaped equal sign ends the string, otherwise only a : or \0 + does. Set *OUTPUT_COUNT to the number of bytes output. Return + true if successful. + + The resulting string is *not* null-terminated, but may contain + embedded nulls. + + Note that both dest and src are char **; on return they point to + the first free byte after the array and the character that ended + the input string, respectively. */ + +static bool +get_funky_string (char **dest, char const **src, bool equals_end, + size_t *output_count) +{ + char num; /* For numerical codes */ + size_t count; /* Something to count with */ + enum { + ST_GND, ST_BACKSLASH, ST_OCTAL, ST_HEX, ST_CARET, ST_END, ST_ERROR + } state; + char const *p; + char *q; + + p = *src; /* We don't want to double-indirect */ + q = *dest; /* the whole darn time. */ + + count = 0; /* No characters counted in yet. */ + num = 0; + + state = ST_GND; /* Start in ground state. */ + while (state < ST_END) + { + switch (state) + { + case ST_GND: /* Ground state (no escapes) */ + switch (*p) + { + case ':': + case '\0': + state = ST_END; /* End of string */ + break; + case '\\': + state = ST_BACKSLASH; /* Backslash escape sequence */ + ++p; + break; + case '^': + state = ST_CARET; /* Caret escape */ + ++p; + break; + case '=': + if (equals_end) + { + state = ST_END; /* End */ + break; + } + FALLTHROUGH; + default: + *(q++) = *(p++); + ++count; + break; + } + break; + + case ST_BACKSLASH: /* Backslash escaped character */ + switch (*p) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + state = ST_OCTAL; /* Octal sequence */ + num = *p - '0'; + break; + case 'x': + case 'X': + state = ST_HEX; /* Hex sequence */ + num = 0; + break; + case 'a': /* Bell */ + num = '\a'; + break; + case 'b': /* Backspace */ + num = '\b'; + break; + case 'e': /* Escape */ + num = 27; + break; + case 'f': /* Form feed */ + num = '\f'; + break; + case 'n': /* Newline */ + num = '\n'; + break; + case 'r': /* Carriage return */ + num = '\r'; + break; + case 't': /* Tab */ + num = '\t'; + break; + case 'v': /* Vtab */ + num = '\v'; + break; + case '?': /* Delete */ + num = 127; + break; + case '_': /* Space */ + num = ' '; + break; + case '\0': /* End of string */ + state = ST_ERROR; /* Error! */ + break; + default: /* Escaped character like \ ^ : = */ + num = *p; + break; + } + if (state == ST_BACKSLASH) + { + *(q++) = num; + ++count; + state = ST_GND; + } + ++p; + break; + + case ST_OCTAL: /* Octal sequence */ + if (*p < '0' || *p > '7') + { + *(q++) = num; + ++count; + state = ST_GND; + } + else + num = (num << 3) + (*(p++) - '0'); + break; + + case ST_HEX: /* Hex sequence */ + switch (*p) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = (num << 4) + (*(p++) - '0'); + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + num = (num << 4) + (*(p++) - 'a') + 10; + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + num = (num << 4) + (*(p++) - 'A') + 10; + break; + default: + *(q++) = num; + ++count; + state = ST_GND; + break; + } + break; + + case ST_CARET: /* Caret escape */ + state = ST_GND; /* Should be the next state... */ + if (*p >= '@' && *p <= '~') + { + *(q++) = *(p++) & 037; + ++count; + } + else if (*p == '?') + { + *(q++) = 127; + ++count; + } + else + state = ST_ERROR; + break; + + default: + unreachable (); + } + } + + *dest = q; + *src = p; + *output_count = count; + + return state != ST_ERROR; +} + +enum parse_state + { + PS_START = 1, + PS_2, + PS_3, + PS_4, + PS_DONE, + PS_FAIL + }; + + +/* Check if the content of TERM is a valid name in dircolors. */ + +static bool +known_term_type (void) +{ + char const *term = getenv ("TERM"); + if (! term || ! *term) + return false; + + char const *line = G_line; + while (line - G_line < sizeof (G_line)) + { + if (STRNCMP_LIT (line, "TERM ") == 0) + { + if (fnmatch (line + 5, term, 0) == 0) + return true; + } + line += strlen (line) + 1; + } + + return false; +} + +static void +parse_ls_color (void) +{ + char const *p; /* Pointer to character being parsed */ + char *buf; /* color_buf buffer pointer */ + int ind_no; /* Indicator number */ + char label[3]; /* Indicator label */ + struct color_ext_type *ext; /* Extension we are working on */ + + if ((p = getenv ("LS_COLORS")) == nullptr || *p == '\0') + { + /* LS_COLORS takes precedence, but if that's not set then + honor the COLORTERM and TERM env variables so that + we only go with the internal ANSI color codes if the + former is non empty or the latter is set to a known value. */ + char const *colorterm = getenv ("COLORTERM"); + if (! (colorterm && *colorterm) && ! known_term_type ()) + print_with_color = false; + return; + } + + ext = nullptr; + strcpy (label, "??"); + + /* This is an overly conservative estimate, but any possible + LS_COLORS string will *not* generate a color_buf longer than + itself, so it is a safe way of allocating a buffer in + advance. */ + buf = color_buf = xstrdup (p); + + enum parse_state state = PS_START; + while (true) + { + switch (state) + { + case PS_START: /* First label character */ + switch (*p) + { + case ':': + ++p; + break; + + case '*': + /* Allocate new extension block and add to head of + linked list (this way a later definition will + override an earlier one, which can be useful for + having terminal-specific defs override global). */ + + ext = xmalloc (sizeof *ext); + ext->next = color_ext_list; + color_ext_list = ext; + ext->exact_match = false; + + ++p; + ext->ext.string = buf; + + state = (get_funky_string (&buf, &p, true, &ext->ext.len) + ? PS_4 : PS_FAIL); + break; + + case '\0': + state = PS_DONE; /* Done! */ + goto done; + + default: /* Assume it is file type label */ + label[0] = *(p++); + state = PS_2; + break; + } + break; + + case PS_2: /* Second label character */ + if (*p) + { + label[1] = *(p++); + state = PS_3; + } + else + state = PS_FAIL; /* Error */ + break; + + case PS_3: /* Equal sign after indicator label */ + state = PS_FAIL; /* Assume failure... */ + if (*(p++) == '=')/* It *should* be... */ + { + for (ind_no = 0; indicator_name[ind_no] != nullptr; ++ind_no) + { + if (STREQ (label, indicator_name[ind_no])) + { + color_indicator[ind_no].string = buf; + state = (get_funky_string (&buf, &p, false, + &color_indicator[ind_no].len) + ? PS_START : PS_FAIL); + break; + } + } + if (state == PS_FAIL) + error (0, 0, _("unrecognized prefix: %s"), quote (label)); + } + break; + + case PS_4: /* Equal sign after *.ext */ + if (*(p++) == '=') + { + ext->seq.string = buf; + state = (get_funky_string (&buf, &p, false, &ext->seq.len) + ? PS_START : PS_FAIL); + } + else + state = PS_FAIL; + break; + + case PS_FAIL: + goto done; + + default: + affirm (false); + } + } + done: + + if (state == PS_FAIL) + { + struct color_ext_type *e; + struct color_ext_type *e2; + + error (0, 0, + _("unparsable value for LS_COLORS environment variable")); + free (color_buf); + for (e = color_ext_list; e != nullptr; /* empty */) + { + e2 = e; + e = e->next; + free (e2); + } + print_with_color = false; + } + else + { + /* Postprocess list to set EXACT_MATCH on entries where there are + different cased extensions with separate sequences defined. + Also set ext.len to SIZE_MAX on any entries that can't + match due to precedence, to avoid redundant string compares. */ + struct color_ext_type *e1; + + for (e1 = color_ext_list; e1 != nullptr; e1 = e1->next) + { + struct color_ext_type *e2; + bool case_ignored = false; + + for (e2 = e1->next; e2 != nullptr; e2 = e2->next) + { + if (e2->ext.len < SIZE_MAX && e1->ext.len == e2->ext.len) + { + if (memcmp (e1->ext.string, e2->ext.string, e1->ext.len) == 0) + e2->ext.len = SIZE_MAX; /* Ignore */ + else if (c_strncasecmp (e1->ext.string, e2->ext.string, + e1->ext.len) == 0) + { + if (case_ignored) + { + e2->ext.len = SIZE_MAX; /* Ignore */ + } + else if (e1->seq.len == e2->seq.len + && memcmp (e1->seq.string, e2->seq.string, + e1->seq.len) == 0) + { + e2->ext.len = SIZE_MAX; /* Ignore */ + case_ignored = true; /* Ignore all subsequent */ + } + else + { + e1->exact_match = true; + e2->exact_match = true; + } + } + } + } + } + } + + if (color_indicator[C_LINK].len == 6 + && !STRNCMP_LIT (color_indicator[C_LINK].string, "target")) + color_symlink_as_referent = true; +} + +/* Return the quoting style specified by the environment variable + QUOTING_STYLE if set and valid, -1 otherwise. */ + +static int +getenv_quoting_style (void) +{ + char const *q_style = getenv ("QUOTING_STYLE"); + if (!q_style) + return -1; + int i = ARGMATCH (q_style, quoting_style_args, quoting_style_vals); + if (i < 0) + { + error (0, 0, + _("ignoring invalid value" + " of environment variable QUOTING_STYLE: %s"), + quote (q_style)); + return -1; + } + return quoting_style_vals[i]; +} + +/* Set the exit status to report a failure. If SERIOUS, it is a + serious failure; otherwise, it is merely a minor problem. */ + +static void +set_exit_status (bool serious) +{ + if (serious) + exit_status = LS_FAILURE; + else if (exit_status == EXIT_SUCCESS) + exit_status = LS_MINOR_PROBLEM; +} + +/* Assuming a failure is serious if SERIOUS, use the printf-style + MESSAGE to report the failure to access a file named FILE. Assume + errno is set appropriately for the failure. */ + +static void +file_failure (bool serious, char const *message, char const *file) +{ + error (0, errno, message, quoteaf (file)); + set_exit_status (serious); +} + +/* Request that the directory named NAME have its contents listed later. + If REALNAME is nonzero, it will be used instead of NAME when the + directory name is printed. This allows symbolic links to directories + to be treated as regular directories but still be listed under their + real names. NAME == nullptr is used to insert a marker entry for the + directory named in REALNAME. + If NAME is non-null, we use its dev/ino information to save + a call to stat -- when doing a recursive (-R) traversal. + COMMAND_LINE_ARG means this directory was mentioned on the command line. */ + +static void +queue_directory (char const *name, char const *realname, bool command_line_arg) +{ + struct pending *new = xmalloc (sizeof *new); + new->realname = realname ? xstrdup (realname) : nullptr; + new->name = name ? xstrdup (name) : nullptr; + new->command_line_arg = command_line_arg; + new->next = pending_dirs; + pending_dirs = new; +} + +/* Read directory NAME, and list the files in it. + If REALNAME is nonzero, print its name instead of NAME; + this is used for symbolic links to directories. + COMMAND_LINE_ARG means this directory was mentioned on the command line. */ + +static void +print_dir (char const *name, char const *realname, bool command_line_arg) +{ + DIR *dirp; + struct dirent *next; + uintmax_t total_blocks = 0; + static bool first = true; + + errno = 0; + dirp = opendir (name); + if (!dirp) + { + file_failure (command_line_arg, _("cannot open directory %s"), name); + return; + } + + if (LOOP_DETECT) + { + struct stat dir_stat; + int fd = dirfd (dirp); + + /* If dirfd failed, endure the overhead of stat'ing by path */ + if ((0 <= fd + ? fstat_for_ino (fd, &dir_stat) + : stat_for_ino (name, &dir_stat)) < 0) + { + file_failure (command_line_arg, + _("cannot determine device and inode of %s"), name); + closedir (dirp); + return; + } + + /* If we've already visited this dev/inode pair, warn that + we've found a loop, and do not process this directory. */ + if (visit_dir (dir_stat.st_dev, dir_stat.st_ino)) + { + error (0, 0, _("%s: not listing already-listed directory"), + quotef (name)); + closedir (dirp); + set_exit_status (true); + return; + } + + dev_ino_push (dir_stat.st_dev, dir_stat.st_ino); + } + + clear_files (); + + if (recursive || print_dir_name) + { + if (!first) + dired_outbyte ('\n'); + first = false; + dired_indent (); + + char *absolute_name = nullptr; + if (print_hyperlink) + { + absolute_name = canonicalize_filename_mode (name, CAN_MISSING); + if (! absolute_name) + file_failure (command_line_arg, + _("error canonicalizing %s"), name); + } + quote_name (realname ? realname : name, dirname_quoting_options, -1, + nullptr, true, &subdired_obstack, absolute_name); + + free (absolute_name); + + dired_outstring (":\n"); + } + + /* Read the directory entries, and insert the subfiles into the 'cwd_file' + table. */ + + while (true) + { + /* Set errno to zero so we can distinguish between a readdir failure + and when readdir simply finds that there are no more entries. */ + errno = 0; + next = readdir (dirp); + if (next) + { + if (! file_ignored (next->d_name)) + { + enum filetype type = unknown; + +#if HAVE_STRUCT_DIRENT_D_TYPE + switch (next->d_type) + { + case DT_BLK: type = blockdev; break; + case DT_CHR: type = chardev; break; + case DT_DIR: type = directory; break; + case DT_FIFO: type = fifo; break; + case DT_LNK: type = symbolic_link; break; + case DT_REG: type = normal; break; + case DT_SOCK: type = sock; break; +# ifdef DT_WHT + case DT_WHT: type = whiteout; break; +# endif + } +#endif + total_blocks += gobble_file (next->d_name, type, + RELIABLE_D_INO (next), + false, name); + + /* In this narrow case, print out each name right away, so + ls uses constant memory while processing the entries of + this directory. Useful when there are many (millions) + of entries in a directory. */ + if (format == one_per_line && sort_type == sort_none + && !print_block_size && !recursive) + { + /* We must call sort_files in spite of + "sort_type == sort_none" for its initialization + of the sorted_file vector. */ + sort_files (); + print_current_files (); + clear_files (); + } + } + } + else if (errno != 0) + { + file_failure (command_line_arg, _("reading directory %s"), name); + if (errno != EOVERFLOW) + break; + } + else + break; + + /* When processing a very large directory, and since we've inhibited + interrupts, this loop would take so long that ls would be annoyingly + uninterruptible. This ensures that it handles signals promptly. */ + process_signals (); + } + + if (closedir (dirp) != 0) + { + file_failure (command_line_arg, _("closing directory %s"), name); + /* Don't return; print whatever we got. */ + } + + /* Sort the directory contents. */ + sort_files (); + + /* If any member files are subdirectories, perhaps they should have their + contents listed rather than being mentioned here as files. */ + + if (recursive) + extract_dirs_from_files (name, false); + + if (format == long_format || print_block_size) + { + char buf[LONGEST_HUMAN_READABLE + 3]; + char *p = human_readable (total_blocks, buf + 1, human_output_opts, + ST_NBLOCKSIZE, output_block_size); + char *pend = p + strlen (p); + *--p = ' '; + *pend++ = eolbyte; + dired_indent (); + dired_outstring (_("total")); + dired_outbuf (p, pend - p); + } + + if (cwd_n_used) + print_current_files (); +} + +/* Add 'pattern' to the list of patterns for which files that match are + not listed. */ + +static void +add_ignore_pattern (char const *pattern) +{ + struct ignore_pattern *ignore; + + ignore = xmalloc (sizeof *ignore); + ignore->pattern = pattern; + /* Add it to the head of the linked list. */ + ignore->next = ignore_patterns; + ignore_patterns = ignore; +} + +/* Return true if one of the PATTERNS matches FILE. */ + +static bool +patterns_match (struct ignore_pattern const *patterns, char const *file) +{ + struct ignore_pattern const *p; + for (p = patterns; p; p = p->next) + if (fnmatch (p->pattern, file, FNM_PERIOD) == 0) + return true; + return false; +} + +/* Return true if FILE should be ignored. */ + +static bool +file_ignored (char const *name) +{ + return ((ignore_mode != IGNORE_MINIMAL + && name[0] == '.' + && (ignore_mode == IGNORE_DEFAULT || ! name[1 + (name[1] == '.')])) + || (ignore_mode == IGNORE_DEFAULT + && patterns_match (hide_patterns, name)) + || patterns_match (ignore_patterns, name)); +} + +/* POSIX requires that a file size be printed without a sign, even + when negative. Assume the typical case where negative sizes are + actually positive values that have wrapped around. */ + +static uintmax_t +unsigned_file_size (off_t size) +{ + return size + (size < 0) * ((uintmax_t) OFF_T_MAX - OFF_T_MIN + 1); +} + +#ifdef HAVE_CAP +/* Return true if NAME has a capability (see linux/capability.h) */ +static bool +has_capability (char const *name) +{ + char *result; + bool has_cap; + + cap_t cap_d = cap_get_file (name); + if (cap_d == nullptr) + return false; + + result = cap_to_text (cap_d, nullptr); + cap_free (cap_d); + if (!result) + return false; + + /* check if human-readable capability string is empty */ + has_cap = !!*result; + + cap_free (result); + return has_cap; +} +#else +static bool +has_capability (MAYBE_UNUSED char const *name) +{ + errno = ENOTSUP; + return false; +} +#endif + +/* Enter and remove entries in the table 'cwd_file'. */ + +static void +free_ent (struct fileinfo *f) +{ + free (f->name); + free (f->linkname); + free (f->absolute_name); + if (f->scontext != UNKNOWN_SECURITY_CONTEXT) + { + if (is_smack_enabled ()) + free (f->scontext); + else + freecon (f->scontext); + } +} + +/* Empty the table of files. */ +static void +clear_files (void) +{ + for (size_t i = 0; i < cwd_n_used; i++) + { + struct fileinfo *f = sorted_file[i]; + free_ent (f); + } + + cwd_n_used = 0; + cwd_some_quoted = false; + any_has_acl = false; + inode_number_width = 0; + block_size_width = 0; + nlink_width = 0; + owner_width = 0; + group_width = 0; + author_width = 0; + scontext_width = 0; + major_device_number_width = 0; + minor_device_number_width = 0; + file_size_width = 0; +} + +/* Return true if ERR implies lack-of-support failure by a + getxattr-calling function like getfilecon or file_has_acl. */ +static bool +errno_unsupported (int err) +{ + return (err == EINVAL || err == ENOSYS || is_ENOTSUP (err)); +} + +/* Cache *getfilecon failure, when it's trivial to do so. + Like getfilecon/lgetfilecon, but when F's st_dev says it's doesn't + support getting the security context, fail with ENOTSUP immediately. */ +static int +getfilecon_cache (char const *file, struct fileinfo *f, bool deref) +{ + /* st_dev of the most recently processed device for which we've + found that [l]getfilecon fails indicating lack of support. */ + static dev_t unsupported_device; + + if (f->stat.st_dev == unsupported_device) + { + errno = ENOTSUP; + return -1; + } + int r = 0; +#ifdef HAVE_SMACK + if (is_smack_enabled ()) + r = smack_new_label_from_path (file, "security.SMACK64", deref, + &f->scontext); + else +#endif + r = (deref + ? getfilecon (file, &f->scontext) + : lgetfilecon (file, &f->scontext)); + if (r < 0 && errno_unsupported (errno)) + unsupported_device = f->stat.st_dev; + return r; +} + +/* Cache file_has_acl failure, when it's trivial to do. + Like file_has_acl, but when F's st_dev says it's on a file + system lacking ACL support, return 0 with ENOTSUP immediately. */ +static int +file_has_acl_cache (char const *file, struct fileinfo *f) +{ + /* st_dev of the most recently processed device for which we've + found that file_has_acl fails indicating lack of support. */ + static dev_t unsupported_device; + + if (f->stat.st_dev == unsupported_device) + { + errno = ENOTSUP; + return 0; + } + + /* Zero errno so that we can distinguish between two 0-returning cases: + "has-ACL-support, but only a default ACL" and "no ACL support". */ + errno = 0; + int n = file_has_acl (file, &f->stat); + if (n <= 0 && errno_unsupported (errno)) + unsupported_device = f->stat.st_dev; + return n; +} + +/* Cache has_capability failure, when it's trivial to do. + Like has_capability, but when F's st_dev says it's on a file + system lacking capability support, return 0 with ENOTSUP immediately. */ +static bool +has_capability_cache (char const *file, struct fileinfo *f) +{ + /* st_dev of the most recently processed device for which we've + found that has_capability fails indicating lack of support. */ + static dev_t unsupported_device; + + if (f->stat.st_dev == unsupported_device) + { + errno = ENOTSUP; + return 0; + } + + bool b = has_capability (file); + if ( !b && errno_unsupported (errno)) + unsupported_device = f->stat.st_dev; + return b; +} + +static bool +needs_quoting (char const *name) +{ + char test[2]; + size_t len = quotearg_buffer (test, sizeof test , name, -1, + filename_quoting_options); + return *name != *test || strlen (name) != len; +} + +/* Add a file to the current table of files. + Verify that the file exists, and print an error message if it does not. + Return the number of blocks that the file occupies. */ +static uintmax_t +gobble_file (char const *name, enum filetype type, ino_t inode, + bool command_line_arg, char const *dirname) +{ + uintmax_t blocks = 0; + struct fileinfo *f; + + /* An inode value prior to gobble_file necessarily came from readdir, + which is not used for command line arguments. */ + affirm (! command_line_arg || inode == NOT_AN_INODE_NUMBER); + + if (cwd_n_used == cwd_n_alloc) + { + cwd_file = xnrealloc (cwd_file, cwd_n_alloc, 2 * sizeof *cwd_file); + cwd_n_alloc *= 2; + } + + f = &cwd_file[cwd_n_used]; + memset (f, '\0', sizeof *f); + f->stat.st_ino = inode; + f->filetype = type; + + f->quoted = -1; + if ((! cwd_some_quoted) && align_variable_outer_quotes) + { + /* Determine if any quoted for padding purposes. */ + f->quoted = needs_quoting (name); + if (f->quoted) + cwd_some_quoted = 1; + } + + if (command_line_arg + || print_hyperlink + || format_needs_stat + /* When coloring a directory (we may know the type from + direct.d_type), we have to stat it in order to indicate + sticky and/or other-writable attributes. */ + || (type == directory && print_with_color + && (is_colored (C_OTHER_WRITABLE) + || is_colored (C_STICKY) + || is_colored (C_STICKY_OTHER_WRITABLE))) + /* When dereferencing symlinks, the inode and type must come from + stat, but readdir provides the inode and type of lstat. */ + || ((print_inode || format_needs_type) + && (type == symbolic_link || type == unknown) + && (dereference == DEREF_ALWAYS + || color_symlink_as_referent || check_symlink_mode)) + /* Command line dereferences are already taken care of by the above + assertion that the inode number is not yet known. */ + || (print_inode && inode == NOT_AN_INODE_NUMBER) + || (format_needs_type + && (type == unknown || command_line_arg + /* --indicator-style=classify (aka -F) + requires that we stat each regular file + to see if it's executable. */ + || (type == normal && (indicator_style == classify + /* This is so that --color ends up + highlighting files with these mode + bits set even when options like -F are + not specified. Note we do a redundant + stat in the very unlikely case where + C_CAP is set but not the others. */ + || (print_with_color + && (is_colored (C_EXEC) + || is_colored (C_SETUID) + || is_colored (C_SETGID) + || is_colored (C_CAP))) + ))))) + + { + /* Absolute name of this file. */ + char *full_name; + bool do_deref; + int err; + + if (name[0] == '/' || dirname[0] == 0) + full_name = (char *) name; + else + { + full_name = alloca (strlen (name) + strlen (dirname) + 2); + attach (full_name, dirname, name); + } + + if (print_hyperlink) + { + f->absolute_name = canonicalize_filename_mode (full_name, + CAN_MISSING); + if (! f->absolute_name) + file_failure (command_line_arg, + _("error canonicalizing %s"), full_name); + } + + switch (dereference) + { + case DEREF_ALWAYS: + err = do_stat (full_name, &f->stat); + do_deref = true; + break; + + case DEREF_COMMAND_LINE_ARGUMENTS: + case DEREF_COMMAND_LINE_SYMLINK_TO_DIR: + if (command_line_arg) + { + bool need_lstat; + err = do_stat (full_name, &f->stat); + do_deref = true; + + if (dereference == DEREF_COMMAND_LINE_ARGUMENTS) + break; + + need_lstat = (err < 0 + ? (errno == ENOENT || errno == ELOOP) + : ! S_ISDIR (f->stat.st_mode)); + if (!need_lstat) + break; + + /* stat failed because of ENOENT || ELOOP, maybe indicating a + non-traversable symlink. Or stat succeeded, + FULL_NAME does not refer to a directory, + and --dereference-command-line-symlink-to-dir is in effect. + Fall through so that we call lstat instead. */ + } + FALLTHROUGH; + + default: /* DEREF_NEVER */ + err = do_lstat (full_name, &f->stat); + do_deref = false; + break; + } + + if (err != 0) + { + /* Failure to stat a command line argument leads to + an exit status of 2. For other files, stat failure + provokes an exit status of 1. */ + file_failure (command_line_arg, + _("cannot access %s"), full_name); + + f->scontext = UNKNOWN_SECURITY_CONTEXT; + + if (command_line_arg) + return 0; + + f->name = xstrdup (name); + cwd_n_used++; + + return 0; + } + + f->stat_ok = true; + + /* Note has_capability() adds around 30% runtime to 'ls --color' */ + if ((type == normal || S_ISREG (f->stat.st_mode)) + && print_with_color && is_colored (C_CAP)) + f->has_capability = has_capability_cache (full_name, f); + + if (format == long_format || print_scontext) + { + bool have_scontext = false; + bool have_acl = false; + int attr_len = getfilecon_cache (full_name, f, do_deref); + err = (attr_len < 0); + + if (err == 0) + { + if (is_smack_enabled ()) + have_scontext = ! STREQ ("_", f->scontext); + else + have_scontext = ! STREQ ("unlabeled", f->scontext); + } + else + { + f->scontext = UNKNOWN_SECURITY_CONTEXT; + + /* When requesting security context information, don't make + ls fail just because the file (even a command line argument) + isn't on the right type of file system. I.e., a getfilecon + failure isn't in the same class as a stat failure. */ + if (is_ENOTSUP (errno) || errno == ENODATA) + err = 0; + } + + if (err == 0 && format == long_format) + { + int n = file_has_acl_cache (full_name, f); + err = (n < 0); + have_acl = (0 < n); + } + + f->acl_type = (!have_scontext && !have_acl + ? ACL_T_NONE + : (have_scontext && !have_acl + ? ACL_T_LSM_CONTEXT_ONLY + : ACL_T_YES)); + any_has_acl |= f->acl_type != ACL_T_NONE; + + if (err) + error (0, errno, "%s", quotef (full_name)); + } + + if (S_ISLNK (f->stat.st_mode) + && (format == long_format || check_symlink_mode)) + { + struct stat linkstats; + + get_link_name (full_name, f, command_line_arg); + + /* Use the slower quoting path for this entry, though + don't update CWD_SOME_QUOTED since alignment not affected. */ + if (f->linkname && f->quoted == 0 && needs_quoting (f->linkname)) + f->quoted = -1; + + /* Avoid following symbolic links when possible, i.e., when + they won't be traced and when no indicator is needed. */ + if (f->linkname + && (file_type <= indicator_style || check_symlink_mode) + && stat_for_mode (full_name, &linkstats) == 0) + { + f->linkok = true; + f->linkmode = linkstats.st_mode; + } + } + + if (S_ISLNK (f->stat.st_mode)) + f->filetype = symbolic_link; + else if (S_ISDIR (f->stat.st_mode)) + { + if (command_line_arg && !immediate_dirs) + f->filetype = arg_directory; + else + f->filetype = directory; + } + else + f->filetype = normal; + + blocks = ST_NBLOCKS (f->stat); + if (format == long_format || print_block_size) + { + char buf[LONGEST_HUMAN_READABLE + 1]; + int len = mbswidth (human_readable (blocks, buf, human_output_opts, + ST_NBLOCKSIZE, output_block_size), + 0); + if (block_size_width < len) + block_size_width = len; + } + + if (format == long_format) + { + if (print_owner) + { + int len = format_user_width (f->stat.st_uid); + if (owner_width < len) + owner_width = len; + } + + if (print_group) + { + int len = format_group_width (f->stat.st_gid); + if (group_width < len) + group_width = len; + } + + if (print_author) + { + int len = format_user_width (f->stat.st_author); + if (author_width < len) + author_width = len; + } + } + + if (print_scontext) + { + int len = strlen (f->scontext); + if (scontext_width < len) + scontext_width = len; + } + + if (format == long_format) + { + char b[INT_BUFSIZE_BOUND (uintmax_t)]; + int b_len = strlen (umaxtostr (f->stat.st_nlink, b)); + if (nlink_width < b_len) + nlink_width = b_len; + + if (S_ISCHR (f->stat.st_mode) || S_ISBLK (f->stat.st_mode)) + { + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + int len = strlen (umaxtostr (major (f->stat.st_rdev), buf)); + if (major_device_number_width < len) + major_device_number_width = len; + len = strlen (umaxtostr (minor (f->stat.st_rdev), buf)); + if (minor_device_number_width < len) + minor_device_number_width = len; + len = major_device_number_width + 2 + minor_device_number_width; + if (file_size_width < len) + file_size_width = len; + } + else + { + char buf[LONGEST_HUMAN_READABLE + 1]; + uintmax_t size = unsigned_file_size (f->stat.st_size); + int len = mbswidth (human_readable (size, buf, + file_human_output_opts, + 1, file_output_block_size), + 0); + if (file_size_width < len) + file_size_width = len; + } + } + } + + if (print_inode) + { + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + int len = strlen (umaxtostr (f->stat.st_ino, buf)); + if (inode_number_width < len) + inode_number_width = len; + } + + f->name = xstrdup (name); + cwd_n_used++; + + return blocks; +} + +/* Return true if F refers to a directory. */ +static bool +is_directory (const struct fileinfo *f) +{ + return f->filetype == directory || f->filetype == arg_directory; +} + +/* Return true if F refers to a (symlinked) directory. */ +static bool +is_linked_directory (const struct fileinfo *f) +{ + return f->filetype == directory || f->filetype == arg_directory + || S_ISDIR (f->linkmode); +} + +/* Put the name of the file that FILENAME is a symbolic link to + into the LINKNAME field of 'f'. COMMAND_LINE_ARG indicates whether + FILENAME is a command-line argument. */ + +static void +get_link_name (char const *filename, struct fileinfo *f, bool command_line_arg) +{ + f->linkname = areadlink_with_size (filename, f->stat.st_size); + if (f->linkname == nullptr) + file_failure (command_line_arg, _("cannot read symbolic link %s"), + filename); +} + +/* Return true if the last component of NAME is '.' or '..' + This is so we don't try to recurse on '././././. ...' */ + +static bool +basename_is_dot_or_dotdot (char const *name) +{ + char const *base = last_component (name); + return dot_or_dotdot (base); +} + +/* Remove any entries from CWD_FILE that are for directories, + and queue them to be listed as directories instead. + DIRNAME is the prefix to prepend to each dirname + to make it correct relative to ls's working dir; + if it is null, no prefix is needed and "." and ".." should not be ignored. + If COMMAND_LINE_ARG is true, this directory was mentioned at the top level, + This is desirable when processing directories recursively. */ + +static void +extract_dirs_from_files (char const *dirname, bool command_line_arg) +{ + size_t i; + size_t j; + bool ignore_dot_and_dot_dot = (dirname != nullptr); + + if (dirname && LOOP_DETECT) + { + /* Insert a marker entry first. When we dequeue this marker entry, + we'll know that DIRNAME has been processed and may be removed + from the set of active directories. */ + queue_directory (nullptr, dirname, false); + } + + /* Queue the directories last one first, because queueing reverses the + order. */ + for (i = cwd_n_used; i-- != 0; ) + { + struct fileinfo *f = sorted_file[i]; + + if (is_directory (f) + && (! ignore_dot_and_dot_dot + || ! basename_is_dot_or_dotdot (f->name))) + { + if (!dirname || f->name[0] == '/') + queue_directory (f->name, f->linkname, command_line_arg); + else + { + char *name = file_name_concat (dirname, f->name, nullptr); + queue_directory (name, f->linkname, command_line_arg); + free (name); + } + if (f->filetype == arg_directory) + free_ent (f); + } + } + + /* Now delete the directories from the table, compacting all the remaining + entries. */ + + for (i = 0, j = 0; i < cwd_n_used; i++) + { + struct fileinfo *f = sorted_file[i]; + sorted_file[j] = f; + j += (f->filetype != arg_directory); + } + cwd_n_used = j; +} + +/* Use strcoll to compare strings in this locale. If an error occurs, + report an error and longjmp to failed_strcoll. */ + +static jmp_buf failed_strcoll; + +static int +xstrcoll (char const *a, char const *b) +{ + int diff; + errno = 0; + diff = strcoll (a, b); + if (errno) + { + error (0, errno, _("cannot compare file names %s and %s"), + quote_n (0, a), quote_n (1, b)); + set_exit_status (false); + longjmp (failed_strcoll, 1); + } + return diff; +} + +/* Comparison routines for sorting the files. */ + +typedef void const *V; +typedef int (*qsortFunc)(V a, V b); + +/* Used below in DEFINE_SORT_FUNCTIONS for _df_ sort function variants. */ +static int +dirfirst_check (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (V, V)) +{ + int diff = is_linked_directory (b) - is_linked_directory (a); + return diff ? diff : cmp (a, b); +} + +/* Define the 8 different sort function variants required for each sortkey. + KEY_NAME is a token describing the sort key, e.g., ctime, atime, size. + KEY_CMP_FUNC is a function to compare records based on that key, e.g., + ctime_cmp, atime_cmp, size_cmp. Append KEY_NAME to the string, + '[rev_][x]str{cmp|coll}[_df]_', to create each function name. */ +#define DEFINE_SORT_FUNCTIONS(key_name, key_cmp_func) \ + /* direct, non-dirfirst versions */ \ + static int xstrcoll_##key_name (V a, V b) \ + { return key_cmp_func (a, b, xstrcoll); } \ + ATTRIBUTE_PURE static int strcmp_##key_name (V a, V b) \ + { return key_cmp_func (a, b, strcmp); } \ + \ + /* reverse, non-dirfirst versions */ \ + static int rev_xstrcoll_##key_name (V a, V b) \ + { return key_cmp_func (b, a, xstrcoll); } \ + ATTRIBUTE_PURE static int rev_strcmp_##key_name (V a, V b) \ + { return key_cmp_func (b, a, strcmp); } \ + \ + /* direct, dirfirst versions */ \ + static int xstrcoll_df_##key_name (V a, V b) \ + { return dirfirst_check (a, b, xstrcoll_##key_name); } \ + ATTRIBUTE_PURE static int strcmp_df_##key_name (V a, V b) \ + { return dirfirst_check (a, b, strcmp_##key_name); } \ + \ + /* reverse, dirfirst versions */ \ + static int rev_xstrcoll_df_##key_name (V a, V b) \ + { return dirfirst_check (a, b, rev_xstrcoll_##key_name); } \ + ATTRIBUTE_PURE static int rev_strcmp_df_##key_name (V a, V b) \ + { return dirfirst_check (a, b, rev_strcmp_##key_name); } + +static int +cmp_ctime (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = timespec_cmp (get_stat_ctime (&b->stat), + get_stat_ctime (&a->stat)); + return diff ? diff : cmp (a->name, b->name); +} + +static int +cmp_mtime (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = timespec_cmp (get_stat_mtime (&b->stat), + get_stat_mtime (&a->stat)); + return diff ? diff : cmp (a->name, b->name); +} + +static int +cmp_atime (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = timespec_cmp (get_stat_atime (&b->stat), + get_stat_atime (&a->stat)); + return diff ? diff : cmp (a->name, b->name); +} + +static int +cmp_btime (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = timespec_cmp (get_stat_btime (&b->stat), + get_stat_btime (&a->stat)); + return diff ? diff : cmp (a->name, b->name); +} + +static int +off_cmp (off_t a, off_t b) +{ + return (a > b) - (a < b); +} + +static int +cmp_size (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = off_cmp (b->stat.st_size, a->stat.st_size); + return diff ? diff : cmp (a->name, b->name); +} + +static int +cmp_name (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + return cmp (a->name, b->name); +} + +/* Compare file extensions. Files with no extension are 'smallest'. + If extensions are the same, compare by file names instead. */ + +static int +cmp_extension (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + char const *base1 = strrchr (a->name, '.'); + char const *base2 = strrchr (b->name, '.'); + int diff = cmp (base1 ? base1 : "", base2 ? base2 : ""); + return diff ? diff : cmp (a->name, b->name); +} + +/* Return the (cached) screen width, + for the NAME associated with the passed fileinfo F. */ + +static size_t +fileinfo_name_width (struct fileinfo const *f) +{ + return f->width + ? f->width + : quote_name_width (f->name, filename_quoting_options, f->quoted); +} + +static int +cmp_width (struct fileinfo const *a, struct fileinfo const *b, + int (*cmp) (char const *, char const *)) +{ + int diff = fileinfo_name_width (a) - fileinfo_name_width (b); + return diff ? diff : cmp (a->name, b->name); +} + +DEFINE_SORT_FUNCTIONS (ctime, cmp_ctime) +DEFINE_SORT_FUNCTIONS (mtime, cmp_mtime) +DEFINE_SORT_FUNCTIONS (atime, cmp_atime) +DEFINE_SORT_FUNCTIONS (btime, cmp_btime) +DEFINE_SORT_FUNCTIONS (size, cmp_size) +DEFINE_SORT_FUNCTIONS (name, cmp_name) +DEFINE_SORT_FUNCTIONS (extension, cmp_extension) +DEFINE_SORT_FUNCTIONS (width, cmp_width) + +/* Compare file versions. + Unlike the other compare functions, cmp_version does not fail + because filevercmp and strcmp do not fail; cmp_version uses strcmp + instead of xstrcoll because filevercmp is locale-independent so + strcmp is its appropriate secondary. + + All the other sort options need xstrcoll and strcmp variants, + because they all use xstrcoll (either as the primary or secondary + sort key), and xstrcoll has the ability to do a longjmp if strcoll fails for + locale reasons. */ +static int +cmp_version (struct fileinfo const *a, struct fileinfo const *b) +{ + int diff = filevercmp (a->name, b->name); + return diff ? diff : strcmp (a->name, b->name); +} + +static int +xstrcoll_version (V a, V b) +{ + return cmp_version (a, b); +} +static int +rev_xstrcoll_version (V a, V b) +{ + return cmp_version (b, a); +} +static int +xstrcoll_df_version (V a, V b) +{ + return dirfirst_check (a, b, xstrcoll_version); +} +static int +rev_xstrcoll_df_version (V a, V b) +{ + return dirfirst_check (a, b, rev_xstrcoll_version); +} + + +/* We have 2^3 different variants for each sort-key function + (for 3 independent sort modes). + The function pointers stored in this array must be dereferenced as: + + sort_variants[sort_key][use_strcmp][reverse][dirs_first] + + Note that the order in which sort keys are listed in the function pointer + array below is defined by the order of the elements in the time_type and + sort_type enums! */ + +#define LIST_SORTFUNCTION_VARIANTS(key_name) \ + { \ + { \ + { xstrcoll_##key_name, xstrcoll_df_##key_name }, \ + { rev_xstrcoll_##key_name, rev_xstrcoll_df_##key_name }, \ + }, \ + { \ + { strcmp_##key_name, strcmp_df_##key_name }, \ + { rev_strcmp_##key_name, rev_strcmp_df_##key_name }, \ + } \ + } + +static qsortFunc const sort_functions[][2][2][2] = + { + LIST_SORTFUNCTION_VARIANTS (name), + LIST_SORTFUNCTION_VARIANTS (extension), + LIST_SORTFUNCTION_VARIANTS (width), + LIST_SORTFUNCTION_VARIANTS (size), + + { + { + { xstrcoll_version, xstrcoll_df_version }, + { rev_xstrcoll_version, rev_xstrcoll_df_version }, + }, + + /* We use nullptr for the strcmp variants of version comparison + since as explained in cmp_version definition, version comparison + does not rely on xstrcoll, so it will never longjmp, and never + need to try the strcmp fallback. */ + { + { nullptr, nullptr }, + { nullptr, nullptr }, + } + }, + + /* last are time sort functions */ + LIST_SORTFUNCTION_VARIANTS (mtime), + LIST_SORTFUNCTION_VARIANTS (ctime), + LIST_SORTFUNCTION_VARIANTS (atime), + LIST_SORTFUNCTION_VARIANTS (btime) + }; + +/* The number of sort keys is calculated as the sum of + the number of elements in the sort_type enum (i.e., sort_numtypes) + -2 because neither sort_time nor sort_none use entries themselves + the number of elements in the time_type enum (i.e., time_numtypes) + This is because when sort_type==sort_time, we have up to + time_numtypes possible sort keys. + + This line verifies at compile-time that the array of sort functions has been + initialized for all possible sort keys. */ +static_assert (ARRAY_CARDINALITY (sort_functions) + == sort_numtypes - 2 + time_numtypes); + +/* Set up SORTED_FILE to point to the in-use entries in CWD_FILE, in order. */ + +static void +initialize_ordering_vector (void) +{ + for (size_t i = 0; i < cwd_n_used; i++) + sorted_file[i] = &cwd_file[i]; +} + +/* Cache values based on attributes global to all files. */ + +static void +update_current_files_info (void) +{ + /* Cache screen width of name, if needed multiple times. */ + if (sort_type == sort_width + || (line_length && (format == many_per_line || format == horizontal))) + { + size_t i; + for (i = 0; i < cwd_n_used; i++) + { + struct fileinfo *f = sorted_file[i]; + f->width = fileinfo_name_width (f); + } + } +} + +/* Sort the files now in the table. */ + +static void +sort_files (void) +{ + bool use_strcmp; + + if (sorted_file_alloc < cwd_n_used + cwd_n_used / 2) + { + free (sorted_file); + sorted_file = xnmalloc (cwd_n_used, 3 * sizeof *sorted_file); + sorted_file_alloc = 3 * cwd_n_used; + } + + initialize_ordering_vector (); + + update_current_files_info (); + + if (sort_type == sort_none) + return; + + /* Try strcoll. If it fails, fall back on strcmp. We can't safely + ignore strcoll failures, as a failing strcoll might be a + comparison function that is not a total order, and if we ignored + the failure this might cause qsort to dump core. */ + + if (! setjmp (failed_strcoll)) + use_strcmp = false; /* strcoll() succeeded */ + else + { + use_strcmp = true; + affirm (sort_type != sort_version); + initialize_ordering_vector (); + } + + /* When sort_type == sort_time, use time_type as subindex. */ + mpsort ((void const **) sorted_file, cwd_n_used, + sort_functions[sort_type + (sort_type == sort_time ? time_type : 0)] + [use_strcmp][sort_reverse] + [directories_first]); +} + +/* List all the files now in the table. */ + +static void +print_current_files (void) +{ + size_t i; + + switch (format) + { + case one_per_line: + for (i = 0; i < cwd_n_used; i++) + { + print_file_name_and_frills (sorted_file[i], 0); + putchar (eolbyte); + } + break; + + case many_per_line: + if (! line_length) + print_with_separator (' '); + else + print_many_per_line (); + break; + + case horizontal: + if (! line_length) + print_with_separator (' '); + else + print_horizontal (); + break; + + case with_commas: + print_with_separator (','); + break; + + case long_format: + for (i = 0; i < cwd_n_used; i++) + { + set_normal_color (); + print_long_format (sorted_file[i]); + dired_outbyte (eolbyte); + } + break; + } +} + +/* Replace the first %b with precomputed aligned month names. + Note on glibc-2.7 at least, this speeds up the whole 'ls -lU' + process by around 17%, compared to letting strftime() handle the %b. */ + +static size_t +align_nstrftime (char *buf, size_t size, bool recent, struct tm const *tm, + timezone_t tz, int ns) +{ + char const *nfmt = (use_abformat + ? abformat[recent][tm->tm_mon] + : long_time_format[recent]); + return nstrftime (buf, size, nfmt, tm, tz, ns); +} + +/* Return the expected number of columns in a long-format timestamp, + or zero if it cannot be calculated. */ + +static int +long_time_expected_width (void) +{ + static int width = -1; + + if (width < 0) + { + time_t epoch = 0; + struct tm tm; + char buf[TIME_STAMP_LEN_MAXIMUM + 1]; + + /* In case you're wondering if localtime_rz can fail with an input time_t + value of 0, let's just say it's very unlikely, but not inconceivable. + The TZ environment variable would have to specify a time zone that + is 2**31-1900 years or more ahead of UTC. This could happen only on + a 64-bit system that blindly accepts e.g., TZ=UTC+20000000000000. + However, this is not possible with Solaris 10 or glibc-2.3.5, since + their implementations limit the offset to 167:59 and 24:00, resp. */ + if (localtime_rz (localtz, &epoch, &tm)) + { + size_t len = align_nstrftime (buf, sizeof buf, false, + &tm, localtz, 0); + if (len != 0) + width = mbsnwidth (buf, len, 0); + } + + if (width < 0) + width = 0; + } + + return width; +} + +/* Print the user or group name NAME, with numeric id ID, using a + print width of WIDTH columns. */ + +static void +format_user_or_group (char const *name, uintmax_t id, int width) +{ + if (name) + { + int width_gap = width - mbswidth (name, 0); + int pad = MAX (0, width_gap); + dired_outstring (name); + + do + dired_outbyte (' '); + while (pad--); + } + else + dired_pos += printf ("%*"PRIuMAX" ", width, id); +} + +/* Print the name or id of the user with id U, using a print width of + WIDTH. */ + +static void +format_user (uid_t u, int width, bool stat_ok) +{ + format_user_or_group (! stat_ok ? "?" : + (numeric_ids ? nullptr : getuser (u)), u, width); +} + +/* Likewise, for groups. */ + +static void +format_group (gid_t g, int width, bool stat_ok) +{ + format_user_or_group (! stat_ok ? "?" : + (numeric_ids ? nullptr : getgroup (g)), g, width); +} + +/* Return the number of columns that format_user_or_group will print. */ + +static int +format_user_or_group_width (char const *name, uintmax_t id) +{ + if (name) + { + int len = mbswidth (name, 0); + return MAX (0, len); + } + else + return snprintf (nullptr, 0, "%"PRIuMAX, id); +} + +/* Return the number of columns that format_user will print. */ + +static int +format_user_width (uid_t u) +{ + return format_user_or_group_width (numeric_ids ? nullptr : getuser (u), u); +} + +/* Likewise, for groups. */ + +static int +format_group_width (gid_t g) +{ + return format_user_or_group_width (numeric_ids ? nullptr : getgroup (g), g); +} + +/* Return a pointer to a formatted version of F->stat.st_ino, + possibly using buffer, which must be at least + INT_BUFSIZE_BOUND (uintmax_t) bytes. */ +static char * +format_inode (char buf[INT_BUFSIZE_BOUND (uintmax_t)], + const struct fileinfo *f) +{ + return (f->stat_ok && f->stat.st_ino != NOT_AN_INODE_NUMBER + ? umaxtostr (f->stat.st_ino, buf) + : (char *) "?"); +} + +/* Print information about F in long format. */ +static void +print_long_format (const struct fileinfo *f) +{ + char modebuf[12]; + char buf + [LONGEST_HUMAN_READABLE + 1 /* inode */ + + LONGEST_HUMAN_READABLE + 1 /* size in blocks */ + + sizeof (modebuf) - 1 + 1 /* mode string */ + + INT_BUFSIZE_BOUND (uintmax_t) /* st_nlink */ + + LONGEST_HUMAN_READABLE + 2 /* major device number */ + + LONGEST_HUMAN_READABLE + 1 /* minor device number */ + + TIME_STAMP_LEN_MAXIMUM + 1 /* max length of time/date */ + ]; + size_t s; + char *p; + struct timespec when_timespec; + struct tm when_local; + bool btime_ok = true; + + /* Compute the mode string, except remove the trailing space if no + file in this directory has an ACL or security context. */ + if (f->stat_ok) + filemodestring (&f->stat, modebuf); + else + { + modebuf[0] = filetype_letter[f->filetype]; + memset (modebuf + 1, '?', 10); + modebuf[11] = '\0'; + } + if (! any_has_acl) + modebuf[10] = '\0'; + else if (f->acl_type == ACL_T_LSM_CONTEXT_ONLY) + modebuf[10] = '.'; + else if (f->acl_type == ACL_T_YES) + modebuf[10] = '+'; + + switch (time_type) + { + case time_ctime: + when_timespec = get_stat_ctime (&f->stat); + break; + case time_mtime: + when_timespec = get_stat_mtime (&f->stat); + break; + case time_atime: + when_timespec = get_stat_atime (&f->stat); + break; + case time_btime: + when_timespec = get_stat_btime (&f->stat); + if (when_timespec.tv_sec == -1 && when_timespec.tv_nsec == -1) + btime_ok = false; + break; + default: + unreachable (); + } + + p = buf; + + if (print_inode) + { + char hbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + p += sprintf (p, "%*s ", inode_number_width, format_inode (hbuf, f)); + } + + if (print_block_size) + { + char hbuf[LONGEST_HUMAN_READABLE + 1]; + char const *blocks = + (! f->stat_ok + ? "?" + : human_readable (ST_NBLOCKS (f->stat), hbuf, human_output_opts, + ST_NBLOCKSIZE, output_block_size)); + int pad; + for (pad = block_size_width - mbswidth (blocks, 0); 0 < pad; pad--) + *p++ = ' '; + while ((*p++ = *blocks++)) + continue; + p[-1] = ' '; + } + + /* The last byte of the mode string is the POSIX + "optional alternate access method flag". */ + { + char hbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + p += sprintf (p, "%s %*s ", modebuf, nlink_width, + ! f->stat_ok ? "?" : umaxtostr (f->stat.st_nlink, hbuf)); + } + + dired_indent (); + + if (print_owner || print_group || print_author || print_scontext) + { + dired_outbuf (buf, p - buf); + + if (print_owner) + format_user (f->stat.st_uid, owner_width, f->stat_ok); + + if (print_group) + format_group (f->stat.st_gid, group_width, f->stat_ok); + + if (print_author) + format_user (f->stat.st_author, author_width, f->stat_ok); + + if (print_scontext) + format_user_or_group (f->scontext, 0, scontext_width); + + p = buf; + } + + if (f->stat_ok + && (S_ISCHR (f->stat.st_mode) || S_ISBLK (f->stat.st_mode))) + { + char majorbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + char minorbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + int blanks_width = (file_size_width + - (major_device_number_width + 2 + + minor_device_number_width)); + p += sprintf (p, "%*s, %*s ", + major_device_number_width + MAX (0, blanks_width), + umaxtostr (major (f->stat.st_rdev), majorbuf), + minor_device_number_width, + umaxtostr (minor (f->stat.st_rdev), minorbuf)); + } + else + { + char hbuf[LONGEST_HUMAN_READABLE + 1]; + char const *size = + (! f->stat_ok + ? "?" + : human_readable (unsigned_file_size (f->stat.st_size), + hbuf, file_human_output_opts, 1, + file_output_block_size)); + int pad; + for (pad = file_size_width - mbswidth (size, 0); 0 < pad; pad--) + *p++ = ' '; + while ((*p++ = *size++)) + continue; + p[-1] = ' '; + } + + s = 0; + *p = '\1'; + + if (f->stat_ok && btime_ok + && localtime_rz (localtz, &when_timespec.tv_sec, &when_local)) + { + struct timespec six_months_ago; + bool recent; + + /* If the file appears to be in the future, update the current + time, in case the file happens to have been modified since + the last time we checked the clock. */ + if (timespec_cmp (current_time, when_timespec) < 0) + gettime (¤t_time); + + /* Consider a time to be recent if it is within the past six months. + A Gregorian year has 365.2425 * 24 * 60 * 60 == 31556952 seconds + on the average. Write this value as an integer constant to + avoid floating point hassles. */ + six_months_ago.tv_sec = current_time.tv_sec - 31556952 / 2; + six_months_ago.tv_nsec = current_time.tv_nsec; + + recent = (timespec_cmp (six_months_ago, when_timespec) < 0 + && timespec_cmp (when_timespec, current_time) < 0); + + /* We assume here that all time zones are offset from UTC by a + whole number of seconds. */ + s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, recent, + &when_local, localtz, when_timespec.tv_nsec); + } + + if (s || !*p) + { + p += s; + *p++ = ' '; + } + else + { + /* The time cannot be converted using the desired format, so + print it as a huge integer number of seconds. */ + char hbuf[INT_BUFSIZE_BOUND (intmax_t)]; + p += sprintf (p, "%*s ", long_time_expected_width (), + (! f->stat_ok || ! btime_ok + ? "?" + : timetostr (when_timespec.tv_sec, hbuf))); + /* FIXME: (maybe) We discarded when_timespec.tv_nsec. */ + } + + dired_outbuf (buf, p - buf); + size_t w = print_name_with_quoting (f, false, &dired_obstack, p - buf); + + if (f->filetype == symbolic_link) + { + if (f->linkname) + { + dired_outstring (" -> "); + print_name_with_quoting (f, true, nullptr, (p - buf) + w + 4); + if (indicator_style != none) + print_type_indicator (true, f->linkmode, unknown); + } + } + else if (indicator_style != none) + print_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); +} + +/* Write to *BUF a quoted representation of the file name NAME, if non-null, + using OPTIONS to control quoting. *BUF is set to NAME if no quoting + is required. *BUF is allocated if more space required (and the original + *BUF is not deallocated). + Store the number of screen columns occupied by NAME's quoted + representation into WIDTH, if non-null. + Store into PAD whether an initial space is needed for padding. + Return the number of bytes in *BUF. */ + +static size_t +quote_name_buf (char **inbuf, size_t bufsize, char *name, + struct quoting_options const *options, + int needs_general_quoting, size_t *width, bool *pad) +{ + char *buf = *inbuf; + size_t displayed_width IF_LINT ( = 0); + size_t len = 0; + bool quoted; + + enum quoting_style qs = get_quoting_style (options); + bool needs_further_quoting = qmark_funny_chars + && (qs == shell_quoting_style + || qs == shell_always_quoting_style + || qs == literal_quoting_style); + + if (needs_general_quoting != 0) + { + len = quotearg_buffer (buf, bufsize, name, -1, options); + if (bufsize <= len) + { + buf = xmalloc (len + 1); + quotearg_buffer (buf, len + 1, name, -1, options); + } + + quoted = (*name != *buf) || strlen (name) != len; + } + else if (needs_further_quoting) + { + len = strlen (name); + if (bufsize <= len) + buf = xmalloc (len + 1); + memcpy (buf, name, len + 1); + + quoted = false; + } + else + { + len = strlen (name); + buf = name; + quoted = false; + } + + if (needs_further_quoting) + { + if (MB_CUR_MAX > 1) + { + char const *p = buf; + char const *plimit = buf + len; + char *q = buf; + displayed_width = 0; + + while (p < plimit) + switch (*p) + { + case ' ': case '!': case '"': case '#': case '%': + case '&': case '\'': case '(': case ')': case '*': + case '+': case ',': case '-': case '.': case '/': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case ':': case ';': case '<': case '=': case '>': + case '?': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case '[': case '\\': case ']': case '^': case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': case '{': case '|': case '}': case '~': + /* These characters are printable ASCII characters. */ + *q++ = *p++; + displayed_width += 1; + break; + default: + /* If we have a multibyte sequence, copy it until we + reach its end, replacing each non-printable multibyte + character with a single question mark. */ + { + mbstate_t mbstate = { 0, }; + do + { + wchar_t wc; + size_t bytes; + int w; + + bytes = mbrtowc (&wc, p, plimit - p, &mbstate); + + if (bytes == (size_t) -1) + { + /* An invalid multibyte sequence was + encountered. Skip one input byte, and + put a question mark. */ + p++; + *q++ = '?'; + displayed_width += 1; + break; + } + + if (bytes == (size_t) -2) + { + /* An incomplete multibyte character + at the end. Replace it entirely with + a question mark. */ + p = plimit; + *q++ = '?'; + displayed_width += 1; + break; + } + + if (bytes == 0) + /* A null wide character was encountered. */ + bytes = 1; + + w = wcwidth (wc); + if (w >= 0) + { + /* A printable multibyte character. + Keep it. */ + for (; bytes > 0; --bytes) + *q++ = *p++; + displayed_width += w; + } + else + { + /* An nonprintable multibyte character. + Replace it entirely with a question + mark. */ + p += bytes; + *q++ = '?'; + displayed_width += 1; + } + } + while (! mbsinit (&mbstate)); + } + break; + } + + /* The buffer may have shrunk. */ + len = q - buf; + } + else + { + char *p = buf; + char const *plimit = buf + len; + + while (p < plimit) + { + if (! isprint (to_uchar (*p))) + *p = '?'; + p++; + } + displayed_width = len; + } + } + else if (width != nullptr) + { + if (MB_CUR_MAX > 1) + displayed_width = mbsnwidth (buf, len, 0); + else + { + char const *p = buf; + char const *plimit = buf + len; + + displayed_width = 0; + while (p < plimit) + { + if (isprint (to_uchar (*p))) + displayed_width++; + p++; + } + } + } + + /* Set padding to better align quoted items, + and also give a visual indication that quotes are + not actually part of the name. */ + *pad = (align_variable_outer_quotes && cwd_some_quoted && ! quoted); + + if (width != nullptr) + *width = displayed_width; + + *inbuf = buf; + + return len; +} + +static size_t +quote_name_width (char const *name, struct quoting_options const *options, + int needs_general_quoting) +{ + char smallbuf[BUFSIZ]; + char *buf = smallbuf; + size_t width; + bool pad; + + quote_name_buf (&buf, sizeof smallbuf, (char *) name, options, + needs_general_quoting, &width, &pad); + + if (buf != smallbuf && buf != name) + free (buf); + + width += pad; + + return width; +} + +/* %XX escape any input out of range as defined in RFC3986, + and also if PATH, convert all path separators to '/'. */ +static char * +file_escape (char const *str, bool path) +{ + char *esc = xnmalloc (3, strlen (str) + 1); + char *p = esc; + while (*str) + { + if (path && ISSLASH (*str)) + { + *p++ = '/'; + str++; + } + else if (RFC3986[to_uchar (*str)]) + *p++ = *str++; + else + p += sprintf (p, "%%%02x", to_uchar (*str++)); + } + *p = '\0'; + return esc; +} + +static size_t +quote_name (char const *name, struct quoting_options const *options, + int needs_general_quoting, const struct bin_str *color, + bool allow_pad, struct obstack *stack, char const *absolute_name) +{ + char smallbuf[BUFSIZ]; + char *buf = smallbuf; + size_t len; + bool pad; + + len = quote_name_buf (&buf, sizeof smallbuf, (char *) name, options, + needs_general_quoting, nullptr, &pad); + + if (pad && allow_pad) + dired_outbyte (' '); + + if (color) + print_color_indicator (color); + + /* If we're padding, then don't include the outer quotes in + the --hyperlink, to improve the alignment of those links. */ + bool skip_quotes = false; + + if (absolute_name) + { + if (align_variable_outer_quotes && cwd_some_quoted && ! pad) + { + skip_quotes = true; + putchar (*buf); + } + char *h = file_escape (hostname, /* path= */ false); + char *n = file_escape (absolute_name, /* path= */ true); + /* TODO: It would be good to be able to define parameters + to give hints to the terminal as how best to render the URI. + For example since ls is outputting a dense block of URIs + it would be best to not underline by default, and only + do so upon hover etc. */ + printf ("\033]8;;file://%s%s%s\a", h, *n == '/' ? "" : "/", n); + free (h); + free (n); + } + + if (stack) + push_current_dired_pos (stack); + + fwrite (buf + skip_quotes, 1, len - (skip_quotes * 2), stdout); + + dired_pos += len; + + if (stack) + push_current_dired_pos (stack); + + if (absolute_name) + { + fputs ("\033]8;;\a", stdout); + if (skip_quotes) + putchar (*(buf + len - 1)); + } + + if (buf != smallbuf && buf != name) + free (buf); + + return len + pad; +} + +static size_t +print_name_with_quoting (const struct fileinfo *f, + bool symlink_target, + struct obstack *stack, + size_t start_col) +{ + char const *name = symlink_target ? f->linkname : f->name; + + const struct bin_str *color + = print_with_color ? get_color_indicator (f, symlink_target) : nullptr; + + bool used_color_this_time = (print_with_color + && (color || is_colored (C_NORM))); + + size_t len = quote_name (name, filename_quoting_options, f->quoted, + color, !symlink_target, stack, f->absolute_name); + + process_signals (); + if (used_color_this_time) + { + prep_non_filename_text (); + + /* We use the byte length rather than display width here as + an optimization to avoid accurately calculating the width, + because we only output the clear to EOL sequence if the name + _might_ wrap to the next line. This may output a sequence + unnecessarily in multi-byte locales for example, + but in that case it's inconsequential to the output. */ + if (line_length + && (start_col / line_length != (start_col + len - 1) / line_length)) + put_indicator (&color_indicator[C_CLR_TO_EOL]); + } + + return len; +} + +static void +prep_non_filename_text (void) +{ + if (color_indicator[C_END].string != nullptr) + put_indicator (&color_indicator[C_END]); + else + { + put_indicator (&color_indicator[C_LEFT]); + put_indicator (&color_indicator[C_RESET]); + put_indicator (&color_indicator[C_RIGHT]); + } +} + +/* Print the file name of 'f' with appropriate quoting. + Also print file size, inode number, and filetype indicator character, + as requested by switches. */ + +static size_t +print_file_name_and_frills (const struct fileinfo *f, size_t start_col) +{ + char buf[MAX (LONGEST_HUMAN_READABLE + 1, INT_BUFSIZE_BOUND (uintmax_t))]; + + set_normal_color (); + + if (print_inode) + printf ("%*s ", format == with_commas ? 0 : inode_number_width, + format_inode (buf, f)); + + if (print_block_size) + printf ("%*s ", format == with_commas ? 0 : block_size_width, + ! f->stat_ok ? "?" + : human_readable (ST_NBLOCKS (f->stat), buf, human_output_opts, + ST_NBLOCKSIZE, output_block_size)); + + if (print_scontext) + printf ("%*s ", format == with_commas ? 0 : scontext_width, f->scontext); + + size_t width = print_name_with_quoting (f, false, nullptr, start_col); + + if (indicator_style != none) + width += print_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); + + return width; +} + +/* Given these arguments describing a file, return the single-byte + type indicator, or 0. */ +static char +get_type_indicator (bool stat_ok, mode_t mode, enum filetype type) +{ + char c; + + if (stat_ok ? S_ISREG (mode) : type == normal) + { + if (stat_ok && indicator_style == classify && (mode & S_IXUGO)) + c = '*'; + else + c = 0; + } + else + { + if (stat_ok ? S_ISDIR (mode) : type == directory || type == arg_directory) + c = '/'; + else if (indicator_style == slash) + c = 0; + else if (stat_ok ? S_ISLNK (mode) : type == symbolic_link) + c = '@'; + else if (stat_ok ? S_ISFIFO (mode) : type == fifo) + c = '|'; + else if (stat_ok ? S_ISSOCK (mode) : type == sock) + c = '='; + else if (stat_ok && S_ISDOOR (mode)) + c = '>'; + else + c = 0; + } + return c; +} + +static bool +print_type_indicator (bool stat_ok, mode_t mode, enum filetype type) +{ + char c = get_type_indicator (stat_ok, mode, type); + if (c) + dired_outbyte (c); + return !!c; +} + +/* Returns if color sequence was printed. */ +static bool +print_color_indicator (const struct bin_str *ind) +{ + if (ind) + { + /* Need to reset so not dealing with attribute combinations */ + if (is_colored (C_NORM)) + restore_default_color (); + put_indicator (&color_indicator[C_LEFT]); + put_indicator (ind); + put_indicator (&color_indicator[C_RIGHT]); + } + + return ind != nullptr; +} + +/* Returns color indicator or nullptr if none. */ +ATTRIBUTE_PURE +static const struct bin_str* +get_color_indicator (const struct fileinfo *f, bool symlink_target) +{ + enum indicator_no type; + struct color_ext_type *ext; /* Color extension */ + size_t len; /* Length of name */ + + char const *name; + mode_t mode; + int linkok; + if (symlink_target) + { + name = f->linkname; + mode = f->linkmode; + linkok = f->linkok ? 0 : -1; + } + else + { + name = f->name; + mode = file_or_link_mode (f); + linkok = f->linkok; + } + + /* Is this a nonexistent file? If so, linkok == -1. */ + + if (linkok == -1 && is_colored (C_MISSING)) + type = C_MISSING; + else if (!f->stat_ok) + { + static enum indicator_no filetype_indicator[] = FILETYPE_INDICATORS; + type = filetype_indicator[f->filetype]; + } + else + { + if (S_ISREG (mode)) + { + type = C_FILE; + + if ((mode & S_ISUID) != 0 && is_colored (C_SETUID)) + type = C_SETUID; + else if ((mode & S_ISGID) != 0 && is_colored (C_SETGID)) + type = C_SETGID; + else if (is_colored (C_CAP) && f->has_capability) + type = C_CAP; + else if ((mode & S_IXUGO) != 0 && is_colored (C_EXEC)) + type = C_EXEC; + else if ((1 < f->stat.st_nlink) && is_colored (C_MULTIHARDLINK)) + type = C_MULTIHARDLINK; + } + else if (S_ISDIR (mode)) + { + type = C_DIR; + + if ((mode & S_ISVTX) && (mode & S_IWOTH) + && is_colored (C_STICKY_OTHER_WRITABLE)) + type = C_STICKY_OTHER_WRITABLE; + else if ((mode & S_IWOTH) != 0 && is_colored (C_OTHER_WRITABLE)) + type = C_OTHER_WRITABLE; + else if ((mode & S_ISVTX) != 0 && is_colored (C_STICKY)) + type = C_STICKY; + } + else if (S_ISLNK (mode)) + type = C_LINK; + else if (S_ISFIFO (mode)) + type = C_FIFO; + else if (S_ISSOCK (mode)) + type = C_SOCK; + else if (S_ISBLK (mode)) + type = C_BLK; + else if (S_ISCHR (mode)) + type = C_CHR; + else if (S_ISDOOR (mode)) + type = C_DOOR; + else + { + /* Classify a file of some other type as C_ORPHAN. */ + type = C_ORPHAN; + } + } + + /* Check the file's suffix only if still classified as C_FILE. */ + ext = nullptr; + if (type == C_FILE) + { + /* Test if NAME has a recognized suffix. */ + + len = strlen (name); + name += len; /* Pointer to final \0. */ + for (ext = color_ext_list; ext != nullptr; ext = ext->next) + { + if (ext->ext.len <= len) + { + if (ext->exact_match) + { + if (STREQ_LEN (name - ext->ext.len, ext->ext.string, + ext->ext.len)) + break; + } + else + { + if (c_strncasecmp (name - ext->ext.len, ext->ext.string, + ext->ext.len) == 0) + break; + } + } + } + } + + /* Adjust the color for orphaned symlinks. */ + if (type == C_LINK && !linkok) + { + if (color_symlink_as_referent || is_colored (C_ORPHAN)) + type = C_ORPHAN; + } + + const struct bin_str *const s + = ext ? &(ext->seq) : &color_indicator[type]; + + return s->string ? s : nullptr; +} + +/* Output a color indicator (which may contain nulls). */ +static void +put_indicator (const struct bin_str *ind) +{ + if (! used_color) + { + used_color = true; + + /* If the standard output is a controlling terminal, watch out + for signals, so that the colors can be restored to the + default state if "ls" is suspended or interrupted. */ + + if (0 <= tcgetpgrp (STDOUT_FILENO)) + signal_init (); + + prep_non_filename_text (); + } + + fwrite (ind->string, ind->len, 1, stdout); +} + +static size_t +length_of_file_name_and_frills (const struct fileinfo *f) +{ + size_t len = 0; + char buf[MAX (LONGEST_HUMAN_READABLE + 1, INT_BUFSIZE_BOUND (uintmax_t))]; + + if (print_inode) + len += 1 + (format == with_commas + ? strlen (umaxtostr (f->stat.st_ino, buf)) + : inode_number_width); + + if (print_block_size) + len += 1 + (format == with_commas + ? strlen (! f->stat_ok ? "?" + : human_readable (ST_NBLOCKS (f->stat), buf, + human_output_opts, ST_NBLOCKSIZE, + output_block_size)) + : block_size_width); + + if (print_scontext) + len += 1 + (format == with_commas ? strlen (f->scontext) : scontext_width); + + len += fileinfo_name_width (f); + + if (indicator_style != none) + { + char c = get_type_indicator (f->stat_ok, f->stat.st_mode, f->filetype); + len += (c != 0); + } + + return len; +} + +static void +print_many_per_line (void) +{ + size_t row; /* Current row. */ + size_t cols = calculate_columns (true); + struct column_info const *line_fmt = &column_info[cols - 1]; + + /* Calculate the number of rows that will be in each column except possibly + for a short column on the right. */ + size_t rows = cwd_n_used / cols + (cwd_n_used % cols != 0); + + for (row = 0; row < rows; row++) + { + size_t col = 0; + size_t filesno = row; + size_t pos = 0; + + /* Print the next row. */ + while (true) + { + struct fileinfo const *f = sorted_file[filesno]; + size_t name_length = length_of_file_name_and_frills (f); + size_t max_name_length = line_fmt->col_arr[col++]; + print_file_name_and_frills (f, pos); + + filesno += rows; + if (filesno >= cwd_n_used) + break; + + indent (pos + name_length, pos + max_name_length); + pos += max_name_length; + } + putchar (eolbyte); + } +} + +static void +print_horizontal (void) +{ + size_t filesno; + size_t pos = 0; + size_t cols = calculate_columns (false); + struct column_info const *line_fmt = &column_info[cols - 1]; + struct fileinfo const *f = sorted_file[0]; + size_t name_length = length_of_file_name_and_frills (f); + size_t max_name_length = line_fmt->col_arr[0]; + + /* Print first entry. */ + print_file_name_and_frills (f, 0); + + /* Now the rest. */ + for (filesno = 1; filesno < cwd_n_used; ++filesno) + { + size_t col = filesno % cols; + + if (col == 0) + { + putchar (eolbyte); + pos = 0; + } + else + { + indent (pos + name_length, pos + max_name_length); + pos += max_name_length; + } + + f = sorted_file[filesno]; + print_file_name_and_frills (f, pos); + + name_length = length_of_file_name_and_frills (f); + max_name_length = line_fmt->col_arr[col]; + } + putchar (eolbyte); +} + +/* Output name + SEP + ' '. */ + +static void +print_with_separator (char sep) +{ + size_t filesno; + size_t pos = 0; + + for (filesno = 0; filesno < cwd_n_used; filesno++) + { + struct fileinfo const *f = sorted_file[filesno]; + size_t len = line_length ? length_of_file_name_and_frills (f) : 0; + + if (filesno != 0) + { + char separator; + + if (! line_length + || ((pos + len + 2 < line_length) + && (pos <= SIZE_MAX - len - 2))) + { + pos += 2; + separator = ' '; + } + else + { + pos = 0; + separator = eolbyte; + } + + putchar (sep); + putchar (separator); + } + + print_file_name_and_frills (f, pos); + pos += len; + } + putchar (eolbyte); +} + +/* Assuming cursor is at position FROM, indent up to position TO. + Use a TAB character instead of two or more spaces whenever possible. */ + +static void +indent (size_t from, size_t to) +{ + while (from < to) + { + if (tabsize != 0 && to / tabsize > (from + 1) / tabsize) + { + putchar ('\t'); + from += tabsize - from % tabsize; + } + else + { + putchar (' '); + from++; + } + } +} + +/* Put DIRNAME/NAME into DEST, handling '.' and '/' properly. */ +/* FIXME: maybe remove this function someday. See about using a + non-malloc'ing version of file_name_concat. */ + +static void +attach (char *dest, char const *dirname, char const *name) +{ + char const *dirnamep = dirname; + + /* Copy dirname if it is not ".". */ + if (dirname[0] != '.' || dirname[1] != 0) + { + while (*dirnamep) + *dest++ = *dirnamep++; + /* Add '/' if 'dirname' doesn't already end with it. */ + if (dirnamep > dirname && dirnamep[-1] != '/') + *dest++ = '/'; + } + while (*name) + *dest++ = *name++; + *dest = 0; +} + +/* Allocate enough column info suitable for the current number of + files and display columns, and initialize the info to represent the + narrowest possible columns. */ + +static void +init_column_info (size_t max_cols) +{ + size_t i; + + /* Currently allocated columns in column_info. */ + static size_t column_info_alloc; + + if (column_info_alloc < max_cols) + { + size_t new_column_info_alloc; + size_t *p; + + if (!max_idx || max_cols < max_idx / 2) + { + /* The number of columns is far less than the display width + allows. Grow the allocation, but only so that it's + double the current requirements. If the display is + extremely wide, this avoids allocating a lot of memory + that is never needed. */ + column_info = xnrealloc (column_info, max_cols, + 2 * sizeof *column_info); + new_column_info_alloc = 2 * max_cols; + } + else + { + column_info = xnrealloc (column_info, max_idx, sizeof *column_info); + new_column_info_alloc = max_idx; + } + + /* Allocate the new size_t objects by computing the triangle + formula n * (n + 1) / 2, except that we don't need to + allocate the part of the triangle that we've already + allocated. Check for address arithmetic overflow. */ + { + size_t column_info_growth = new_column_info_alloc - column_info_alloc; + size_t s = column_info_alloc + 1 + new_column_info_alloc; + size_t t = s * column_info_growth; + if (s < new_column_info_alloc || t / column_info_growth != s) + xalloc_die (); + p = xnmalloc (t / 2, sizeof *p); + } + + /* Grow the triangle by parceling out the cells just allocated. */ + for (i = column_info_alloc; i < new_column_info_alloc; i++) + { + column_info[i].col_arr = p; + p += i + 1; + } + + column_info_alloc = new_column_info_alloc; + } + + for (i = 0; i < max_cols; ++i) + { + size_t j; + + column_info[i].valid_len = true; + column_info[i].line_len = (i + 1) * MIN_COLUMN_WIDTH; + for (j = 0; j <= i; ++j) + column_info[i].col_arr[j] = MIN_COLUMN_WIDTH; + } +} + +/* Calculate the number of columns needed to represent the current set + of files in the current display width. */ + +static size_t +calculate_columns (bool by_columns) +{ + size_t filesno; /* Index into cwd_file. */ + size_t cols; /* Number of files across. */ + + /* Normally the maximum number of columns is determined by the + screen width. But if few files are available this might limit it + as well. */ + size_t max_cols = 0 < max_idx && max_idx < cwd_n_used ? max_idx : cwd_n_used; + + init_column_info (max_cols); + + /* Compute the maximum number of possible columns. */ + for (filesno = 0; filesno < cwd_n_used; ++filesno) + { + struct fileinfo const *f = sorted_file[filesno]; + size_t name_length = length_of_file_name_and_frills (f); + + for (size_t i = 0; i < max_cols; ++i) + { + if (column_info[i].valid_len) + { + size_t idx = (by_columns + ? filesno / ((cwd_n_used + i) / (i + 1)) + : filesno % (i + 1)); + size_t real_length = name_length + (idx == i ? 0 : 2); + + if (column_info[i].col_arr[idx] < real_length) + { + column_info[i].line_len += (real_length + - column_info[i].col_arr[idx]); + column_info[i].col_arr[idx] = real_length; + column_info[i].valid_len = (column_info[i].line_len + < line_length); + } + } + } + } + + /* Find maximum allowed columns. */ + for (cols = max_cols; 1 < cols; --cols) + { + if (column_info[cols - 1].valid_len) + break; + } + + return cols; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); + fputs (_("\ +List information about the FILEs (the current directory by default).\n\ +Sort entries alphabetically if none of -cftuvSUX nor --sort is specified.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -a, --all do not ignore entries starting with .\n\ + -A, --almost-all do not list implied . and ..\n\ + --author with -l, print the author of each file\n\ + -b, --escape print C-style escapes for nongraphic characters\n\ +"), stdout); + fputs (_("\ + --block-size=SIZE with -l, scale sizes by SIZE when printing them;\n\ + e.g., '--block-size=M'; see SIZE format below\n\ +\n\ +"), stdout); + fputs (_("\ + -B, --ignore-backups do not list implied entries ending with ~\n\ +"), stdout); + fputs (_("\ + -c with -lt: sort by, and show, ctime (time of last\n\ + change of file status information);\n\ + with -l: show ctime and sort by name;\n\ + otherwise: sort by ctime, newest first\n\ +\n\ +"), stdout); + fputs (_("\ + -C list entries by columns\n\ + --color[=WHEN] color the output WHEN; more info below\n\ + -d, --directory list directories themselves, not their contents\n\ + -D, --dired generate output designed for Emacs' dired mode\n\ +"), stdout); + fputs (_("\ + -f list all entries in directory order\n\ + -F, --classify[=WHEN] append indicator (one of */=>@|) to entries WHEN\n\ + --file-type likewise, except do not append '*'\n\ +"), stdout); + fputs (_("\ + --format=WORD across -x, commas -m, horizontal -x, long -l,\n\ + single-column -1, verbose -l, vertical -C\n\ +\n\ +"), stdout); + fputs (_("\ + --full-time like -l --time-style=full-iso\n\ +"), stdout); + fputs (_("\ + -g like -l, but do not list owner\n\ +"), stdout); + fputs (_("\ + --group-directories-first\n\ + group directories before files;\n\ + can be augmented with a --sort option, but any\n\ + use of --sort=none (-U) disables grouping\n\ +\n\ +"), stdout); + fputs (_("\ + -G, --no-group in a long listing, don't print group names\n\ +"), stdout); + fputs (_("\ + -h, --human-readable with -l and -s, print sizes like 1K 234M 2G etc.\n\ + --si likewise, but use powers of 1000 not 1024\n\ +"), stdout); + fputs (_("\ + -H, --dereference-command-line\n\ + follow symbolic links listed on the command line\n\ +"), stdout); + fputs (_("\ + --dereference-command-line-symlink-to-dir\n\ + follow each command line symbolic link\n\ + that points to a directory\n\ +\n\ +"), stdout); + fputs (_("\ + --hide=PATTERN do not list implied entries matching shell PATTERN\ +\n\ + (overridden by -a or -A)\n\ +\n\ +"), stdout); + fputs (_("\ + --hyperlink[=WHEN] hyperlink file names WHEN\n\ +"), stdout); + fputs (_("\ + --indicator-style=WORD\n\ + append indicator with style WORD to entry names:\n\ + none (default), slash (-p),\n\ + file-type (--file-type), classify (-F)\n\ +\n\ +"), stdout); + fputs (_("\ + -i, --inode print the index number of each file\n\ + -I, --ignore=PATTERN do not list implied entries matching shell PATTERN\ +\n\ +"), stdout); + fputs (_("\ + -k, --kibibytes default to 1024-byte blocks for file system usage;\ +\n\ + used only with -s and per directory totals\n\ +\n\ +"), stdout); + fputs (_("\ + -l use a long listing format\n\ +"), stdout); + fputs (_("\ + -L, --dereference when showing file information for a symbolic\n\ + link, show information for the file the link\n\ + references rather than for the link itself\n\ +\n\ +"), stdout); + fputs (_("\ + -m fill width with a comma separated list of entries\ +\n\ +"), stdout); + fputs (_("\ + -n, --numeric-uid-gid like -l, but list numeric user and group IDs\n\ + -N, --literal print entry names without quoting\n\ + -o like -l, but do not list group information\n\ + -p, --indicator-style=slash\n\ + append / indicator to directories\n\ +"), stdout); + fputs (_("\ + -q, --hide-control-chars print ? instead of nongraphic characters\n\ +"), stdout); + fputs (_("\ + --show-control-chars show nongraphic characters as-is (the default,\n\ + unless program is 'ls' and output is a terminal)\ +\n\ +\n\ +"), stdout); + fputs (_("\ + -Q, --quote-name enclose entry names in double quotes\n\ +"), stdout); + fputs (_("\ + --quoting-style=WORD use quoting style WORD for entry names:\n\ + literal, locale, shell, shell-always,\n\ + shell-escape, shell-escape-always, c, escape\n\ + (overrides QUOTING_STYLE environment variable)\n\ +\n\ +"), stdout); + fputs (_("\ + -r, --reverse reverse order while sorting\n\ + -R, --recursive list subdirectories recursively\n\ + -s, --size print the allocated size of each file, in blocks\n\ +"), stdout); + fputs (_("\ + -S sort by file size, largest first\n\ +"), stdout); + fputs (_("\ + --sort=WORD sort by WORD instead of name: none (-U), size (-S)\ +,\n\ + time (-t), version (-v), extension (-X), width\n\ +\n\ +"), stdout); + fputs (_("\ + --time=WORD select which timestamp used to display or sort;\n\ + access time (-u): atime, access, use;\n\ + metadata change time (-c): ctime, status;\n\ + modified time (default): mtime, modification;\n\ + birth time: birth, creation;\n\ + with -l, WORD determines which time to show;\n\ + with --sort=time, sort by WORD (newest first)\n\ +\n\ +"), stdout); + fputs (_("\ + --time-style=TIME_STYLE\n\ + time/date format with -l; see TIME_STYLE below\n\ +"), stdout); + fputs (_("\ + -t sort by time, newest first; see --time\n\ + -T, --tabsize=COLS assume tab stops at each COLS instead of 8\n\ +"), stdout); + fputs (_("\ + -u with -lt: sort by, and show, access time;\n\ + with -l: show access time and sort by name;\n\ + otherwise: sort by access time, newest first\n\ +\n\ +"), stdout); + fputs (_("\ + -U do not sort; list entries in directory order\n\ +"), stdout); + fputs (_("\ + -v natural sort of (version) numbers within text\n\ +"), stdout); + fputs (_("\ + -w, --width=COLS set output width to COLS. 0 means no limit\n\ + -x list entries by lines instead of by columns\n\ + -X sort alphabetically by entry extension\n\ + -Z, --context print any security context of each file\n\ + --zero end each output line with NUL, not newline\n\ + -1 list one file per line\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_size_note (); + fputs (_("\ +\n\ +The TIME_STYLE argument can be full-iso, long-iso, iso, locale, or +FORMAT.\n\ +FORMAT is interpreted like in date(1). If FORMAT is FORMAT1FORMAT2,\n\ +then FORMAT1 applies to non-recent files and FORMAT2 to recent files.\n\ +TIME_STYLE prefixed with 'posix-' takes effect only outside the POSIX locale.\n\ +Also the TIME_STYLE environment variable sets the default style to use.\n\ +"), stdout); + fputs (_("\ +\n\ +The WHEN argument defaults to 'always' and can also be 'auto' or 'never'.\n\ +"), stdout); + fputs (_("\ +\n\ +Using color to distinguish file types is disabled both by default and\n\ +with --color=never. With --color=auto, ls emits color codes only when\n\ +standard output is connected to a terminal. The LS_COLORS environment\n\ +variable can change the settings. Use the dircolors(1) command to set it.\n\ +"), stdout); + fputs (_("\ +\n\ +Exit status:\n\ + 0 if OK,\n\ + 1 if minor problems (e.g., cannot access subdirectory),\n\ + 2 if serious trouble (e.g., cannot access command-line argument).\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} diff --git a/src/ls.h b/src/ls.h new file mode 100644 index 0000000..b358211 --- /dev/null +++ b/src/ls.h @@ -0,0 +1,10 @@ +/* This is for the 'ls' program. */ +#define LS_LS 1 + +/* This is for the 'dir' program. */ +#define LS_MULTI_COL 2 + +/* This is for the 'vdir' program. */ +#define LS_LONG_FORMAT 3 + +extern int ls_mode; diff --git a/src/make-prime-list.c b/src/make-prime-list.c new file mode 100644 index 0000000..d66ee3d --- /dev/null +++ b/src/make-prime-list.c @@ -0,0 +1,240 @@ +/* Factoring of uintmax_t numbers. Generation of needed tables. + + Contributed to the GNU project by Torbjörn Granlund and Niels Möller + Contains code from GNU MP. + +Copyright 2012-2023 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see https://www.gnu.org/licenses/. */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +/* Deactivate "rpl_"-prefixed definitions of these symbols. */ +#undef fclose +#undef free +#undef malloc +#undef strerror + +/* An unsigned type that is no narrower than 32 bits and no narrower + than unsigned int. It's best to make it as wide as possible. + For GCC 4.6 and later, use a heuristic to guess whether unsigned + __int128 works on your platform. If this heuristic does not work + for you, please report a bug; in the meantime compile with, e.g., + -Dwide_uint='unsigned __int128' to override the heuristic. */ +#ifndef wide_uint +# if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) && ULONG_MAX >> 31 >> 31 >> 1 != 0 +typedef unsigned __int128 wide_uint; +# else +typedef uintmax_t wide_uint; +# endif +#endif + +struct prime +{ + unsigned p; + wide_uint pinv; /* Inverse mod b = 2^{bitsize of wide_uint} */ + wide_uint lim; /* floor ((wide_uint) -1 / p) */ +}; + +ATTRIBUTE_CONST +static wide_uint +binvert (wide_uint a) +{ + wide_uint x = 0xf5397db1 >> (4 * ((a / 2) & 0x7)); + for (;;) + { + wide_uint y = 2 * x - x * x * a; + if (y == x) + return x; + x = y; + } +} + +static void +process_prime (struct prime *info, unsigned p) +{ + wide_uint max = -1; + info->p = p; + info->pinv = binvert (p); + info->lim = max / p; +} + +static void +print_wide_uint (wide_uint n, int nesting, unsigned wide_uint_bits) +{ + /* Number of bits per integer literal. 8 is too many, because + uintmax_t is 32 bits on some machines so we cannot shift by 32 bits. + So use 7. */ + int hex_digits_per_literal = 7; + int bits_per_literal = hex_digits_per_literal * 4; + + unsigned remainder = n & ((1 << bits_per_literal) - 1); + + if (n != remainder) + { + int needs_parentheses = n >> bits_per_literal >> bits_per_literal != 0; + if (needs_parentheses) + printf ("("); + print_wide_uint (n >> bits_per_literal, nesting + 1, wide_uint_bits); + if (needs_parentheses) + printf (")\n%*s", nesting + 3, ""); + printf (" << %d | ", bits_per_literal); + } + else if (nesting) + { + printf ("(uintmax_t) "); + hex_digits_per_literal + = ((wide_uint_bits - 1) % bits_per_literal) % 4 + 1; + } + + printf ("0x%0*xU", hex_digits_per_literal, remainder); +} + +/* Work around . */ +#if 13 <= __GNUC__ +# pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value" +#endif + +static void +output_primes (const struct prime *primes, unsigned nprimes) +{ + unsigned i; + unsigned p; + int is_prime; + + /* Compute wide_uint_bits by repeated shifting, rather than by + multiplying sizeof by CHAR_BIT, as this works even if the + wide_uint representation has holes. */ + unsigned wide_uint_bits = 0; + wide_uint mask = -1; + for (wide_uint_bits = 0; mask; wide_uint_bits++) + mask >>= 1; + + puts ("/* Generated file -- DO NOT EDIT */\n"); + printf ("#define WIDE_UINT_BITS %u\n", wide_uint_bits); + + for (i = 0, p = 2; i < nprimes; i++) + { + unsigned int d8 = i + 8 < nprimes ? primes[i + 8].p - primes[i].p : 0xff; + if (255 < d8) /* this happens at 668221 */ + abort (); + printf ("P (%u, %u,\n (", primes[i].p - p, d8); + print_wide_uint (primes[i].pinv, 0, wide_uint_bits); + printf ("),\n UINTMAX_MAX / %u)\n", primes[i].p); + p = primes[i].p; + } + + printf ("\n#undef FIRST_OMITTED_PRIME\n"); + + /* Find next prime */ + do + { + p += 2; + for (i = 0, is_prime = 1; is_prime; i++) + { + if (primes[i].p * primes[i].p > p) + break; + if (p * primes[i].pinv <= primes[i].lim) + { + is_prime = 0; + break; + } + } + } + while (!is_prime); + + printf ("#define FIRST_OMITTED_PRIME %u\n", p); +} + +ATTRIBUTE_MALLOC +static void * +xalloc (size_t s) +{ + void *p = malloc (s); + if (p) + return p; + + fprintf (stderr, "Virtual memory exhausted.\n"); + exit (EXIT_FAILURE); +} + +int +main (int argc, char **argv) +{ + int limit; + + char *sieve; + size_t size, i; + + struct prime *prime_list; + unsigned nprimes; + + if (argc != 2) + { + fprintf (stderr, "Usage: %s LIMIT\n" + "Produces a list of odd primes <= LIMIT\n", argv[0]); + return EXIT_FAILURE; + } + limit = atoi (argv[1]); + if (limit < 3) + return EXIT_SUCCESS; + + /* Make limit odd */ + if ( !(limit & 1)) + limit--; + + size = (limit - 1) / 2; + /* sieve[i] represents 3+2*i */ + sieve = xalloc (size); + memset (sieve, 1, size); + + prime_list = xalloc (size * sizeof (*prime_list)); + nprimes = 0; + + for (i = 0; i < size;) + { + unsigned p = 3 + 2 * i; + unsigned j; + + process_prime (&prime_list[nprimes++], p); + + for (j = (p * p - 3) / 2; j < size; j += p) + sieve[j] = 0; + + while (++i < size && sieve[i] == 0) + ; + } + + output_primes (prime_list, nprimes); + + free (sieve); + free (prime_list); + + if (ferror (stdout) + fclose (stdout)) + { + fprintf (stderr, "write error: %s\n", strerror (errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/mkdir.c b/src/mkdir.c new file mode 100644 index 0000000..7e6a911 --- /dev/null +++ b/src/mkdir.c @@ -0,0 +1,308 @@ +/* mkdir -- make directories + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* David MacKenzie */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "mkdir-p.h" +#include "modechange.h" +#include "prog-fprintf.h" +#include "quote.h" +#include "savewd.h" +#include "selinux.h" +#include "smack.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mkdir" + +#define AUTHORS proper_name ("David MacKenzie") + +static struct option const longopts[] = +{ + {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, + {"mode", required_argument, nullptr, 'm'}, + {"parents", no_argument, nullptr, 'p'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... DIRECTORY...\n"), program_name); + fputs (_("\ +Create the DIRECTORY(ies), if they do not already exist.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -m, --mode=MODE set file mode (as in chmod), not a=rwx - umask\n\ + -p, --parents no error if existing, make parent directories as needed,\n\ + with their file modes unaffected by any -m option.\n\ + -v, --verbose print a message for each created directory\n\ +"), stdout); + fputs (_("\ + -Z set SELinux security context of each created directory\n\ + to the default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the SELinux\n\ + or SMACK security context to CTX\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Options passed to subsidiary functions. */ +struct mkdir_options +{ + /* Function to make an ancestor, or nullptr if ancestors should not be + made. */ + int (*make_ancestor_function) (char const *, char const *, void *); + + /* Umask value for when making an ancestor. */ + mode_t umask_ancestor; + + /* Umask value for when making the directory itself. */ + mode_t umask_self; + + /* Mode for directory itself. */ + mode_t mode; + + /* File mode bits affected by MODE. */ + mode_t mode_bits; + + /* Set the SELinux File Context. */ + struct selabel_handle *set_security_context; + + /* If not null, format to use when reporting newly made directories. */ + char const *created_directory_format; +}; + +/* Report that directory DIR was made, if OPTIONS requests this. */ +static void +announce_mkdir (char const *dir, void *options) +{ + struct mkdir_options const *o = options; + if (o->created_directory_format) + prog_fprintf (stdout, o->created_directory_format, quoteaf (dir)); +} + +/* Make ancestor directory DIR, whose last component is COMPONENT, + with options OPTIONS. Assume the working directory is COMPONENT's + parent. Return 0 if successful and the resulting directory is + readable, 1 if successful but the resulting directory is not + readable, -1 (setting errno) otherwise. */ +static int +make_ancestor (char const *dir, char const *component, void *options) +{ + struct mkdir_options const *o = options; + + if (o->set_security_context + && defaultcon (o->set_security_context, component, S_IFDIR) < 0 + && ! ignorable_ctx_err (errno)) + error (0, errno, _("failed to set default creation context for %s"), + quoteaf (dir)); + + if (o->umask_ancestor != o->umask_self) + umask (o->umask_ancestor); + int r = mkdir (component, S_IRWXUGO); + if (o->umask_ancestor != o->umask_self) + { + int mkdir_errno = errno; + umask (o->umask_self); + errno = mkdir_errno; + } + if (r == 0) + { + r = (o->umask_ancestor & S_IRUSR) != 0; + announce_mkdir (dir, options); + } + return r; +} + +/* Process a command-line file name. */ +static int +process_dir (char *dir, struct savewd *wd, void *options) +{ + struct mkdir_options const *o = options; + + /* If possible set context before DIR created. */ + if (o->set_security_context) + { + if (! o->make_ancestor_function + && defaultcon (o->set_security_context, dir, S_IFDIR) < 0 + && ! ignorable_ctx_err (errno)) + error (0, errno, _("failed to set default creation context for %s"), + quoteaf (dir)); + } + + int ret = (make_dir_parents (dir, wd, o->make_ancestor_function, options, + o->mode, announce_mkdir, + o->mode_bits, (uid_t) -1, (gid_t) -1, true) + ? EXIT_SUCCESS + : EXIT_FAILURE); + + /* FIXME: Due to the current structure of make_dir_parents() + we don't have the facility to call defaultcon() before the + final component of DIR is created. So for now, create the + final component with the context from previous component + and here we set the context for the final component. */ + if (ret == EXIT_SUCCESS && o->set_security_context + && o->make_ancestor_function) + { + if (! restorecon (o->set_security_context, last_component (dir), false) + && ! ignorable_ctx_err (errno)) + error (0, errno, _("failed to restore context for %s"), + quoteaf (dir)); + } + + return ret; +} + +int +main (int argc, char **argv) +{ + char const *specified_mode = nullptr; + int optc; + char const *scontext = nullptr; + struct mkdir_options options; + + options.make_ancestor_function = nullptr; + options.mode = S_IRWXUGO; + options.mode_bits = 0; + options.created_directory_format = nullptr; + options.set_security_context = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "pm:vZ", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'p': + options.make_ancestor_function = make_ancestor; + break; + case 'm': + specified_mode = optarg; + break; + case 'v': /* --verbose */ + options.created_directory_format = _("created directory %s"); + break; + case 'Z': + if (is_smack_enabled ()) + { + /* We don't yet support -Z to restore context with SMACK. */ + scontext = optarg; + } + else if (is_selinux_enabled () > 0) + { + if (optarg) + scontext = optarg; + else + { + options.set_security_context = selabel_open (SELABEL_CTX_FILE, + nullptr, 0); + if (! options.set_security_context) + error (0, errno, _("warning: ignoring --context")); + } + } + else if (optarg) + { + error (0, 0, + _("warning: ignoring --context; " + "it requires an SELinux/SMACK-enabled kernel")); + } + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind == argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + /* FIXME: This assumes mkdir() is done in the same process. + If that's not always the case we would need to call this + like we do when options.set_security_context. */ + if (scontext) + { + int ret = 0; + if (is_smack_enabled ()) + ret = smack_set_label_for_self (scontext); + else + ret = setfscreatecon (scontext); + + if (ret < 0) + error (EXIT_FAILURE, errno, + _("failed to set default file creation context to %s"), + quote (scontext)); + } + + + if (options.make_ancestor_function || specified_mode) + { + mode_t umask_value = umask (0); + options.umask_ancestor = umask_value & ~(S_IWUSR | S_IXUSR); + + if (specified_mode) + { + struct mode_change *change = mode_compile (specified_mode); + if (!change) + error (EXIT_FAILURE, 0, _("invalid mode %s"), + quote (specified_mode)); + options.mode = mode_adjust (S_IRWXUGO, true, umask_value, change, + &options.mode_bits); + options.umask_self = umask_value & ~options.mode; + free (change); + } + else + { + options.mode = S_IRWXUGO; + options.umask_self = umask_value; + } + + umask (options.umask_self); + } + + return savewd_process_files (argc - optind, argv + optind, + process_dir, &options); +} diff --git a/src/mkfifo.c b/src/mkfifo.c new file mode 100644 index 0000000..67b9fae --- /dev/null +++ b/src/mkfifo.c @@ -0,0 +1,185 @@ +/* mkfifo -- make fifo's (named pipes) + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* David MacKenzie */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "modechange.h" +#include "quote.h" +#include "selinux.h" +#include "smack.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mkfifo" + +#define AUTHORS proper_name ("David MacKenzie") + +static struct option const longopts[] = +{ + {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, + {"mode", required_argument, nullptr, 'm'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... NAME...\n"), program_name); + fputs (_("\ +Create named pipes (FIFOs) with the given NAMEs.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -m, --mode=MODE set file permission bits to MODE, not a=rw - umask\n\ +"), stdout); + fputs (_("\ + -Z set the SELinux security context to default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the SELinux\n\ + or SMACK security context to CTX\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + mode_t newmode; + char const *specified_mode = nullptr; + int exit_status = EXIT_SUCCESS; + int optc; + char const *scontext = nullptr; + struct selabel_handle *set_security_context = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "m:Z", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'm': + specified_mode = optarg; + break; + case 'Z': + if (is_smack_enabled ()) + { + /* We don't yet support -Z to restore context with SMACK. */ + scontext = optarg; + } + else if (is_selinux_enabled () > 0) + { + if (optarg) + scontext = optarg; + else + { + set_security_context = selabel_open (SELABEL_CTX_FILE, + nullptr, 0); + if (! set_security_context) + error (0, errno, _("warning: ignoring --context")); + } + } + else if (optarg) + { + error (0, 0, + _("warning: ignoring --context; " + "it requires an SELinux/SMACK-enabled kernel")); + } + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind == argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (scontext) + { + int ret = 0; + if (is_smack_enabled ()) + ret = smack_set_label_for_self (scontext); + else + ret = setfscreatecon (scontext); + + if (ret < 0) + error (EXIT_FAILURE, errno, + _("failed to set default file creation context to %s"), + quote (scontext)); + } + + newmode = MODE_RW_UGO; + if (specified_mode) + { + mode_t umask_value; + struct mode_change *change = mode_compile (specified_mode); + if (!change) + error (EXIT_FAILURE, 0, _("invalid mode")); + umask_value = umask (0); + umask (umask_value); + newmode = mode_adjust (newmode, false, umask_value, change, nullptr); + free (change); + if (newmode & ~S_IRWXUGO) + error (EXIT_FAILURE, 0, + _("mode must specify only file permission bits")); + } + + for (; optind < argc; ++optind) + { + if (set_security_context) + defaultcon (set_security_context, argv[optind], S_IFIFO); + if (mkfifo (argv[optind], newmode) != 0) + { + error (0, errno, _("cannot create fifo %s"), quoteaf (argv[optind])); + exit_status = EXIT_FAILURE; + } + else if (specified_mode && lchmod (argv[optind], newmode) != 0) + { + error (0, errno, _("cannot set permissions of %s"), + quoteaf (argv[optind])); + exit_status = EXIT_FAILURE; + } + } + + return exit_status; +} diff --git a/src/mknod.c b/src/mknod.c new file mode 100644 index 0000000..576d1b0 --- /dev/null +++ b/src/mknod.c @@ -0,0 +1,278 @@ +/* mknod -- make special files + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "modechange.h" +#include "quote.h" +#include "selinux.h" +#include "smack.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mknod" + +#define AUTHORS proper_name ("David MacKenzie") + +static struct option const longopts[] = +{ + {GETOPT_SELINUX_CONTEXT_OPTION_DECL}, + {"mode", required_argument, nullptr, 'm'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... NAME TYPE [MAJOR MINOR]\n"), + program_name); + fputs (_("\ +Create the special file NAME of the given TYPE.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -m, --mode=MODE set file permission bits to MODE, not a=rw - umask\n\ +"), stdout); + fputs (_("\ + -Z set the SELinux security context to default type\n\ + --context[=CTX] like -Z, or if CTX is specified then set the SELinux\n\ + or SMACK security context to CTX\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Both MAJOR and MINOR must be specified when TYPE is b, c, or u, and they\n\ +must be omitted when TYPE is p. If MAJOR or MINOR begins with 0x or 0X,\n\ +it is interpreted as hexadecimal; otherwise, if it begins with 0, as octal;\n\ +otherwise, as decimal. TYPE may be:\n\ +"), stdout); + fputs (_("\ +\n\ + b create a block (buffered) special file\n\ + c, u create a character (unbuffered) special file\n\ + p create a FIFO\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + mode_t newmode; + char const *specified_mode = nullptr; + int optc; + size_t expected_operands; + mode_t node_type; + char const *scontext = nullptr; + struct selabel_handle *set_security_context = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "m:Z", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'm': + specified_mode = optarg; + break; + case 'Z': + if (is_smack_enabled ()) + { + /* We don't yet support -Z to restore context with SMACK. */ + scontext = optarg; + } + else if (is_selinux_enabled () > 0) + { + if (optarg) + scontext = optarg; + else + { + set_security_context = selabel_open (SELABEL_CTX_FILE, + nullptr, 0); + if (! set_security_context) + error (0, errno, _("warning: ignoring --context")); + } + } + else if (optarg) + { + error (0, 0, + _("warning: ignoring --context; " + "it requires an SELinux/SMACK-enabled kernel")); + } + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + newmode = MODE_RW_UGO; + if (specified_mode) + { + mode_t umask_value; + struct mode_change *change = mode_compile (specified_mode); + if (!change) + error (EXIT_FAILURE, 0, _("invalid mode")); + umask_value = umask (0); + umask (umask_value); + newmode = mode_adjust (newmode, false, umask_value, change, nullptr); + free (change); + if (newmode & ~S_IRWXUGO) + error (EXIT_FAILURE, 0, + _("mode must specify only file permission bits")); + } + + /* If the number of arguments is 0 or 1, + or (if it's 2 or more and the second one starts with 'p'), then there + must be exactly two operands. Otherwise, there must be four. */ + expected_operands = (argc <= optind + || (optind + 1 < argc && argv[optind + 1][0] == 'p') + ? 2 : 4); + + if (argc - optind < expected_operands) + { + if (argc <= optind) + error (0, 0, _("missing operand")); + else + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + if (expected_operands == 4 && argc - optind == 2) + fprintf (stderr, "%s\n", + _("Special files require major and minor device numbers.")); + usage (EXIT_FAILURE); + } + + if (expected_operands < argc - optind) + { + error (0, 0, _("extra operand %s"), + quote (argv[optind + expected_operands])); + if (expected_operands == 2 && argc - optind == 4) + fprintf (stderr, "%s\n", + _("Fifos do not have major and minor device numbers.")); + usage (EXIT_FAILURE); + } + + if (scontext) + { + int ret = 0; + if (is_smack_enabled ()) + ret = smack_set_label_for_self (scontext); + else + ret = setfscreatecon (scontext); + + if (ret < 0) + error (EXIT_FAILURE, errno, + _("failed to set default file creation context to %s"), + quote (scontext)); + } + + /* Only check the first character, to allow mnemonic usage like + 'mknod /dev/rst0 character 18 0'. */ + + switch (argv[optind + 1][0]) + { + case 'b': /* 'block' or 'buffered' */ +#ifndef S_IFBLK + error (EXIT_FAILURE, 0, _("block special files not supported")); +#else + node_type = S_IFBLK; +#endif + goto block_or_character; + + case 'c': /* 'character' */ + case 'u': /* 'unbuffered' */ +#ifndef S_IFCHR + error (EXIT_FAILURE, 0, _("character special files not supported")); +#else + node_type = S_IFCHR; +#endif + goto block_or_character; + + block_or_character: + { + char const *s_major = argv[optind + 2]; + char const *s_minor = argv[optind + 3]; + uintmax_t i_major, i_minor; + dev_t device; + + if (xstrtoumax (s_major, nullptr, 0, &i_major, "") != LONGINT_OK + || i_major != (major_t) i_major) + error (EXIT_FAILURE, 0, + _("invalid major device number %s"), quote (s_major)); + + if (xstrtoumax (s_minor, nullptr, 0, &i_minor, "") != LONGINT_OK + || i_minor != (minor_t) i_minor) + error (EXIT_FAILURE, 0, + _("invalid minor device number %s"), quote (s_minor)); + + device = makedev (i_major, i_minor); +#ifdef NODEV + if (device == NODEV) + error (EXIT_FAILURE, 0, _("invalid device %s %s"), + s_major, s_minor); +#endif + + if (set_security_context) + defaultcon (set_security_context, argv[optind], node_type); + + if (mknod (argv[optind], newmode | node_type, device) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (argv[optind])); + } + break; + + case 'p': /* 'pipe' */ + if (set_security_context) + defaultcon (set_security_context, argv[optind], S_IFIFO); + if (mkfifo (argv[optind], newmode) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (argv[optind])); + break; + + default: + error (0, 0, _("invalid device type %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + if (specified_mode && lchmod (argv[optind], newmode) != 0) + error (EXIT_FAILURE, errno, _("cannot set permissions of %s"), + quoteaf (argv[optind])); + + return EXIT_SUCCESS; +} diff --git a/src/mktemp.c b/src/mktemp.c new file mode 100644 index 0000000..992d4a3 --- /dev/null +++ b/src/mktemp.c @@ -0,0 +1,342 @@ +/* Create a temporary file or directory, safely. + Copyright (C) 2007-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering and Eric Blake. */ + +#include +#include +#include + +#include "system.h" + +#include "close-stream.h" +#include "filenamecat.h" +#include "quote.h" +#include "tempname.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mktemp" + +#define AUTHORS \ + proper_name ("Jim Meyering"), \ + proper_name ("Eric Blake") + +static char const *default_template = "tmp.XXXXXXXXXX"; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + SUFFIX_OPTION = CHAR_MAX + 1, +}; + +static struct option const longopts[] = +{ + {"directory", no_argument, nullptr, 'd'}, + {"quiet", no_argument, nullptr, 'q'}, + {"dry-run", no_argument, nullptr, 'u'}, + {"suffix", required_argument, nullptr, SUFFIX_OPTION}, + {"tmpdir", optional_argument, nullptr, 'p'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [TEMPLATE]\n"), program_name); + fputs (_("\ +Create a temporary file or directory, safely, and print its name.\n\ +TEMPLATE must contain at least 3 consecutive 'X's in last component.\n\ +If TEMPLATE is not specified, use tmp.XXXXXXXXXX, and --tmpdir is implied.\n\ +"), stdout); + fputs (_("\ +Files are created u+rw, and directories u+rwx, minus umask restrictions.\n\ +"), stdout); + fputs ("\n", stdout); + fputs (_("\ + -d, --directory create a directory, not a file\n\ + -u, --dry-run do not create anything; merely print a name (unsafe)\n\ + -q, --quiet suppress diagnostics about file/dir-creation failure\n\ +"), stdout); + fputs (_("\ + --suffix=SUFF append SUFF to TEMPLATE; SUFF must not contain a slash.\n\ + This option is implied if TEMPLATE does not end in X\n\ +"), stdout); + fputs (_("\ + -p DIR, --tmpdir[=DIR] interpret TEMPLATE relative to DIR; if DIR is not\n\ + specified, use $TMPDIR if set, else /tmp. With\n\ + this option, TEMPLATE must not be an absolute name;\n\ + unlike with -t, TEMPLATE may contain slashes, but\n\ + mktemp creates only the final component\n\ +"), stdout); + fputs (_("\ + -t interpret TEMPLATE as a single file name component,\n\ + relative to a directory: $TMPDIR, if set; else the\n\ + directory specified via -p; else /tmp [deprecated]\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +static size_t +count_consecutive_X_s (char const *s, size_t len) +{ + size_t n = 0; + for ( ; len && s[len - 1] == 'X'; len--) + ++n; + return n; +} + +static int +mkstemp_len (char *tmpl, size_t suff_len, size_t x_len, bool dry_run) +{ + return gen_tempname_len (tmpl, suff_len, 0, dry_run ? GT_NOCREATE : GT_FILE, + x_len); +} + +static int +mkdtemp_len (char *tmpl, size_t suff_len, size_t x_len, bool dry_run) +{ + return gen_tempname_len (tmpl, suff_len, 0, dry_run ? GT_NOCREATE : GT_DIR, + x_len); +} + +/* True if we have already closed standard output. */ +static bool stdout_closed; + +/* Avoid closing stdout twice. Since we conditionally call + close_stream (stdout) in order to decide whether to clean up a + temporary file, the exit hook needs to know whether to do all of + close_stdout or just the stderr half. */ +static void +maybe_close_stdout (void) +{ + if (!stdout_closed) + close_stdout (); + else if (close_stream (stderr) != 0) + _exit (EXIT_FAILURE); +} + +int +main (int argc, char **argv) +{ + char const *dest_dir; + char const *dest_dir_arg = nullptr; + bool suppress_file_err = false; + int c; + char *template; + char *suffix = nullptr; + bool use_dest_dir = false; + bool deprecated_t_option = false; + bool create_directory = false; + bool dry_run = false; + int status = EXIT_SUCCESS; + size_t x_count; + size_t suffix_len; + char *dest_name; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (maybe_close_stdout); + + while ((c = getopt_long (argc, argv, "dp:qtuV", longopts, nullptr)) != -1) + { + switch (c) + { + case 'd': + create_directory = true; + break; + case 'p': + dest_dir_arg = optarg; + use_dest_dir = true; + break; + case 'q': + suppress_file_err = true; + break; + case 't': + use_dest_dir = true; + deprecated_t_option = true; + break; + case 'u': + dry_run = true; + break; + + case SUFFIX_OPTION: + suffix = optarg; + break; + + case_GETOPT_HELP_CHAR; + + case 'V': /* Undocumented alias, for compatibility with the original + mktemp program. */ + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + int n_args = argc - optind; + if (2 <= n_args) + { + error (0, 0, _("too many templates")); + usage (EXIT_FAILURE); + } + + if (n_args == 0) + { + use_dest_dir = true; + template = (char *) default_template; + } + else + { + template = argv[optind]; + } + + if (suffix) + { + size_t len = strlen (template); + if (!len || template[len - 1] != 'X') + { + error (EXIT_FAILURE, 0, + _("with --suffix, template %s must end in X"), + quote (template)); + } + suffix_len = strlen (suffix); + dest_name = xcharalloc (len + suffix_len + 1); + memcpy (dest_name, template, len); + memcpy (dest_name + len, suffix, suffix_len + 1); + template = dest_name; + suffix = dest_name + len; + } + else + { + template = xstrdup (template); + suffix = strrchr (template, 'X'); + if (!suffix) + suffix = strchr (template, '\0'); + else + suffix++; + suffix_len = strlen (suffix); + } + + /* At this point, template is malloc'd, and suffix points into template. */ + if (suffix_len && last_component (suffix) != suffix) + { + error (EXIT_FAILURE, 0, + _("invalid suffix %s, contains directory separator"), + quote (suffix)); + } + x_count = count_consecutive_X_s (template, suffix - template); + if (x_count < 3) + error (EXIT_FAILURE, 0, _("too few X's in template %s"), quote (template)); + + if (use_dest_dir) + { + if (deprecated_t_option) + { + char *env = getenv ("TMPDIR"); + if (env && *env) + dest_dir = env; + else if (dest_dir_arg && *dest_dir_arg) + dest_dir = dest_dir_arg; + else + dest_dir = "/tmp"; + + if (last_component (template) != template) + error (EXIT_FAILURE, 0, + _("invalid template, %s, contains directory separator"), + quote (template)); + } + else + { + if (dest_dir_arg && *dest_dir_arg) + dest_dir = dest_dir_arg; + else + { + char *env = getenv ("TMPDIR"); + dest_dir = (env && *env ? env : "/tmp"); + } + if (IS_ABSOLUTE_FILE_NAME (template)) + error (EXIT_FAILURE, 0, + _("invalid template, %s; with --tmpdir," + " it may not be absolute"), + quote (template)); + } + + dest_name = file_name_concat (dest_dir, template, nullptr); + free (template); + template = dest_name; + /* Note that suffix is now invalid. */ + } + + /* Make a copy to be used in case of diagnostic, since failing + mkstemp may leave the buffer in an undefined state. */ + dest_name = xstrdup (template); + + if (create_directory) + { + int err = mkdtemp_len (dest_name, suffix_len, x_count, dry_run); + if (err != 0) + { + if (!suppress_file_err) + error (0, errno, _("failed to create directory via template %s"), + quote (template)); + status = EXIT_FAILURE; + } + } + else + { + int fd = mkstemp_len (dest_name, suffix_len, x_count, dry_run); + if (fd < 0 || (!dry_run && close (fd) != 0)) + { + if (!suppress_file_err) + error (0, errno, _("failed to create file via template %s"), + quote (template)); + status = EXIT_FAILURE; + } + } + + if (status == EXIT_SUCCESS) + { + puts (dest_name); + /* If we created a file, but then failed to output the file + name, we should clean up the mess before failing. */ + if (!dry_run && ((stdout_closed = true), close_stream (stdout) != 0)) + { + int saved_errno = errno; + remove (dest_name); + if (!suppress_file_err) + error (0, saved_errno, _("write error")); + status = EXIT_FAILURE; + } + } + + main_exit (status); +} diff --git a/src/mv.c b/src/mv.c new file mode 100644 index 0000000..4fac6ea --- /dev/null +++ b/src/mv.c @@ -0,0 +1,556 @@ +/* mv -- move or rename files + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Mike Parker, David MacKenzie, and Jim Meyering */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "backupfile.h" +#include "copy.h" +#include "cp-hash.h" +#include "filenamecat.h" +#include "remove.h" +#include "renameatu.h" +#include "root-dev-ino.h" +#include "targetdir.h" +#include "priv-set.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "mv" + +#define AUTHORS \ + proper_name ("Mike Parker"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEBUG_OPTION = CHAR_MAX + 1, + NO_COPY_OPTION, + STRIP_TRAILING_SLASHES_OPTION +}; + +static char const *const update_type_string[] = +{ + "all", "none", "older", nullptr +}; +static enum Update_type const update_type[] = +{ + UPDATE_ALL, UPDATE_NONE, UPDATE_OLDER, +}; +ARGMATCH_VERIFY (update_type_string, update_type); + +static struct option const long_options[] = +{ + {"backup", optional_argument, nullptr, 'b'}, + {"context", no_argument, nullptr, 'Z'}, + {"debug", no_argument, nullptr, DEBUG_OPTION}, + {"force", no_argument, nullptr, 'f'}, + {"interactive", no_argument, nullptr, 'i'}, + {"no-clobber", no_argument, nullptr, 'n'}, + {"no-copy", no_argument, nullptr, NO_COPY_OPTION}, + {"no-target-directory", no_argument, nullptr, 'T'}, + {"strip-trailing-slashes", no_argument, nullptr, + STRIP_TRAILING_SLASHES_OPTION}, + {"suffix", required_argument, nullptr, 'S'}, + {"target-directory", required_argument, nullptr, 't'}, + {"update", optional_argument, nullptr, 'u'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static void +rm_option_init (struct rm_options *x) +{ + x->ignore_missing_files = false; + x->remove_empty_directories = true; + x->recursive = true; + x->one_file_system = false; + + /* Should we prompt for removal, too? No. Prompting for the 'move' + part is enough. It implies removal. */ + x->interactive = RMI_NEVER; + x->stdin_tty = false; + + x->verbose = false; + + /* Since this program may well have to process additional command + line arguments after any call to 'rm', that function must preserve + the initial working directory, in case one of those is a + '.'-relative name. */ + x->require_restore_cwd = true; + + { + static struct dev_ino dev_ino_buf; + x->root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (x->root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + + x->preserve_all_root = false; +} + +static void +cp_option_init (struct cp_options *x) +{ + bool selinux_enabled = (0 < is_selinux_enabled ()); + + cp_options_default (x); + x->copy_as_regular = false; /* FIXME: maybe make this an option */ + x->reflink_mode = REFLINK_AUTO; + x->dereference = DEREF_NEVER; + x->unlink_dest_before_opening = false; + x->unlink_dest_after_failed_open = false; + x->hard_link = false; + x->interactive = I_UNSPECIFIED; + x->move_mode = true; + x->install_mode = false; + x->one_file_system = false; + x->preserve_ownership = true; + x->preserve_links = true; + x->preserve_mode = true; + x->preserve_timestamps = true; + x->explicit_no_preserve_mode= false; + x->preserve_security_context = selinux_enabled; + x->set_security_context = nullptr; + x->reduce_diagnostics = false; + x->data_copy_required = true; + x->require_preserve = false; /* FIXME: maybe make this an option */ + x->require_preserve_context = false; + x->preserve_xattr = true; + x->require_preserve_xattr = false; + x->recursive = true; + x->sparse_mode = SPARSE_AUTO; /* FIXME: maybe make this an option */ + x->symbolic_link = false; + x->set_mode = false; + x->mode = 0; + x->stdin_tty = isatty (STDIN_FILENO); + + x->open_dangling_dest_symlink = false; + x->update = false; + x->verbose = false; + x->dest_info = nullptr; + x->src_info = nullptr; +} + +/* Move SOURCE onto DEST aka DEST_DIRFD+DEST_RELNAME. + Handle cross-file-system moves. + If SOURCE is a directory, DEST must not exist. + Return true if successful. */ + +static bool +do_move (char const *source, char const *dest, + int dest_dirfd, char const *dest_relname, const struct cp_options *x) +{ + bool copy_into_self; + bool rename_succeeded; + bool ok = copy (source, dest, dest_dirfd, dest_relname, 0, x, + ©_into_self, &rename_succeeded); + + if (ok) + { + char const *dir_to_remove; + if (copy_into_self) + { + /* In general, when copy returns with copy_into_self set, SOURCE is + the same as, or a parent of DEST. In this case we know it's a + parent. It doesn't make sense to move a directory into itself, and + besides in some situations doing so would give highly unintuitive + results. Run this 'mkdir b; touch a c; mv * b' in an empty + directory. Here's the result of running echo $(find b -print): + b b/a b/b b/b/a b/c. Notice that only file 'a' was copied + into b/b. Handle this by giving a diagnostic, removing the + copied-into-self directory, DEST ('b/b' in the example), + and failing. */ + + dir_to_remove = nullptr; + ok = false; + } + else if (rename_succeeded) + { + /* No need to remove anything. SOURCE was successfully + renamed to DEST. Or the user declined to rename a file. */ + dir_to_remove = nullptr; + } + else + { + /* This may mean SOURCE and DEST referred to different devices. + It may also conceivably mean that even though they referred + to the same device, rename wasn't implemented for that device. + + E.g., (from Joel N. Weber), + [...] there might someday be cases where you can't rename + but you can copy where the device name is the same, especially + on Hurd. Consider an ftpfs with a primitive ftp server that + supports uploading, downloading and deleting, but not renaming. + + Also, note that comparing device numbers is not a reliable + check for 'can-rename'. Some systems can be set up so that + files from many different physical devices all have the same + st_dev field. This is a feature of some NFS mounting + configurations. + + We reach this point if SOURCE has been successfully copied + to DEST. Now we have to remove SOURCE. + + This function used to resort to copying only when rename + failed and set errno to EXDEV. */ + + dir_to_remove = source; + } + + if (dir_to_remove != nullptr) + { + struct rm_options rm_options; + enum RM_status status; + char const *dir[2]; + + rm_option_init (&rm_options); + rm_options.verbose = x->verbose; + dir[0] = dir_to_remove; + dir[1] = nullptr; + + status = rm ((void *) dir, &rm_options); + affirm (VALID_STATUS (status)); + if (status == RM_ERROR) + ok = false; + } + } + + return ok; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [-T] SOURCE DEST\n\ + or: %s [OPTION]... SOURCE... DIRECTORY\n\ + or: %s [OPTION]... -t DIRECTORY SOURCE...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Rename SOURCE to DEST, or move SOURCE(s) to DIRECTORY.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + --backup[=CONTROL] make a backup of each existing destination file\ +\n\ + -b like --backup but does not accept an argument\n\ +"), stdout); + fputs (_("\ + --debug explain how a file is copied. Implies -v\n\ +"), stdout); + fputs (_("\ + -f, --force do not prompt before overwriting\n\ + -i, --interactive prompt before overwrite\n\ + -n, --no-clobber do not overwrite an existing file\n\ +If you specify more than one of -i, -f, -n, only the final one takes effect.\n\ +"), stdout); + fputs (_("\ + --no-copy do not copy if renaming fails\n\ + --strip-trailing-slashes remove any trailing slashes from each SOURCE\n\ + argument\n\ + -S, --suffix=SUFFIX override the usual backup suffix\n\ +"), stdout); + fputs (_("\ + -t, --target-directory=DIRECTORY move all SOURCE arguments into DIRECTORY\n\ + -T, --no-target-directory treat DEST as a normal file\n\ +"), stdout); + fputs (_("\ + --update[=UPDATE] control which existing files are updated;\n\ + UPDATE={all,none,older(default)}. See below\n\ + -u equivalent to --update[=older]\n\ +"), stdout); + fputs (_("\ + -v, --verbose explain what is being done\n\ + -Z, --context set SELinux security context of destination\n\ + file to default type\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_update_parameters_note (); + emit_backup_suffix_note (); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int c; + bool ok; + bool make_backups = false; + char const *backup_suffix = nullptr; + char *version_control_string = nullptr; + struct cp_options x; + bool remove_trailing_slashes = false; + char const *target_directory = nullptr; + bool no_target_directory = false; + int n_files; + char **file; + bool selinux_enabled = (0 < is_selinux_enabled ()); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdin); + + cp_option_init (&x); + + /* Try to disable the ability to unlink a directory. */ + priv_set_remove_linkdir (); + + while ((c = getopt_long (argc, argv, "bfint:uvS:TZ", long_options, nullptr)) + != -1) + { + switch (c) + { + case 'b': + make_backups = true; + if (optarg) + version_control_string = optarg; + break; + case 'f': + x.interactive = I_ALWAYS_YES; + break; + case 'i': + x.interactive = I_ASK_USER; + break; + case 'n': + x.interactive = I_ALWAYS_NO; + break; + case DEBUG_OPTION: + x.debug = x.verbose = true; + break; + case NO_COPY_OPTION: + x.no_copy = true; + break; + case STRIP_TRAILING_SLASHES_OPTION: + remove_trailing_slashes = true; + break; + case 't': + if (target_directory) + error (EXIT_FAILURE, 0, _("multiple target directories specified")); + target_directory = optarg; + break; + case 'T': + no_target_directory = true; + break; + case 'u': + if (optarg == nullptr) + x.update = true; + else if (x.interactive != I_ALWAYS_NO) /* -n takes precedence. */ + { + enum Update_type update_opt; + update_opt = XARGMATCH ("--update", optarg, + update_type_string, update_type); + if (update_opt == UPDATE_ALL) + { + /* Default mv operation. */ + x.update = false; + x.interactive = I_UNSPECIFIED; + } + else if (update_opt == UPDATE_NONE) + { + x.update = false; + x.interactive = I_ALWAYS_SKIP; + } + else if (update_opt == UPDATE_OLDER) + { + x.update = true; + x.interactive = I_UNSPECIFIED; + } + } + break; + case 'v': + x.verbose = true; + break; + case 'S': + make_backups = true; + backup_suffix = optarg; + break; + case 'Z': + /* As a performance enhancement, don't even bother trying + to "restorecon" when not on an selinux-enabled kernel. */ + if (selinux_enabled) + { + x.preserve_security_context = false; + x.set_security_context = selabel_open (SELABEL_CTX_FILE, + nullptr, 0); + if (! x.set_security_context) + error (0, errno, _("warning: ignoring --context")); + } + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + n_files = argc - optind; + file = argv + optind; + + if (n_files <= !target_directory) + { + if (n_files <= 0) + error (0, 0, _("missing file operand")); + else + error (0, 0, _("missing destination file operand after %s"), + quoteaf (file[0])); + usage (EXIT_FAILURE); + } + + struct stat sb; + sb.st_mode = 0; + int target_dirfd = AT_FDCWD; + if (no_target_directory) + { + if (target_directory) + error (EXIT_FAILURE, 0, + _("cannot combine --target-directory (-t) " + "and --no-target-directory (-T)")); + if (2 < n_files) + { + error (0, 0, _("extra operand %s"), quoteaf (file[2])); + usage (EXIT_FAILURE); + } + } + else if (target_directory) + { + target_dirfd = target_directory_operand (target_directory, &sb); + if (! target_dirfd_valid (target_dirfd)) + error (EXIT_FAILURE, errno, _("target directory %s"), + quoteaf (target_directory)); + } + else + { + char const *lastfile = file[n_files - 1]; + if (n_files == 2) + x.rename_errno = (renameatu (AT_FDCWD, file[0], AT_FDCWD, lastfile, + RENAME_NOREPLACE) + ? errno : 0); + if (x.rename_errno != 0) + { + int fd = target_directory_operand (lastfile, &sb); + if (target_dirfd_valid (fd)) + { + x.rename_errno = -1; + target_dirfd = fd; + target_directory = lastfile; + n_files--; + } + else + { + /* The last operand LASTFILE cannot be opened as a directory. + If there are more than two operands, report an error. + + Also, report an error if LASTFILE is known to be a directory + even though it could not be opened, which can happen if + opening failed with EACCES on a platform lacking O_PATH. + In this case use stat to test whether LASTFILE is a + directory, in case opening a non-directory with (O_SEARCH + | O_DIRECTORY) failed with EACCES not ENOTDIR. */ + int err = errno; + if (2 < n_files + || (O_PATHSEARCH == O_SEARCH && err == EACCES + && (sb.st_mode != 0 || stat (lastfile, &sb) == 0) + && S_ISDIR (sb.st_mode))) + error (EXIT_FAILURE, err, _("target %s"), quoteaf (lastfile)); + } + } + } + + /* Handle the ambiguity in the semantics of mv induced by the + varying semantics of the rename function. POSIX-compatible + systems (e.g., GNU/Linux) have a rename function that honors a + trailing slash in the source, while others (Solaris 9, FreeBSD + 7.2) have a rename function that ignores it. */ + if (remove_trailing_slashes) + for (int i = 0; i < n_files; i++) + strip_trailing_slashes (file[i]); + + if (x.interactive == I_ALWAYS_NO) + x.update = false; + + if (make_backups && x.interactive == I_ALWAYS_NO) + { + error (0, 0, + _("options --backup and --no-clobber are mutually exclusive")); + usage (EXIT_FAILURE); + } + + x.backup_type = (make_backups + ? xget_version (_("backup type"), + version_control_string) + : no_backups); + set_simple_backup_suffix (backup_suffix); + + hash_init (); + + if (target_directory) + { + /* Initialize the hash table only if we'll need it. + The problem it is used to detect can arise only if there are + two or more files to move. */ + if (2 <= n_files) + dest_info_init (&x); + + ok = true; + for (int i = 0; i < n_files; ++i) + { + x.last_file = i + 1 == n_files; + char const *source = file[i]; + char const *source_basename = last_component (source); + char *dest_relname; + char *dest = file_name_concat (target_directory, source_basename, + &dest_relname); + strip_trailing_slashes (dest_relname); + ok &= do_move (source, dest, target_dirfd, dest_relname, &x); + free (dest); + } + } + else + { + x.last_file = true; + ok = do_move (file[0], file[1], AT_FDCWD, file[1], &x); + } + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/nice.c b/src/nice.c new file mode 100644 index 0000000..e573c24 --- /dev/null +++ b/src/nice.c @@ -0,0 +1,220 @@ +/* nice -- run a program with modified niceness + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* David MacKenzie */ + +#include +#include +#include +#include + +#include "system.h" + +#if ! HAVE_NICE +/* Include this after "system.h" so we're sure to have definitions + (from time.h or sys/time.h) required for e.g. the ru_utime member. */ +# include +#endif + +#include "quote.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "nice" + +#define AUTHORS proper_name ("David MacKenzie") + +#if HAVE_NICE +# define GET_NICENESS() nice (0) +#else +# define GET_NICENESS() getpriority (PRIO_PROCESS, 0) +#endif + +#ifndef NZERO +# define NZERO 20 +#endif + +/* This is required for Darwin Kernel Version 7.7.0. */ +#if NZERO == 0 +# undef NZERO +# define NZERO 20 +#endif + +static struct option const longopts[] = +{ + {"adjustment", required_argument, nullptr, 'n'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION] [COMMAND [ARG]...]\n"), program_name); + printf (_("\ +Run COMMAND with an adjusted niceness, which affects process scheduling.\n\ +With no COMMAND, print the current niceness. Niceness values range from\n\ +%d (most favorable to the process) to %d (least favorable to the process).\n\ +"), + - NZERO, NZERO - 1); + + emit_mandatory_arg_note (); + + fputs (_("\ + -n, --adjustment=N add integer N to the niceness (default 10)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static bool +perm_related_errno (int err) +{ + return err == EACCES || err == EPERM; +} + +int +main (int argc, char **argv) +{ + int current_niceness; + int adjustment = 10; + char const *adjustment_given = nullptr; + bool ok; + int i; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + for (i = 1; i < argc; /* empty */) + { + char const *s = argv[i]; + + if (s[0] == '-' && ISDIGIT (s[1 + (s[1] == '-' || s[1] == '+')])) + { + adjustment_given = s + 1; + ++i; + } + else + { + int c; + int fake_argc = argc - (i - 1); + char **fake_argv = argv + (i - 1); + + /* Ensure that any getopt diagnostics use the right name. */ + fake_argv[0] = argv[0]; + + /* Initialize getopt_long's internal state. */ + optind = 0; + + c = getopt_long (fake_argc, fake_argv, "+n:", longopts, nullptr); + i += optind - 1; + + switch (c) + { + case 'n': + adjustment_given = optarg; + break; + + case -1: + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_CANCELED); + break; + } + + if (c == -1) + break; + } + } + + if (adjustment_given) + { + /* If the requested adjustment is outside the valid range, + silently bring it to just within range; this mimics what + "setpriority" and "nice" do. */ + enum { MIN_ADJUSTMENT = 1 - 2 * NZERO, MAX_ADJUSTMENT = 2 * NZERO - 1 }; + long int tmp; + if (LONGINT_OVERFLOW < xstrtol (adjustment_given, nullptr, 10, &tmp, "")) + error (EXIT_CANCELED, 0, _("invalid adjustment %s"), + quote (adjustment_given)); + adjustment = MAX (MIN_ADJUSTMENT, MIN (tmp, MAX_ADJUSTMENT)); + } + + if (i == argc) + { + if (adjustment_given) + { + error (0, 0, _("a command must be given with an adjustment")); + usage (EXIT_CANCELED); + } + /* No command given; print the niceness. */ + errno = 0; + current_niceness = GET_NICENESS (); + if (current_niceness == -1 && errno != 0) + error (EXIT_CANCELED, errno, _("cannot get niceness")); + printf ("%d\n", current_niceness); + return EXIT_SUCCESS; + } + + errno = 0; +#if HAVE_NICE + ok = (nice (adjustment) != -1 || errno == 0); +#else + current_niceness = GET_NICENESS (); + if (current_niceness == -1 && errno != 0) + error (EXIT_CANCELED, errno, _("cannot get niceness")); + ok = (setpriority (PRIO_PROCESS, 0, current_niceness + adjustment) == 0); +#endif + if (!ok) + { + error (perm_related_errno (errno) ? 0 + : EXIT_CANCELED, errno, _("cannot set niceness")); + /* error() flushes stderr, but does not check for write failure. + Normally, we would catch this via our atexit() hook of + close_stdout, but execvp() gets in the way. If stderr + encountered a write failure, there is no need to try calling + error() again. */ + if (ferror (stderr)) + return EXIT_CANCELED; + } + + execvp (argv[i], &argv[i]); + + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + error (0, errno, "%s", quote (argv[i])); + return exit_status; +} diff --git a/src/nl.c b/src/nl.c new file mode 100644 index 0000000..e41e695 --- /dev/null +++ b/src/nl.c @@ -0,0 +1,619 @@ +/* nl -- number lines of files + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Scott Bartram (nancy!scott@uunet.uu.net) + Revised by David MacKenzie (djm@gnu.ai.mit.edu) */ + +#include + +#include +#include +#include +#include + +#include "system.h" + +#include + +#include "fadvise.h" +#include "linebuffer.h" +#include "quote.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "nl" + +#define AUTHORS \ + proper_name ("Scott Bartram"), \ + proper_name ("David MacKenzie") + +/* Line-number formats. They are given an int width, an intmax_t + value, and a string separator. */ + +/* Right justified, no leading zeroes. */ +static char const FORMAT_RIGHT_NOLZ[] = "%*" PRIdMAX "%s"; + +/* Right justified, leading zeroes. */ +static char const FORMAT_RIGHT_LZ[] = "%0*" PRIdMAX "%s"; + +/* Left justified, no leading zeroes. */ +static char const FORMAT_LEFT[] = "%-*" PRIdMAX "%s"; + +/* Default section delimiter characters. */ +static char DEFAULT_SECTION_DELIMITERS[] = "\\:"; + +/* Types of input lines: either one of the section delimiters, + or text to output. */ +enum section +{ + Header, Body, Footer, Text +}; + +/* Format of body lines (-b). */ +static char const *body_type = "t"; + +/* Format of header lines (-h). */ +static char const *header_type = "n"; + +/* Format of footer lines (-f). */ +static char const *footer_type = "n"; + +/* Format currently being used (body, header, or footer). */ +static char const *current_type; + +/* Regex for body lines to number (-bp). */ +static struct re_pattern_buffer body_regex; + +/* Regex for header lines to number (-hp). */ +static struct re_pattern_buffer header_regex; + +/* Regex for footer lines to number (-fp). */ +static struct re_pattern_buffer footer_regex; + +/* Fastmaps for the above. */ +static char body_fastmap[UCHAR_MAX + 1]; +static char header_fastmap[UCHAR_MAX + 1]; +static char footer_fastmap[UCHAR_MAX + 1]; + +/* Pointer to current regex, if any. */ +static struct re_pattern_buffer *current_regex = nullptr; + +/* Separator string to print after line number (-s). */ +static char const *separator_str = "\t"; + +/* Input section delimiter string (-d). */ +static char *section_del = DEFAULT_SECTION_DELIMITERS; + +/* Header delimiter string. */ +static char *header_del = nullptr; + +/* Header section delimiter length. */ +static size_t header_del_len; + +/* Body delimiter string. */ +static char *body_del = nullptr; + +/* Body section delimiter length. */ +static size_t body_del_len; + +/* Footer delimiter string. */ +static char *footer_del = nullptr; + +/* Footer section delimiter length. */ +static size_t footer_del_len; + +/* Input buffer. */ +static struct linebuffer line_buf; + +/* printf format string for unnumbered lines. */ +static char *print_no_line_fmt = nullptr; + +/* Starting line number on each page (-v). */ +static intmax_t starting_line_number = 1; + +/* Line number increment (-i). */ +static intmax_t page_incr = 1; + +/* If true, reset line number at start of each page (-p). */ +static bool reset_numbers = true; + +/* Number of blank lines to consider to be one line for numbering (-l). */ +static intmax_t blank_join = 1; + +/* Width of line numbers (-w). */ +static int lineno_width = 6; + +/* Line number format (-n). */ +static char const *lineno_format = FORMAT_RIGHT_NOLZ; + +/* Current print line number. */ +static intmax_t line_no; + +/* Whether the current line number has incremented past limits. */ +static bool line_no_overflow; + +/* True if we have ever read standard input. */ +static bool have_read_stdin; + +static struct option const longopts[] = +{ + {"header-numbering", required_argument, nullptr, 'h'}, + {"body-numbering", required_argument, nullptr, 'b'}, + {"footer-numbering", required_argument, nullptr, 'f'}, + {"starting-line-number", required_argument, nullptr, 'v'}, + {"line-increment", required_argument, nullptr, 'i'}, + {"no-renumber", no_argument, nullptr, 'p'}, + {"join-blank-lines", required_argument, nullptr, 'l'}, + {"number-separator", required_argument, nullptr, 's'}, + {"number-width", required_argument, nullptr, 'w'}, + {"number-format", required_argument, nullptr, 'n'}, + {"section-delimiter", required_argument, nullptr, 'd'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Print a usage message and quit. */ + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Write each FILE to standard output, with line numbers added.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --body-numbering=STYLE use STYLE for numbering body lines\n\ + -d, --section-delimiter=CC use CC for logical page delimiters\n\ + -f, --footer-numbering=STYLE use STYLE for numbering footer lines\n\ +"), stdout); + fputs (_("\ + -h, --header-numbering=STYLE use STYLE for numbering header lines\n\ + -i, --line-increment=NUMBER line number increment at each line\n\ + -l, --join-blank-lines=NUMBER group of NUMBER empty lines counted as one\n\ + -n, --number-format=FORMAT insert line numbers according to FORMAT\n\ + -p, --no-renumber do not reset line numbers for each section\n\ + -s, --number-separator=STRING add STRING after (possible) line number\n\ +"), stdout); + fputs (_("\ + -v, --starting-line-number=NUMBER first line number for each section\n\ + -w, --number-width=NUMBER use NUMBER columns for line numbers\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Default options are: -bt -d'\\:' -fn -hn -i1 -l1 -n'rn' -s -v1 -w6\n\ +\n\ +CC are two delimiter characters used to construct logical page delimiters;\n\ +a missing second character implies ':'. As a GNU extension one can specify\n\ +more than two characters, and also specifying the empty string (-d '')\n\ +disables section matching.\n\ +"), stdout); + fputs (_("\ +\n\ +STYLE is one of:\n\ +\n\ + a number all lines\n\ + t number only nonempty lines\n\ + n number no lines\n\ + pBRE number only lines that contain a match for the basic regular\n\ + expression, BRE\n\ +"), stdout); + fputs (_("\ +\n\ +FORMAT is one of:\n\ +\n\ + ln left justified, no leading zeros\n\ + rn right justified, no leading zeros\n\ + rz right justified, leading zeros\n\ +\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Set the command line flag TYPEP and possibly the regex pointer REGEXP, + according to 'optarg'. */ + +static bool +build_type_arg (char const **typep, + struct re_pattern_buffer *regexp, char *fastmap) +{ + char const *errmsg; + bool rval = true; + + switch (*optarg) + { + case 'a': + case 't': + case 'n': + *typep = optarg; + break; + case 'p': + *typep = optarg++; + regexp->buffer = nullptr; + regexp->allocated = 0; + regexp->fastmap = fastmap; + regexp->translate = nullptr; + re_syntax_options = + RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; + errmsg = re_compile_pattern (optarg, strlen (optarg), regexp); + if (errmsg) + error (EXIT_FAILURE, 0, "%s", (errmsg)); + break; + default: + rval = false; + break; + } + return rval; +} + +/* Print the line number and separator; increment the line number. */ + +static void +print_lineno (void) +{ + if (line_no_overflow) + error (EXIT_FAILURE, 0, _("line number overflow")); + + printf (lineno_format, lineno_width, line_no, separator_str); + + if (ckd_add (&line_no, line_no, page_incr)) + line_no_overflow = true; +} + +static void +reset_lineno (void) +{ + if (reset_numbers) + { + line_no = starting_line_number; + line_no_overflow = false; + } +} + +/* Switch to a header section. */ + +static void +proc_header (void) +{ + current_type = header_type; + current_regex = &header_regex; + reset_lineno (); + putchar ('\n'); +} + +/* Switch to a body section. */ + +static void +proc_body (void) +{ + current_type = body_type; + current_regex = &body_regex; + reset_lineno (); + putchar ('\n'); +} + +/* Switch to a footer section. */ + +static void +proc_footer (void) +{ + current_type = footer_type; + current_regex = &footer_regex; + reset_lineno (); + putchar ('\n'); +} + +/* Process a regular text line in 'line_buf'. */ + +static void +proc_text (void) +{ + static intmax_t blank_lines = 0; /* Consecutive blank lines so far. */ + + switch (*current_type) + { + case 'a': + if (blank_join > 1) + { + if (1 < line_buf.length || ++blank_lines == blank_join) + { + print_lineno (); + blank_lines = 0; + } + else + fputs (print_no_line_fmt, stdout); + } + else + print_lineno (); + break; + case 't': + if (1 < line_buf.length) + print_lineno (); + else + fputs (print_no_line_fmt, stdout); + break; + case 'n': + fputs (print_no_line_fmt, stdout); + break; + case 'p': + switch (re_search (current_regex, line_buf.buffer, line_buf.length - 1, + 0, line_buf.length - 1, nullptr)) + { + case -2: + error (EXIT_FAILURE, errno, _("error in regular expression search")); + + case -1: + fputs (print_no_line_fmt, stdout); + break; + + default: + print_lineno (); + break; + } + } + fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout); +} + +/* Return the type of line in 'line_buf'. */ + +static enum section +check_section (void) +{ + size_t len = line_buf.length - 1; + + if (len < 2 || footer_del_len < 2 + || memcmp (line_buf.buffer, section_del, 2)) + return Text; + if (len == header_del_len + && !memcmp (line_buf.buffer, header_del, header_del_len)) + return Header; + if (len == body_del_len + && !memcmp (line_buf.buffer, body_del, body_del_len)) + return Body; + if (len == footer_del_len + && !memcmp (line_buf.buffer, footer_del, footer_del_len)) + return Footer; + return Text; +} + +/* Read and process the file pointed to by FP. */ + +static void +process_file (FILE *fp) +{ + while (readlinebuffer (&line_buf, fp)) + { + switch (check_section ()) + { + case Header: + proc_header (); + break; + case Body: + proc_body (); + break; + case Footer: + proc_footer (); + break; + case Text: + proc_text (); + break; + } + } +} + +/* Process file FILE to standard output. + Return true if successful. */ + +static bool +nl_file (char const *file) +{ + FILE *stream; + + if (STREQ (file, "-")) + { + have_read_stdin = true; + stream = stdin; + assume (stream); /* Pacify GCC bug#109613. */ + } + else + { + stream = fopen (file, "r"); + if (stream == nullptr) + { + error (0, errno, "%s", quotef (file)); + return false; + } + } + + fadvise (stream, FADVISE_SEQUENTIAL); + + process_file (stream); + + int err = errno; + if (!ferror (stream)) + err = 0; + if (STREQ (file, "-")) + clearerr (stream); /* Also clear EOF. */ + else if (fclose (stream) != 0 && !err) + err = errno; + if (err) + { + error (0, err, "%s", quotef (file)); + return false; + } + return true; +} + +int +main (int argc, char **argv) +{ + int c; + size_t len; + bool ok = true; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + have_read_stdin = false; + + while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts, + nullptr)) + != -1) + { + switch (c) + { + case 'h': + if (! build_type_arg (&header_type, &header_regex, header_fastmap)) + { + error (0, 0, _("invalid header numbering style: %s"), + quote (optarg)); + ok = false; + } + break; + case 'b': + if (! build_type_arg (&body_type, &body_regex, body_fastmap)) + { + error (0, 0, _("invalid body numbering style: %s"), + quote (optarg)); + ok = false; + } + break; + case 'f': + if (! build_type_arg (&footer_type, &footer_regex, footer_fastmap)) + { + error (0, 0, _("invalid footer numbering style: %s"), + quote (optarg)); + ok = false; + } + break; + case 'v': + starting_line_number = xdectoimax (optarg, INTMAX_MIN, INTMAX_MAX, "", + _("invalid starting line number"), + 0); + break; + case 'i': + page_incr = xdectoimax (optarg, INTMAX_MIN, INTMAX_MAX, "", + _("invalid line number increment"), 0); + break; + case 'p': + reset_numbers = false; + break; + case 'l': + blank_join = xdectoimax (optarg, 1, INTMAX_MAX, "", + _("invalid line number of blank lines"), 0); + break; + case 's': + separator_str = optarg; + break; + case 'w': + lineno_width = xdectoimax (optarg, 1, INT_MAX, "", + _("invalid line number field width"), 0); + break; + case 'n': + if (STREQ (optarg, "ln")) + lineno_format = FORMAT_LEFT; + else if (STREQ (optarg, "rn")) + lineno_format = FORMAT_RIGHT_NOLZ; + else if (STREQ (optarg, "rz")) + lineno_format = FORMAT_RIGHT_LZ; + else + { + error (0, 0, _("invalid line numbering format: %s"), + quote (optarg)); + ok = false; + } + break; + case 'd': + len = strlen (optarg); + if (len == 1 || len == 2) /* POSIX. */ + { + char *p = section_del; + while (*optarg) + *p++ = *optarg++; + } + else + section_del = optarg; /* GNU extension. */ + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + ok = false; + break; + } + } + + if (!ok) + usage (EXIT_FAILURE); + + /* Initialize the section delimiters. */ + len = strlen (section_del); + + header_del_len = len * 3; + header_del = xmalloc (header_del_len + 1); + stpcpy (stpcpy (stpcpy (header_del, section_del), section_del), section_del); + + body_del_len = len * 2; + body_del = header_del + len; + + footer_del_len = len; + footer_del = body_del + len; + + /* Initialize the input buffer. */ + initbuffer (&line_buf); + + /* Initialize the printf format for unnumbered lines. */ + len = strlen (separator_str); + print_no_line_fmt = xmalloc (lineno_width + len + 1); + memset (print_no_line_fmt, ' ', lineno_width + len); + print_no_line_fmt[lineno_width + len] = '\0'; + + line_no = starting_line_number; + current_type = body_type; + current_regex = &body_regex; + + /* Main processing. */ + + if (optind == argc) + ok = nl_file ("-"); + else + for (; optind < argc; optind++) + ok &= nl_file (argv[optind]); + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, "-"); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/nohup.c b/src/nohup.c new file mode 100644 index 0000000..a6486c0 --- /dev/null +++ b/src/nohup.c @@ -0,0 +1,230 @@ +/* nohup -- run a command immune to hangups, with output to a non-tty + Copyright (C) 2003-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering */ + +#include +#include +#include +#include + +#include "system.h" + +#include "filenamecat.h" +#include "fd-reopen.h" +#include "long-options.h" +#include "unistd--.h" + +#define PROGRAM_NAME "nohup" + +#define AUTHORS proper_name ("Jim Meyering") + +/* Exit statuses. */ +enum + { + /* 'nohup' itself failed. */ + POSIX_NOHUP_FAILURE = 127 + }; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s COMMAND [ARG]...\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + + fputs (_("\ +Run COMMAND, ignoring hangup signals.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\n\ +If standard input is a terminal, redirect it from an unreadable file.\n\ +If standard output is a terminal, append output to 'nohup.out' if possible,\n\ +'$HOME/nohup.out' otherwise.\n\ +If standard error is a terminal, redirect it to standard output.\n\ +To save output to FILE, use '%s COMMAND > FILE'.\n"), + program_name); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* GCC 13 gets confused by the dup2 calls. */ +#if 13 <= __GNUC__ +# pragma GCC diagnostic ignored "-Wanalyzer-fd-leak" +#endif + +int +main (int argc, char **argv) +{ + int out_fd = STDOUT_FILENO; + int saved_stderr_fd = STDERR_FILENO; + bool ignoring_input; + bool redirecting_stdout; + bool stdout_is_closed; + bool redirecting_stderr; + int exit_internal_failure; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* POSIX 2008 requires that internal failure give status 127; unlike + for env, exec, nice, time, and xargs where it requires internal + failure give something in the range 1-125. For consistency with + other tools, fail with EXIT_CANCELED unless POSIXLY_CORRECT. */ + exit_internal_failure = (getenv ("POSIXLY_CORRECT") + ? POSIX_NOHUP_FAILURE : EXIT_CANCELED); + initialize_exit_failure (exit_internal_failure); + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, false, usage, AUTHORS, + (char const *) nullptr); + + if (argc <= optind) + { + error (0, 0, _("missing operand")); + usage (exit_internal_failure); + } + + ignoring_input = isatty (STDIN_FILENO); + redirecting_stdout = isatty (STDOUT_FILENO); + stdout_is_closed = (!redirecting_stdout && errno == EBADF); + redirecting_stderr = isatty (STDERR_FILENO); + + /* If standard input is a tty, replace it with /dev/null if possible. + Note that it is deliberately opened for *writing*, + to ensure any read evokes an error. */ + if (ignoring_input) + { + if (fd_reopen (STDIN_FILENO, "/dev/null", O_WRONLY, 0) < 0) + error (exit_internal_failure, errno, + _("failed to render standard input unusable")); + if (!redirecting_stdout && !redirecting_stderr) + error (0, 0, _("ignoring input")); + } + + /* If standard output is a tty, redirect it (appending) to a file. + First try nohup.out, then $HOME/nohup.out. If standard error is + a tty and standard output is closed, open nohup.out or + $HOME/nohup.out without redirecting anything. */ + if (redirecting_stdout || (redirecting_stderr && stdout_is_closed)) + { + char *in_home = nullptr; + char const *file = "nohup.out"; + int flags = O_CREAT | O_WRONLY | O_APPEND; + mode_t mode = S_IRUSR | S_IWUSR; + mode_t umask_value = umask (~mode); + out_fd = (redirecting_stdout + ? fd_reopen (STDOUT_FILENO, file, flags, mode) + : open (file, flags, mode)); + + if (out_fd < 0) + { + int saved_errno = errno; + char const *home = getenv ("HOME"); + if (home) + { + in_home = file_name_concat (home, file, nullptr); + out_fd = (redirecting_stdout + ? fd_reopen (STDOUT_FILENO, in_home, flags, mode) + : open (in_home, flags, mode)); + } + if (out_fd < 0) + { + int saved_errno2 = errno; + error (0, saved_errno, _("failed to open %s"), quoteaf (file)); + if (in_home) + error (0, saved_errno2, _("failed to open %s"), + quoteaf (in_home)); + return exit_internal_failure; + } + file = in_home; + } + + umask (umask_value); + error (0, 0, + _(ignoring_input + ? N_("ignoring input and appending output to %s") + : N_("appending output to %s")), + quoteaf (file)); + free (in_home); + } + + /* If standard error is a tty, redirect it. */ + if (redirecting_stderr) + { + /* Save a copy of stderr before redirecting, so we can use the original + if execve fails. It's no big deal if this dup fails. It might + not change anything, and at worst, it'll lead to suppression of + the post-failed-execve diagnostic. */ + saved_stderr_fd = fcntl (STDERR_FILENO, F_DUPFD_CLOEXEC, + STDERR_FILENO + 1); + + if (!redirecting_stdout) + error (0, 0, + _(ignoring_input + ? N_("ignoring input and redirecting stderr to stdout") + : N_("redirecting stderr to stdout"))); + + if (dup2 (out_fd, STDERR_FILENO) < 0) + error (exit_internal_failure, errno, + _("failed to redirect standard error")); + + if (stdout_is_closed) + close (out_fd); + } + + /* error() flushes stderr, but does not check for write failure. + Normally, we would catch this via our atexit() hook of + close_stdout, but execvp() gets in the way. If stderr + encountered a write failure, there is no need to try calling + error() again, particularly since we may have just changed the + underlying fd out from under stderr. */ + if (ferror (stderr)) + return exit_internal_failure; + + signal (SIGHUP, SIG_IGN); + + char **cmd = argv + optind; + execvp (*cmd, cmd); + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + int saved_errno = errno; + + /* The execve failed. Output a diagnostic to stderr only if: + - stderr was initially redirected to a non-tty, or + - stderr was initially directed to a tty, and we + can dup2 it to point back to that same tty. + In other words, output the diagnostic if possible, but only if + it will go to the original stderr. */ + if (dup2 (saved_stderr_fd, STDERR_FILENO) == STDERR_FILENO) + error (0, saved_errno, _("failed to run command %s"), quoteaf (*cmd)); + + return exit_status; +} diff --git a/src/nproc.c b/src/nproc.c new file mode 100644 index 0000000..4092baa --- /dev/null +++ b/src/nproc.c @@ -0,0 +1,128 @@ +/* nproc - print the number of processors. + Copyright (C) 2009-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Giuseppe Scrivano. */ + +#include +#include +#include +#include + +#include "system.h" +#include "nproc.h" +#include "quote.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "nproc" + +#define AUTHORS proper_name ("Giuseppe Scrivano") + +enum +{ + ALL_OPTION = CHAR_MAX + 1, + IGNORE_OPTION +}; + +static struct option const longopts[] = +{ + {"all", no_argument, nullptr, ALL_OPTION}, + {"ignore", required_argument, nullptr, IGNORE_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]...\n"), program_name); + fputs (_("\ +Print the number of processing units available to the current process,\n\ +which may be less than the number of online processors\n\ +\n\ +"), stdout); + fputs (_("\ + --all print the number of installed processors\n\ + --ignore=N if possible, exclude N processing units\n\ +"), stdout); + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + unsigned long nproc, ignore = 0; + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + enum nproc_query mode = NPROC_CURRENT_OVERRIDABLE; + + while (true) + { + int c = getopt_long (argc, argv, "", longopts, nullptr); + if (c == -1) + break; + switch (c) + { + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + case ALL_OPTION: + mode = NPROC_ALL; + break; + + case IGNORE_OPTION: + ignore = xdectoumax (optarg, 0, ULONG_MAX, "", _("invalid number"),0); + break; + + default: + usage (EXIT_FAILURE); + } + } + + if (argc != optind) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + nproc = num_processors (mode); + + if (ignore < nproc) + nproc -= ignore; + else + nproc = 1; + + printf ("%lu\n", nproc); + + return EXIT_SUCCESS; +} diff --git a/src/numfmt.c b/src/numfmt.c new file mode 100644 index 0000000..ca2eaff --- /dev/null +++ b/src/numfmt.c @@ -0,0 +1,1655 @@ +/* Reformat numbers like 11505426432 to the more human-readable 11G + Copyright (C) 2012-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include +#include +#include + +#include "mbsalign.h" +#include "argmatch.h" +#include "c-ctype.h" +#include "quote.h" +#include "system.h" +#include "xstrtol.h" + +#include "set-fields.h" + +#if HAVE_FPSETPREC +# include +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "numfmt" + +#define AUTHORS proper_name ("Assaf Gordon") + +/* Exit code when some numbers fail to convert. */ +enum { EXIT_CONVERSION_WARNINGS = 2 }; + +enum +{ + FROM_OPTION = CHAR_MAX + 1, + FROM_UNIT_OPTION, + TO_OPTION, + TO_UNIT_OPTION, + ROUND_OPTION, + SUFFIX_OPTION, + GROUPING_OPTION, + PADDING_OPTION, + FIELD_OPTION, + DEBUG_OPTION, + DEV_DEBUG_OPTION, + HEADER_OPTION, + FORMAT_OPTION, + INVALID_OPTION +}; + +enum scale_type +{ + scale_none, /* the default: no scaling. */ + scale_auto, /* --from only. */ + scale_SI, + scale_IEC, + scale_IEC_I /* 'i' suffix is required. */ +}; + +static char const *const scale_from_args[] = +{ + "none", "auto", "si", "iec", "iec-i", nullptr +}; + +static enum scale_type const scale_from_types[] = +{ + scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I +}; + +static char const *const scale_to_args[] = +{ + "none", "si", "iec", "iec-i", nullptr +}; + +static enum scale_type const scale_to_types[] = +{ + scale_none, scale_SI, scale_IEC, scale_IEC_I +}; + + +enum round_type +{ + round_ceiling, + round_floor, + round_from_zero, + round_to_zero, + round_nearest, +}; + +static char const *const round_args[] = +{ + "up", "down", "from-zero", "towards-zero", "nearest", nullptr +}; + +static enum round_type const round_types[] = +{ + round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest +}; + + +enum inval_type +{ + inval_abort, + inval_fail, + inval_warn, + inval_ignore +}; + +static char const *const inval_args[] = +{ + "abort", "fail", "warn", "ignore", nullptr +}; + +static enum inval_type const inval_types[] = +{ + inval_abort, inval_fail, inval_warn, inval_ignore +}; + +static struct option const longopts[] = +{ + {"from", required_argument, nullptr, FROM_OPTION}, + {"from-unit", required_argument, nullptr, FROM_UNIT_OPTION}, + {"to", required_argument, nullptr, TO_OPTION}, + {"to-unit", required_argument, nullptr, TO_UNIT_OPTION}, + {"round", required_argument, nullptr, ROUND_OPTION}, + {"padding", required_argument, nullptr, PADDING_OPTION}, + {"suffix", required_argument, nullptr, SUFFIX_OPTION}, + {"grouping", no_argument, nullptr, GROUPING_OPTION}, + {"delimiter", required_argument, nullptr, 'd'}, + {"field", required_argument, nullptr, FIELD_OPTION}, + {"debug", no_argument, nullptr, DEBUG_OPTION}, + {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION}, + {"header", optional_argument, nullptr, HEADER_OPTION}, + {"format", required_argument, nullptr, FORMAT_OPTION}, + {"invalid", required_argument, nullptr, INVALID_OPTION}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* If delimiter has this value, blanks separate fields. */ +enum { DELIMITER_DEFAULT = CHAR_MAX + 1 }; + +/* Maximum number of digits we can safely handle + without precision loss, if scaling is 'none'. */ +enum { MAX_UNSCALED_DIGITS = LDBL_DIG }; + +/* Maximum number of digits we can work with. + This is equivalent to 999Q. + NOTE: 'long double' can handle more than that, but there's + no official suffix assigned beyond Quetta (1000^10). */ +enum { MAX_ACCEPTABLE_DIGITS = 33 }; + +static enum scale_type scale_from = scale_none; +static enum scale_type scale_to = scale_none; +static enum round_type round_style = round_from_zero; +static enum inval_type inval_style = inval_abort; +static char const *suffix = nullptr; +static uintmax_t from_unit_size = 1; +static uintmax_t to_unit_size = 1; +static int grouping = 0; +static char *padding_buffer = nullptr; +static size_t padding_buffer_size = 0; +static long int padding_width = 0; +static long int zero_padding_width = 0; +static long int user_precision = -1; +static char const *format_str = nullptr; +static char *format_str_prefix = nullptr; +static char *format_str_suffix = nullptr; + +/* By default, any conversion error will terminate the program. */ +static int conv_exit_code = EXIT_CONVERSION_WARNINGS; + + +/* auto-pad each line based on skipped whitespace. */ +static int auto_padding = 0; +static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT; + +/* field delimiter */ +static int delimiter = DELIMITER_DEFAULT; + +/* line delimiter. */ +static unsigned char line_delim = '\n'; + +/* if non-zero, the first 'header' lines from STDIN are skipped. */ +static uintmax_t header = 0; + +/* Debug for users: print warnings to STDERR about possible + error (similar to sort's debug). */ +static bool debug; + +/* will be set according to the current locale. */ +static char const *decimal_point; +static int decimal_point_length; + +/* debugging for developers. Enables devmsg(). */ +static bool dev_debug = false; + + +static inline int +default_scale_base (enum scale_type scale) +{ + switch (scale) + { + case scale_IEC: + case scale_IEC_I: + return 1024; + + case scale_none: + case scale_auto: + case scale_SI: + default: + return 1000; + } +} + +static char const zero_and_valid_suffixes[] = "0KMGTPEZYRQ"; +static char const *valid_suffixes = 1 + zero_and_valid_suffixes; + +static inline bool +valid_suffix (const char suf) +{ + return strchr (valid_suffixes, suf) != nullptr; +} + +static inline int +suffix_power (const char suf) +{ + switch (suf) + { + case 'K': /* kilo or kibi. */ + return 1; + + case 'M': /* mega or mebi. */ + return 2; + + case 'G': /* giga or gibi. */ + return 3; + + case 'T': /* tera or tebi. */ + return 4; + + case 'P': /* peta or pebi. */ + return 5; + + case 'E': /* exa or exbi. */ + return 6; + + case 'Z': /* zetta or 2**70. */ + return 7; + + case 'Y': /* yotta or 2**80. */ + return 8; + + case 'R': /* ronna or 2**90. */ + return 9; + + case 'Q': /* quetta or 2**100. */ + return 10; + + default: /* should never happen. assert? */ + return 0; + } +} + +static inline char const * +suffix_power_char (int power) +{ + switch (power) + { + case 0: + return ""; + + case 1: + return "K"; + + case 2: + return "M"; + + case 3: + return "G"; + + case 4: + return "T"; + + case 5: + return "P"; + + case 6: + return "E"; + + case 7: + return "Z"; + + case 8: + return "Y"; + + case 9: + return "R"; + + case 10: + return "Q"; + + default: + return "(error)"; + } +} + +/* Similar to 'powl(3)' but without requiring 'libm'. */ +static long double +powerld (long double base, int x) +{ + long double result = base; + if (x == 0) + return 1; /* note for test coverage: this is never + reached, as 'powerld' won't be called if + there's no suffix, hence, no "power". */ + + /* TODO: check for overflow, inf? */ + while (--x) + result *= base; + return result; +} + +/* Similar to 'fabs(3)' but without requiring 'libm'. */ +static inline long double +absld (long double val) +{ + return val < 0 ? -val : val; +} + +/* Scale down 'val', returns 'updated val' and 'x', such that + val*base^X = original val + Similar to "frexpl(3)" but without requiring 'libm', + allowing only integer scale, limited functionality and error checking. */ +static long double +expld (long double val, int base, int /*output */ *x) +{ + int power = 0; + + if (val >= -LDBL_MAX && val <= LDBL_MAX) + { + while (absld (val) >= base) + { + ++power; + val /= base; + } + } + if (x) + *x = power; + return val; +} + +/* EXTREMELY limited 'ceil' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_ceiling (long double val) +{ + intmax_t intval = val; + if (intval < val) + intval++; + return intval; +} + +/* EXTREMELY limited 'floor' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_floor (long double val) +{ + return -simple_round_ceiling (-val); +} + +/* EXTREMELY limited 'round away from zero'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_from_zero (long double val) +{ + return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val); +} + +/* EXTREMELY limited 'round away to zero'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_to_zero (long double val) +{ + return val; +} + +/* EXTREMELY limited 'round' - without 'libm'. + Assumes values that fit in intmax_t. */ +static inline intmax_t +simple_round_nearest (long double val) +{ + return val < 0 ? val - 0.5 : val + 0.5; +} + +ATTRIBUTE_CONST +static inline long double +simple_round (long double val, enum round_type t) +{ + intmax_t rval; + intmax_t intmax_mul = val / INTMAX_MAX; + val -= (long double) INTMAX_MAX * intmax_mul; + + switch (t) + { + case round_ceiling: + rval = simple_round_ceiling (val); + break; + + case round_floor: + rval = simple_round_floor (val); + break; + + case round_from_zero: + rval = simple_round_from_zero (val); + break; + + case round_to_zero: + rval = simple_round_to_zero (val); + break; + + case round_nearest: + rval = simple_round_nearest (val); + break; + + default: + /* to silence the compiler - this should never happen. */ + return 0; + } + + return (long double) INTMAX_MAX * intmax_mul + rval; +} + +enum simple_strtod_error +{ + SSE_OK = 0, + SSE_OK_PRECISION_LOSS, + SSE_OVERFLOW, + SSE_INVALID_NUMBER, + + /* the following are returned by 'simple_strtod_human'. */ + SSE_VALID_BUT_FORBIDDEN_SUFFIX, + SSE_INVALID_SUFFIX, + SSE_MISSING_I_SUFFIX +}; + +/* Read an *integer* INPUT_STR, + but return the integer value in a 'long double' VALUE + hence, no UINTMAX_MAX limitation. + NEGATIVE is updated, and is stored separately from the VALUE + so that signbit() isn't required to determine the sign of -0.. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + + Note locale'd grouping is not supported, + nor is skipping of white-space supported. + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than 18 digits were used. + SSE_OVERFLOW - if more than 33 digits (999Q) were used. + SSE_INVALID_NUMBER - if no digits were found. */ +static enum simple_strtod_error +simple_strtod_int (char const *input_str, + char **endptr, long double *value, bool *negative) +{ + enum simple_strtod_error e = SSE_OK; + + long double val = 0; + int digits = 0; + bool found_digit = false; + + if (*input_str == '-') + { + input_str++; + *negative = true; + } + else + *negative = false; + + *endptr = (char *) input_str; + while (c_isdigit (**endptr)) + { + int digit = (**endptr) - '0'; + + found_digit = true; + + if (val || digit) + digits++; + + if (digits > MAX_UNSCALED_DIGITS) + e = SSE_OK_PRECISION_LOSS; + + if (digits > MAX_ACCEPTABLE_DIGITS) + return SSE_OVERFLOW; + + val *= 10; + val += digit; + + ++(*endptr); + } + if (! found_digit + && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length)) + return SSE_INVALID_NUMBER; + if (*negative) + val = -val; + + if (value) + *value = val; + + return e; +} + +/* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]", + and return the value in a 'long double' VALUE. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + PRECISION is optional and used to indicate fractions are present. + + Note locale'd grouping is not supported, + nor is skipping of white-space supported. + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than 18 digits were used. + SSE_OVERFLOW - if more than 33 digits (999Q) were used. + SSE_INVALID_NUMBER - if no digits were found. */ +static enum simple_strtod_error +simple_strtod_float (char const *input_str, + char **endptr, + long double *value, + size_t *precision) +{ + bool negative; + enum simple_strtod_error e = SSE_OK; + + if (precision) + *precision = 0; + + /* TODO: accept locale'd grouped values for the integral part. */ + e = simple_strtod_int (input_str, endptr, value, &negative); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + return e; + + /* optional decimal point + fraction. */ + if (STREQ_LEN (*endptr, decimal_point, decimal_point_length)) + { + char *ptr2; + long double val_frac = 0; + bool neg_frac; + + (*endptr) += decimal_point_length; + enum simple_strtod_error e2 = + simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac); + if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS) + return e2; + if (e2 == SSE_OK_PRECISION_LOSS) + e = e2; /* propagate warning. */ + if (neg_frac) + return SSE_INVALID_NUMBER; + + /* number of digits in the fractions. */ + size_t exponent = ptr2 - *endptr; + + val_frac = ((long double) val_frac) / powerld (10, exponent); + + /* TODO: detect loss of precision (only really 18 digits + of precision across all digits (before and after '.')). */ + if (value) + { + if (negative) + *value -= val_frac; + else + *value += val_frac; + } + + if (precision) + *precision = exponent; + + *endptr = ptr2; + } + return e; +} + +/* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix", + and return the value in a 'long double' VALUE, + with the precision of the input returned in PRECISION. + ENDPTR is required (unlike strtod) and is used to store a pointer + to the character after the last character used in the conversion. + ALLOWED_SCALING determines the scaling supported. + + TODO: + support locale'd grouping + accept scientific and hex floats (probably use strtold directly) + + Returns: + SSE_OK - valid number. + SSE_OK_PRECISION_LOSS - if more than LDBL_DIG digits were used. + SSE_OVERFLOW - if more than 33 digits (999Q) were used. + SSE_INVALID_NUMBER - if no digits were found. + SSE_VALID_BUT_FORBIDDEN_SUFFIX + SSE_INVALID_SUFFIX + SSE_MISSING_I_SUFFIX */ +static enum simple_strtod_error +simple_strtod_human (char const *input_str, + char **endptr, long double *value, size_t *precision, + enum scale_type allowed_scaling) +{ + int power = 0; + /* 'scale_auto' is checked below. */ + int scale_base = default_scale_base (allowed_scaling); + + devmsg ("simple_strtod_human:\n input string: %s\n" + " locale decimal-point: %s\n" + " MAX_UNSCALED_DIGITS: %d\n", + quote_n (0, input_str), + quote_n (1, decimal_point), + MAX_UNSCALED_DIGITS); + + enum simple_strtod_error e = + simple_strtod_float (input_str, endptr, value, precision); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + return e; + + devmsg (" parsed numeric value: %Lf\n" + " input precision = %d\n", *value, (int)*precision); + + if (**endptr != '\0') + { + /* process suffix. */ + + /* Skip any blanks between the number and suffix. */ + while (isblank (to_uchar (**endptr))) + (*endptr)++; + + if (!valid_suffix (**endptr)) + return SSE_INVALID_SUFFIX; + + if (allowed_scaling == scale_none) + return SSE_VALID_BUT_FORBIDDEN_SUFFIX; + + power = suffix_power (**endptr); + (*endptr)++; /* skip first suffix character. */ + + if (allowed_scaling == scale_auto && **endptr == 'i') + { + /* auto-scaling enabled, and the first suffix character + is followed by an 'i' (e.g. Ki, Mi, Gi). */ + scale_base = 1024; + (*endptr)++; /* skip second ('i') suffix character. */ + devmsg (" Auto-scaling, found 'i', switching to base %d\n", + scale_base); + } + + *precision = 0; /* Reset, to select precision based on scale. */ + } + + if (allowed_scaling == scale_IEC_I) + { + if (**endptr == 'i') + (*endptr)++; + else + return SSE_MISSING_I_SUFFIX; + } + + long double multiplier = powerld (scale_base, power); + + devmsg (" suffix power=%d^%d = %Lf\n", scale_base, power, multiplier); + + /* TODO: detect loss of precision and overflows. */ + (*value) = (*value) * multiplier; + + devmsg (" returning value: %Lf (%LG)\n", *value, *value); + + return e; +} + + +static void +simple_strtod_fatal (enum simple_strtod_error err, char const *input_str) +{ + char const *msgid = nullptr; + + switch (err) + { + case SSE_OK_PRECISION_LOSS: + case SSE_OK: + /* should never happen - this function isn't called when OK. */ + unreachable (); + + case SSE_OVERFLOW: + msgid = N_("value too large to be converted: %s"); + break; + + case SSE_INVALID_NUMBER: + msgid = N_("invalid number: %s"); + break; + + case SSE_VALID_BUT_FORBIDDEN_SUFFIX: + msgid = N_("rejecting suffix in input: %s (consider using --from)"); + break; + + case SSE_INVALID_SUFFIX: + msgid = N_("invalid suffix in input: %s"); + break; + + case SSE_MISSING_I_SUFFIX: + msgid = N_("missing 'i' suffix in input: %s (e.g Ki/Mi/Gi)"); + break; + + } + + if (inval_style != inval_ignore) + error (conv_exit_code, 0, gettext (msgid), quote (input_str)); +} + +/* Convert VAL to a human format string in BUF. */ +static void +double_to_human (long double val, int precision, + char *buf, size_t buf_size, + enum scale_type scale, int group, enum round_type round) +{ + int num_size; + char fmt[64]; + static_assert ((INT_BUFSIZE_BOUND (zero_padding_width) + + INT_BUFSIZE_BOUND (precision) + + 10 /* for %.Lf etc. */) + < sizeof fmt); + + char *pfmt = fmt; + *pfmt++ = '%'; + + if (group) + *pfmt++ = '\''; + + if (zero_padding_width) + pfmt += snprintf (pfmt, sizeof (fmt) - 2, "0%ld", zero_padding_width); + + devmsg ("double_to_human:\n"); + + if (scale == scale_none) + { + val *= powerld (10, precision); + val = simple_round (val, round); + val /= powerld (10, precision); + + devmsg ((group) ? + " no scaling, returning (grouped) value: %'.*Lf\n" : + " no scaling, returning value: %.*Lf\n", precision, val); + + stpcpy (pfmt, ".*Lf"); + + num_size = snprintf (buf, buf_size, fmt, precision, val); + if (num_size < 0 || num_size >= (int) buf_size) + error (EXIT_FAILURE, 0, + _("failed to prepare value '%Lf' for printing"), val); + return; + } + + /* Scaling requested by user. */ + double scale_base = default_scale_base (scale); + + /* Normalize val to scale. */ + int power = 0; + val = expld (val, scale_base, &power); + devmsg (" scaled value to %Lf * %0.f ^ %d\n", val, scale_base, power); + + /* Perform rounding. */ + int power_adjust = 0; + if (user_precision != -1) + power_adjust = MIN (power * 3, user_precision); + else if (absld (val) < 10) + { + /* for values less than 10, we allow one decimal-point digit, + so adjust before rounding. */ + power_adjust = 1; + } + + val *= powerld (10, power_adjust); + val = simple_round (val, round); + val /= powerld (10, power_adjust); + + /* two special cases after rounding: + 1. a "999.99" can turn into 1000 - so scale down + 2. a "9.99" can turn into 10 - so don't display decimal-point. */ + if (absld (val) >= scale_base) + { + val /= scale_base; + power++; + } + + /* should "7.0" be printed as "7" ? + if removing the ".0" is preferred, enable the fourth condition. */ + int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0); + /* && (absld (val) > simple_round_floor (val))) */ + + devmsg (" after rounding, value=%Lf * %0.f ^ %d\n", val, scale_base, power); + + stpcpy (pfmt, ".*Lf%s"); + + int prec = user_precision == -1 ? show_decimal_point : user_precision; + + /* buf_size - 1 used here to ensure place for possible scale_IEC_I suffix. */ + num_size = snprintf (buf, buf_size - 1, fmt, prec, val, + suffix_power_char (power)); + if (num_size < 0 || num_size >= (int) buf_size - 1) + error (EXIT_FAILURE, 0, + _("failed to prepare value '%Lf' for printing"), val); + + if (scale == scale_IEC_I && power > 0) + strncat (buf, "i", buf_size - num_size - 1); + + devmsg (" returning value: %s\n", quote (buf)); + + return; +} + +/* Convert a string of decimal digits, N_STRING, with an optional suffix + to an integral value. Suffixes are handled as with --from=auto. + Upon successful conversion, return that value. + If it cannot be converted, give a diagnostic and exit. */ +static uintmax_t +unit_to_umax (char const *n_string) +{ + strtol_error s_err; + char const *c_string = n_string; + char *t_string = nullptr; + size_t n_len = strlen (n_string); + char *end = nullptr; + uintmax_t n; + char const *suffixes = valid_suffixes; + + /* Adjust suffixes so K=1000, Ki=1024, KiB=invalid. */ + if (n_len && ! c_isdigit (n_string[n_len - 1])) + { + t_string = xmalloc (n_len + 2); + end = t_string + n_len - 1; + memcpy (t_string, n_string, n_len); + + if (*end == 'i' && 2 <= n_len && ! c_isdigit (*(end - 1))) + *end = '\0'; + else + { + *++end = 'B'; + *++end = '\0'; + suffixes = zero_and_valid_suffixes; + } + + c_string = t_string; + } + + s_err = xstrtoumax (c_string, &end, 10, &n, suffixes); + + if (s_err != LONGINT_OK || *end || n == 0) + { + free (t_string); + error (EXIT_FAILURE, 0, _("invalid unit size: %s"), quote (n_string)); + } + + free (t_string); + + return n; +} + + +static void +setup_padding_buffer (size_t min_size) +{ + if (padding_buffer_size > min_size) + return; + + padding_buffer_size = min_size + 1; + padding_buffer = xrealloc (padding_buffer, padding_buffer_size); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [NUMBER]...\n\ +"), program_name); + fputs (_("\ +Reformat NUMBER(s), or the numbers from standard input if none are specified.\n\ +"), stdout); + emit_mandatory_arg_note (); + fputs (_("\ + --debug print warnings about invalid input\n\ +"), stdout); + fputs (_("\ + -d, --delimiter=X use X instead of whitespace for field delimiter\n\ +"), stdout); + fputs (_("\ + --field=FIELDS replace the numbers in these input fields (default=1);\n\ + see FIELDS below\n\ +"), stdout); + fputs (_("\ + --format=FORMAT use printf style floating-point FORMAT;\n\ + see FORMAT below for details\n\ +"), stdout); + fputs (_("\ + --from=UNIT auto-scale input numbers to UNITs; default is 'none';\n\ + see UNIT below\n\ +"), stdout); + fputs (_("\ + --from-unit=N specify the input unit size (instead of the default 1)\n\ +"), stdout); + fputs (_("\ + --grouping use locale-defined grouping of digits, e.g. 1,000,000\n\ + (which means it has no effect in the C/POSIX locale)\n\ +"), stdout); + fputs (_("\ + --header[=N] print (without converting) the first N header lines;\n\ + N defaults to 1 if not specified\n\ +"), stdout); + fputs (_("\ + --invalid=MODE failure mode for invalid numbers: MODE can be:\n\ + abort (default), fail, warn, ignore\n\ +"), stdout); + fputs (_("\ + --padding=N pad the output to N characters; positive N will\n\ + right-align; negative N will left-align;\n\ + padding is ignored if the output is wider than N;\n\ + the default is to automatically pad if a whitespace\n\ + is found\n\ +"), stdout); + fputs (_("\ + --round=METHOD use METHOD for rounding when scaling; METHOD can be:\n\ + up, down, from-zero (default), towards-zero, nearest\n\ +"), stdout); + fputs (_("\ + --suffix=SUFFIX add SUFFIX to output numbers, and accept optional\n\ + SUFFIX in input numbers\n\ +"), stdout); + fputs (_("\ + --to=UNIT auto-scale output numbers to UNITs; see UNIT below\n\ +"), stdout); + fputs (_("\ + --to-unit=N the output unit size (instead of the default 1)\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + + fputs (_("\ +\n\ +UNIT options:\n"), stdout); + fputs (_("\ + none no auto-scaling is done; suffixes will trigger an error\n\ +"), stdout); + fputs (_("\ + auto accept optional single/two letter suffix:\n\ + 1K = 1000,\n\ + 1Ki = 1024,\n\ + 1M = 1000000,\n\ + 1Mi = 1048576,\n"), stdout); + fputs (_("\ + si accept optional single letter suffix:\n\ + 1K = 1000,\n\ + 1M = 1000000,\n\ + ...\n"), stdout); + fputs (_("\ + iec accept optional single letter suffix:\n\ + 1K = 1024,\n\ + 1M = 1048576,\n\ + ...\n"), stdout); + fputs (_("\ + iec-i accept optional two-letter suffix:\n\ + 1Ki = 1024,\n\ + 1Mi = 1048576,\n\ + ...\n"), stdout); + + fputs (_("\n\ +FIELDS supports cut(1) style field ranges:\n\ + N N'th field, counted from 1\n\ + N- from N'th field, to end of line\n\ + N-M from N'th to M'th field (inclusive)\n\ + -M from first to M'th field (inclusive)\n\ + - all fields\n\ +Multiple fields/ranges can be separated with commas\n\ +"), stdout); + + fputs (_("\n\ +FORMAT must be suitable for printing one floating-point argument '%f'.\n\ +Optional quote (%'f) will enable --grouping (if supported by current locale).\n\ +Optional width value (%10f) will pad output. Optional zero (%010f) width\n\ +will zero pad the number. Optional negative values (%-10f) will left align.\n\ +Optional precision (%.1f) will override the input determined precision.\n\ +"), stdout); + + printf (_("\n\ +Exit status is 0 if all input numbers were successfully converted.\n\ +By default, %s will stop at the first conversion error with exit status 2.\n\ +With --invalid='fail' a warning is printed for each conversion error\n\ +and the exit status is 2. With --invalid='warn' each conversion error is\n\ +diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\ +errors are not diagnosed and the exit status is 0.\n\ +"), program_name); + + printf (_("\n\ +Examples:\n\ + $ %s --to=si 1000\n\ + -> \"1.0K\"\n\ + $ %s --to=iec 2048\n\ + -> \"2.0K\"\n\ + $ %s --to=iec-i 4096\n\ + -> \"4.0Ki\"\n\ + $ echo 1K | %s --from=si\n\ + -> \"1000\"\n\ + $ echo 1K | %s --from=iec\n\ + -> \"1024\"\n\ + $ df -B1 | %s --header --field 2-4 --to=si\n\ + $ ls -l | %s --header --field 5 --to=iec\n\ + $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\ + $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n"), + program_name, program_name, program_name, + program_name, program_name, program_name, + program_name, program_name, program_name); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Given 'fmt' (a printf(3) compatible format string), extracts the following: + 1. padding (e.g. %20f) + 2. alignment (e.g. %-20f) + 3. grouping (e.g. %'f) + + Only a limited subset of printf(3) syntax is supported. + + TODO: + support %e %g etc. rather than just %f + + NOTES: + 1. This function sets the global variables: + padding_width, padding_alignment, grouping, + format_str_prefix, format_str_suffix + 2. The function aborts on any errors. */ +static void +parse_format_string (char const *fmt) +{ + size_t i; + size_t prefix_len = 0; + size_t suffix_pos; + long int pad = 0; + char *endptr = nullptr; + bool zero_padding = false; + + for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1) + { + if (!fmt[i]) + error (EXIT_FAILURE, 0, + _("format %s has no %% directive"), quote (fmt)); + prefix_len++; + } + + i++; + while (true) + { + size_t skip = strspn (fmt + i, " "); + i += skip; + if (fmt[i] == '\'') + { + grouping = 1; + i++; + } + else if (fmt[i] == '0') + { + zero_padding = true; + i++; + } + else if (! skip) + break; + } + + errno = 0; + pad = strtol (fmt + i, &endptr, 10); + if (errno == ERANGE || pad < -LONG_MAX) + error (EXIT_FAILURE, 0, + _("invalid format %s (width overflow)"), quote (fmt)); + + if (endptr != (fmt + i) && pad != 0) + { + if (debug && padding_width && !(zero_padding && pad > 0)) + error (0, 0, _("--format padding overriding --padding")); + + if (pad < 0) + { + padding_alignment = MBS_ALIGN_LEFT; + padding_width = -pad; + } + else + { + if (zero_padding) + zero_padding_width = pad; + else + padding_width = pad; + } + + } + i = endptr - fmt; + + if (fmt[i] == '\0') + error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt)); + + if (fmt[i] == '.') + { + i++; + errno = 0; + user_precision = strtol (fmt + i, &endptr, 10); + if (errno == ERANGE || user_precision < 0 || SIZE_MAX < user_precision + || isblank (fmt[i]) || fmt[i] == '+') + { + /* Note we disallow negative user_precision to be + consistent with printf(1). POSIX states that + negative precision is only supported (and ignored) + when used with '.*f'. glibc at least will malform + output when passed a direct negative precision. */ + error (EXIT_FAILURE, 0, + _("invalid precision in format %s"), quote (fmt)); + } + i = endptr - fmt; + } + + if (fmt[i] != 'f') + error (EXIT_FAILURE, 0, _("invalid format %s," + " directive must be %%[0]['][-][N][.][N]f"), + quote (fmt)); + i++; + suffix_pos = i; + + for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1) + if (fmt[i] == '%' && fmt[i + 1] != '%') + error (EXIT_FAILURE, 0, _("format %s has too many %% directives"), + quote (fmt)); + + if (prefix_len) + format_str_prefix = ximemdup0 (fmt, prefix_len); + if (fmt[suffix_pos] != '\0') + format_str_suffix = xstrdup (fmt + suffix_pos); + + devmsg ("format String:\n input: %s\n grouping: %s\n" + " padding width: %ld\n alignment: %s\n" + " prefix: %s\n suffix: %s\n", + quote_n (0, fmt), (grouping) ? "yes" : "no", + padding_width, + (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right", + quote_n (1, format_str_prefix ? format_str_prefix : ""), + quote_n (2, format_str_suffix ? format_str_suffix : "")); +} + +/* Parse a numeric value (with optional suffix) from a string. + Returns a long double value, with input precision. + + If there's an error converting the string to value - exits with + an error. + + If there are any trailing characters after the number + (besides a valid suffix) - exits with an error. */ +static enum simple_strtod_error +parse_human_number (char const *str, long double /*output */ *value, + size_t *precision) +{ + char *ptr = nullptr; + + enum simple_strtod_error e = + simple_strtod_human (str, &ptr, value, precision, scale_from); + if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS) + { + simple_strtod_fatal (e, str); + return e; + } + + if (ptr && *ptr != '\0') + { + if (inval_style != inval_ignore) + error (conv_exit_code, 0, _("invalid suffix in input %s: %s"), + quote_n (0, str), quote_n (1, ptr)); + e = SSE_INVALID_SUFFIX; + } + return e; +} + + +/* Print the given VAL, using the requested representation. + The number is printed to STDOUT, with padding and alignment. */ +static int +prepare_padded_number (const long double val, size_t precision) +{ + /* Generate Output. */ + char buf[128]; + + size_t precision_used = user_precision == -1 ? precision : user_precision; + + /* Can't reliably print too-large values without auto-scaling. */ + int x; + expld (val, 10, &x); + + if (scale_to == scale_none + && x + precision_used > MAX_UNSCALED_DIGITS) + { + if (inval_style != inval_ignore) + { + if (precision_used) + error (conv_exit_code, 0, + _("value/precision too large to be printed: '%Lg/%"PRIuMAX"'" + " (consider using --to)"), val, (uintmax_t)precision_used); + else + error (conv_exit_code, 0, + _("value too large to be printed: '%Lg'" + " (consider using --to)"), val); + } + return 0; + } + + if (x > MAX_ACCEPTABLE_DIGITS - 1) + { + if (inval_style != inval_ignore) + error (conv_exit_code, 0, _("value too large to be printed: '%Lg'" + " (cannot handle values > 999Q)"), val); + return 0; + } + + double_to_human (val, precision_used, buf, sizeof (buf), + scale_to, grouping, round_style); + if (suffix) + strncat (buf, suffix, sizeof (buf) - strlen (buf) -1); + + devmsg ("formatting output:\n value: %Lf\n humanized: %s\n", + val, quote (buf)); + + if (padding_width && strlen (buf) < padding_width) + { + size_t w = padding_width; + mbsalign (buf, padding_buffer, padding_buffer_size, &w, + padding_alignment, MBA_UNIBYTE_ONLY); + + devmsg (" After padding: %s\n", quote (padding_buffer)); + } + else + { + setup_padding_buffer (strlen (buf) + 1); + strcpy (padding_buffer, buf); + } + + return 1; +} + +static void +print_padded_number (void) +{ + if (format_str_prefix) + fputs (format_str_prefix, stdout); + + fputs (padding_buffer, stdout); + + if (format_str_suffix) + fputs (format_str_suffix, stdout); +} + +/* Converts the TEXT number string to the requested representation, + and handles automatic suffix addition. */ +static int +process_suffixed_number (char *text, long double *result, + size_t *precision, long int field) +{ + if (suffix && strlen (text) > strlen (suffix)) + { + char *possible_suffix = text + strlen (text) - strlen (suffix); + + if (STREQ (suffix, possible_suffix)) + { + /* trim suffix, ONLY if it's at the end of the text. */ + *possible_suffix = '\0'; + devmsg ("trimming suffix %s\n", quote (suffix)); + } + else + devmsg ("no valid suffix found\n"); + } + + /* Skip white space - always. */ + char *p = text; + while (*p && isblank (to_uchar (*p))) + ++p; + + /* setup auto-padding. */ + if (auto_padding) + { + if (text < p || field > 1) + { + padding_width = strlen (text); + setup_padding_buffer (padding_width); + } + else + { + padding_width = 0; + } + devmsg ("setting Auto-Padding to %ld characters\n", padding_width); + } + + long double val = 0; + enum simple_strtod_error e = parse_human_number (p, &val, precision); + if (e == SSE_OK_PRECISION_LOSS && debug) + error (0, 0, _("large input value %s: possible precision loss"), + quote (p)); + + if (from_unit_size != 1 || to_unit_size != 1) + val = (val * from_unit_size) / to_unit_size; + + *result = val; + + return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS); +} + +/* Return a pointer to the beginning of the next field in line. + The line pointer is moved to the end of the next field. */ +static char* +next_field (char **line) +{ + char *field_start = *line; + char *field_end = field_start; + + if (delimiter != DELIMITER_DEFAULT) + { + if (*field_start != delimiter) + { + while (*field_end && *field_end != delimiter) + ++field_end; + } + /* else empty field */ + } + else + { + /* keep any space prefix in the returned field */ + while (*field_end && field_sep (*field_end)) + ++field_end; + + while (*field_end && ! field_sep (*field_end)) + ++field_end; + } + + *line = field_end; + return field_start; +} + +ATTRIBUTE_PURE +static bool +include_field (uintmax_t field) +{ + struct field_range_pair *p = frp; + if (!p) + return field == 1; + + while (p->lo != UINTMAX_MAX) + { + if (p->lo <= field && p->hi >= field) + return true; + ++p; + } + return false; +} + +/* Convert and output the given field. If it is not included in the set + of fields to process just output the original */ +static bool +process_field (char *text, uintmax_t field) +{ + long double val = 0; + size_t precision = 0; + bool valid_number = true; + + if (include_field (field)) + { + valid_number = + process_suffixed_number (text, &val, &precision, field); + + if (valid_number) + valid_number = prepare_padded_number (val, precision); + + if (valid_number) + print_padded_number (); + else + fputs (text, stdout); + } + else + fputs (text, stdout); + + return valid_number; +} + +/* Convert number in a given line of text. + NEWLINE specifies whether to output a '\n' for this "line". */ +static int +process_line (char *line, bool newline) +{ + char *next; + uintmax_t field = 0; + bool valid_number = true; + + while (true) { + ++field; + next = next_field (&line); + + if (*line != '\0') + { + /* nul terminate the current field string and process */ + *line = '\0'; + + if (! process_field (next, field)) + valid_number = false; + + fputc ((delimiter == DELIMITER_DEFAULT) ? + ' ' : delimiter, stdout); + ++line; + } + else + { + /* end of the line, process the last field and finish */ + if (! process_field (next, field)) + valid_number = false; + + break; + } + } + + if (newline) + putchar (line_delim); + + return valid_number; +} + +int +main (int argc, char **argv) +{ + int valid_numbers = 1; + bool locale_ok; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + locale_ok = !!setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + +#if HAVE_FPSETPREC + /* Enabled extended precision if needed. */ + fpsetprec (FP_PE); +#endif + + decimal_point = nl_langinfo (RADIXCHAR); + if (decimal_point == nullptr || strlen (decimal_point) == 0) + decimal_point = "."; + decimal_point_length = strlen (decimal_point); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "d:z", longopts, nullptr); + + if (c == -1) + break; + + switch (c) + { + case FROM_OPTION: + scale_from = XARGMATCH ("--from", optarg, + scale_from_args, scale_from_types); + break; + + case FROM_UNIT_OPTION: + from_unit_size = unit_to_umax (optarg); + break; + + case TO_OPTION: + scale_to = + XARGMATCH ("--to", optarg, scale_to_args, scale_to_types); + break; + + case TO_UNIT_OPTION: + to_unit_size = unit_to_umax (optarg); + break; + + case ROUND_OPTION: + round_style = XARGMATCH ("--round", optarg, round_args, round_types); + break; + + case GROUPING_OPTION: + grouping = 1; + break; + + case PADDING_OPTION: + if (xstrtol (optarg, nullptr, 10, &padding_width, "") != LONGINT_OK + || padding_width == 0 || padding_width < -LONG_MAX) + error (EXIT_FAILURE, 0, _("invalid padding value %s"), + quote (optarg)); + if (padding_width < 0) + { + padding_alignment = MBS_ALIGN_LEFT; + padding_width = -padding_width; + } + /* TODO: We probably want to apply a specific --padding + to --header lines too. */ + break; + + case FIELD_OPTION: + if (n_frp) + error (EXIT_FAILURE, 0, _("multiple field specifications")); + set_fields (optarg, SETFLD_ALLOW_DASH); + break; + + case 'd': + /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ + if (optarg[0] != '\0' && optarg[1] != '\0') + error (EXIT_FAILURE, 0, + _("the delimiter must be a single character")); + delimiter = optarg[0]; + break; + + case 'z': + line_delim = '\0'; + break; + + case SUFFIX_OPTION: + suffix = optarg; + break; + + case DEBUG_OPTION: + debug = true; + break; + + case DEV_DEBUG_OPTION: + dev_debug = true; + debug = true; + break; + + case HEADER_OPTION: + if (optarg) + { + if (xstrtoumax (optarg, nullptr, 10, &header, "") != LONGINT_OK + || header == 0) + error (EXIT_FAILURE, 0, _("invalid header value %s"), + quote (optarg)); + } + else + { + header = 1; + } + break; + + case FORMAT_OPTION: + format_str = optarg; + break; + + case INVALID_OPTION: + inval_style = XARGMATCH ("--invalid", optarg, + inval_args, inval_types); + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (format_str != nullptr && grouping) + error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format")); + + if (debug && ! locale_ok) + error (0, 0, _("failed to set locale")); + + /* Warn about no-op. */ + if (debug && scale_from == scale_none && scale_to == scale_none + && !grouping && (padding_width == 0) && (format_str == nullptr)) + error (0, 0, _("no conversion option specified")); + + if (format_str) + parse_format_string (format_str); + + if (grouping) + { + if (scale_to != scale_none) + error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to")); + if (debug && (strlen (nl_langinfo (THOUSEP)) == 0)) + error (0, 0, _("grouping has no effect in this locale")); + } + + + setup_padding_buffer (padding_width); + auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT); + + if (inval_style != inval_abort) + conv_exit_code = 0; + + if (argc > optind) + { + if (debug && header) + error (0, 0, _("--header ignored with command-line input")); + + for (; optind < argc; optind++) + valid_numbers &= process_line (argv[optind], true); + } + else + { + char *line = nullptr; + size_t line_allocated = 0; + ssize_t len; + + while (header-- && getdelim (&line, &line_allocated, + line_delim, stdin) > 0) + fputs (line, stdout); + + while ((len = getdelim (&line, &line_allocated, + line_delim, stdin)) > 0) + { + bool newline = line[len - 1] == line_delim; + if (newline) + line[len - 1] = '\0'; + valid_numbers &= process_line (line, newline); + } + + if (ferror (stdin)) + error (EXIT_FAILURE, errno, _("error reading input")); + } + + if (debug && !valid_numbers) + error (0, 0, _("failed to convert some of the input numbers")); + + int exit_status = EXIT_SUCCESS; + if (!valid_numbers + && inval_style != inval_warn && inval_style != inval_ignore) + exit_status = EXIT_CONVERSION_WARNINGS; + + main_exit (exit_status); +} diff --git a/src/od.c b/src/od.c new file mode 100644 index 0000000..3ad2565 --- /dev/null +++ b/src/od.c @@ -0,0 +1,1980 @@ +/* od -- dump files in octal and other formats + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering. */ + +#include + +#include +#include +#include +#include +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "ftoastr.h" +#include "quote.h" +#include "stat-size.h" +#include "xbinary-io.h" +#include "xprintf.h" +#include "xstrtol.h" +#include "xstrtol-error.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "od" + +#define AUTHORS proper_name ("Jim Meyering") + +/* The default number of input bytes per output line. */ +#define DEFAULT_BYTES_PER_BLOCK 16 + +#if HAVE_UNSIGNED_LONG_LONG_INT +typedef unsigned long long int unsigned_long_long_int; +#else +/* This is just a place-holder to avoid a few '#if' directives. + In this case, the type isn't actually used. */ +typedef unsigned long int unsigned_long_long_int; +#endif + +enum size_spec + { + NO_SIZE, + CHAR, + SHORT, + INT, + LONG, + LONG_LONG, + /* FIXME: add INTMAX support, too */ + FLOAT_SINGLE, + FLOAT_DOUBLE, + FLOAT_LONG_DOUBLE, + N_SIZE_SPECS + }; + +enum output_format + { + SIGNED_DECIMAL, + UNSIGNED_DECIMAL, + OCTAL, + HEXADECIMAL, + FLOATING_POINT, + NAMED_CHARACTER, + CHARACTER + }; + +#define MAX_INTEGRAL_TYPE_SIZE sizeof (unsigned_long_long_int) + +/* The maximum number of bytes needed for a format string, including + the trailing nul. Each format string expects a variable amount of + padding (guaranteed to be at least 1 plus the field width), then an + element that will be formatted in the field. */ +enum + { + FMT_BYTES_ALLOCATED = + (sizeof "%*.99" + 1 + + MAX (sizeof "ld", + MAX (sizeof PRIdMAX, + MAX (sizeof PRIoMAX, + MAX (sizeof PRIuMAX, + sizeof PRIxMAX))))) + }; + +/* Ensure that our choice for FMT_BYTES_ALLOCATED is reasonable. */ +static_assert (MAX_INTEGRAL_TYPE_SIZE * CHAR_BIT / 3 <= 99); + +/* Each output format specification (from '-t spec' or from + old-style options) is represented by one of these structures. */ +struct tspec + { + enum output_format fmt; + enum size_spec size; /* Type of input object. */ + /* FIELDS is the number of fields per line, BLANK is the number of + fields to leave blank. WIDTH is width of one field, excluding + leading space, and PAD is total pad to divide among FIELDS. + PAD is at least as large as FIELDS. */ + void (*print_function) (size_t fields, size_t blank, void const *data, + char const *fmt, int width, int pad); + char fmt_string[FMT_BYTES_ALLOCATED]; /* Of the style "%*d". */ + bool hexl_mode_trailer; + int field_width; /* Minimum width of a field, excluding leading space. */ + int pad_width; /* Total padding to be divided among fields. */ + }; + +/* Convert the number of 8-bit bytes of a binary representation to + the number of characters (digits + sign if the type is signed) + required to represent the same quantity in the specified base/type. + For example, a 32-bit (4-byte) quantity may require a field width + as wide as the following for these types: + 11 unsigned octal + 11 signed decimal + 10 unsigned decimal + 8 unsigned hexadecimal */ + +static char const bytes_to_oct_digits[] = +{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43}; + +static char const bytes_to_signed_dec_digits[] = +{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40}; + +static char const bytes_to_unsigned_dec_digits[] = +{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39}; + +static char const bytes_to_hex_digits[] = +{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + +/* It'll be a while before we see integral types wider than 16 bytes, + but if/when it happens, this check will catch it. Without this check, + a wider type would provoke a buffer overrun. */ +static_assert (MAX_INTEGRAL_TYPE_SIZE + < ARRAY_CARDINALITY (bytes_to_hex_digits)); + +/* Make sure the other arrays have the same length. */ +static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_signed_dec_digits); +static_assert (sizeof bytes_to_oct_digits + == sizeof bytes_to_unsigned_dec_digits); +static_assert (sizeof bytes_to_oct_digits == sizeof bytes_to_hex_digits); + +/* Convert enum size_spec to the size of the named type. */ +static const int width_bytes[] = +{ + -1, + sizeof (char), + sizeof (short int), + sizeof (int), + sizeof (long int), + sizeof (unsigned_long_long_int), + sizeof (float), + sizeof (double), + sizeof (long double) +}; + +/* Ensure that for each member of 'enum size_spec' there is an + initializer in the width_bytes array. */ +static_assert (ARRAY_CARDINALITY (width_bytes) == N_SIZE_SPECS); + +/* Names for some non-printing characters. */ +static char const charname[33][4] = +{ + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs", "ht", "nl", "vt", "ff", "cr", "so", "si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em", "sub", "esc", "fs", "gs", "rs", "us", + "sp" +}; + +/* Address base (8, 10 or 16). */ +static int address_base; + +/* The number of octal digits required to represent the largest + address value. */ +#define MAX_ADDRESS_LENGTH \ + ((sizeof (uintmax_t) * CHAR_BIT + CHAR_BIT - 1) / 3) + +/* Width of a normal address. */ +static int address_pad_len; + +/* Minimum length when detecting --strings. */ +static size_t string_min; + +/* True when in --strings mode. */ +static bool flag_dump_strings; + +/* True if we should recognize the older non-option arguments + that specified at most one file and optional arguments specifying + offset and pseudo-start address. */ +static bool traditional; + +/* True if an old-style 'pseudo-address' was specified. */ +static bool flag_pseudo_start; + +/* The difference between the old-style pseudo starting address and + the number of bytes to skip. */ +static uintmax_t pseudo_offset; + +/* Function that accepts an address and an optional following char, + and prints the address and char to stdout. */ +static void (*format_address) (uintmax_t, char); + +/* The number of input bytes to skip before formatting and writing. */ +static uintmax_t n_bytes_to_skip = 0; + +/* When false, MAX_BYTES_TO_FORMAT and END_OFFSET are ignored, and all + input is formatted. */ +static bool limit_bytes_to_format = false; + +/* The maximum number of bytes that will be formatted. */ +static uintmax_t max_bytes_to_format; + +/* The offset of the first byte after the last byte to be formatted. */ +static uintmax_t end_offset; + +/* When true and two or more consecutive blocks are equal, format + only the first block and output an asterisk alone on the following + line to indicate that identical blocks have been elided. */ +static bool abbreviate_duplicate_blocks = true; + +/* An array of specs describing how to format each input block. */ +static struct tspec *spec; + +/* The number of format specs. */ +static size_t n_specs; + +/* The allocated length of SPEC. */ +static size_t n_specs_allocated; + +/* The number of input bytes formatted per output line. It must be + a multiple of the least common multiple of the sizes associated with + the specified output types. It should be as large as possible, but + no larger than 16 -- unless specified with the -w option. */ +static size_t bytes_per_block; + +/* Human-readable representation of *file_list (for error messages). + It differs from file_list[-1] only when file_list[-1] is "-". */ +static char const *input_filename; + +/* A null-terminated list of the file-arguments from the command line. */ +static char const *const *file_list; + +/* Initializer for file_list if no file-arguments + were specified on the command line. */ +static char const *const default_file_list[] = {"-", nullptr}; + +/* The input stream associated with the current file. */ +static FILE *in_stream; + +/* If true, at least one of the files we read was standard input. */ +static bool have_read_stdin; + +/* Map the size in bytes to a type identifier. */ +static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1]; + +#define MAX_FP_TYPE_SIZE sizeof (long double) +static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1]; + +#ifndef WORDS_BIGENDIAN +# define WORDS_BIGENDIAN 0 +#endif + +/* Use native endianness by default. */ +static bool input_swap; + +static char const short_options[] = "A:aBbcDdeFfHhIij:LlN:OoS:st:vw::Xx"; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + TRADITIONAL_OPTION = CHAR_MAX + 1, + ENDIAN_OPTION, +}; + +enum endian_type +{ + endian_little, + endian_big +}; + +static char const *const endian_args[] = +{ + "little", "big", nullptr +}; + +static enum endian_type const endian_types[] = +{ + endian_little, endian_big +}; + +static struct option const long_options[] = +{ + {"skip-bytes", required_argument, nullptr, 'j'}, + {"address-radix", required_argument, nullptr, 'A'}, + {"read-bytes", required_argument, nullptr, 'N'}, + {"format", required_argument, nullptr, 't'}, + {"output-duplicates", no_argument, nullptr, 'v'}, + {"strings", optional_argument, nullptr, 'S'}, + {"traditional", no_argument, nullptr, TRADITIONAL_OPTION}, + {"width", optional_argument, nullptr, 'w'}, + {"endian", required_argument, nullptr, ENDIAN_OPTION }, + + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [-abcdfilosx]... [FILE] [[+]OFFSET[.][b]]\n\ + or: %s --traditional [OPTION]... [FILE] [[+]OFFSET[.][b] [+][LABEL][.][b]]\n\ +"), + program_name, program_name, program_name); + fputs (_("\n\ +Write an unambiguous representation, octal bytes by default,\n\ +of FILE to standard output. With more than one FILE argument,\n\ +concatenate them in the listed order to form the input.\n\ +"), stdout); + + emit_stdin_note (); + + fputs (_("\ +\n\ +If first and second call formats both apply, the second format is assumed\n\ +if the last operand begins with + or (if there are 2 operands) a digit.\n\ +An OFFSET operand means -j OFFSET. LABEL is the pseudo-address\n\ +at first byte printed, incremented when dump is progressing.\n\ +For OFFSET and LABEL, a 0x or 0X prefix indicates hexadecimal;\n\ +suffixes may be . for octal and b for multiply by 512.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -A, --address-radix=RADIX output format for file offsets; RADIX is one\n\ + of [doxn], for Decimal, Octal, Hex or None\n\ + --endian={big|little} swap input bytes according the specified order\n\ + -j, --skip-bytes=BYTES skip BYTES input bytes first\n\ +"), stdout); + fputs (_("\ + -N, --read-bytes=BYTES limit dump to BYTES input bytes\n\ + -S BYTES, --strings[=BYTES] show only NUL terminated strings\n\ + of at least BYTES (3) printable characters\n\ + -t, --format=TYPE select output format or formats\n\ + -v, --output-duplicates do not use * to mark line suppression\n\ + -w[BYTES], --width[=BYTES] output BYTES bytes per output line;\n\ + 32 is implied when BYTES is not specified\n\ + --traditional accept arguments in third form above\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +\n\ +Traditional format specifications may be intermixed; they accumulate:\n\ + -a same as -t a, select named characters, ignoring high-order bit\n\ + -b same as -t o1, select octal bytes\n\ + -c same as -t c, select printable characters or backslash escapes\n\ + -d same as -t u2, select unsigned decimal 2-byte units\n\ +"), stdout); + fputs (_("\ + -f same as -t fF, select floats\n\ + -i same as -t dI, select decimal ints\n\ + -l same as -t dL, select decimal longs\n\ + -o same as -t o2, select octal 2-byte units\n\ + -s same as -t d2, select decimal 2-byte units\n\ + -x same as -t x2, select hexadecimal 2-byte units\n\ +"), stdout); + fputs (_("\ +\n\ +\n\ +TYPE is made up of one or more of these specifications:\n\ + a named character, ignoring high-order bit\n\ + c printable character or backslash escape\n\ +"), stdout); + fputs (_("\ + d[SIZE] signed decimal, SIZE bytes per integer\n\ + f[SIZE] floating point, SIZE bytes per float\n\ + o[SIZE] octal, SIZE bytes per integer\n\ + u[SIZE] unsigned decimal, SIZE bytes per integer\n\ + x[SIZE] hexadecimal, SIZE bytes per integer\n\ +"), stdout); + fputs (_("\ +\n\ +SIZE is a number. For TYPE in [doux], SIZE may also be C for\n\ +sizeof(char), S for sizeof(short), I for sizeof(int) or L for\n\ +sizeof(long). If TYPE is f, SIZE may also be F for sizeof(float), D\n\ +for sizeof(double) or L for sizeof(long double).\n\ +"), stdout); + fputs (_("\ +\n\ +Adding a z suffix to any type displays printable characters at the end of\n\ +each output line.\n\ +"), stdout); + fputs (_("\ +\n\ +\n\ +BYTES is hex with 0x or 0X prefix, and may have a multiplier suffix:\n\ + b 512\n\ + KB 1000\n\ + K 1024\n\ + MB 1000*1000\n\ + M 1024*1024\n\ +and so on for G, T, P, E, Z, Y, R, Q.\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Define the print functions. */ + +#define PRINT_FIELDS(N, T, FMT_STRING_DECL, ACTION) \ +static void \ +N (size_t fields, size_t blank, void const *block, \ + FMT_STRING_DECL, int width, int pad) \ +{ \ + T const *p = block; \ + uintmax_t i; \ + int pad_remaining = pad; \ + for (i = fields; blank < i; i--) \ + { \ + int next_pad = pad * (i - 1) / fields; \ + int adjusted_width = pad_remaining - next_pad + width; \ + T x; \ + if (input_swap && sizeof (T) > 1) \ + { \ + size_t j; \ + union { \ + T x; \ + char b[sizeof (T)]; \ + } u; \ + for (j = 0; j < sizeof (T); j++) \ + u.b[j] = ((char const *) p)[sizeof (T) - 1 - j]; \ + x = u.x; \ + } \ + else \ + x = *p; \ + p++; \ + ACTION; \ + pad_remaining = next_pad; \ + } \ +} + +#define PRINT_TYPE(N, T) \ + PRINT_FIELDS (N, T, char const *fmt_string, \ + xprintf (fmt_string, adjusted_width, x)) + +#define PRINT_FLOATTYPE(N, T, FTOASTR, BUFSIZE) \ + PRINT_FIELDS (N, T, MAYBE_UNUSED char const *fmt_string, \ + char buf[BUFSIZE]; \ + FTOASTR (buf, sizeof buf, 0, 0, x); \ + xprintf ("%*s", adjusted_width, buf)) + +PRINT_TYPE (print_s_char, signed char) +PRINT_TYPE (print_char, unsigned char) +PRINT_TYPE (print_s_short, short int) +PRINT_TYPE (print_short, unsigned short int) +PRINT_TYPE (print_int, unsigned int) +PRINT_TYPE (print_long, unsigned long int) +PRINT_TYPE (print_long_long, unsigned_long_long_int) + +PRINT_FLOATTYPE (print_float, float, ftoastr, FLT_BUFSIZE_BOUND) +PRINT_FLOATTYPE (print_double, double, dtoastr, DBL_BUFSIZE_BOUND) +PRINT_FLOATTYPE (print_long_double, long double, ldtoastr, LDBL_BUFSIZE_BOUND) + +#undef PRINT_TYPE +#undef PRINT_FLOATTYPE + +static void +dump_hexl_mode_trailer (size_t n_bytes, char const *block) +{ + fputs (" >", stdout); + for (size_t i = n_bytes; i > 0; i--) + { + unsigned char c = *block++; + unsigned char c2 = (isprint (c) ? c : '.'); + putchar (c2); + } + putchar ('<'); +} + +static void +print_named_ascii (size_t fields, size_t blank, void const *block, + MAYBE_UNUSED char const *unused_fmt_string, + int width, int pad) +{ + unsigned char const *p = block; + uintmax_t i; + int pad_remaining = pad; + for (i = fields; blank < i; i--) + { + int next_pad = pad * (i - 1) / fields; + int masked_c = *p++ & 0x7f; + char const *s; + char buf[2]; + + if (masked_c == 127) + s = "del"; + else if (masked_c <= 040) + s = charname[masked_c]; + else + { + buf[0] = masked_c; + buf[1] = 0; + s = buf; + } + + xprintf ("%*s", pad_remaining - next_pad + width, s); + pad_remaining = next_pad; + } +} + +static void +print_ascii (size_t fields, size_t blank, void const *block, + MAYBE_UNUSED char const *unused_fmt_string, int width, + int pad) +{ + unsigned char const *p = block; + uintmax_t i; + int pad_remaining = pad; + for (i = fields; blank < i; i--) + { + int next_pad = pad * (i - 1) / fields; + unsigned char c = *p++; + char const *s; + char buf[4]; + + switch (c) + { + case '\0': + s = "\\0"; + break; + + case '\a': + s = "\\a"; + break; + + case '\b': + s = "\\b"; + break; + + case '\f': + s = "\\f"; + break; + + case '\n': + s = "\\n"; + break; + + case '\r': + s = "\\r"; + break; + + case '\t': + s = "\\t"; + break; + + case '\v': + s = "\\v"; + break; + + default: + sprintf (buf, (isprint (c) ? "%c" : "%03o"), c); + s = buf; + } + + xprintf ("%*s", pad_remaining - next_pad + width, s); + pad_remaining = next_pad; + } +} + +/* Convert a null-terminated (possibly zero-length) string S to an + int value. If S points to a non-digit set *P to S, + *VAL to 0, and return true. Otherwise, accumulate the integer value of + the string of digits. If the string of digits represents a value + larger than INT_MAX, don't modify *VAL or *P and return false. + Otherwise, advance *P to the first non-digit after S, set *VAL to + the result of the conversion and return true. */ + +static bool +simple_strtoi (char const *s, char const **p, int *val) +{ + int sum; + + for (sum = 0; ISDIGIT (*s); s++) + if (ckd_mul (&sum, sum, 10) || ckd_add (&sum, sum, *s - '0')) + return false; + *p = s; + *val = sum; + return true; +} + +/* If S points to a single valid modern od format string, put + a description of that format in *TSPEC, make *NEXT point at the + character following the just-decoded format (if *NEXT is non-null), + and return true. If S is not valid, don't modify *NEXT or *TSPEC, + give a diagnostic, and return false. For example, if S were + "d4afL" *NEXT would be set to "afL" and *TSPEC would be + { + fmt = SIGNED_DECIMAL; + size = INT or LONG; (whichever integral_type_size[4] resolves to) + print_function = print_int; (assuming size == INT) + field_width = 11; + fmt_string = "%*d"; + } + pad_width is determined later, but is at least as large as the + number of fields printed per row. + S_ORIG is solely for reporting errors. It should be the full format + string argument. + */ + +static bool ATTRIBUTE_NONNULL () +decode_one_format (char const *s_orig, char const *s, char const **next, + struct tspec *tspec) +{ + enum size_spec size_spec; + int size; + enum output_format fmt; + void (*print_function) (size_t, size_t, void const *, char const *, + int, int); + char const *p; + char c; + int field_width; + + switch (*s) + { + case 'd': + case 'o': + case 'u': + case 'x': + c = *s; + ++s; + switch (*s) + { + case 'C': + ++s; + size = sizeof (char); + break; + + case 'S': + ++s; + size = sizeof (short int); + break; + + case 'I': + ++s; + size = sizeof (int); + break; + + case 'L': + ++s; + size = sizeof (long int); + break; + + default: + if (! simple_strtoi (s, &p, &size)) + { + /* The integer at P in S would overflow an int. + A digit string that long is sufficiently odd looking + that the following diagnostic is sufficient. */ + error (0, 0, _("invalid type string %s"), quote (s_orig)); + return false; + } + if (p == s) + size = sizeof (int); + else + { + if (MAX_INTEGRAL_TYPE_SIZE < size + || integral_type_size[size] == NO_SIZE) + { + error (0, 0, _("invalid type string %s;\nthis system" + " doesn't provide a %d-byte integral type"), + quote (s_orig), size); + return false; + } + s = p; + } + break; + } + +#define ISPEC_TO_FORMAT(Spec, Min_format, Long_format, Max_format) \ + ((Spec) == LONG_LONG ? (Max_format) \ + : ((Spec) == LONG ? (Long_format) \ + : (Min_format))) \ + + size_spec = integral_type_size[size]; + + switch (c) + { + case 'd': + fmt = SIGNED_DECIMAL; + field_width = bytes_to_signed_dec_digits[size]; + sprintf (tspec->fmt_string, "%%*%s", + ISPEC_TO_FORMAT (size_spec, "d", "ld", PRIdMAX)); + break; + + case 'o': + fmt = OCTAL; + sprintf (tspec->fmt_string, "%%*.%d%s", + (field_width = bytes_to_oct_digits[size]), + ISPEC_TO_FORMAT (size_spec, "o", "lo", PRIoMAX)); + break; + + case 'u': + fmt = UNSIGNED_DECIMAL; + field_width = bytes_to_unsigned_dec_digits[size]; + sprintf (tspec->fmt_string, "%%*%s", + ISPEC_TO_FORMAT (size_spec, "u", "lu", PRIuMAX)); + break; + + case 'x': + fmt = HEXADECIMAL; + sprintf (tspec->fmt_string, "%%*.%d%s", + (field_width = bytes_to_hex_digits[size]), + ISPEC_TO_FORMAT (size_spec, "x", "lx", PRIxMAX)); + break; + + default: + unreachable (); + } + + switch (size_spec) + { + case CHAR: + print_function = (fmt == SIGNED_DECIMAL + ? print_s_char + : print_char); + break; + + case SHORT: + print_function = (fmt == SIGNED_DECIMAL + ? print_s_short + : print_short); + break; + + case INT: + print_function = print_int; + break; + + case LONG: + print_function = print_long; + break; + + case LONG_LONG: + print_function = print_long_long; + break; + + default: + affirm (false); + } + break; + + case 'f': + fmt = FLOATING_POINT; + ++s; + switch (*s) + { + case 'F': + ++s; + size = sizeof (float); + break; + + case 'D': + ++s; + size = sizeof (double); + break; + + case 'L': + ++s; + size = sizeof (long double); + break; + + default: + if (! simple_strtoi (s, &p, &size)) + { + /* The integer at P in S would overflow an int. + A digit string that long is sufficiently odd looking + that the following diagnostic is sufficient. */ + error (0, 0, _("invalid type string %s"), quote (s_orig)); + return false; + } + if (p == s) + size = sizeof (double); + else + { + if (size > MAX_FP_TYPE_SIZE + || fp_type_size[size] == NO_SIZE) + { + error (0, 0, + _("invalid type string %s;\n" + "this system doesn't provide a %d-byte" + " floating point type"), + quote (s_orig), size); + return false; + } + s = p; + } + break; + } + size_spec = fp_type_size[size]; + + { + struct lconv const *locale = localeconv (); + size_t decimal_point_len = + (locale->decimal_point[0] ? strlen (locale->decimal_point) : 1); + + switch (size_spec) + { + case FLOAT_SINGLE: + print_function = print_float; + field_width = FLT_STRLEN_BOUND_L (decimal_point_len); + break; + + case FLOAT_DOUBLE: + print_function = print_double; + field_width = DBL_STRLEN_BOUND_L (decimal_point_len); + break; + + case FLOAT_LONG_DOUBLE: + print_function = print_long_double; + field_width = LDBL_STRLEN_BOUND_L (decimal_point_len); + break; + + default: + affirm (false); + } + + break; + } + + case 'a': + ++s; + fmt = NAMED_CHARACTER; + size_spec = CHAR; + print_function = print_named_ascii; + field_width = 3; + break; + + case 'c': + ++s; + fmt = CHARACTER; + size_spec = CHAR; + print_function = print_ascii; + field_width = 3; + break; + + default: + error (0, 0, _("invalid character '%c' in type string %s"), + *s, quote (s_orig)); + return false; + } + + tspec->size = size_spec; + tspec->fmt = fmt; + tspec->print_function = print_function; + + tspec->field_width = field_width; + tspec->hexl_mode_trailer = (*s == 'z'); + if (tspec->hexl_mode_trailer) + s++; + + *next = s; + return true; +} + +/* Given a list of one or more input filenames FILE_LIST, set the global + file pointer IN_STREAM and the global string INPUT_FILENAME to the + first one that can be successfully opened. Modify FILE_LIST to + reference the next filename in the list. A file name of "-" is + interpreted as standard input. If any file open fails, give an error + message and return false. */ + +static bool +open_next_file (void) +{ + bool ok = true; + + do + { + input_filename = *file_list; + if (input_filename == nullptr) + return ok; + ++file_list; + + if (STREQ (input_filename, "-")) + { + input_filename = _("standard input"); + in_stream = stdin; + have_read_stdin = true; + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + { + in_stream = fopen (input_filename, (O_BINARY ? "rb" : "r")); + if (in_stream == nullptr) + { + error (0, errno, "%s", quotef (input_filename)); + ok = false; + } + } + } + while (in_stream == nullptr); + + if (limit_bytes_to_format && !flag_dump_strings) + setvbuf (in_stream, nullptr, _IONBF, 0); + + return ok; +} + +/* Test whether there have been errors on in_stream, and close it if + it is not standard input. Return false if there has been an error + on in_stream or stdout; return true otherwise. This function will + report more than one error only if both a read and a write error + have occurred. IN_ERRNO, if nonzero, is the error number + corresponding to the most recent action for IN_STREAM. */ + +static bool +check_and_close (int in_errno) +{ + bool ok = true; + + if (in_stream != nullptr) + { + if (!ferror (in_stream)) + in_errno = 0; + if (STREQ (file_list[-1], "-")) + clearerr (in_stream); + else if (fclose (in_stream) != 0 && !in_errno) + in_errno = errno; + if (in_errno) + { + error (0, in_errno, "%s", quotef (input_filename)); + ok = false; + } + + in_stream = nullptr; + } + + if (ferror (stdout)) + { + error (0, 0, _("write error")); + ok = false; + } + + return ok; +} + +/* Decode the modern od format string S. Append the decoded + representation to the global array SPEC, reallocating SPEC if + necessary. Return true if S is valid. */ + +static bool ATTRIBUTE_NONNULL () +decode_format_string (char const *s) +{ + char const *s_orig = s; + + while (*s != '\0') + { + char const *next; + + if (n_specs_allocated <= n_specs) + spec = X2NREALLOC (spec, &n_specs_allocated); + + if (! decode_one_format (s_orig, s, &next, &spec[n_specs])) + return false; + + affirm (s != next); + s = next; + ++n_specs; + } + + return true; +} + +/* Given a list of one or more input filenames FILE_LIST, set the global + file pointer IN_STREAM to position N_SKIP in the concatenation of + those files. If any file operation fails or if there are fewer than + N_SKIP bytes in the combined input, give an error message and return + false. When possible, use seek rather than read operations to + advance IN_STREAM. */ + +static bool +skip (uintmax_t n_skip) +{ + bool ok = true; + int in_errno = 0; + + if (n_skip == 0) + return true; + + while (in_stream != nullptr) /* EOF. */ + { + struct stat file_stats; + + /* First try seeking. For large offsets, this extra work is + worthwhile. If the offset is below some threshold it may be + more efficient to move the pointer by reading. There are two + issues when trying to seek: + - the file must be seekable. + - before seeking to the specified position, make sure + that the new position is in the current file. + Try to do that by getting file's size using fstat. + But that will work only for regular files. */ + + if (fstat (fileno (in_stream), &file_stats) == 0) + { + bool usable_size = usable_st_size (&file_stats); + + /* The st_size field is valid for regular files. + If the number of bytes left to skip is larger than + the size of the current file, we can decrement n_skip + and go on to the next file. Skip this optimization also + when st_size is no greater than the block size, because + some kernels report nonsense small file sizes for + proc-like file systems. */ + if (usable_size && ST_BLKSIZE (file_stats) < file_stats.st_size) + { + if ((uintmax_t) file_stats.st_size < n_skip) + n_skip -= file_stats.st_size; + else + { + if (fseeko (in_stream, n_skip, SEEK_CUR) != 0) + { + in_errno = errno; + ok = false; + } + n_skip = 0; + } + } + + else if (!usable_size && fseeko (in_stream, n_skip, SEEK_CUR) == 0) + n_skip = 0; + + /* If it's not a regular file with nonnegative size, + or if it's so small that it might be in a proc-like file system, + position the file pointer by reading. */ + + else + { + char buf[BUFSIZ]; + size_t n_bytes_read, n_bytes_to_read = BUFSIZ; + + while (0 < n_skip) + { + if (n_skip < n_bytes_to_read) + n_bytes_to_read = n_skip; + n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream); + n_skip -= n_bytes_read; + if (n_bytes_read != n_bytes_to_read) + { + if (ferror (in_stream)) + { + in_errno = errno; + ok = false; + n_skip = 0; + break; + } + if (feof (in_stream)) + break; + } + } + } + + if (n_skip == 0) + break; + } + + else /* cannot fstat() file */ + { + error (0, errno, "%s", quotef (input_filename)); + ok = false; + } + + ok &= check_and_close (in_errno); + + ok &= open_next_file (); + } + + if (n_skip != 0) + error (EXIT_FAILURE, 0, _("cannot skip past end of combined input")); + + return ok; +} + +static void +format_address_none (MAYBE_UNUSED uintmax_t address, + MAYBE_UNUSED char c) +{ +} + +static void +format_address_std (uintmax_t address, char c) +{ + char buf[MAX_ADDRESS_LENGTH + 2]; + char *p = buf + sizeof buf; + char const *pbound; + + *--p = '\0'; + *--p = c; + pbound = p - address_pad_len; + + /* Use a special case of the code for each base. This is measurably + faster than generic code. */ + switch (address_base) + { + case 8: + do + *--p = '0' + (address & 7); + while ((address >>= 3) != 0); + break; + + case 10: + do + *--p = '0' + (address % 10); + while ((address /= 10) != 0); + break; + + case 16: + do + *--p = "0123456789abcdef"[address & 15]; + while ((address >>= 4) != 0); + break; + } + + while (pbound < p) + *--p = '0'; + + fputs (p, stdout); +} + +static void +format_address_paren (uintmax_t address, char c) +{ + putchar ('('); + format_address_std (address, ')'); + if (c) + putchar (c); +} + +static void +format_address_label (uintmax_t address, char c) +{ + format_address_std (address, ' '); + format_address_paren (address + pseudo_offset, c); +} + +/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each + of the N_SPEC format specs. CURRENT_OFFSET is the byte address of + CURR_BLOCK in the concatenation of input files, and it is printed + (optionally) only before the output line associated with the first + format spec. When duplicate blocks are being abbreviated, the output + for a sequence of identical input blocks is the output for the first + block followed by an asterisk alone on a line. It is valid to compare + the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK. + That condition may be false only for the last input block. */ + +static void +write_block (uintmax_t current_offset, size_t n_bytes, + char const *prev_block, char const *curr_block) +{ + static bool first = true; + static bool prev_pair_equal = false; + +#define EQUAL_BLOCKS(b1, b2) (memcmp (b1, b2, bytes_per_block) == 0) + + if (abbreviate_duplicate_blocks + && !first && n_bytes == bytes_per_block + && EQUAL_BLOCKS (prev_block, curr_block)) + { + if (prev_pair_equal) + { + /* The two preceding blocks were equal, and the current + block is the same as the last one, so print nothing. */ + } + else + { + printf ("*\n"); + prev_pair_equal = true; + } + } + else + { + prev_pair_equal = false; + for (size_t i = 0; i < n_specs; i++) + { + int datum_width = width_bytes[spec[i].size]; + int fields_per_block = bytes_per_block / datum_width; + int blank_fields = (bytes_per_block - n_bytes) / datum_width; + if (i == 0) + format_address (current_offset, '\0'); + else + printf ("%*s", address_pad_len, ""); + (*spec[i].print_function) (fields_per_block, blank_fields, + curr_block, spec[i].fmt_string, + spec[i].field_width, spec[i].pad_width); + if (spec[i].hexl_mode_trailer) + { + /* space-pad out to full line width, then dump the trailer */ + int field_width = spec[i].field_width; + int pad_width = (spec[i].pad_width * blank_fields + / fields_per_block); + printf ("%*s", blank_fields * field_width + pad_width, ""); + dump_hexl_mode_trailer (n_bytes, curr_block); + } + putchar ('\n'); + } + } + first = false; +} + +/* Read a single byte into *C from the concatenation of the input files + named in the global array FILE_LIST. On the first call to this + function, the global variable IN_STREAM is expected to be an open + stream associated with the input file INPUT_FILENAME. If IN_STREAM + is at end-of-file, close it and update the global variables IN_STREAM + and INPUT_FILENAME so they correspond to the next file in the list. + Then try to read a byte from the newly opened file. Repeat if + necessary until EOF is reached for the last file in FILE_LIST, then + set *C to EOF and return. Subsequent calls do likewise. Return + true if successful. */ + +static bool +read_char (int *c) +{ + bool ok = true; + + *c = EOF; + + while (in_stream != nullptr) /* EOF. */ + { + *c = fgetc (in_stream); + + if (*c != EOF) + break; + + ok &= check_and_close (errno); + + ok &= open_next_file (); + } + + return ok; +} + +/* Read N bytes into BLOCK from the concatenation of the input files + named in the global array FILE_LIST. On the first call to this + function, the global variable IN_STREAM is expected to be an open + stream associated with the input file INPUT_FILENAME. If all N + bytes cannot be read from IN_STREAM, close IN_STREAM and update + the global variables IN_STREAM and INPUT_FILENAME. Then try to + read the remaining bytes from the newly opened file. Repeat if + necessary until EOF is reached for the last file in FILE_LIST. + On subsequent calls, don't modify BLOCK and return true. Set + *N_BYTES_IN_BUFFER to the number of bytes read. If an error occurs, + it will be detected through ferror when the stream is about to be + closed. If there is an error, give a message but continue reading + as usual and return false. Otherwise return true. */ + +static bool +read_block (size_t n, char *block, size_t *n_bytes_in_buffer) +{ + bool ok = true; + + affirm (0 < n && n <= bytes_per_block); + + *n_bytes_in_buffer = 0; + + while (in_stream != nullptr) /* EOF. */ + { + size_t n_needed; + size_t n_read; + + n_needed = n - *n_bytes_in_buffer; + n_read = fread (block + *n_bytes_in_buffer, 1, n_needed, in_stream); + + *n_bytes_in_buffer += n_read; + + if (n_read == n_needed) + break; + + ok &= check_and_close (errno); + + ok &= open_next_file (); + } + + return ok; +} + +/* Return the least common multiple of the sizes associated + with the format specs. */ + +ATTRIBUTE_PURE +static int +get_lcm (void) +{ + int l_c_m = 1; + + for (size_t i = 0; i < n_specs; i++) + l_c_m = lcm (l_c_m, width_bytes[spec[i].size]); + return l_c_m; +} + +/* If S is a valid traditional offset specification with an optional + leading '+' return true and set *OFFSET to the offset it denotes. */ + +static bool +parse_old_offset (char const *s, uintmax_t *offset) +{ + int radix; + + if (*s == '\0') + return false; + + /* Skip over any leading '+'. */ + if (s[0] == '+') + ++s; + + /* Determine the radix we'll use to interpret S. If there is a '.', + it's decimal, otherwise, if the string begins with '0X'or '0x', + it's hexadecimal, else octal. */ + if (strchr (s, '.') != nullptr) + radix = 10; + else + { + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) + radix = 16; + else + radix = 8; + } + + return xstrtoumax (s, nullptr, radix, offset, "Bb") == LONGINT_OK; +} + +/* Read a chunk of size BYTES_PER_BLOCK from the input files, write the + formatted block to standard output, and repeat until the specified + maximum number of bytes has been read or until all input has been + processed. If the last block read is smaller than BYTES_PER_BLOCK + and its size is not a multiple of the size associated with a format + spec, extend the input block with zero bytes until its length is a + multiple of all format spec sizes. Write the final block. Finally, + write on a line by itself the offset of the byte after the last byte + read. Accumulate return values from calls to read_block and + check_and_close, and if any was false, return false. + Otherwise, return true. */ + +static bool +dump (void) +{ + char *block[2]; + uintmax_t current_offset; + bool idx = false; + bool ok = true; + size_t n_bytes_read; + + block[0] = xnmalloc (2, bytes_per_block); + block[1] = block[0] + bytes_per_block; + + current_offset = n_bytes_to_skip; + + if (limit_bytes_to_format) + { + while (ok) + { + size_t n_needed; + if (current_offset >= end_offset) + { + n_bytes_read = 0; + break; + } + n_needed = MIN (end_offset - current_offset, + (uintmax_t) bytes_per_block); + ok &= read_block (n_needed, block[idx], &n_bytes_read); + if (n_bytes_read < bytes_per_block) + break; + affirm (n_bytes_read == bytes_per_block); + write_block (current_offset, n_bytes_read, + block[!idx], block[idx]); + if (ferror (stdout)) + ok = false; + current_offset += n_bytes_read; + idx = !idx; + } + } + else + { + while (ok) + { + ok &= read_block (bytes_per_block, block[idx], &n_bytes_read); + if (n_bytes_read < bytes_per_block) + break; + affirm (n_bytes_read == bytes_per_block); + write_block (current_offset, n_bytes_read, + block[!idx], block[idx]); + if (ferror (stdout)) + ok = false; + current_offset += n_bytes_read; + idx = !idx; + } + } + + if (n_bytes_read > 0) + { + int l_c_m; + size_t bytes_to_write; + + l_c_m = get_lcm (); + + /* Ensure zero-byte padding up to the smallest multiple of l_c_m that + is at least as large as n_bytes_read. */ + bytes_to_write = l_c_m * ((n_bytes_read + l_c_m - 1) / l_c_m); + + memset (block[idx] + n_bytes_read, 0, bytes_to_write - n_bytes_read); + write_block (current_offset, n_bytes_read, block[!idx], block[idx]); + current_offset += n_bytes_read; + } + + format_address (current_offset, '\n'); + + if (limit_bytes_to_format && current_offset >= end_offset) + ok &= check_and_close (0); + + free (block[0]); + + return ok; +} + +/* STRINGS mode. Find each "string constant" in the input. + A string constant is a run of at least 'string_min' ASCII + graphic (or formatting) characters terminated by a null. + Based on a function written by Richard Stallman for a + traditional version of od. Return true if successful. */ + +static bool +dump_strings (void) +{ + size_t bufsize = MAX (100, string_min); + char *buf = xmalloc (bufsize); + uintmax_t address = n_bytes_to_skip; + bool ok = true; + + while (true) + { + size_t i; + int c; + + /* See if the next 'string_min' chars are all printing chars. */ + tryline: + + if (limit_bytes_to_format + && (end_offset < string_min || end_offset - string_min <= address)) + break; + + for (i = 0; i < string_min; i++) + { + ok &= read_char (&c); + address++; + if (c < 0) + { + free (buf); + return ok; + } + if (! isprint (c)) + /* Found a non-printing. Try again starting with next char. */ + goto tryline; + buf[i] = c; + } + + /* We found a run of 'string_min' printable characters. + Now see if it is terminated with a null byte. */ + while (!limit_bytes_to_format || address < end_offset) + { + if (i == bufsize) + { + buf = X2REALLOC (buf, &bufsize); + } + ok &= read_char (&c); + address++; + if (c < 0) + { + free (buf); + return ok; + } + if (c == '\0') + break; /* It is; print this string. */ + if (! isprint (c)) + goto tryline; /* It isn't; give up on this string. */ + buf[i++] = c; /* String continues; store it all. */ + } + + /* If we get here, the string is all printable and null-terminated, + so print it. It is all in 'buf' and 'i' is its length. */ + buf[i] = 0; + format_address (address - i - 1, ' '); + + for (i = 0; (c = buf[i]); i++) + { + switch (c) + { + case '\a': + fputs ("\\a", stdout); + break; + + case '\b': + fputs ("\\b", stdout); + break; + + case '\f': + fputs ("\\f", stdout); + break; + + case '\n': + fputs ("\\n", stdout); + break; + + case '\r': + fputs ("\\r", stdout); + break; + + case '\t': + fputs ("\\t", stdout); + break; + + case '\v': + fputs ("\\v", stdout); + break; + + default: + putc (c, stdout); + } + } + putchar ('\n'); + } + + /* We reach this point only if we search through + (max_bytes_to_format - string_min) bytes before reaching EOF. */ + + free (buf); + + ok &= check_and_close (0); + return ok; +} + +int +main (int argc, char **argv) +{ + int n_files; + size_t i; + int l_c_m; + idx_t desired_width IF_LINT ( = 0); + bool modern = false; + bool width_specified = false; + bool ok = true; + size_t width_per_block = 0; + static char const multipliers[] = "bEGKkMmPQRTYZ0"; + + /* The old-style 'pseudo starting address' to be printed in parentheses + after any true address. */ + uintmax_t pseudo_start IF_LINT ( = 0); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + for (i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++) + integral_type_size[i] = NO_SIZE; + + integral_type_size[sizeof (char)] = CHAR; + integral_type_size[sizeof (short int)] = SHORT; + integral_type_size[sizeof (int)] = INT; + integral_type_size[sizeof (long int)] = LONG; +#if HAVE_UNSIGNED_LONG_LONG_INT + /* If 'long int' and 'long long int' have the same size, it's fine + to overwrite the entry for 'long' with this one. */ + integral_type_size[sizeof (unsigned_long_long_int)] = LONG_LONG; +#endif + + for (i = 0; i <= MAX_FP_TYPE_SIZE; i++) + fp_type_size[i] = NO_SIZE; + + fp_type_size[sizeof (float)] = FLOAT_SINGLE; + /* The array entry for 'double' is filled in after that for 'long double' + so that if they are the same size, we avoid any overhead of + long double computation in libc. */ + fp_type_size[sizeof (long double)] = FLOAT_LONG_DOUBLE; + fp_type_size[sizeof (double)] = FLOAT_DOUBLE; + + n_specs = 0; + n_specs_allocated = 0; + spec = nullptr; + + format_address = format_address_std; + address_base = 8; + address_pad_len = 7; + flag_dump_strings = false; + + while (true) + { + uintmax_t tmp; + enum strtol_error s_err; + int oi = -1; + int c = getopt_long (argc, argv, short_options, long_options, &oi); + if (c == -1) + break; + + switch (c) + { + case 'A': + modern = true; + switch (optarg[0]) + { + case 'd': + format_address = format_address_std; + address_base = 10; + address_pad_len = 7; + break; + case 'o': + format_address = format_address_std; + address_base = 8; + address_pad_len = 7; + break; + case 'x': + format_address = format_address_std; + address_base = 16; + address_pad_len = 6; + break; + case 'n': + format_address = format_address_none; + address_pad_len = 0; + break; + default: + error (EXIT_FAILURE, 0, + _("invalid output address radix '%c';" + " it must be one character from [doxn]"), + optarg[0]); + break; + } + break; + + case 'j': + modern = true; + s_err = xstrtoumax (optarg, nullptr, 0, + &n_bytes_to_skip, multipliers); + if (s_err != LONGINT_OK) + xstrtol_fatal (s_err, oi, c, long_options, optarg); + break; + + case 'N': + modern = true; + limit_bytes_to_format = true; + + s_err = xstrtoumax (optarg, nullptr, 0, &max_bytes_to_format, + multipliers); + if (s_err != LONGINT_OK) + xstrtol_fatal (s_err, oi, c, long_options, optarg); + break; + + case 'S': + modern = true; + if (optarg == nullptr) + string_min = 3; + else + { + s_err = xstrtoumax (optarg, nullptr, 0, &tmp, multipliers); + if (s_err != LONGINT_OK) + xstrtol_fatal (s_err, oi, c, long_options, optarg); + + /* The minimum string length may be no larger than SIZE_MAX, + since we may allocate a buffer of this size. */ + if (SIZE_MAX < tmp) + error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg)); + + string_min = tmp; + } + flag_dump_strings = true; + break; + + case 't': + modern = true; + ok &= decode_format_string (optarg); + break; + + case 'v': + modern = true; + abbreviate_duplicate_blocks = false; + break; + + case TRADITIONAL_OPTION: + traditional = true; + break; + + case ENDIAN_OPTION: + switch (XARGMATCH ("--endian", optarg, endian_args, endian_types)) + { + case endian_big: + input_swap = ! WORDS_BIGENDIAN; + break; + case endian_little: + input_swap = WORDS_BIGENDIAN; + break; + } + break; + + /* The next several cases map the traditional format + specification options to the corresponding modern format + specs. GNU od accepts any combination of old- and + new-style options. Format specification options accumulate. + The obsolescent and undocumented formats are compatible + with FreeBSD 4.10 od. */ + +#define CASE_OLD_ARG(old_char,new_string) \ + case old_char: \ + ok &= decode_format_string (new_string); \ + break + + CASE_OLD_ARG ('a', "a"); + CASE_OLD_ARG ('b', "o1"); + CASE_OLD_ARG ('c', "c"); + CASE_OLD_ARG ('D', "u4"); /* obsolescent and undocumented */ + CASE_OLD_ARG ('d', "u2"); + case 'F': /* obsolescent and undocumented alias */ + CASE_OLD_ARG ('e', "fD"); /* obsolescent and undocumented */ + CASE_OLD_ARG ('f', "fF"); + case 'X': /* obsolescent and undocumented alias */ + CASE_OLD_ARG ('H', "x4"); /* obsolescent and undocumented */ + CASE_OLD_ARG ('i', "dI"); + case 'I': case 'L': /* obsolescent and undocumented aliases */ + CASE_OLD_ARG ('l', "dL"); + CASE_OLD_ARG ('O', "o4"); /* obsolescent and undocumented */ + case 'B': /* obsolescent and undocumented alias */ + CASE_OLD_ARG ('o', "o2"); + CASE_OLD_ARG ('s', "d2"); + case 'h': /* obsolescent and undocumented alias */ + CASE_OLD_ARG ('x', "x2"); + +#undef CASE_OLD_ARG + + case 'w': + modern = true; + width_specified = true; + if (optarg == nullptr) + { + desired_width = 32; + } + else + { + intmax_t w_tmp; + s_err = xstrtoimax (optarg, nullptr, 10, &w_tmp, ""); + if (s_err != LONGINT_OK || w_tmp <= 0) + xstrtol_fatal (s_err, oi, c, long_options, optarg); + if (ckd_add (&desired_width, w_tmp, 0)) + error (EXIT_FAILURE, 0, _("%s is too large"), quote (optarg)); + } + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + break; + } + } + + if (!ok) + return EXIT_FAILURE; + + if (flag_dump_strings && n_specs > 0) + error (EXIT_FAILURE, 0, + _("no type may be specified when dumping strings")); + + n_files = argc - optind; + + /* If the --traditional option is used, there may be from + 0 to 3 remaining command line arguments; handle each case + separately. + od [file] [[+]offset[.][b] [[+]label[.][b]]] + The offset and label have the same syntax. + + If --traditional is not given, and if no modern options are + given, and if the offset begins with + or (if there are two + operands) a digit, accept only this form, as per POSIX: + od [file] [[+]offset[.][b]] + */ + + if (!modern || traditional) + { + uintmax_t o1; + uintmax_t o2; + + switch (n_files) + { + case 1: + if ((traditional || argv[optind][0] == '+') + && parse_old_offset (argv[optind], &o1)) + { + n_bytes_to_skip = o1; + --n_files; + ++argv; + } + break; + + case 2: + if ((traditional || argv[optind + 1][0] == '+' + || ISDIGIT (argv[optind + 1][0])) + && parse_old_offset (argv[optind + 1], &o2)) + { + if (traditional && parse_old_offset (argv[optind], &o1)) + { + n_bytes_to_skip = o1; + flag_pseudo_start = true; + pseudo_start = o2; + argv += 2; + n_files -= 2; + } + else + { + n_bytes_to_skip = o2; + --n_files; + argv[optind + 1] = argv[optind]; + ++argv; + } + } + break; + + case 3: + if (traditional + && parse_old_offset (argv[optind + 1], &o1) + && parse_old_offset (argv[optind + 2], &o2)) + { + n_bytes_to_skip = o1; + flag_pseudo_start = true; + pseudo_start = o2; + argv[optind + 2] = argv[optind]; + argv += 2; + n_files -= 2; + } + break; + } + + if (traditional && 1 < n_files) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + error (0, 0, "%s", + _("compatibility mode supports at most one file")); + usage (EXIT_FAILURE); + } + } + + if (flag_pseudo_start) + { + if (format_address == format_address_none) + { + address_base = 8; + address_pad_len = 7; + format_address = format_address_paren; + } + else + format_address = format_address_label; + } + + if (limit_bytes_to_format) + { + end_offset = n_bytes_to_skip + max_bytes_to_format; + if (end_offset < n_bytes_to_skip) + error (EXIT_FAILURE, 0, _("skip-bytes + read-bytes is too large")); + } + + if (n_specs == 0) + decode_format_string ("oS"); + + if (n_files > 0) + { + /* Set the global pointer FILE_LIST so that it + references the first file-argument on the command-line. */ + + file_list = (char const *const *) &argv[optind]; + } + else + { + /* No files were listed on the command line. + Set the global pointer FILE_LIST so that it + references the null-terminated list of one name: "-". */ + + file_list = default_file_list; + } + + /* open the first input file */ + ok = open_next_file (); + if (in_stream == nullptr) + goto cleanup; + + /* skip over any unwanted header bytes */ + ok &= skip (n_bytes_to_skip); + if (in_stream == nullptr) + goto cleanup; + + pseudo_offset = (flag_pseudo_start ? pseudo_start - n_bytes_to_skip : 0); + + /* Compute output block length. */ + l_c_m = get_lcm (); + + if (width_specified) + { + if (desired_width != 0 && desired_width % l_c_m == 0) + bytes_per_block = desired_width; + else + { + error (0, 0, _("warning: invalid width %td; using %d instead"), + desired_width, l_c_m); + bytes_per_block = l_c_m; + } + } + else + { + if (l_c_m < DEFAULT_BYTES_PER_BLOCK) + bytes_per_block = l_c_m * (DEFAULT_BYTES_PER_BLOCK / l_c_m); + else + bytes_per_block = l_c_m; + } + + /* Compute padding necessary to align output block. */ + for (i = 0; i < n_specs; i++) + { + int fields_per_block = bytes_per_block / width_bytes[spec[i].size]; + int block_width = (spec[i].field_width + 1) * fields_per_block; + if (width_per_block < block_width) + width_per_block = block_width; + } + for (i = 0; i < n_specs; i++) + { + int fields_per_block = bytes_per_block / width_bytes[spec[i].size]; + int block_width = spec[i].field_width * fields_per_block; + spec[i].pad_width = width_per_block - block_width; + } + +#ifdef DEBUG + printf ("lcm=%d, width_per_block=%"PRIuMAX"\n", l_c_m, + (uintmax_t) width_per_block); + for (i = 0; i < n_specs; i++) + { + int fields_per_block = bytes_per_block / width_bytes[spec[i].size]; + affirm (bytes_per_block % width_bytes[spec[i].size] == 0); + affirm (1 <= spec[i].pad_width / fields_per_block); + printf ("%d: fmt=\"%s\" in_width=%d out_width=%d pad=%d\n", + i, spec[i].fmt_string, width_bytes[spec[i].size], + spec[i].field_width, spec[i].pad_width); + } +#endif + + ok &= (flag_dump_strings ? dump_strings () : dump ()); + +cleanup: + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, _("standard input")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/operand2sig.c b/src/operand2sig.c new file mode 100644 index 0000000..71341d9 --- /dev/null +++ b/src/operand2sig.c @@ -0,0 +1,92 @@ +/* operand2sig.c -- common function for parsing signal specifications + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from kill.c/timeout.c by Pádraig Brady. + FIXME: Move this to gnulib/str2sig.c */ + + +/* Convert OPERAND to a signal number with printable representation SIGNAME. + Return the signal number, or -1 if unsuccessful. */ + +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" +#include "sig2str.h" +#include "operand2sig.h" + +extern int +operand2sig (char const *operand, char *signame) +{ + int signum; + + if (ISDIGIT (*operand)) + { + /* Note we don't put a limit on the maximum value passed, + because we're checking shell $? values here, and ksh for + example will add 256 to the signal value, thus being wider + than the number of WEXITSTATUS bits. + We could validate that values were not above say + ((WEXITSTATUS (~0) << 1) + 1), which would cater for ksh. + But some shells may use other adjustments in future to be + (forward) compatible with systems that support + wider exit status values as discussed at + https://austingroupbugs.net/view.php?id=947 */ + + char *endp; + long int l = (errno = 0, strtol (operand, &endp, 10)); + int i = l; + signum = (operand == endp || *endp || errno || i != l ? -1 : i); + + if (signum != -1) + { + /* Note AIX uses a different bit pattern for status returned + from shell and wait(), so we can't use WTERMSIG etc. here. + Also ksh returns 0xFF + signal number. */ + signum &= signum >= 0xFF ? 0xFF : 0x7F; + } + } + else + { + /* Convert signal to upper case in the C locale, not in the + current locale. Don't assume ASCII; it might be EBCDIC. */ + char *upcased = xstrdup (operand); + char *p; + for (p = upcased; *p; p++) + if (strchr ("abcdefghijklmnopqrstuvwxyz", *p)) + *p += 'A' - 'a'; + + /* Look for the signal name, possibly prefixed by "SIG", + and possibly lowercased. */ + if (!(str2sig (upcased, &signum) == 0 + || (upcased[0] == 'S' && upcased[1] == 'I' && upcased[2] == 'G' + && str2sig (upcased + 3, &signum) == 0))) + signum = -1; + + free (upcased); + } + + if (signum < 0 || sig2str (signum, signame) != 0) + { + error (0, 0, _("%s: invalid signal"), quote (operand)); + return -1; + } + + return signum; +} diff --git a/src/operand2sig.h b/src/operand2sig.h new file mode 100644 index 0000000..1847a72 --- /dev/null +++ b/src/operand2sig.h @@ -0,0 +1,19 @@ +/* operand2sig.h -- prototype for signal specification function + + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +extern int operand2sig (char const *operand, char *signame) + _GL_ATTRIBUTE_NONNULL (); diff --git a/src/paste.c b/src/paste.c new file mode 100644 index 0000000..68f8a36 --- /dev/null +++ b/src/paste.c @@ -0,0 +1,517 @@ +/* paste - merge lines of files + Copyright (C) 1997-2023 Free Software Foundation, Inc. + Copyright (C) 1984 David M. Ihnat + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David Ihnat. */ + +/* The list of valid escape sequences has been expanded over the Unix + version, to include \b, \f, \r, and \v. + + POSIX changes, bug fixes, long-named options, and cleanup + by David MacKenzie . + + Options: + --serial + -s Paste one file at a time rather than + one line from each file. + --delimiters=delim-list + -d delim-list Consecutively use the characters in + DELIM-LIST instead of tab to separate + merged lines. When DELIM-LIST is exhausted, + start again at its beginning. + A FILE of '-' means standard input. + If no FILEs are given, standard input is used. */ + +#include + +#include +#include +#include +#include "system.h" +#include "fadvise.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "paste" + +#define AUTHORS \ + proper_name ("David M. Ihnat"), \ + proper_name ("David MacKenzie") + +/* Indicates that no delimiter should be added in the current position. */ +#define EMPTY_DELIM '\0' + +/* If nonzero, we have read standard input at some point. */ +static bool have_read_stdin; + +/* If nonzero, merge subsequent lines of each file rather than + corresponding lines from each file in parallel. */ +static bool serial_merge; + +/* The delimiters between lines of input files (used cyclically). */ +static char *delims; + +/* A pointer to the character after the end of 'delims'. */ +static char const *delim_end; + +static unsigned char line_delim = '\n'; + +static struct option const longopts[] = +{ + {"serial", no_argument, nullptr, 's'}, + {"delimiters", required_argument, nullptr, 'd'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Set globals delims and delim_end. Copy STRPTR to DELIMS, converting + backslash representations of special characters in STRPTR to their actual + values. The set of possible backslash characters has been expanded beyond + that recognized by the Unix version. + Return 0 upon success. + If the string ends in an odd number of backslashes, ignore the + final backslash and return nonzero. */ + +static int +collapse_escapes (char const *strptr) +{ + char *strout = xstrdup (strptr); + bool backslash_at_end = false; + + delims = strout; + + while (*strptr) + { + if (*strptr != '\\') /* Is it an escape character? */ + *strout++ = *strptr++; /* No, just transfer it. */ + else + { + switch (*++strptr) + { + case '0': + *strout++ = EMPTY_DELIM; + break; + + case 'b': + *strout++ = '\b'; + break; + + case 'f': + *strout++ = '\f'; + break; + + case 'n': + *strout++ = '\n'; + break; + + case 'r': + *strout++ = '\r'; + break; + + case 't': + *strout++ = '\t'; + break; + + case 'v': + *strout++ = '\v'; + break; + + case '\\': + *strout++ = '\\'; + break; + + case '\0': + backslash_at_end = true; + goto done; + + default: + *strout++ = *strptr; + break; + } + strptr++; + } + } + + done: + + delim_end = strout; + return backslash_at_end ? 1 : 0; +} + +/* Output a single byte, reporting any write errors. */ + +static inline void +xputchar (char c) +{ + if (putchar (c) < 0) + write_error (); +} + +/* Perform column paste on the NFILES files named in FNAMPTR. + Return true if successful, false if one or more files could not be + opened or read. */ + +static bool +paste_parallel (size_t nfiles, char **fnamptr) +{ + bool ok = true; + /* If all files are just ready to be closed, or will be on this + round, the string of delimiters must be preserved. + delbuf[0] through delbuf[nfiles] + store the delimiters for closed files. */ + char *delbuf = xmalloc (nfiles + 2); + + /* Streams open to the files to process; null if the corresponding + stream is closed. */ + FILE **fileptr = xnmalloc (nfiles + 1, sizeof *fileptr); + + /* Number of files still open to process. */ + size_t files_open; + + /* True if any fopen got fd == STDIN_FILENO. */ + bool opened_stdin = false; + + /* Attempt to open all files. This could be expanded to an infinite + number of files, but at the (considerable) expense of remembering + each file and its current offset, then opening/reading/closing. */ + + for (files_open = 0; files_open < nfiles; ++files_open) + { + if (STREQ (fnamptr[files_open], "-")) + { + have_read_stdin = true; + fileptr[files_open] = stdin; + } + else + { + fileptr[files_open] = fopen (fnamptr[files_open], "r"); + if (fileptr[files_open] == nullptr) + error (EXIT_FAILURE, errno, "%s", quotef (fnamptr[files_open])); + else if (fileno (fileptr[files_open]) == STDIN_FILENO) + opened_stdin = true; + fadvise (fileptr[files_open], FADVISE_SEQUENTIAL); + } + } + + if (opened_stdin && have_read_stdin) + error (EXIT_FAILURE, 0, _("standard input is closed")); + + /* Read a line from each file and output it to stdout separated by a + delimiter, until we go through the loop without successfully + reading from any of the files. */ + + while (files_open) + { + /* Set up for the next line. */ + bool somedone = false; + char const *delimptr = delims; + size_t delims_saved = 0; /* Number of delims saved in 'delbuf'. */ + + for (size_t i = 0; i < nfiles && files_open; i++) + { + int chr; /* Input character. */ + int err; /* Input errno value. */ + bool sometodo = false; /* Input chars to process. */ + + if (fileptr[i]) + { + chr = getc (fileptr[i]); + err = errno; + if (chr != EOF && delims_saved) + { + if (fwrite (delbuf, 1, delims_saved, stdout) != delims_saved) + write_error (); + delims_saved = 0; + } + + while (chr != EOF) + { + sometodo = true; + if (chr == line_delim) + break; + xputchar (chr); + chr = getc (fileptr[i]); + err = errno; + } + } + + if (! sometodo) + { + /* EOF, read error, or closed file. + If an EOF or error, close the file. */ + if (fileptr[i]) + { + if (!ferror (fileptr[i])) + err = 0; + if (fileptr[i] == stdin) + clearerr (fileptr[i]); /* Also clear EOF. */ + else if (fclose (fileptr[i]) == EOF && !err) + err = errno; + if (err) + { + error (0, err, "%s", quotef (fnamptr[i])); + ok = false; + } + + fileptr[i] = nullptr; + files_open--; + } + + if (i + 1 == nfiles) + { + /* End of this output line. + Is this the end of the whole thing? */ + if (somedone) + { + /* No. Some files were not closed for this line. */ + if (delims_saved) + { + if (fwrite (delbuf, 1, delims_saved, stdout) + != delims_saved) + write_error (); + delims_saved = 0; + } + xputchar (line_delim); + } + continue; /* Next read of files, or exit. */ + } + else + { + /* Closed file; add delimiter to 'delbuf'. */ + if (*delimptr != EMPTY_DELIM) + delbuf[delims_saved++] = *delimptr; + if (++delimptr == delim_end) + delimptr = delims; + } + } + else + { + /* Some data read. */ + somedone = true; + + /* Except for last file, replace last newline with delim. */ + if (i + 1 != nfiles) + { + if (chr != line_delim && chr != EOF) + xputchar (chr); + if (*delimptr != EMPTY_DELIM) + xputchar (*delimptr); + if (++delimptr == delim_end) + delimptr = delims; + } + else + { + /* If the last line of the last file lacks a newline, + print one anyhow. POSIX requires this. */ + char c = (chr == EOF ? line_delim : chr); + xputchar (c); + } + } + } + } + free (fileptr); + free (delbuf); + return ok; +} + +/* Perform serial paste on the NFILES files named in FNAMPTR. + Return true if no errors, false if one or more files could not be + opened or read. */ + +static bool +paste_serial (size_t nfiles, char **fnamptr) +{ + bool ok = true; /* false if open or read errors occur. */ + int charnew, charold; /* Current and previous char read. */ + char const *delimptr; /* Current delimiter char. */ + FILE *fileptr; /* Open for reading current file. */ + + for (; nfiles; nfiles--, fnamptr++) + { + int saved_errno; + bool is_stdin = STREQ (*fnamptr, "-"); + if (is_stdin) + { + have_read_stdin = true; + fileptr = stdin; + } + else + { + fileptr = fopen (*fnamptr, "r"); + if (fileptr == nullptr) + { + error (0, errno, "%s", quotef (*fnamptr)); + ok = false; + continue; + } + fadvise (fileptr, FADVISE_SEQUENTIAL); + } + + delimptr = delims; /* Set up for delimiter string. */ + + charold = getc (fileptr); + saved_errno = errno; + if (charold != EOF) + { + /* 'charold' is set up. Hit it! + Keep reading characters, stashing them in 'charnew'; + output 'charold', converting to the appropriate delimiter + character if needed. After the EOF, output 'charold' + if it's a newline; otherwise, output it and then a newline. */ + + while ((charnew = getc (fileptr)) != EOF) + { + /* Process the old character. */ + if (charold == line_delim) + { + if (*delimptr != EMPTY_DELIM) + xputchar (*delimptr); + + if (++delimptr == delim_end) + delimptr = delims; + } + else + xputchar (charold); + + charold = charnew; + } + saved_errno = errno; + + /* Hit EOF. Process that last character. */ + xputchar (charold); + } + + if (charold != line_delim) + xputchar (line_delim); + + if (!ferror (fileptr)) + saved_errno = 0; + if (is_stdin) + clearerr (fileptr); /* Also clear EOF. */ + else if (fclose (fileptr) != 0 && !saved_errno) + saved_errno = errno; + if (saved_errno) + { + error (0, saved_errno, "%s", quotef (*fnamptr)); + ok = false; + } + } + return ok; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Write lines consisting of the sequentially corresponding lines from\n\ +each FILE, separated by TABs, to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -d, --delimiters=LIST reuse characters from LIST instead of TABs\n\ + -s, --serial paste one file at a time instead of in parallel\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + /* FIXME: add a couple of examples. */ + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + char const *delim_arg = "\t"; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + have_read_stdin = false; + serial_merge = false; + + while ((optc = getopt_long (argc, argv, "d:sz", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'd': + /* Delimiter character(s). */ + delim_arg = (optarg[0] == '\0' ? "\\0" : optarg); + break; + + case 's': + serial_merge = true; + break; + + case 'z': + line_delim = '\0'; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + int nfiles = argc - optind; + if (nfiles == 0) + { + argv[optind] = bad_cast ("-"); + nfiles++; + } + + if (collapse_escapes (delim_arg)) + { + /* Don't use the quote() quoting style, because that would double the + number of displayed backslashes, making the diagnostic look bogus. */ + error (EXIT_FAILURE, 0, + _("delimiter list ends with an unescaped backslash: %s"), + quotearg_n_style_colon (0, c_maybe_quoting_style, delim_arg)); + } + + bool ok = ((serial_merge ? paste_serial : paste_parallel) + (nfiles, &argv[optind])); + + free (delims); + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, "-"); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/pathchk.c b/src/pathchk.c new file mode 100644 index 0000000..9614294 --- /dev/null +++ b/src/pathchk.c @@ -0,0 +1,419 @@ +/* pathchk -- check whether file names are valid or portable + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "pathchk" + +#define AUTHORS \ + proper_name ("Paul Eggert"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Jim Meyering") + +#ifndef _POSIX_PATH_MAX +# define _POSIX_PATH_MAX 256 +#endif +#ifndef _POSIX_NAME_MAX +# define _POSIX_NAME_MAX 14 +#endif + +#ifdef _XOPEN_NAME_MAX +# define NAME_MAX_MINIMUM _XOPEN_NAME_MAX +#else +# define NAME_MAX_MINIMUM _POSIX_NAME_MAX +#endif +#ifdef _XOPEN_PATH_MAX +# define PATH_MAX_MINIMUM _XOPEN_PATH_MAX +#else +# define PATH_MAX_MINIMUM _POSIX_PATH_MAX +#endif + +#if ! (HAVE_PATHCONF && defined _PC_NAME_MAX && defined _PC_PATH_MAX) +# ifndef _PC_NAME_MAX +# define _PC_NAME_MAX 0 +# define _PC_PATH_MAX 1 +# endif +# ifndef pathconf +# define pathconf(file, flag) \ + (flag == _PC_NAME_MAX ? NAME_MAX_MINIMUM : PATH_MAX_MINIMUM) +# endif +#endif + +static bool validate_file_name (char *, bool, bool); + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + PORTABILITY_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"portability", no_argument, nullptr, PORTABILITY_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... NAME...\n"), program_name); + fputs (_("\ +Diagnose invalid or non-portable file names.\n\ +\n\ + -p check for most POSIX systems\n\ + -P check for empty names and leading \"-\"\n\ + --portability check for all POSIX systems (equivalent to -p -P)\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + bool ok = true; + bool check_basic_portability = false; + bool check_extra_portability = false; + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "+pP", longopts, nullptr)) != -1) + { + switch (optc) + { + case PORTABILITY_OPTION: + check_basic_portability = true; + check_extra_portability = true; + break; + + case 'p': + check_basic_portability = true; + break; + + case 'P': + check_extra_portability = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (optind == argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + for (; optind < argc; ++optind) + ok &= validate_file_name (argv[optind], + check_basic_portability, check_extra_portability); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} + +/* If FILE contains a component with a leading "-", report an error + and return false; otherwise, return true. */ + +static bool +no_leading_hyphen (char const *file) +{ + char const *p; + + for (p = file; (p = strchr (p, '-')); p++) + if (p == file || p[-1] == '/') + { + error (0, 0, _("leading '-' in a component of file name %s"), + quoteaf (file)); + return false; + } + + return true; +} + +/* If FILE (of length FILELEN) contains only portable characters, + return true, else report an error and return false. */ + +static bool +portable_chars_only (char const *file, size_t filelen) +{ + size_t validlen = strspn (file, + ("/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789._-")); + char const *invalid = file + validlen; + + if (*invalid) + { + mbstate_t mbstate = { 0, }; + size_t charlen = mbrlen (invalid, filelen - validlen, &mbstate); + error (0, 0, + _("non-portable character %s in file name %s"), + quotearg_n_style_mem (1, locale_quoting_style, invalid, + (charlen <= MB_LEN_MAX ? charlen : 1)), + quoteaf_n (0, file)); + return false; + } + + return true; +} + +/* Return the address of the start of the next file name component in F. */ + +ATTRIBUTE_PURE +static char * +component_start (char *f) +{ + while (*f == '/') + f++; + return f; +} + +/* Return the size of the file name component F. F must be nonempty. */ + +ATTRIBUTE_PURE +static size_t +component_len (char const *f) +{ + size_t len; + for (len = 1; f[len] != '/' && f[len]; len++) + continue; + return len; +} + +/* Make sure that + strlen (FILE) <= PATH_MAX + && strlen (each-existing-directory-in-FILE) <= NAME_MAX + + If CHECK_BASIC_PORTABILITY is true, compare against _POSIX_PATH_MAX and + _POSIX_NAME_MAX instead, and make sure that FILE contains no + characters not in the POSIX portable filename character set, which + consists of A-Z, a-z, 0-9, ., _, - (plus / for separators). + + If CHECK_BASIC_PORTABILITY is false, make sure that all leading directories + along FILE that exist are searchable. + + If CHECK_EXTRA_PORTABILITY is true, check that file name components do not + begin with "-". + + If either CHECK_BASIC_PORTABILITY or CHECK_EXTRA_PORTABILITY is true, + check that the file name is not empty. + + Return true if all of these tests are successful, false if any fail. */ + +static bool +validate_file_name (char *file, bool check_basic_portability, + bool check_extra_portability) +{ + idx_t filelen = strlen (file); + + /* Start of file name component being checked. */ + char *start; + + /* True if component lengths need to be checked. */ + bool check_component_lengths; + + /* True if the file is known to exist. */ + bool file_exists = false; + + if (check_extra_portability && ! no_leading_hyphen (file)) + return false; + + if ((check_basic_portability || check_extra_portability) + && filelen == 0) + { + /* Fail, since empty names are not portable. As of + 2005-01-06 POSIX does not address whether "pathchk -p ''" + should (or is allowed to) fail, so this is not a + conformance violation. */ + error (0, 0, _("empty file name")); + return false; + } + + if (check_basic_portability) + { + if (! portable_chars_only (file, filelen)) + return false; + } + else + { + /* Check whether a file name component is in a directory that + is not searchable, or has some other serious problem. + POSIX does not allow "" as a file name, but some non-POSIX + hosts do (as an alias for "."), so allow "" if lstat does. */ + + struct stat st; + if (lstat (file, &st) == 0) + file_exists = true; + else if (errno != ENOENT || filelen == 0) + { + error (0, errno, "%s", quotef (file)); + return false; + } + } + + if (check_basic_portability + || (! file_exists && PATH_MAX_MINIMUM <= filelen)) + { + idx_t maxsize; + + if (check_basic_portability) + maxsize = _POSIX_PATH_MAX; + else + { + long int size; + char const *dir = (*file == '/' ? "/" : "."); + errno = 0; + size = pathconf (dir, _PC_PATH_MAX); + if (size < 0 && errno != 0) + { + error (0, errno, + _("%s: unable to determine maximum file name length"), + dir); + return false; + } + maxsize = MIN (size, MIN (SSIZE_MAX, IDX_MAX)); + } + + if (maxsize <= filelen) + { + error (0, 0, _("limit %td exceeded by length %td of file name %s"), + maxsize - 1, filelen, quoteaf (file)); + return false; + } + } + + /* Check whether pathconf (..., _PC_NAME_MAX) can be avoided, i.e., + whether all file name components are so short that they are valid + in any file system on this platform. If CHECK_BASIC_PORTABILITY, though, + it's more convenient to check component lengths below. */ + + check_component_lengths = check_basic_portability; + if (! check_component_lengths && ! file_exists) + { + for (start = file; *(start = component_start (start)); ) + { + size_t length = component_len (start); + + if (NAME_MAX_MINIMUM < length) + { + check_component_lengths = true; + break; + } + + start += length; + } + } + + if (check_component_lengths) + { + /* The limit on file name components for the current component. + This defaults to NAME_MAX_MINIMUM, for the sake of non-POSIX + systems (NFS, say?) where pathconf fails on "." or "/" with + errno == ENOENT. */ + idx_t name_max = NAME_MAX_MINIMUM; + + /* If nonzero, the known limit on file name components. */ + idx_t known_name_max = check_basic_portability ? _POSIX_NAME_MAX : 0; + + for (start = file; *(start = component_start (start)); ) + { + idx_t length; + + if (known_name_max) + name_max = known_name_max; + else + { + long int len; + char const *dir = (start == file ? "." : file); + char c = *start; + errno = 0; + *start = '\0'; + len = pathconf (dir, _PC_NAME_MAX); + *start = c; + if (0 <= len) + name_max = MIN (len, MIN (SSIZE_MAX, IDX_MAX)); + else + switch (errno) + { + case 0: + /* There is no limit. */ + name_max = IDX_MAX; + break; + + case ENOENT: + /* DIR does not exist; use its parent's maximum. */ + known_name_max = name_max; + break; + + default: + *start = '\0'; + error (0, errno, "%s", quotef (dir)); + *start = c; + return false; + } + } + + length = component_len (start); + + if (name_max < length) + { + char c = start[length]; + start[length] = '\0'; + error (0, 0, + _("limit %td exceeded by length %td " + "of file name component %s"), + name_max, length, quote (start)); + start[length] = c; + return false; + } + + start += length; + } + } + + return true; +} diff --git a/src/pinky.c b/src/pinky.c new file mode 100644 index 0000000..3427fb1 --- /dev/null +++ b/src/pinky.c @@ -0,0 +1,604 @@ +/* GNU's pinky. + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Created by hacking who.c by Kaveh Ghazi ghazi@caip.rutgers.edu */ + +#include +#include +#include +#include +#include + +#include +#include "system.h" + +#include "canon-host.h" +#include "hard-locale.h" +#include "readutmp.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "pinky" + +#define AUTHORS \ + proper_name ("Joseph Arceneaux"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Kaveh Ghazi") + +/* If true, display the hours:minutes since each user has touched + the keyboard, or blank if within the last minute, or days followed + by a 'd' if not within the last day. */ +static bool include_idle = true; + +/* If true, display a line at the top describing each field. */ +static bool include_heading = true; + +/* if true, display the user's full name from pw_gecos. */ +static bool include_fullname = true; + +/* if true, display the user's ~/.project file when doing long format. */ +static bool include_project = true; + +/* if true, display the user's ~/.plan file when doing long format. */ +static bool include_plan = true; + +/* if true, display the user's home directory and shell + when doing long format. */ +static bool include_home_and_shell = true; + +/* if true, use the "short" output format. */ +static bool do_short_format = true; + +/* if true, display the ut_host field. */ +#if HAVE_STRUCT_XTMP_UT_HOST +static bool include_where = true; +#endif + +/* The strftime format to use for login times, and its expected + output width. */ +static char const *time_format; +static int time_format_width; + +static struct option const longopts[] = +{ + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Count and return the number of ampersands in STR. */ + +ATTRIBUTE_PURE +static size_t +count_ampersands (char const *str) +{ + size_t count = 0; + do + { + if (*str == '&') + count++; + } while (*str++); + return count; +} + +/* Create a string (via xmalloc) which contains a full name by substituting + for each ampersand in GECOS_NAME the USER_NAME string with its first + character capitalized. The caller must ensure that GECOS_NAME contains + no ','s. The caller also is responsible for free'ing the return value of + this function. */ + +static char * +create_fullname (char const *gecos_name, char const *user_name) +{ + size_t rsize = strlen (gecos_name) + 1; + char *result; + char *r; + size_t ampersands = count_ampersands (gecos_name); + + if (ampersands != 0) + { + size_t ulen = strlen (user_name); + size_t product; + if (ckd_mul (&product, ulen, ampersands - 1) + || ckd_add (&rsize, rsize, product)) + xalloc_die (); + } + + r = result = xmalloc (rsize); + + while (*gecos_name) + { + if (*gecos_name == '&') + { + char const *uname = user_name; + if (islower (to_uchar (*uname))) + *r++ = toupper (to_uchar (*uname++)); + while (*uname) + *r++ = *uname++; + } + else + { + *r++ = *gecos_name; + } + + gecos_name++; + } + *r = 0; + + return result; +} + +/* Return a string representing the time between WHEN and the time + that this function is first run. */ + +static char const * +idle_string (time_t when) +{ + static time_t now = 0; + static char buf[INT_STRLEN_BOUND (intmax_t) + sizeof "d"]; + time_t seconds_idle; + + if (now == 0) + time (&now); + + seconds_idle = now - when; + if (seconds_idle < 60) /* One minute. */ + return " "; + if (seconds_idle < (24 * 60 * 60)) /* One day. */ + { + int hours = seconds_idle / (60 * 60); + int minutes = (seconds_idle % (60 * 60)) / 60; + sprintf (buf, "%02d:%02d", hours, minutes); + } + else + { + intmax_t days = seconds_idle / (24 * 60 * 60); + sprintf (buf, "%"PRIdMAX"d", days); + } + return buf; +} + +/* Return a time string. */ +static char const * +time_string (struct gl_utmp const *utmp_ent) +{ + static char buf[INT_STRLEN_BOUND (intmax_t) + sizeof "-%m-%d %H:%M"]; + struct tm *tmp = localtime (&utmp_ent->ut_ts.tv_sec); + + if (tmp) + { + strftime (buf, sizeof buf, time_format, tmp); + return buf; + } + else + return timetostr (utmp_ent->ut_ts.tv_sec, buf); +} + +/* Display a line of information about UTMP_ENT. */ + +static void +print_entry (struct gl_utmp const *utmp_ent) +{ + struct stat stats; + time_t last_change; + char mesg; + + /* If ut_line contains a space, the device name starts after the space. */ + char *line = utmp_ent->ut_line; + char *space = strchr (line, ' '); + line = space ? space + 1 : line; + + int dirfd; + if (IS_ABSOLUTE_FILE_NAME (line)) + dirfd = AT_FDCWD; + else + { + static int dev_dirfd; + if (!dev_dirfd) + { + dev_dirfd = open ("/dev", O_PATHSEARCH | O_DIRECTORY); + if (dev_dirfd < 0) + dev_dirfd = AT_FDCWD - 1; + } + dirfd = dev_dirfd; + } + + if (AT_FDCWD <= dirfd && fstatat (dirfd, line, &stats, 0) == 0) + { + mesg = (stats.st_mode & S_IWGRP) ? ' ' : '*'; + last_change = stats.st_atime; + } + else + { + mesg = '?'; + last_change = 0; + } + + char *ut_user = utmp_ent->ut_user; + if (strnlen (ut_user, 8) < 8) + printf ("%-8s", ut_user); + else + fputs (ut_user, stdout); + + if (include_fullname) + { + struct passwd *pw = getpwnam (ut_user); + if (pw == nullptr) + /* TRANSLATORS: Real name is unknown; at most 19 characters. */ + printf (" %19s", _(" ???")); + else + { + char *const comma = strchr (pw->pw_gecos, ','); + char *result; + + if (comma) + *comma = '\0'; + + result = create_fullname (pw->pw_gecos, pw->pw_name); + printf (" %-19.19s", result); + free (result); + } + } + + fputc (' ', stdout); + fputc (mesg, stdout); + if (strnlen (utmp_ent->ut_line, 8) < 8) + printf ("%-8s", utmp_ent->ut_line); + else + fputs (utmp_ent->ut_line, stdout); + + if (include_idle) + { + if (last_change) + printf (" %-6s", idle_string (last_change)); + else + /* TRANSLATORS: Idle time is unknown; at most 5 characters. */ + printf (" %-6s", _("?????")); + } + + printf (" %s", time_string (utmp_ent)); + +#ifdef HAVE_STRUCT_XTMP_UT_HOST + if (include_where && utmp_ent->ut_host[0]) + { + char *host = nullptr; + char *display = nullptr; + char *ut_host = utmp_ent->ut_host; + + /* Look for an X display. */ + display = strchr (ut_host, ':'); + if (display) + *display++ = '\0'; + + if (*ut_host) + /* See if we can canonicalize it. */ + host = canon_host (ut_host); + if ( ! host) + host = ut_host; + + fputc (' ', stdout); + fputs (host, stdout); + if (display) + { + fputc (':', stdout); + fputs (display, stdout); + } + + if (host != ut_host) + free (host); + } +#endif + + putchar ('\n'); +} + +/* Display a verbose line of information about UTMP_ENT. */ + +static void +print_long_entry (const char name[]) +{ + struct passwd *pw; + + pw = getpwnam (name); + + printf (_("Login name: ")); + printf ("%-28s", name); + + printf (_("In real life: ")); + if (pw == nullptr) + { + /* TRANSLATORS: Real name is unknown; no hard limit. */ + printf (" %s", _("???\n")); + return; + } + else + { + char *const comma = strchr (pw->pw_gecos, ','); + char *result; + + if (comma) + *comma = '\0'; + + result = create_fullname (pw->pw_gecos, pw->pw_name); + printf (" %s", result); + free (result); + } + + putchar ('\n'); + + if (include_home_and_shell) + { + printf (_("Directory: ")); + printf ("%-29s", pw->pw_dir); + printf (_("Shell: ")); + printf (" %s", pw->pw_shell); + putchar ('\n'); + } + + if (include_project) + { + FILE *stream; + char buf[1024]; + char const *const baseproject = "/.project"; + char *const project = + xmalloc (strlen (pw->pw_dir) + strlen (baseproject) + 1); + stpcpy (stpcpy (project, pw->pw_dir), baseproject); + + stream = fopen (project, "r"); + if (stream) + { + size_t bytes; + + printf (_("Project: ")); + + while ((bytes = fread (buf, 1, sizeof (buf), stream)) > 0) + fwrite (buf, 1, bytes, stdout); + fclose (stream); + } + + free (project); + } + + if (include_plan) + { + FILE *stream; + char buf[1024]; + char const *const baseplan = "/.plan"; + char *const plan = + xmalloc (strlen (pw->pw_dir) + strlen (baseplan) + 1); + stpcpy (stpcpy (plan, pw->pw_dir), baseplan); + + stream = fopen (plan, "r"); + if (stream) + { + size_t bytes; + + printf (_("Plan:\n")); + + while ((bytes = fread (buf, 1, sizeof (buf), stream)) > 0) + fwrite (buf, 1, bytes, stdout); + fclose (stream); + } + + free (plan); + } + + putchar ('\n'); +} + +/* Print the username of each valid entry and the number of valid entries + in UTMP_BUF, which should have N elements. */ + +static void +print_heading (void) +{ + printf ("%-8s", _("Login")); + if (include_fullname) + printf (" %-19s", _("Name")); + printf (" %-9s", _(" TTY")); + if (include_idle) + printf (" %-6s", _("Idle")); + printf (" %-*s", time_format_width, _("When")); +#ifdef HAVE_STRUCT_XTMP_UT_HOST + if (include_where) + printf (" %s", _("Where")); +#endif + putchar ('\n'); +} + +/* Display UTMP_BUF, which should have N entries. */ + +static void +scan_entries (idx_t n, struct gl_utmp const *utmp_buf, + const int argc_names, char *const argv_names[]) +{ + if (hard_locale (LC_TIME)) + { + time_format = "%Y-%m-%d %H:%M"; + time_format_width = 4 + 1 + 2 + 1 + 2 + 1 + 2 + 1 + 2; + } + else + { + time_format = "%b %e %H:%M"; + time_format_width = 3 + 1 + 2 + 1 + 2 + 1 + 2; + } + + if (include_heading) + print_heading (); + + while (n--) + { + if (IS_USER_PROCESS (utmp_buf)) + { + if (argc_names) + { + for (int i = 0; i < argc_names; i++) + if (STREQ (utmp_buf->ut_user, argv_names[i])) + { + print_entry (utmp_buf); + break; + } + } + else + print_entry (utmp_buf); + } + utmp_buf++; + } +} + +/* Display a list of who is on the system, according to utmp file FILENAME. */ + +static void +short_pinky (char const *filename, + const int argc_names, char *const argv_names[]) +{ + idx_t n_users; + struct gl_utmp *utmp_buf; + if (read_utmp (filename, &n_users, &utmp_buf, READ_UTMP_USER_PROCESS) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (filename)); + + scan_entries (n_users, utmp_buf, argc_names, argv_names); + exit (EXIT_SUCCESS); +} + +static void +long_pinky (const int argc_names, char *const argv_names[]) +{ + for (int i = 0; i < argc_names; i++) + print_long_entry (argv_names[i]); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [USER]...\n"), program_name); + fputs (_("\ +\n\ + -l produce long format output for the specified USERs\n\ + -b omit the user's home directory and shell in long format\n\ + -h omit the user's project file in long format\n\ + -p omit the user's plan file in long format\n\ + -s do short format output, this is the default\n\ +"), stdout); + fputs (_("\ + -f omit the line of column headings in short format\n\ + -w omit the user's full name in short format\n\ + -i omit the user's full name and remote host in short format\n\ + -q omit the user's full name, remote host and idle time\n\ + in short format\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +A lightweight 'finger' program; print user information.\n\ +The utmp file will be %s.\n\ +"), UTMP_FILE); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + int n_users; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "sfwiqbhlp", longopts, nullptr)) + != -1) + { + switch (optc) + { + case 's': + do_short_format = true; + break; + + case 'l': + do_short_format = false; + break; + + case 'f': + include_heading = false; + break; + + case 'w': + include_fullname = false; + break; + + case 'i': + include_fullname = false; +#ifdef HAVE_STRUCT_XTMP_UT_HOST + include_where = false; +#endif + break; + + case 'q': + include_fullname = false; +#ifdef HAVE_STRUCT_XTMP_UT_HOST + include_where = false; +#endif + include_idle = false; + break; + + case 'h': + include_project = false; + break; + + case 'p': + include_plan = false; + break; + + case 'b': + include_home_and_shell = false; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + n_users = argc - optind; + + if (!do_short_format && n_users == 0) + { + error (0, 0, _("no username specified; at least one must be\ + specified when using -l")); + usage (EXIT_FAILURE); + } + + if (do_short_format) + short_pinky (UTMP_FILE, n_users, argv + optind); + else + long_pinky (n_users, argv + optind); + + return EXIT_SUCCESS; +} diff --git a/src/pr.c b/src/pr.c new file mode 100644 index 0000000..419545c --- /dev/null +++ b/src/pr.c @@ -0,0 +1,2867 @@ +/* pr -- convert text files for printing. + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* By Pete TerMaat, with considerable refinement by Roland Huebner. */ + +/* Things to watch: Sys V screws up on ... + pr -n -3 -s: /usr/dict/words + pr -m -o10 -n /usr/dict/words{,,,} + pr -6 -a -n -o5 /usr/dict/words + + Ideas: + + Keep a things_to_do list of functions to call when we know we have + something to print. Cleaner than current series of checks. + + Improve the printing of control prefixes. + + Expand the file name in the centered header line to a full file name. + + + Concept: + + If the input_tab_char differs from the default value TAB + ('-e[CHAR[...]]' is used), any input text tab is expanded to the + default width of 8 spaces (compare char_to_clump). - Same as SunOS + does. + + The treatment of the number_separator (compare add_line_number): + The default value TAB of the number_separator ('-n[SEP[...]]') doesn't + be thought to be an input character. An optional '-e'-input has no + effect. + - With single column output + only one POSIX requirement has to be met: + The default n-separator should be a TAB. The consequence is a + different width between the number and the text if the output position + of the separator changes, i.e., it depends upon the left margin used. + That's not nice but easy-to-use together with the defaults of other + utilities, e.g. sort or cut. - Same as SunOS does. + - With multicolumn output + two conflicting POSIX requirements exist: + First "default n-separator is TAB", second "output text columns shall + be of equal width". Moreover POSIX specifies the number+separator a + part of the column, together with '-COLUMN' and '-a -COLUMN'. + (With -m output the number shall occupy each line only once. Exactly + the same situation as single column output exists.) + GNU pr gives priority to the 2nd requirement and observes POSIX + column definition. The n-separator TAB is expanded to the same number + of spaces in each column using the default value 8. Tabification is + only performed if it is compatible with the output position. + Consequence: The output text columns are of equal width. The layout + of a page does not change if the left margin varies. - Looks better + than the SunOS approach. + SunOS pr gives priority to the 1st requirement. n-separator TAB + width varies with each column. Only the width of text part of the + column is fixed. + Consequence: The output text columns don't have equal width. The + widths and the layout of the whole page varies with the left margin. + An overflow of the line length (without margin) over the input value + PAGE_WIDTH may occur. + + The interference of the POSIX-compliant small letter options -w and -s: + ("interference" means "setting a _separator_ with -s switches off the + column structure and the default - not generally - page_width, + acts on -w option") + options: text form / separator: equivalent new options: + -w l -s[x] + -------------------------------------------------------------------- + 1. -- -- columns / space -- + trunc. to page_width = 72 + 2. -- -s[:] full lines / TAB[:] -J --sep-string[=""|:] + no truncation + 3. -w l -- columns / space -W l + trunc. to page_width = l + 4. -w l -s[:] columns / no sep.[:] -W l --sep-string[=:] + trunc. to page_width = l + -------------------------------------------------------------------- + + + Options: + + Including version 1.22i: + Some SMALL LETTER options have been redefined with the object of a + better POSIX compliance. The output of some further cases has been + adapted to other UNIXes. A violation of downward compatibility has to + be accepted. + Some NEW CAPITAL LETTER options ( -J, -S, -W) has been introduced to + turn off unexpected interference of small letter options (-s and -w + together with the three column options). + -N option and the second argument LAST_PAGE of +FIRST_PAGE offer more + flexibility; The detailed handling of form feeds set in the input + files requires -T option. + + Capital letter options dominate small letter ones. + + Some of the option-arguments cannot be specified as separate arguments + from the preceding option letter (already stated in POSIX specification). + + Form feeds in the input cause page breaks in the output. Multiple + form feeds produce empty pages. + + +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE] + begin [stop] printing with page FIRST_[LAST_]PAGE + + -COLUMN, --columns=COLUMN + Produce output that is COLUMN columns wide and + print columns down, unless -a is used. Balance number of + lines in the columns on each page. + + -a, --across Print columns across rather than down, used + together with -COLUMN. The input + one + two + three + four + will be printed with '-a -3' as + one two three + four + + -b Balance columns on the last page. + -b is no longer an independent option. It's always used + together with -COLUMN (unless -a is used) to get a + consistent formulation with "FF set by hand" in input + files. Each formfeed found terminates the number of lines + to be read with the actual page. The situation for + printing columns down is equivalent to that on the last + page. So we need a balancing. + + Keeping -b as an underground option guarantees some + downward compatibility. Utilities using pr with -b + (a most frequently used form) still work as usual. + + -c, --show-control-chars + Print nonprintable characters as control prefixes. + Control-g is printed as ^G (use hat notation) and + octal backslash notation. + + -d, --double-space Double space the output. + + -D FORMAT, --date-format=FORMAT Use FORMAT for the header date. + + -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]] + Expand tabs to spaces on input. Optional argument CHAR + is the input TAB character. (Default is TAB). Optional + argument WIDTH is the input TAB character's width. + (Default is 8.) + + -F, -f, --form-feed Use formfeeds instead of newlines to separate + pages. A three line HEADER is used, no TRAILER with -F, + without -F both HEADER and TRAILER are made of five lines. + + -h HEADER, --header=HEADER + Replace the filename in the header with the string HEADER. + A centered header is used. + + -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]] + Replace spaces with tabs on output. Optional argument + CHAR is the output TAB character. (Default is TAB). + Optional argument WIDTH is the output TAB character's + width. (Default is 8) + + -J, --join-lines Merge lines of full length, turns off -W/-w + line truncation, no column alignment, --sep-string[=STRING] + sets separators, works with all column options + (-COLUMN | -a -COLUMN | -m). + -J has been introduced (together with -W and --sep-string) to + disentangle the old (POSIX compliant) options -w, -s + along with the 3 column options. + + -l PAGE_LENGTH, --length=PAGE_LENGTH + Set the page length to PAGE_LENGTH lines. Default is 66, + including 5 lines of HEADER and 5 lines of TRAILER + without -F, but only 3 lines of HEADER and no TRAILER + with -F (i.e the number of text lines defaults to 56 or + 63 respectively). + + -m, --merge Print files in parallel; pad_across_to align + columns; truncate lines and print separator strings; + Do it also with empty columns to get a continuous line + numbering and column marking by separators throughout + the whole merged file. + + Empty pages in some input files produce empty columns + [marked by separators] in the merged pages. Completely + empty merged pages show no column separators at all. + + The layout of a merged page is ruled by the largest form + feed distance of the single pages at that page. Shorter + columns will be filled up with empty lines. + + Together with -J option join lines of full length and + set separators when -S option is used. + + -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]] + Provide DIGITS digit line numbering (default for DIGITS + is 5). With multicolumn output the number occupies the + first DIGITS column positions of each text column or only + each line of -m output. + With single column output the number precedes each line + just as -m output. + Optional argument SEP is the character appended to the + line number to separate it from the text followed. + The default separator is a TAB. In a strict sense a TAB + is always printed with single column output only. The + TAB-width varies with the TAB-position, e.g. with the + left margin specified by -o option. + With multicolumn output priority is given to "equal width + of output columns" (a POSIX specification). The TAB-width + is fixed to the value of the 1st column and does not + change with different values of left margin. That means a + fixed number of spaces is always printed in the place of + a TAB. The tabification depends upon the output + position. + + Default counting of the line numbers starts with 1st + line of the input file (not the 1st line printed, + compare the --page option and -N option). + + -N NUMBER, --first-line-number=NUMBER + Start line counting with the number NUMBER at the 1st + line of first page printed (mostly not the 1st line of + the input file). + + -o MARGIN, --indent=MARGIN + Offset each line with a margin MARGIN spaces wide. + Total page width is the size of the margin plus the + PAGE_WIDTH set with -W/-w option. + + -r, --no-file-warnings + Omit warning when a file cannot be opened. + + -s[CHAR], --separator[=CHAR] + Separate columns by a single character CHAR, default for + CHAR is the TAB character without -w and 'no char' with -w. + Without '-s' default separator 'space' is set. + -s[CHAR] turns off line truncation of all 3 column options + (-COLUMN|-a -COLUMN|-m) except -w is set. That is a POSIX + compliant formulation. The source code translates -s into + the new options -S and -J, also -W if required. + + -S[STRING], --sep-string[=STRING] + Separate columns by any string STRING. The -S option + doesn't react upon the -W/-w option (unlike -s option + does). It defines a separator nothing else. + Without -S: Default separator TAB is used with -J and + 'space' otherwise (same as -S" "). + With -S "": No separator is used. + Quotes should be used with blanks and some shell active + characters. + -S is problematic because in its obsolete form you + cannot use -S "STRING", but in its standard form you + must use -S "STRING" if STRING is empty. Use + --sep-string to avoid the ambiguity. + + -t, --omit-header Do not print headers or footers but retain form + feeds set in the input files. + + -T, --omit-pagination + Do not print headers or footers, eliminate any pagination + by form feeds set in the input files. + + -v, --show-nonprinting + Print nonprintable characters as escape sequences. Use + octal backslash notation. Control-G becomes \007. + + -w PAGE_WIDTH, --width=PAGE_WIDTH + Set page width to PAGE_WIDTH characters for multiple + text-column output only (default for PAGE_WIDTH is 72). + -s[CHAR] turns off the default page width and any line + truncation. Lines of full length will be merged, + regardless of the column options set. A POSIX compliant + formulation. + + -W PAGE_WIDTH, --page-width=PAGE_WIDTH + Set the page width to PAGE_WIDTH characters. That's valid + with and without a column option. Text lines will be + truncated, unless -J is used. Together with one of the + column options (-COLUMN| -a -COLUMN| -m) column alignment + is always used. + Default is 72 characters. + Without -W PAGE_WIDTH + - but with one of the column options default truncation of + 72 characters is used (to keep downward compatibility + and to simplify most frequently met column tasks). + Column alignment and column separators are used. + - and without any of the column options NO line truncation + is used (to keep downward compatibility and to meet most + frequent tasks). That's equivalent to -W 72 -J . + + With/without -W PAGE_WIDTH the header line is always + truncated to avoid line overflow. + + (In pr versions newer than 1.14 -S option does no longer + affect -W option.) + +*/ + +#include + +#include +#include +#include +#include "system.h" +#include "fadvise.h" +#include "hard-locale.h" +#include "mbswidth.h" +#include "quote.h" +#include "stat-time.h" +#include "stdio--.h" +#include "strftime.h" +#include "xstrtol.h" +#include "xstrtol-error.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "pr" + +#define AUTHORS \ + proper_name ("Pete TerMaat"), \ + proper_name ("Roland Huebner") + +/* Used with start_position in the struct COLUMN described below. + If start_position == ANYWHERE, we aren't truncating columns and + can begin printing a column anywhere. Otherwise we must pad to + the horizontal position start_position. */ +#define ANYWHERE 0 + +/* Each column has one of these structures allocated for it. + If we're only dealing with one file, fp is the same for all + columns. + + The general strategy is to spend time setting up these column + structures (storing columns if necessary), after which printing + is a matter of flitting from column to column and calling + print_func. + + Parallel files, single files printing across in multiple + columns, and single files printing down in multiple columns all + fit the same printing loop. + + print_func Function used to print lines in this column. + If we're storing this column it will be + print_stored(), Otherwise it will be read_line(). + + char_func Function used to process characters in this column. + If we're storing this column it will be store_char(), + otherwise it will be print_char(). + + current_line Index of the current entry in line_vector, which + contains the index of the first character of the + current line in buff[]. + + lines_stored Number of lines in this column which are stored in + buff. + + lines_to_print If we're storing this column, lines_to_print is + the number of stored_lines which remain to be + printed. Otherwise it is the number of lines + we can print without exceeding lines_per_body. + + start_position The horizontal position we want to be in before we + print the first character in this column. + + numbered True means precede this column with a line number. */ + +/* FIXME: There are many unchecked integer overflows in this file, + that will cause this command to misbehave given large inputs or + options. Many of the "int" values below should be "size_t" or + something else like that. */ + +struct COLUMN; +struct COLUMN + { + FILE *fp; /* Input stream for this column. */ + char const *name; /* File name. */ + enum + { + OPEN, + FF_FOUND, /* used with -b option, set with \f, changed + to ON_HOLD after print_header */ + ON_HOLD, /* Hit a form feed. */ + CLOSED + } + status; /* Status of the file pointer. */ + + /* Func to print lines in this col. */ + bool (*print_func) (struct COLUMN *); + + /* Func to print/store chars in this col. */ + void (*char_func) (char); + + int current_line; /* Index of current place in line_vector. */ + int lines_stored; /* Number of lines stored in buff. */ + int lines_to_print; /* No. lines stored or space left on page. */ + int start_position; /* Horizontal position of first char. */ + bool numbered; + bool full_page_printed; /* True means printed without a FF found. */ + + /* p->full_page_printed controls a special case of "FF set by hand": + True means a full page has been printed without FF found. To avoid an + additional empty page we have to ignore a FF immediately following in + the next line. */ + }; + +typedef struct COLUMN COLUMN; + +static int char_to_clump (char c); +static bool read_line (COLUMN *p); +static bool print_page (void); +static bool print_stored (COLUMN *p); +static bool open_file (char *name, COLUMN *p); +static bool skip_to_page (uintmax_t page); +static void print_header (void); +static void pad_across_to (int position); +static void add_line_number (COLUMN *p); +static void getoptnum (char const *n_str, int min, int *num, + char const *errfmt); +static void getoptarg (char *arg, char switch_char, char *character, + int *number); +static void print_files (int number_of_files, char **av); +static void init_parameters (int number_of_files); +static void init_header (char const *filename, int desc); +static bool init_fps (int number_of_files, char **av); +static void init_funcs (void); +static void init_store_cols (void); +static void store_columns (void); +static void balance (int total_stored); +static void store_char (char c); +static void pad_down (unsigned int lines); +static void read_rest_of_line (COLUMN *p); +static void skip_read (COLUMN *p, int column_number); +static void print_char (char c); +static void cleanup (void); +static void print_sep_string (void); +static void separator_string (char const *optarg_S); + +/* All of the columns to print. */ +static COLUMN *column_vector; + +/* When printing a single file in multiple downward columns, + we store the leftmost columns contiguously in buff. + To print a line from buff, get the index of the first character + from line_vector[i], and print up to line_vector[i + 1]. */ +static char *buff; + +/* Index of the position in buff where the next character + will be stored. */ +static unsigned int buff_current; + +/* The number of characters in buff. + Used for allocation of buff and to detect overflow of buff. */ +static size_t buff_allocated; + +/* Array of indices into buff. + Each entry is an index of the first character of a line. + This is used when storing lines to facilitate shuffling when + we do column balancing on the last page. */ +static int *line_vector; + +/* Array of horizontal positions. + For each line in line_vector, end_vector[line] is the horizontal + position we are in after printing that line. We keep track of this + so that we know how much we need to pad to prepare for the next + column. */ +static int *end_vector; + +/* (-m) True means we're printing multiple files in parallel. */ +static bool parallel_files = false; + +/* (-m) True means a line starts with some empty columns (some files + already CLOSED or ON_HOLD) which we have to align. */ +static bool align_empty_cols; + +/* (-m) True means we have not yet found any printable column in a line. + align_empty_cols = true has to be maintained. */ +static bool empty_line; + +/* (-m) False means printable column output precedes a form feed found. + Column alignment is done only once. No additional action with that form + feed. + True means we found only a form feed in a column. Maybe we have to do + some column alignment with that form feed. */ +static bool FF_only; + +/* (-[0-9]+) True means we're given an option explicitly specifying + number of columns. Used to detect when this option is used with -m + and when translating old options to new/long options. */ +static bool explicit_columns = false; + +/* (-t|-T) False means we aren't printing headers and footers. */ +static bool extremities = true; + +/* (-t) True means we retain all FF set by hand in input files. + False is set with -T option. */ +static bool keep_FF = false; +static bool print_a_FF = false; + +/* True means we need to print a header as soon as we know we've got input + to print after it. */ +static bool print_a_header; + +/* (-f) True means use formfeeds instead of newlines to separate pages. */ +static bool use_form_feed = false; + +/* True means we have read the standard input. */ +static bool have_read_stdin = false; + +/* True means the -a flag has been given. */ +static bool print_across_flag = false; + +/* True means we're printing one file in multiple (>1) downward columns. */ +static bool storing_columns = true; + +/* (-b) True means balance columns on the last page as Sys V does. */ +/* That's no longer an independent option. With storing_columns = true + balance_columns = true is used too (s. function init_parameters). + We get a consistent formulation with "FF set by hand" in input files. */ +static bool balance_columns = false; + +/* (-l) Number of lines on a page, including header and footer lines. */ +static int lines_per_page = 66; + +/* Number of lines in the header and footer can be reset to 0 using + the -t flag. */ +enum { lines_per_header = 5 }; +static int lines_per_body; +enum { lines_per_footer = 5 }; + +/* (-w|-W) Width in characters of the page. Does not include the width of + the margin. */ +static int chars_per_line = 72; + +/* (-w|W) True means we truncate lines longer than chars_per_column. */ +static bool truncate_lines = false; + +/* (-J) True means we join lines without any line truncation. -J + dominates -w option. */ +static bool join_lines = false; + +/* Number of characters in a column. Based on col_sep_length and + page width. */ +static int chars_per_column; + +/* (-e) True means convert tabs to spaces on input. */ +static bool untabify_input = false; + +/* (-e) The input tab character. */ +static char input_tab_char = '\t'; + +/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ... + where the leftmost column is 1. */ +static int chars_per_input_tab = 8; + +/* (-i) True means convert spaces to tabs on output. */ +static bool tabify_output = false; + +/* (-i) The output tab character. */ +static char output_tab_char = '\t'; + +/* (-i) The width of the output tab. */ +static int chars_per_output_tab = 8; + +/* Keeps track of pending white space. When we hit a nonspace + character after some whitespace, we print whitespace, tabbing + if necessary to get to output_position + spaces_not_printed. */ +static int spaces_not_printed; + +/* (-o) Number of spaces in the left margin (tabs used when possible). */ +static int chars_per_margin = 0; + +/* Position where the next character will fall. + Leftmost position is 0 + chars_per_margin. + Rightmost position is chars_per_margin + chars_per_line - 1. + This is important for converting spaces to tabs on output. */ +static int output_position; + +/* Horizontal position relative to the current file. + (output_position depends on where we are on the page; + input_position depends on where we are in the file.) + Important for converting tabs to spaces on input. */ +static int input_position; + +/* True if there were any failed opens so we can exit with nonzero + status. */ +static bool failed_opens = false; + +/* The number of spaces taken up if we print a tab character with width + c_ from position h_. */ +#define TAB_WIDTH(c_, h_) ((c_) - ((h_) % (c_))) + +/* The horizontal position we'll be at after printing a tab character + of width c_ from the position h_. */ +#define POS_AFTER_TAB(c_, h_) ((h_) + TAB_WIDTH (c_, h_)) + +/* (-NNN) Number of columns of text to print. */ +static int columns = 1; + +/* (+NNN:MMM) Page numbers on which to begin and stop printing. + first_page_number = 0 will be used to check input only. */ +static uintmax_t first_page_number = 0; +static uintmax_t last_page_number = UINTMAX_MAX; + +/* Number of files open (not closed, not on hold). */ +static int files_ready_to_read = 0; + +/* Current page number. Displayed in header. */ +static uintmax_t page_number; + +/* Current line number. Displayed when -n flag is specified. + + When printing files in parallel (-m flag), line numbering is as follows: + 1 foo goo moo + 2 hoo too zoo + + When printing files across (-a flag), ... + 1 foo 2 moo 3 goo + 4 hoo 5 too 6 zoo + + Otherwise, line numbering is as follows: + 1 foo 3 goo 5 too + 2 moo 4 hoo 6 zoo */ +static int line_number; + +/* (-n) True means lines should be preceded by numbers. */ +static bool numbered_lines = false; + +/* (-n) Character which follows each line number. */ +static char number_separator = '\t'; + +/* (-n) line counting starts with 1st line of input file (not with 1st + line of 1st page printed). */ +static int line_count = 1; + +/* (-n) True means counting of skipped lines starts with 1st line of + input file. False means -N option is used in addition, counting of + skipped lines not required. */ +static bool skip_count = true; + +/* (-N) Counting starts with start_line_number = NUMBER at 1st line of + first page printed, usually not 1st page of input file. */ +static int start_line_num = 1; + +/* (-n) Width in characters of a line number. */ +static int chars_per_number = 5; + +/* Used when widening the first column to accommodate numbers -- only + needed when printing files in parallel. Includes width of both the + number and the number_separator. */ +static int number_width; + +/* Buffer sprintf uses to format a line number. */ +static char *number_buff; + +/* (-v) True means nonprintable characters are printed as escape sequences. + control-g becomes \007. */ +static bool use_esc_sequence = false; + +/* (-c) True means nonprintable characters are printed as control prefixes. + control-g becomes ^G. */ +static bool use_cntrl_prefix = false; + +/* (-d) True means output is double spaced. */ +static bool double_space = false; + +/* Number of files opened initially in init_files. Should be 1 + unless we're printing multiple files in parallel. */ +static int total_files = 0; + +/* (-r) True means don't complain if we can't open a file. */ +static bool ignore_failed_opens = false; + +/* (-S) True means we separate columns with a specified string. + -S option does not affect line truncation nor column alignment. */ +static bool use_col_separator = false; + +/* String used to separate columns if the -S option has been specified. + Default without -S but together with one of the column options + -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */ +static char const *col_sep_string = ""; +static int col_sep_length = 0; +static char *column_separator = (char *) " "; +static char *line_separator = (char *) "\t"; + +/* Number of separator characters waiting to be printed as soon as we + know that we have any input remaining to be printed. */ +static int separators_not_printed; + +/* Position we need to pad to, as soon as we know that we have input + remaining to be printed. */ +static int padding_not_printed; + +/* True means we should pad the end of the page. Remains false until we + know we have a page to print. */ +static bool pad_vertically; + +/* (-h) String of characters used in place of the filename in the header. */ +static char *custom_header; + +/* (-D) Date format for the header. */ +static char const *date_format; + +/* The local time zone rules, as per the TZ environment variable. */ +static timezone_t localtz; + +/* Date and file name for the header. */ +static char *date_text; +static char const *file_text; + +/* Output columns available, not counting the date and file name. */ +static int header_width_available; + +static char *clump_buff; + +/* True means we read the line no. lines_per_body in skip_read + called by skip_to_page. That variable controls the coincidence of a + "FF set by hand" and "full_page_printed", see above the definition of + structure COLUMN. */ +static bool last_line = false; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + COLUMNS_OPTION = CHAR_MAX + 1, + PAGES_OPTION +}; + +static char const short_options[] = + "-0123456789D:FJN:S::TW:abcde::fh:i::l:mn::o:rs::tvw:"; + +static struct option const long_options[] = +{ + {"pages", required_argument, nullptr, PAGES_OPTION}, + {"columns", required_argument, nullptr, COLUMNS_OPTION}, + {"across", no_argument, nullptr, 'a'}, + {"show-control-chars", no_argument, nullptr, 'c'}, + {"double-space", no_argument, nullptr, 'd'}, + {"date-format", required_argument, nullptr, 'D'}, + {"expand-tabs", optional_argument, nullptr, 'e'}, + {"form-feed", no_argument, nullptr, 'f'}, + {"header", required_argument, nullptr, 'h'}, + {"output-tabs", optional_argument, nullptr, 'i'}, + {"join-lines", no_argument, nullptr, 'J'}, + {"length", required_argument, nullptr, 'l'}, + {"merge", no_argument, nullptr, 'm'}, + {"number-lines", optional_argument, nullptr, 'n'}, + {"first-line-number", required_argument, nullptr, 'N'}, + {"indent", required_argument, nullptr, 'o'}, + {"no-file-warnings", no_argument, nullptr, 'r'}, + {"separator", optional_argument, nullptr, 's'}, + {"sep-string", optional_argument, nullptr, 'S'}, + {"omit-header", no_argument, nullptr, 't'}, + {"omit-pagination", no_argument, nullptr, 'T'}, + {"show-nonprinting", no_argument, nullptr, 'v'}, + {"width", required_argument, nullptr, 'w'}, + {"page-width", required_argument, nullptr, 'W'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static _Noreturn void +integer_overflow (void) +{ + error (EXIT_FAILURE, 0, _("integer overflow")); +} + +/* Return the number of columns that have either an open file or + stored lines. */ + +ATTRIBUTE_PURE +static unsigned int +cols_ready_to_print (void) +{ + COLUMN *q; + unsigned int i; + unsigned int n; + + n = 0; + for (q = column_vector, i = 0; i < columns; ++q, ++i) + if (q->status == OPEN + || q->status == FF_FOUND /* With -b: To print a header only */ + || (storing_columns && q->lines_stored > 0 && q->lines_to_print > 0)) + ++n; + return n; +} + +/* Estimate first_ / last_page_number + using option +FIRST_PAGE:LAST_PAGE */ + +static bool +first_last_page (int oi, char c, char const *pages) +{ + char *p; + uintmax_t first; + uintmax_t last = UINTMAX_MAX; + strtol_error err = xstrtoumax (pages, &p, 10, &first, ""); + if (err != LONGINT_OK && err != LONGINT_INVALID_SUFFIX_CHAR) + xstrtol_fatal (err, oi, c, long_options, pages); + + if (p == pages || !first) + return false; + + if (*p == ':') + { + char const *p1 = p + 1; + err = xstrtoumax (p1, &p, 10, &last, ""); + if (err != LONGINT_OK) + xstrtol_fatal (err, oi, c, long_options, pages); + if (p1 == p || last < first) + return false; + } + + if (*p) + return false; + + first_page_number = first; + last_page_number = last; + return true; +} + +/* Parse column count string S, and if it's valid (1 or larger and + within range of the type of 'columns') set the global variables + columns and explicit_columns. Otherwise, exit with a diagnostic. */ + +static void +parse_column_count (char const *s) +{ + getoptnum (s, 1, &columns, _("invalid number of columns")); + explicit_columns = true; +} + +/* Estimate length of col_sep_string with option -S. */ + +static void +separator_string (char const *optarg_S) +{ + size_t len = strlen (optarg_S); + if (INT_MAX < len) + integer_overflow (); + col_sep_length = len; + col_sep_string = optarg_S; +} + +int +main (int argc, char **argv) +{ + unsigned int n_files; + bool old_options = false; + bool old_w = false; + bool old_s = false; + char **file_names; + + /* Accumulate the digits of old-style options like -99. */ + char *column_count_string = nullptr; + size_t n_digits = 0; + size_t n_alloc = 0; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + n_files = 0; + file_names = (argc > 1 + ? xnmalloc (argc - 1, sizeof (char *)) + : nullptr); + + while (true) + { + int oi = -1; + int c = getopt_long (argc, argv, short_options, long_options, &oi); + if (c == -1) + break; + + if (ISDIGIT (c)) + { + /* Accumulate column-count digits specified via old-style options. */ + if (n_digits + 1 >= n_alloc) + column_count_string + = X2REALLOC (column_count_string, &n_alloc); + column_count_string[n_digits++] = c; + column_count_string[n_digits] = '\0'; + continue; + } + + n_digits = 0; + + switch (c) + { + case 1: /* Non-option argument. */ + /* long option --page dominates old '+FIRST_PAGE ...'. */ + if (! (first_page_number == 0 + && *optarg == '+' && first_last_page (-2, '+', optarg + 1))) + file_names[n_files++] = optarg; + break; + + case PAGES_OPTION: /* --pages=FIRST_PAGE[:LAST_PAGE] */ + { /* dominates old opt +... */ + if (! optarg) + error (EXIT_FAILURE, 0, + _("'--pages=FIRST_PAGE[:LAST_PAGE]' missing argument")); + else if (! first_last_page (oi, 0, optarg)) + error (EXIT_FAILURE, 0, _("invalid page range %s"), + quote (optarg)); + break; + } + + case COLUMNS_OPTION: /* --columns=COLUMN */ + { + parse_column_count (optarg); + + /* If there was a prior column count specified via the + short-named option syntax, e.g., -9, ensure that this + long-name-specified value overrides it. */ + free (column_count_string); + column_count_string = nullptr; + n_alloc = 0; + break; + } + + case 'a': + print_across_flag = true; + storing_columns = false; + break; + case 'b': + balance_columns = true; + break; + case 'c': + use_cntrl_prefix = true; + break; + case 'd': + double_space = true; + break; + case 'D': + date_format = optarg; + break; + case 'e': + if (optarg) + getoptarg (optarg, 'e', &input_tab_char, + &chars_per_input_tab); + /* Could check tab width > 0. */ + untabify_input = true; + break; + case 'f': + case 'F': + use_form_feed = true; + break; + case 'h': + custom_header = optarg; + break; + case 'i': + if (optarg) + getoptarg (optarg, 'i', &output_tab_char, + &chars_per_output_tab); + /* Could check tab width > 0. */ + tabify_output = true; + break; + case 'J': + join_lines = true; + break; + case 'l': + getoptnum (optarg, 1, &lines_per_page, + _("'-l PAGE_LENGTH' invalid number of lines")); + break; + case 'm': + parallel_files = true; + storing_columns = false; + break; + case 'n': + numbered_lines = true; + if (optarg) + getoptarg (optarg, 'n', &number_separator, + &chars_per_number); + break; + case 'N': + skip_count = false; + getoptnum (optarg, INT_MIN, &start_line_num, + _("'-N NUMBER' invalid starting line number")); + break; + case 'o': + getoptnum (optarg, 0, &chars_per_margin, + _("'-o MARGIN' invalid line offset")); + break; + case 'r': + ignore_failed_opens = true; + break; + case 's': + old_options = true; + old_s = true; + if (!use_col_separator && optarg) + separator_string (optarg); + break; + case 'S': + old_s = false; + /* Reset an additional input of -s, -S dominates -s */ + col_sep_string = ""; + col_sep_length = 0; + use_col_separator = true; + if (optarg) + separator_string (optarg); + break; + case 't': + extremities = false; + keep_FF = true; + break; + case 'T': + extremities = false; + keep_FF = false; + break; + case 'v': + use_esc_sequence = true; + break; + case 'w': + old_options = true; + old_w = true; + { + int tmp_cpl; + getoptnum (optarg, 1, &tmp_cpl, + _("'-w PAGE_WIDTH' invalid number of characters")); + if (! truncate_lines) + chars_per_line = tmp_cpl; + } + break; + case 'W': + old_w = false; /* dominates -w */ + truncate_lines = true; + getoptnum (optarg, 1, &chars_per_line, + _("'-W PAGE_WIDTH' invalid number of characters")); + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + break; + } + } + + if (column_count_string) + { + parse_column_count (column_count_string); + free (column_count_string); + } + + if (! date_format) + date_format = (getenv ("POSIXLY_CORRECT") && !hard_locale (LC_TIME) + ? "%b %e %H:%M %Y" + : "%Y-%m-%d %H:%M"); + + localtz = tzalloc (getenv ("TZ")); + + /* Now we can set a reasonable initial value: */ + if (first_page_number == 0) + first_page_number = 1; + + if (parallel_files && explicit_columns) + error (EXIT_FAILURE, 0, + _("cannot specify number of columns when printing in parallel")); + + if (parallel_files && print_across_flag) + error (EXIT_FAILURE, 0, + _("cannot specify both printing across and printing in parallel")); + +/* Translate some old short options to new/long options. + To meet downward compatibility with other UNIX pr utilities + and some POSIX specifications. */ + + if (old_options) + { + if (old_w) + { + if (parallel_files || explicit_columns) + { + /* activate -W */ + truncate_lines = true; + if (old_s) + /* adapt HP-UX and SunOS: -s = no separator; + activate -S */ + use_col_separator = true; + } + else + /* old -w sets width with columns only + activate -J */ + join_lines = true; + } + else if (!use_col_separator) + { + /* No -S option read */ + if (old_s && (parallel_files || explicit_columns)) + { + if (!truncate_lines) + { + /* old -s (without -w and -W) annuls column alignment, + uses fields, activate -J */ + join_lines = true; + if (col_sep_length > 0) + /* activate -S */ + use_col_separator = true; + } + else + /* with -W */ + /* adapt HP-UX and SunOS: -s = no separator; + activate -S */ + use_col_separator = true; + } + } + } + + for (; optind < argc; optind++) + { + file_names[n_files++] = argv[optind]; + } + + if (n_files == 0) + { + /* No file arguments specified; read from standard input. */ + print_files (0, nullptr); + } + else + { + if (parallel_files) + print_files (n_files, file_names); + else + { + for (unsigned int i = 0; i < n_files; i++) + print_files (1, &file_names[i]); + } + } + + cleanup (); + + if (have_read_stdin && fclose (stdin) == EOF) + error (EXIT_FAILURE, errno, _("standard input")); + main_exit (failed_opens ? EXIT_FAILURE : EXIT_SUCCESS); +} + +/* Parse numeric arguments, ensuring MIN <= number <= INT_MAX. */ + +static void +getoptnum (char const *n_str, int min, int *num, char const *err) +{ + intmax_t tnum = xdectoimax (n_str, min, INT_MAX, "", err, 0); + *num = tnum; +} + +/* Parse options of the form -scNNN. + + Example: -nck, where 'n' is the option, c is the optional number + separator, and k is the optional width of the field used when printing + a number. */ + +static void +getoptarg (char *arg, char switch_char, char *character, int *number) +{ + if (!*arg) + { + error (0, 0, _("'-%c': Invalid argument: %s"), switch_char, quote (arg)); + usage (EXIT_FAILURE); + } + + if (!ISDIGIT (*arg)) + *character = *arg++; + if (*arg) + { + long int tmp_long; + strtol_error e = xstrtol (arg, nullptr, 10, &tmp_long, ""); + if (e == LONGINT_OK) + { + if (tmp_long <= 0) + e = LONGINT_INVALID; + else if (INT_MAX < tmp_long) + e = LONGINT_OVERFLOW; + } + if (e != LONGINT_OK) + { + error (0, e & LONGINT_OVERFLOW ? EOVERFLOW : 0, + _("'-%c' extra characters or invalid number in the argument: %s"), + switch_char, quote (arg)); + usage (EXIT_FAILURE); + } + *number = tmp_long; + } +} + +/* Set parameters related to formatting. */ + +static void +init_parameters (int number_of_files) +{ + int chars_used_by_number = 0; + + lines_per_body = lines_per_page - lines_per_header - lines_per_footer; + if (lines_per_body <= 0) + { + extremities = false; + keep_FF = true; + } + if (extremities == false) + lines_per_body = lines_per_page; + + if (double_space) + lines_per_body = MAX (1, lines_per_body / 2); + + /* If input is stdin, cannot print parallel files. BSD dumps core + on this. */ + if (number_of_files == 0) + parallel_files = false; + + if (parallel_files) + columns = number_of_files; + + /* One file, multi columns down: -b option is set to get a consistent + formulation with "FF set by hand" in input files. */ + if (storing_columns) + balance_columns = true; + + /* Tabification is assumed for multiple columns. */ + if (columns > 1) + { + if (!use_col_separator) + { + /* Use default separator */ + if (join_lines) + col_sep_string = line_separator; + else + col_sep_string = column_separator; + + col_sep_length = 1; + use_col_separator = true; + } + /* It's rather pointless to define a TAB separator with column + alignment */ + else if (!join_lines && col_sep_length == 1 && *col_sep_string == '\t') + col_sep_string = column_separator; + + truncate_lines = true; + if (! (col_sep_length == 1 && *col_sep_string == '\t')) + untabify_input = true; + tabify_output = true; + } + else + storing_columns = false; + + /* -J dominates -w in any case */ + if (join_lines) + truncate_lines = false; + + if (numbered_lines) + { + int chars_per_default_tab = 8; + + line_count = start_line_num; + + /* To allow input tab-expansion (-e sensitive) use: + if (number_separator == input_tab_char) + number_width = chars_per_number + + TAB_WIDTH (chars_per_input_tab, chars_per_number); */ + + /* Estimate chars_per_text without any margin and keep it constant. */ + if (number_separator == '\t') + number_width = (chars_per_number + + TAB_WIDTH (chars_per_default_tab, chars_per_number)); + else + number_width = chars_per_number + 1; + + /* The number is part of the column width unless we are + printing files in parallel. */ + if (parallel_files) + chars_used_by_number = number_width; + } + + int sep_chars, useful_chars; + if (ckd_mul (&sep_chars, columns - 1, col_sep_length)) + sep_chars = INT_MAX; + if (ckd_sub (&useful_chars, chars_per_line - chars_used_by_number, + sep_chars)) + useful_chars = 0; + chars_per_column = useful_chars / columns; + + if (chars_per_column < 1) + error (EXIT_FAILURE, 0, _("page width too narrow")); + + if (numbered_lines) + { + free (number_buff); + number_buff = xmalloc (MAX (chars_per_number, + INT_STRLEN_BOUND (line_number)) + 1); + } + + /* Pick the maximum between the tab width and the width of an + escape sequence. + The width of an escape sequence (4) isn't the lower limit any longer. + We've to use 8 as the lower limit, if we use chars_per_default_tab = 8 + to expand a tab which is not an input_tab-char. */ + free (clump_buff); + clump_buff = xmalloc (MAX (8, chars_per_input_tab)); +} + +/* Open the necessary files, + maintaining a COLUMN structure for each column. + + With multiple files, each column p has a different p->fp. + With single files, each column p has the same p->fp. + Return false if (number_of_files > 0) and no files can be opened, + true otherwise. + + With each column/file p, p->full_page_printed is initialized, + see also open_file. */ + +static bool +init_fps (int number_of_files, char **av) +{ + COLUMN *p; + + total_files = 0; + + free (column_vector); + column_vector = xnmalloc (columns, sizeof (COLUMN)); + + if (parallel_files) + { + int files_left = number_of_files; + for (p = column_vector; files_left--; ++p, ++av) + { + if (! open_file (*av, p)) + { + --p; + --columns; + } + } + if (columns == 0) + return false; + init_header ("", -1); + } + else + { + p = column_vector; + if (number_of_files > 0) + { + if (! open_file (*av, p)) + return false; + init_header (*av, fileno (p->fp)); + p->lines_stored = 0; + } + else + { + p->name = _("standard input"); + p->fp = stdin; + have_read_stdin = true; + p->status = OPEN; + p->full_page_printed = false; + ++total_files; + init_header ("", -1); + p->lines_stored = 0; + } + + char const *firstname = p->name; + FILE *firstfp = p->fp; + int i; + for (i = columns - 1, ++p; i; --i, ++p) + { + p->name = firstname; + p->fp = firstfp; + p->status = OPEN; + p->full_page_printed = false; + p->lines_stored = 0; + } + } + files_ready_to_read = total_files; + return true; +} + +/* Determine print_func and char_func, the functions + used by each column for printing and/or storing. + + Determine the horizontal position desired when we begin + printing a column (p->start_position). */ + +static void +init_funcs (void) +{ + int i, h, h_next; + COLUMN *p; + + h = chars_per_margin; + + if (!truncate_lines) + h_next = ANYWHERE; + else + { + /* When numbering lines of parallel files, we enlarge the + first column to accommodate the number. Looks better than + the Sys V approach. */ + if (parallel_files && numbered_lines) + h_next = h + chars_per_column + number_width; + else + h_next = h + chars_per_column; + } + + /* Enlarge p->start_position of first column to use the same form of + padding_not_printed with all columns. */ + h = h + col_sep_length; + + /* This loop takes care of all but the rightmost column. */ + + for (p = column_vector, i = 1; i < columns; ++p, ++i) + { + if (storing_columns) /* One file, multi columns down. */ + { + p->char_func = store_char; + p->print_func = print_stored; + } + else + /* One file, multi columns across; or parallel files. */ + { + p->char_func = print_char; + p->print_func = read_line; + } + + /* Number only the first column when printing files in + parallel. */ + p->numbered = numbered_lines && (!parallel_files || i == 1); + p->start_position = h; + + /* If we don't truncate lines, all start_positions are + ANYWHERE, except the first column's start_position when + using a margin. */ + + if (!truncate_lines) + { + h = ANYWHERE; + h_next = ANYWHERE; + } + else + { + h = h_next + col_sep_length; + h_next = h + chars_per_column; + } + } + + /* The rightmost column. + + Doesn't need to be stored unless we intend to balance + columns on the last page. */ + if (storing_columns && balance_columns) + { + p->char_func = store_char; + p->print_func = print_stored; + } + else + { + p->char_func = print_char; + p->print_func = read_line; + } + + p->numbered = numbered_lines && (!parallel_files || i == 1); + p->start_position = h; +} + +/* Open a file. Return true if successful. + + With each file p, p->full_page_printed is initialized, + see also init_fps. */ + +static bool +open_file (char *name, COLUMN *p) +{ + if (STREQ (name, "-")) + { + p->name = _("standard input"); + p->fp = stdin; + have_read_stdin = true; + } + else + { + p->name = name; + p->fp = fopen (name, "r"); + } + if (p->fp == nullptr) + { + failed_opens = true; + if (!ignore_failed_opens) + error (0, errno, "%s", quotef (name)); + return false; + } + fadvise (p->fp, FADVISE_SEQUENTIAL); + p->status = OPEN; + p->full_page_printed = false; + ++total_files; + return true; +} + +/* Close the file in P. + + If we aren't dealing with multiple files in parallel, we change + the status of all columns in the column list to reflect the close. */ + +static void +close_file (COLUMN *p) +{ + COLUMN *q; + int i; + + if (p->status == CLOSED) + return; + + int err = errno; + if (!ferror (p->fp)) + err = 0; + if (fileno (p->fp) == STDIN_FILENO) + clearerr (p->fp); + else if (fclose (p->fp) != 0 && !err) + err = errno; + if (err) + error (EXIT_FAILURE, err, "%s", quotef (p->name)); + + if (!parallel_files) + { + for (q = column_vector, i = columns; i; ++q, --i) + { + q->status = CLOSED; + if (q->lines_stored == 0) + { + q->lines_to_print = 0; + } + } + } + else + { + p->status = CLOSED; + p->lines_to_print = 0; + } + + --files_ready_to_read; +} + +/* Put a file on hold until we start a new page, + since we've hit a form feed. + + If we aren't dealing with parallel files, we must change the + status of all columns in the column list. */ + +static void +hold_file (COLUMN *p) +{ + COLUMN *q; + int i; + + if (!parallel_files) + for (q = column_vector, i = columns; i; ++q, --i) + { + if (storing_columns) + q->status = FF_FOUND; + else + q->status = ON_HOLD; + } + else + p->status = ON_HOLD; + + p->lines_to_print = 0; + --files_ready_to_read; +} + +/* Undo hold_file -- go through the column list and change any + ON_HOLD columns to OPEN. Used at the end of each page. */ + +static void +reset_status (void) +{ + int i = columns; + COLUMN *p; + + for (p = column_vector; i; --i, ++p) + if (p->status == ON_HOLD) + { + p->status = OPEN; + files_ready_to_read++; + } + + if (storing_columns) + { + if (column_vector->status == CLOSED) + /* We use the info to output an error message in skip_to_page. */ + files_ready_to_read = 0; + else + files_ready_to_read = 1; + } +} + +/* Print a single file, or multiple files in parallel. + + Set up the list of columns, opening the necessary files. + Allocate space for storing columns, if necessary. + Skip to first_page_number, if user has asked to skip leading pages. + Determine which functions are appropriate to store/print lines + in each column. + Print the file(s). */ + +static void +print_files (int number_of_files, char **av) +{ + init_parameters (number_of_files); + if (! init_fps (number_of_files, av)) + return; + if (storing_columns) + init_store_cols (); + + if (first_page_number > 1) + { + if (!skip_to_page (first_page_number)) + return; + else + page_number = first_page_number; + } + else + page_number = 1; + + init_funcs (); + + line_number = line_count; + while (print_page ()) + ; +} + +/* Initialize header information. + If DESC is non-negative, it is a file descriptor open to + FILENAME for reading. */ + +static void +init_header (char const *filename, int desc) +{ + char *buf = nullptr; + struct stat st; + struct timespec t; + int ns; + struct tm tm; + + /* If parallel files or standard input, use current date. */ + if (STREQ (filename, "-")) + desc = -1; + if (0 <= desc && fstat (desc, &st) == 0) + t = get_stat_mtime (&st); + else + { + static struct timespec timespec; + if (! timespec.tv_sec) + gettime (×pec); + t = timespec; + } + + ns = t.tv_nsec; + if (localtime_rz (localtz, &t.tv_sec, &tm)) + { + size_t bufsize + = nstrftime (nullptr, SIZE_MAX, date_format, &tm, localtz, ns) + 1; + buf = xmalloc (bufsize); + nstrftime (buf, bufsize, date_format, &tm, localtz, ns); + } + else + { + char secbuf[INT_BUFSIZE_BOUND (intmax_t)]; + buf = xmalloc (sizeof secbuf + MAX (10, INT_BUFSIZE_BOUND (int))); + sprintf (buf, "%s.%09d", timetostr (t.tv_sec, secbuf), ns); + } + + free (date_text); + date_text = buf; + file_text = custom_header ? custom_header : desc < 0 ? "" : filename; + header_width_available = (chars_per_line + - mbswidth (date_text, 0) + - mbswidth (file_text, 0)); +} + +/* Set things up for printing a page + + Scan through the columns ... + Determine which are ready to print + (i.e., which have lines stored or open files) + Set p->lines_to_print appropriately + (to p->lines_stored if we're storing, or lines_per_body + if we're reading straight from the file) + Keep track of this total so we know when to stop printing */ + +static void +init_page (void) +{ + int j; + COLUMN *p; + + if (storing_columns) + { + store_columns (); + for (j = columns - 1, p = column_vector; j; --j, ++p) + { + p->lines_to_print = p->lines_stored; + } + + /* Last column. */ + if (balance_columns) + { + p->lines_to_print = p->lines_stored; + } + /* Since we're not balancing columns, we don't need to store + the rightmost column. Read it straight from the file. */ + else + { + if (p->status == OPEN) + { + p->lines_to_print = lines_per_body; + } + else + p->lines_to_print = 0; + } + } + else + for (j = columns, p = column_vector; j; --j, ++p) + if (p->status == OPEN) + { + p->lines_to_print = lines_per_body; + } + else + p->lines_to_print = 0; +} + +/* Align empty columns and print separators. + Empty columns will be formed by files with status ON_HOLD or CLOSED + when printing multiple files in parallel. */ + +static void +align_column (COLUMN *p) +{ + padding_not_printed = p->start_position; + if (col_sep_length < padding_not_printed) + { + pad_across_to (padding_not_printed - col_sep_length); + padding_not_printed = ANYWHERE; + } + + if (use_col_separator) + print_sep_string (); + + if (p->numbered) + add_line_number (p); +} + +/* Print one page. + + As long as there are lines left on the page and columns ready to print, + Scan across the column list + if the column has stored lines or the file is open + pad to the appropriate spot + print the column + pad the remainder of the page with \n or \f as requested + reset the status of all files -- any files which where on hold because + of formfeeds are now put back into the lineup. */ + +static bool +print_page (void) +{ + int j; + int lines_left_on_page; + COLUMN *p; + + /* Used as an accumulator (with | operator) of successive values of + pad_vertically. The trick is to set pad_vertically + to false before each run through the inner loop, then after that + loop, it tells us whether a line was actually printed (whether a + newline needs to be output -- or two for double spacing). But those + values have to be accumulated (in pv) so we can invoke pad_down + properly after the outer loop completes. */ + bool pv; + + init_page (); + + if (cols_ready_to_print () == 0) + return false; + + if (extremities) + print_a_header = true; + + /* Don't pad unless we know a page was printed. */ + pad_vertically = false; + pv = false; + + lines_left_on_page = lines_per_body; + if (double_space) + lines_left_on_page *= 2; + + while (lines_left_on_page > 0 && cols_ready_to_print () > 0) + { + output_position = 0; + spaces_not_printed = 0; + separators_not_printed = 0; + pad_vertically = false; + align_empty_cols = false; + empty_line = true; + + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + { + input_position = 0; + if (p->lines_to_print > 0 || p->status == FF_FOUND) + { + FF_only = false; + padding_not_printed = p->start_position; + if (!(p->print_func) (p)) + read_rest_of_line (p); + pv |= pad_vertically; + + --p->lines_to_print; + if (p->lines_to_print <= 0) + { + if (cols_ready_to_print () == 0) + break; + } + + /* File p changed its status to ON_HOLD or CLOSED */ + if (parallel_files && p->status != OPEN) + { + if (empty_line) + align_empty_cols = true; + else if (p->status == CLOSED + || (p->status == ON_HOLD && FF_only)) + align_column (p); + } + } + else if (parallel_files) + { + /* File status ON_HOLD or CLOSED */ + if (empty_line) + align_empty_cols = true; + else + align_column (p); + } + + /* We need it also with an empty column */ + if (use_col_separator) + ++separators_not_printed; + } + + if (pad_vertically) + { + putchar ('\n'); + --lines_left_on_page; + } + + if (cols_ready_to_print () == 0 && !extremities) + break; + + if (double_space && pv) + { + putchar ('\n'); + --lines_left_on_page; + } + } + + if (lines_left_on_page == 0) + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + if (p->status == OPEN) + p->full_page_printed = true; + + pad_vertically = pv; + + if (pad_vertically && extremities) + pad_down (lines_left_on_page + lines_per_footer); + else if (keep_FF && print_a_FF) + { + putchar ('\f'); + print_a_FF = false; + } + + if (last_page_number < ++page_number) + return false; /* Stop printing with LAST_PAGE */ + + reset_status (); /* Change ON_HOLD to OPEN. */ + + return true; /* More pages to go. */ +} + +/* Allocate space for storing columns. + + This is necessary when printing multiple columns from a single file. + Lines are stored consecutively in buff, separated by '\0'. + + The following doesn't apply any longer - any tuning possible? + (We can't use a fixed offset since with the '-s' flag lines aren't + truncated.) + + We maintain a list (line_vector) of pointers to the beginnings + of lines in buff. We allocate one more than the number of lines + because the last entry tells us the index of the last character, + which we need to know in order to print the last line in buff. */ + +static void +init_store_cols (void) +{ + int total_lines, total_lines_1, chars_per_column_1, chars_if_truncate; + if (ckd_mul (&total_lines, lines_per_body, columns) + || ckd_add (&total_lines_1, total_lines, 1) + || ckd_add (&chars_per_column_1, chars_per_column, 1) + || ckd_mul (&chars_if_truncate, total_lines, chars_per_column_1)) + integer_overflow (); + + free (line_vector); + /* FIXME: here's where it was allocated. */ + line_vector = xnmalloc (total_lines_1, sizeof *line_vector); + + free (end_vector); + end_vector = xnmalloc (total_lines, sizeof *end_vector); + + free (buff); + buff = xnmalloc (chars_if_truncate, use_col_separator + 1); + buff_allocated = chars_if_truncate; /* Tune this. */ + buff_allocated *= use_col_separator + 1; +} + +/* Store all but the rightmost column. + (Used when printing a single file in multiple downward columns) + + For each column + set p->current_line to be the index in line_vector of the + first line in the column + For each line in the column + store the line in buff + add to line_vector the index of the line's first char + buff_start is the index in buff of the first character in the + current line. */ + +static void +store_columns (void) +{ + int i, j; + unsigned int line = 0; + unsigned int buff_start; + int last_col; /* The rightmost column which will be saved in buff */ + COLUMN *p; + + buff_current = 0; + buff_start = 0; + + if (balance_columns) + last_col = columns; + else + last_col = columns - 1; + + for (i = 1, p = column_vector; i <= last_col; ++i, ++p) + p->lines_stored = 0; + + for (i = 1, p = column_vector; i <= last_col && files_ready_to_read; + ++i, ++p) + { + p->current_line = line; + for (j = lines_per_body; j && files_ready_to_read; --j) + + if (p->status == OPEN) /* Redundant. Clean up. */ + { + input_position = 0; + + if (!read_line (p)) + read_rest_of_line (p); + + if (p->status == OPEN + || buff_start != buff_current) + { + ++p->lines_stored; + line_vector[line] = buff_start; + end_vector[line++] = input_position; + buff_start = buff_current; + } + } + } + + /* Keep track of the location of the last char in buff. */ + line_vector[line] = buff_start; + + if (balance_columns) + balance (line); +} + +static void +balance (int total_stored) +{ + COLUMN *p; + int i, lines; + int first_line = 0; + + for (i = 1, p = column_vector; i <= columns; ++i, ++p) + { + lines = total_stored / columns; + if (i <= total_stored % columns) + ++lines; + + p->lines_stored = lines; + p->current_line = first_line; + + first_line += lines; + } +} + +/* Store a character in the buffer. */ + +static void +store_char (char c) +{ + if (buff_current >= buff_allocated) + { + /* May be too generous. */ + buff = X2REALLOC (buff, &buff_allocated); + } + buff[buff_current++] = c; +} + +static void +add_line_number (COLUMN *p) +{ + int i; + char *s; + int num_width; + + /* Cutting off the higher-order digits is more informative than + lower-order cut off. */ + num_width = sprintf (number_buff, "%*d", chars_per_number, line_number); + line_number++; + s = number_buff + (num_width - chars_per_number); + for (i = chars_per_number; i > 0; i--) + (p->char_func) (*s++); + + if (columns > 1) + { + /* Tabification is assumed for multiple columns, also for n-separators, + but 'default n-separator = TAB' hasn't been given priority over + equal column_width also specified by POSIX. */ + if (number_separator == '\t') + { + i = number_width - chars_per_number; + while (i-- > 0) + (p->char_func) (' '); + } + else + (p->char_func) (number_separator); + } + else + /* To comply with POSIX, we avoid any expansion of default TAB + separator with a single column output. No column_width requirement + has to be considered. */ + { + (p->char_func) (number_separator); + if (number_separator == '\t') + output_position = POS_AFTER_TAB (chars_per_output_tab, + output_position); + } + + if (truncate_lines && !parallel_files) + input_position += number_width; +} + +/* Print (or store) padding until the current horizontal position + is position. */ + +static void +pad_across_to (int position) +{ + int h = output_position; + + if (tabify_output) + spaces_not_printed = position - output_position; + else + { + while (++h <= position) + putchar (' '); + output_position = position; + } +} + +/* Pad to the bottom of the page. + + If the user has requested a formfeed, use one. + Otherwise, use newlines. */ + +static void +pad_down (unsigned int lines) +{ + if (use_form_feed) + putchar ('\f'); + else + for (unsigned int i = lines; i; --i) + putchar ('\n'); +} + +/* Read the rest of the line. + + Read from the current column's file until an end of line is + hit. Used when we've truncated a line and we no longer need + to print or store its characters. */ + +static void +read_rest_of_line (COLUMN *p) +{ + int c; + FILE *f = p->fp; + + while ((c = getc (f)) != '\n') + { + if (c == '\f') + { + if ((c = getc (f)) != '\n') + ungetc (c, f); + if (keep_FF) + print_a_FF = true; + hold_file (p); + break; + } + else if (c == EOF) + { + close_file (p); + break; + } + } +} + +/* Read a line with skip_to_page. + + Read from the current column's file until an end of line is + hit. Used when we read full lines to skip pages. + With skip_to_page we have to check for FF-coincidence which is done + in function read_line otherwise. + Count lines of skipped pages to find the line number of 1st page + printed relative to 1st line of input file (start_line_num). */ + +static void +skip_read (COLUMN *p, int column_number) +{ + int c; + FILE *f = p->fp; + int i; + bool single_ff = false; + COLUMN *q; + + /* Read 1st character in a line or any character succeeding a FF */ + if ((c = getc (f)) == '\f' && p->full_page_printed) + /* A FF-coincidence with a previous full_page_printed. + To avoid an additional empty page, eliminate the FF */ + if ((c = getc (f)) == '\n') + c = getc (f); + + p->full_page_printed = false; + + /* 1st character a FF means a single FF without any printable + characters. Don't count it as a line with -n option. */ + if (c == '\f') + single_ff = true; + + /* Preparing for a FF-coincidence: Maybe we finish that page + without a FF found */ + if (last_line) + p->full_page_printed = true; + + while (c != '\n') + { + if (c == '\f') + { + /* No FF-coincidence possible, + no catching up of a FF-coincidence with next page */ + if (last_line) + { + if (!parallel_files) + for (q = column_vector, i = columns; i; ++q, --i) + q->full_page_printed = false; + else + p->full_page_printed = false; + } + + if ((c = getc (f)) != '\n') + ungetc (c, f); + hold_file (p); + break; + } + else if (c == EOF) + { + close_file (p); + break; + } + c = getc (f); + } + + if (skip_count) + if ((!parallel_files || column_number == 1) && !single_ff) + ++line_count; +} + +/* If we're tabifying output, + + When print_char encounters white space it keeps track + of our desired horizontal position and delays printing + until this function is called. */ + +static void +print_white_space (void) +{ + int h_new; + int h_old = output_position; + int goal = h_old + spaces_not_printed; + + while (goal - h_old > 1 + && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal) + { + putchar (output_tab_char); + h_old = h_new; + } + while (++h_old <= goal) + putchar (' '); + + output_position = goal; + spaces_not_printed = 0; +} + +/* Print column separators. + + We keep a count until we know that we'll be printing a line, + then print_sep_string() is called. */ + +static void +print_sep_string (void) +{ + char const *s = col_sep_string; + int l = col_sep_length; + + if (separators_not_printed <= 0) + { + /* We'll be starting a line with chars_per_margin, anything else? */ + if (spaces_not_printed > 0) + print_white_space (); + } + else + { + for (; separators_not_printed > 0; --separators_not_printed) + { + while (l-- > 0) + { + /* 3 types of sep_strings: spaces only, spaces and chars, + chars only */ + if (*s == ' ') + { + /* We're tabifying output; consecutive spaces in + sep_string may have to be converted to tabs */ + s++; + ++spaces_not_printed; + } + else + { + if (spaces_not_printed > 0) + print_white_space (); + putchar (*s++); + ++output_position; + } + } + /* sep_string ends with some spaces */ + if (spaces_not_printed > 0) + print_white_space (); + } + } +} + +/* Print (or store, depending on p->char_func) a clump of N + characters. */ + +static void +print_clump (COLUMN *p, int n, char *clump) +{ + while (n--) + (p->char_func) (*clump++); +} + +/* Print a character. + + Update the following comment: process-char hasn't been used any + longer. + If we're tabifying, all tabs have been converted to spaces by + process_char(). Keep a count of consecutive spaces, and when + a nonspace is encountered, call print_white_space() to print the + required number of tabs and spaces. */ + +static void +print_char (char c) +{ + if (tabify_output) + { + if (c == ' ') + { + ++spaces_not_printed; + return; + } + else if (spaces_not_printed > 0) + print_white_space (); + + /* Nonprintables are assumed to have width 0, except '\b'. */ + if (! isprint (to_uchar (c))) + { + if (c == '\b') + --output_position; + } + else + ++output_position; + } + putchar (c); +} + +/* Skip to page PAGE before printing. + PAGE may be larger than total number of pages. */ + +static bool +skip_to_page (uintmax_t page) +{ + for (uintmax_t n = 1; n < page; ++n) + { + COLUMN *p; + int j; + + for (int i = 1; i < lines_per_body; ++i) + { + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + if (p->status == OPEN) + skip_read (p, j); + } + last_line = true; + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + if (p->status == OPEN) + skip_read (p, j); + + if (storing_columns) /* change FF_FOUND to ON_HOLD */ + for (j = 1, p = column_vector; j <= columns; ++j, ++p) + if (p->status != CLOSED) + p->status = ON_HOLD; + + reset_status (); + last_line = false; + + if (files_ready_to_read < 1) + { + /* It's very helpful, normally the total number of pages is + not known in advance. */ + error (0, 0, + _("starting page number %"PRIuMAX + " exceeds page count %"PRIuMAX), + page, n); + break; + } + } + return files_ready_to_read > 0; +} + +/* Print a header. + + Formfeeds are assumed to use up two lines at the beginning of + the page. */ + +static void +print_header (void) +{ + char page_text[256 + INT_STRLEN_BOUND (page_number)]; + int available_width; + int lhs_spaces; + int rhs_spaces; + + output_position = 0; + pad_across_to (chars_per_margin); + print_white_space (); + + if (page_number == 0) + error (EXIT_FAILURE, 0, _("page number overflow")); + + /* The translator must ensure that formatting the translation of + "Page %"PRIuMAX does not generate more than (sizeof page_text - 1) + bytes. */ + sprintf (page_text, _("Page %"PRIuMAX), page_number); + available_width = header_width_available - mbswidth (page_text, 0); + available_width = MAX (0, available_width); + lhs_spaces = available_width >> 1; + rhs_spaces = available_width - lhs_spaces; + + printf ("\n\n%*s%s%*s%s%*s%s\n\n\n", + chars_per_margin, "", + date_text, lhs_spaces, " ", + file_text, rhs_spaces, " ", page_text); + + print_a_header = false; + output_position = 0; +} + +/* Print (or store, if p->char_func is store_char()) a line. + + Read a character to determine whether we have a line or not. + (We may hit EOF, \n, or \f) + + Once we know we have a line, + set pad_vertically = true, meaning it's safe + to pad down at the end of the page, since we do have a page. + print a header if needed. + pad across to padding_not_printed if needed. + print any separators which need to be printed. + print a line number if it needs to be printed. + + Print the clump which corresponds to the first character. + + Enter a loop and keep printing until an end of line condition + exists, or until we exceed chars_per_column. + + Return false if we exceed chars_per_column before reading + an end of line character, true otherwise. */ + +static bool +read_line (COLUMN *p) +{ + int c; + int chars; + int last_input_position; + int j, k; + COLUMN *q; + + /* read 1st character in each line or any character succeeding a FF: */ + c = getc (p->fp); + + last_input_position = input_position; + + if (c == '\f' && p->full_page_printed) + if ((c = getc (p->fp)) == '\n') + c = getc (p->fp); + p->full_page_printed = false; + + switch (c) + { + case '\f': + if ((c = getc (p->fp)) != '\n') + ungetc (c, p->fp); + FF_only = true; + if (print_a_header && !storing_columns) + { + pad_vertically = true; + print_header (); + } + else if (keep_FF) + print_a_FF = true; + hold_file (p); + return true; + case EOF: + close_file (p); + return true; + case '\n': + break; + default: + chars = char_to_clump (c); + } + + if (truncate_lines && input_position > chars_per_column) + { + input_position = last_input_position; + return false; + } + + if (p->char_func != store_char) + { + pad_vertically = true; + + if (print_a_header && !storing_columns) + print_header (); + + if (parallel_files && align_empty_cols) + { + /* We have to align empty columns at the beginning of a line. */ + k = separators_not_printed; + separators_not_printed = 0; + for (j = 1, q = column_vector; j <= k; ++j, ++q) + { + align_column (q); + separators_not_printed += 1; + } + padding_not_printed = p->start_position; + if (truncate_lines) + spaces_not_printed = chars_per_column; + else + spaces_not_printed = 0; + align_empty_cols = false; + } + + if (col_sep_length < padding_not_printed) + { + pad_across_to (padding_not_printed - col_sep_length); + padding_not_printed = ANYWHERE; + } + + if (use_col_separator) + print_sep_string (); + } + + if (p->numbered) + add_line_number (p); + + empty_line = false; + if (c == '\n') + return true; + + print_clump (p, chars, clump_buff); + + while (true) + { + c = getc (p->fp); + + switch (c) + { + case '\n': + return true; + case '\f': + if ((c = getc (p->fp)) != '\n') + ungetc (c, p->fp); + if (keep_FF) + print_a_FF = true; + hold_file (p); + return true; + case EOF: + close_file (p); + return true; + } + + last_input_position = input_position; + chars = char_to_clump (c); + if (truncate_lines && input_position > chars_per_column) + { + input_position = last_input_position; + return false; + } + + print_clump (p, chars, clump_buff); + } +} + +/* Print a line from buff. + + If this function has been called, we know we have "something to + print". But it remains to be seen whether we have a real text page + or an empty page (a single form feed) with/without a header only. + Therefore first we set pad_vertically to true and print a header + if necessary. + If FF_FOUND and we are using -t|-T option we omit any newline by + setting pad_vertically to false (see print_page). + Otherwise we pad across if necessary, print separators if necessary + and text of COLUMN *p. + + Return true, meaning there is no need to call read_rest_of_line. */ + +static bool +print_stored (COLUMN *p) +{ + COLUMN *q; + + int line = p->current_line++; + char *first = &buff[line_vector[line]]; + /* FIXME + UMR: Uninitialized memory read: + * This is occurring while in: + print_stored [pr.c:2239] + * Reading 4 bytes from 0x5148c in the heap. + * Address 0x5148c is 4 bytes into a malloc'd block at 0x51488 of 676 bytes + * This block was allocated from: + malloc [rtlib.o] + xmalloc [xmalloc.c:94] + init_store_cols [pr.c:1648] + */ + char *last = &buff[line_vector[line + 1]]; + + pad_vertically = true; + + if (print_a_header) + print_header (); + + if (p->status == FF_FOUND) + { + int i; + for (i = 1, q = column_vector; i <= columns; ++i, ++q) + q->status = ON_HOLD; + if (column_vector->lines_to_print <= 0) + { + if (!extremities) + pad_vertically = false; + return true; /* print a header only */ + } + } + + if (col_sep_length < padding_not_printed) + { + pad_across_to (padding_not_printed - col_sep_length); + padding_not_printed = ANYWHERE; + } + + if (use_col_separator) + print_sep_string (); + + while (first != last) + print_char (*first++); + + if (spaces_not_printed == 0) + { + output_position = p->start_position + end_vector[line]; + if (p->start_position - col_sep_length == chars_per_margin) + output_position -= col_sep_length; + } + + return true; +} + +/* Convert a character to the proper format and return the number of + characters in the resulting clump. Increment input_position by + the width of the clump. + + Tabs are converted to clumps of spaces. + Nonprintable characters may be converted to clumps of escape + sequences or control prefixes. + + Note: the width of a clump is not necessarily equal to the number of + characters in clump_buff. (e.g., the width of '\b' is -1, while the + number of characters is 1.) */ + +static int +char_to_clump (char c) +{ + unsigned char uc = c; + char *s = clump_buff; + int i; + char esc_buff[4]; + int width; + int chars; + int chars_per_c = 8; + + if (c == input_tab_char) + chars_per_c = chars_per_input_tab; + + if (c == input_tab_char || c == '\t') + { + width = TAB_WIDTH (chars_per_c, input_position); + + if (untabify_input) + { + for (i = width; i; --i) + *s++ = ' '; + chars = width; + } + else + { + *s = c; + chars = 1; + } + + } + else if (! isprint (uc)) + { + if (use_esc_sequence) + { + width = 4; + chars = 4; + *s++ = '\\'; + sprintf (esc_buff, "%03o", uc); + for (i = 0; i <= 2; ++i) + *s++ = esc_buff[i]; + } + else if (use_cntrl_prefix) + { + if (uc < 0200) + { + width = 2; + chars = 2; + *s++ = '^'; + *s = c ^ 0100; + } + else + { + width = 4; + chars = 4; + *s++ = '\\'; + sprintf (esc_buff, "%03o", uc); + for (i = 0; i <= 2; ++i) + *s++ = esc_buff[i]; + } + } + else if (c == '\b') + { + width = -1; + chars = 1; + *s = c; + } + else + { + width = 0; + chars = 1; + *s = c; + } + } + else + { + width = 1; + chars = 1; + *s = c; + } + + /* Too many backspaces must put us in position 0 -- never negative. */ + if (width < 0 && input_position == 0) + { + chars = 0; + input_position = 0; + } + else if (width < 0 && input_position <= -width) + input_position = 0; + else + input_position += width; + + return chars; +} + +/* We've just printed some files and need to clean up things before + looking for more options and printing the next batch of files. + + Free everything we've xmalloc'ed, except 'header'. */ + +static void +cleanup (void) +{ + free (number_buff); + free (clump_buff); + free (column_vector); + free (line_vector); + free (end_vector); + free (buff); +} + +/* Complain, print a usage message, and die. */ + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + + fputs (_("\ +Paginate or columnate FILE(s) for printing.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + +FIRST_PAGE[:LAST_PAGE], --pages=FIRST_PAGE[:LAST_PAGE]\n\ + begin [stop] printing with page FIRST_[LAST_]PAGE\n\ + -COLUMN, --columns=COLUMN\n\ + output COLUMN columns and print columns down,\n\ + unless -a is used. Balance number of lines in the\n\ + columns on each page\n\ +"), stdout); + fputs (_("\ + -a, --across print columns across rather than down, used together\n\ + with -COLUMN\n\ + -c, --show-control-chars\n\ + use hat notation (^G) and octal backslash notation\n\ + -d, --double-space\n\ + double space the output\n\ +"), stdout); + fputs (_("\ + -D, --date-format=FORMAT\n\ + use FORMAT for the header date\n\ + -e[CHAR[WIDTH]], --expand-tabs[=CHAR[WIDTH]]\n\ + expand input CHARs (TABs) to tab WIDTH (8)\n\ + -F, -f, --form-feed\n\ + use form feeds instead of newlines to separate pages\n\ + (by a 3-line page header with -F or a 5-line header\n\ + and trailer without -F)\n\ +"), stdout); + fputs (_("\ + -h, --header=HEADER\n\ + use a centered HEADER instead of filename in page header,\n\ + -h \"\" prints a blank line, don't use -h\"\"\n\ + -i[CHAR[WIDTH]], --output-tabs[=CHAR[WIDTH]]\n\ + replace spaces with CHARs (TABs) to tab WIDTH (8)\n\ + -J, --join-lines merge full lines, turns off -W line truncation, no column\n\ + alignment, --sep-string[=STRING] sets separators\n\ +"), stdout); + fputs (_("\ + -l, --length=PAGE_LENGTH\n\ + set the page length to PAGE_LENGTH (66) lines\n\ + (default number of lines of text 56, and with -F 63).\n\ + implies -t if PAGE_LENGTH <= 10\n\ +"), stdout); + fputs (_("\ + -m, --merge print all files in parallel, one in each column,\n\ + truncate lines, but join lines of full length with -J\n\ +"), stdout); + fputs (_("\ + -n[SEP[DIGITS]], --number-lines[=SEP[DIGITS]]\n\ + number lines, use DIGITS (5) digits, then SEP (TAB),\n\ + default counting starts with 1st line of input file\n\ + -N, --first-line-number=NUMBER\n\ + start counting with NUMBER at 1st line of first\n\ + page printed (see +FIRST_PAGE)\n\ +"), stdout); + fputs (_("\ + -o, --indent=MARGIN\n\ + offset each line with MARGIN (zero) spaces, do not\n\ + affect -w or -W, MARGIN will be added to PAGE_WIDTH\n\ + -r, --no-file-warnings\n\ + omit warning when a file cannot be opened\n\ +"), stdout); + fputs (_("\ + -s[CHAR], --separator[=CHAR]\n\ + separate columns by a single character, default for CHAR\n\ + is the character without -w and \'no char\' with -w.\ +\n\ + -s[CHAR] turns off line truncation of all 3 column\n\ + options (-COLUMN|-a -COLUMN|-m) except -w is set\n\ +"), stdout); + fputs (_("\ + -S[STRING], --sep-string[=STRING]\n\ + separate columns by STRING,\n\ + without -S: Default separator with -J and \n\ + otherwise (same as -S\" \"), no effect on column options\n\ +"), stdout); + fputs (_("\ + -t, --omit-header omit page headers and trailers;\n\ + implied if PAGE_LENGTH <= 10\n\ +"), stdout); + fputs (_("\ + -T, --omit-pagination\n\ + omit page headers and trailers, eliminate any pagination\n\ + by form feeds set in input files\n\ + -v, --show-nonprinting\n\ + use octal backslash notation\n\ + -w, --width=PAGE_WIDTH\n\ + set page width to PAGE_WIDTH (72) characters for\n\ + multiple text-column output only, -s[char] turns off (72)\n\ +"), stdout); + fputs (_("\ + -W, --page-width=PAGE_WIDTH\n\ + set page width to PAGE_WIDTH (72) characters always,\n\ + truncate lines, except -J option is set, no interference\n\ + with -S or -s\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} diff --git a/src/primes.h b/src/primes.h new file mode 100644 index 0000000..f608966 --- /dev/null +++ b/src/primes.h @@ -0,0 +1,4014 @@ +/* Generated file -- DO NOT EDIT */ + +#define WIDE_UINT_BITS 128 +P (1, 26, + (((((uintmax_t) 0xaaaaU << 28 | 0xaaaaaaaU) + << 28 | 0xaaaaaaaU) + << 28 | 0xaaaaaaaU) + << 28 | 0xaaaaaabU), + UINTMAX_MAX / 3) +P (2, 26, + (((((uintmax_t) 0xccccU << 28 | 0xcccccccU) + << 28 | 0xcccccccU) + << 28 | 0xcccccccU) + << 28 | 0xccccccdU), + UINTMAX_MAX / 5) +P (2, 30, + (((((uintmax_t) 0xb6dbU << 28 | 0x6db6db6U) + << 28 | 0xdb6db6dU) + << 28 | 0xb6db6dbU) + << 28 | 0x6db6db7U), + UINTMAX_MAX / 7) +P (4, 30, + (((((uintmax_t) 0xa2e8U << 28 | 0xba2e8baU) + << 28 | 0x2e8ba2eU) + << 28 | 0x8ba2e8bU) + << 28 | 0xa2e8ba3U), + UINTMAX_MAX / 11) +P (2, 30, + (((((uintmax_t) 0xc4ecU << 28 | 0x4ec4ec4U) + << 28 | 0xec4ec4eU) + << 28 | 0xc4ec4ecU) + << 28 | 0x4ec4ec5U), + UINTMAX_MAX / 13) +P (4, 30, + (((((uintmax_t) 0xf0f0U << 28 | 0xf0f0f0fU) + << 28 | 0x0f0f0f0U) + << 28 | 0xf0f0f0fU) + << 28 | 0x0f0f0f1U), + UINTMAX_MAX / 17) +P (2, 34, + (((((uintmax_t) 0xbca1U << 28 | 0xaf286bcU) + << 28 | 0xa1af286U) + << 28 | 0xbca1af2U) + << 28 | 0x86bca1bU), + UINTMAX_MAX / 19) +P (4, 36, + (((((uintmax_t) 0x4de9U << 28 | 0xbd37a6fU) + << 28 | 0x4de9bd3U) + << 28 | 0x7a6f4deU) + << 28 | 0x9bd37a7U), + UINTMAX_MAX / 23) +P (6, 32, + (((((uintmax_t) 0xc234U << 28 | 0xf72c234U) + << 28 | 0xf72c234U) + << 28 | 0xf72c234U) + << 28 | 0xf72c235U), + UINTMAX_MAX / 29) +P (2, 36, + (((((uintmax_t) 0xdef7U << 28 | 0xbdef7bdU) + << 28 | 0xef7bdefU) + << 28 | 0x7bdef7bU) + << 28 | 0xdef7bdfU), + UINTMAX_MAX / 31) +P (6, 34, + (((((uintmax_t) 0xc1baU << 28 | 0xcf914c1U) + << 28 | 0xbacf914U) + << 28 | 0xc1bacf9U) + << 28 | 0x14c1badU), + UINTMAX_MAX / 37) +P (4, 32, + (((((uintmax_t) 0x18f9U << 28 | 0xc18f9c1U) + << 28 | 0x8f9c18fU) + << 28 | 0x9c18f9cU) + << 28 | 0x18f9c19U), + UINTMAX_MAX / 41) +P (2, 36, + (((((uintmax_t) 0xbe82U << 28 | 0xfa0be82U) + << 28 | 0xfa0be82U) + << 28 | 0xfa0be82U) + << 28 | 0xfa0be83U), + UINTMAX_MAX / 43) +P (4, 36, + (((((uintmax_t) 0x3677U << 28 | 0xd46cefaU) + << 28 | 0x8d9df51U) + << 28 | 0xb3bea36U) + << 28 | 0x77d46cfU), + UINTMAX_MAX / 47) +P (6, 36, + (((((uintmax_t) 0x1352U << 28 | 0x1cfb2b7U) + << 28 | 0x8c13521U) + << 28 | 0xcfb2b78U) + << 28 | 0xc13521dU), + UINTMAX_MAX / 53) +P (6, 38, + (((((uintmax_t) 0x8f2fU << 28 | 0xba93868U) + << 28 | 0x22b63cbU) + << 28 | 0xeea4e1aU) + << 28 | 0x08ad8f3U), + UINTMAX_MAX / 59) +P (2, 40, + (((((uintmax_t) 0x14fbU << 28 | 0xcda3ac1U) + << 28 | 0x0c9714fU) + << 28 | 0xbcda3acU) + << 28 | 0x10c9715U), + UINTMAX_MAX / 61) +P (6, 36, + (((((uintmax_t) 0xc2ddU << 28 | 0x9ca81e9U) + << 28 | 0x131abf0U) + << 28 | 0xb7672a0U) + << 28 | 0x7a44c6bU), + UINTMAX_MAX / 67) +P (4, 36, + (((((uintmax_t) 0x4f52U << 28 | 0xedf8c9eU) + << 28 | 0xa5dbf19U) + << 28 | 0x3d4bb7eU) + << 28 | 0x327a977U), + UINTMAX_MAX / 71) +P (2, 36, + (((((uintmax_t) 0x3f1fU << 28 | 0x8fc7e3fU) + << 28 | 0x1f8fc7eU) + << 28 | 0x3f1f8fcU) + << 28 | 0x7e3f1f9U), + UINTMAX_MAX / 73) +P (6, 34, + (((((uintmax_t) 0xd5dfU << 28 | 0x984dc5aU) + << 28 | 0xbbf309bU) + << 28 | 0x8b577e6U) + << 28 | 0x13716afU), + UINTMAX_MAX / 79) +P (4, 44, + (((((uintmax_t) 0x2818U << 28 | 0xacb90f6U) + << 28 | 0xbf3a9a3U) + << 28 | 0x784a062U) + << 28 | 0xb2e43dbU), + UINTMAX_MAX / 83) +P (6, 42, + (((((uintmax_t) 0xd1faU << 28 | 0x3f47e8fU) + << 28 | 0xd1fa3f4U) + << 28 | 0x7e8fd1fU) + << 28 | 0xa3f47e9U), + UINTMAX_MAX / 89) +P (8, 40, + (((((uintmax_t) 0x5f02U << 28 | 0xa3a0fd5U) + << 28 | 0xc5f02a3U) + << 28 | 0xa0fd5c5U) + << 28 | 0xf02a3a1U), + UINTMAX_MAX / 97) +P (4, 38, + (((((uintmax_t) 0xc32bU << 28 | 0x16cfd77U) + << 28 | 0x20f353aU) + << 28 | 0x4c0a237U) + << 28 | 0xc32b16dU), + UINTMAX_MAX / 101) +P (2, 46, + (((((uintmax_t) 0xd0c6U << 28 | 0xd5bf60eU) + << 28 | 0xe9a18daU) + << 28 | 0xb7ec1ddU) + << 28 | 0x3431b57U), + UINTMAX_MAX / 103) +P (4, 44, + (((((uintmax_t) 0xa2b1U << 28 | 0x0bf66e0U) + << 28 | 0xe5aea77U) + << 28 | 0xa04c8f8U) + << 28 | 0xd28ac43U), + UINTMAX_MAX / 107) +P (2, 48, + (((((uintmax_t) 0xc096U << 28 | 0x4fda6c0U) + << 28 | 0x964fda6U) + << 28 | 0xc0964fdU) + << 28 | 0xa6c0965U), + UINTMAX_MAX / 109) +P (4, 50, + (((((uintmax_t) 0xc090U << 28 | 0xfdbc090U) + << 28 | 0xfdbc090U) + << 28 | 0xfdbc090U) + << 28 | 0xfdbc091U), + UINTMAX_MAX / 113) +P (14, 40, + (((((uintmax_t) 0xbf7eU << 28 | 0xfdfbf7eU) + << 28 | 0xfdfbf7eU) + << 28 | 0xfdfbf7eU) + << 28 | 0xfdfbf7fU), + UINTMAX_MAX / 127) +P (4, 42, + (((((uintmax_t) 0xf82eU << 28 | 0xe6986d6U) + << 28 | 0xf63aa03U) + << 28 | 0xe88cb3cU) + << 28 | 0x9484e2bU), + UINTMAX_MAX / 131) +P (6, 42, + (((((uintmax_t) 0x21a2U << 28 | 0x91c0779U) + << 28 | 0x75b8fe2U) + << 28 | 0x1a291c0U) + << 28 | 0x77975b9U), + UINTMAX_MAX / 137) +P (2, 42, + (((((uintmax_t) 0xa212U << 28 | 0x6ad1f4fU) + << 28 | 0x31ba03aU) + << 28 | 0xef6ca97U) + << 28 | 0x0586723U), + UINTMAX_MAX / 139) +P (10, 42, + (((((uintmax_t) 0x93c2U << 28 | 0x25cc74dU) + << 28 | 0x50c06dfU) + << 28 | 0x5b0f768U) + << 28 | 0xce2cabdU), + UINTMAX_MAX / 149) +P (2, 42, + (((((uintmax_t) 0x26feU << 28 | 0x4dfc9bfU) + << 28 | 0x937f26fU) + << 28 | 0xe4dfc9bU) + << 28 | 0xf937f27U), + UINTMAX_MAX / 151) +P (6, 40, + (((((uintmax_t) 0x0685U << 28 | 0xb4fe5e9U) + << 28 | 0x2c0685bU) + << 28 | 0x4fe5e92U) + << 28 | 0xc0685b5U), + UINTMAX_MAX / 157) +P (6, 36, + (((((uintmax_t) 0x8bc7U << 28 | 0x75ca99eU) + << 28 | 0xa03241fU) + << 28 | 0x693a1c4U) + << 28 | 0x51ab30bU), + UINTMAX_MAX / 163) +P (4, 44, + (((((uintmax_t) 0x513eU << 28 | 0xd9ad38bU) + << 28 | 0x7f3bc8dU) + << 28 | 0x07aa27dU) + << 28 | 0xb35a717U), + UINTMAX_MAX / 167) +P (6, 50, + (((((uintmax_t) 0x133cU << 28 | 0xaba736cU) + << 28 | 0x05eb488U) + << 28 | 0x2383b30U) + << 28 | 0xd516325U), + UINTMAX_MAX / 173) +P (6, 48, + (((((uintmax_t) 0x0e4dU << 28 | 0x3aa30a0U) + << 28 | 0x2dc3eedU) + << 28 | 0x6866f8dU) + << 28 | 0x962ae7bU), + UINTMAX_MAX / 179) +P (2, 48, + (((((uintmax_t) 0x6fbcU << 28 | 0x1c498c0U) + << 28 | 0x5a84f34U) + << 28 | 0x54dca41U) + << 28 | 0x0f8ed9dU), + UINTMAX_MAX / 181) +P (10, 42, + (((((uintmax_t) 0x7749U << 28 | 0xb79f7f5U) + << 28 | 0x470961dU) + << 28 | 0x7ca632eU) + << 28 | 0xe936f3fU), + UINTMAX_MAX / 191) +P (2, 46, + (((((uintmax_t) 0x9094U << 28 | 0x8f40feaU) + << 28 | 0xc6f6b70U) + << 28 | 0xbf01539U) + << 28 | 0x0948f41U), + UINTMAX_MAX / 193) +P (4, 44, + (((((uintmax_t) 0x0bb2U << 28 | 0x07cc053U) + << 28 | 0x2ae21c9U) + << 28 | 0x6bdb9d3U) + << 28 | 0xd137e0dU), + UINTMAX_MAX / 197) +P (2, 52, + (((((uintmax_t) 0x7a36U << 28 | 0x07b7f5bU) + << 28 | 0x5630e26U) + << 28 | 0x97cc8aeU) + << 28 | 0xf46c0f7U), + UINTMAX_MAX / 199) +P (12, 46, + (((((uintmax_t) 0x2f51U << 28 | 0x4a026d3U) + << 28 | 0x1be7bc0U) + << 28 | 0xe8f2a76U) + << 28 | 0xe68575bU), + UINTMAX_MAX / 211) +P (12, 40, + (((((uintmax_t) 0xdd8fU << 28 | 0x7f6d0eeU) + << 28 | 0xc7bfb68U) + << 28 | 0x7763dfdU) + << 28 | 0xb43bb1fU), + UINTMAX_MAX / 223) +P (4, 42, + (((((uintmax_t) 0x766aU << 28 | 0x024168eU) + << 28 | 0x18cf81bU) + << 28 | 0x10ea929U) + << 28 | 0xba144cbU), + UINTMAX_MAX / 227) +P (2, 42, + (((((uintmax_t) 0x0c4cU << 28 | 0x0478bbcU) + << 28 | 0xecfee1dU) + << 28 | 0x10c4c04U) + << 28 | 0x78bbcedU), + UINTMAX_MAX / 229) +P (4, 44, + (((((uintmax_t) 0x758fU << 28 | 0xee6bac7U) + << 28 | 0xf735d63U) + << 28 | 0xfb9aeb1U) + << 28 | 0xfdcd759U), + UINTMAX_MAX / 233) +P (6, 42, + (((((uintmax_t) 0x077fU << 28 | 0x76e538cU) + << 28 | 0x5167e64U) + << 28 | 0xafaa4f4U) + << 28 | 0x37b2e0fU), + UINTMAX_MAX / 239) +P (2, 42, + (((((uintmax_t) 0x10feU << 28 | 0xf010fefU) + << 28 | 0x010fef0U) + << 28 | 0x10fef01U) + << 28 | 0x0fef011U), + UINTMAX_MAX / 241) +P (10, 42, + (((((uintmax_t) 0xa020U << 28 | 0xa32fefaU) + << 28 | 0xe680828U) + << 28 | 0xcbfbeb9U) + << 28 | 0xa020a33U), + UINTMAX_MAX / 251) +P (6, 50, + (((((uintmax_t) 0xff00U << 28 | 0xff00ff0U) + << 28 | 0x0ff00ffU) + << 28 | 0x00ff00fU) + << 28 | 0xf00ff01U), + UINTMAX_MAX / 257) +P (6, 48, + (((((uintmax_t) 0xf836U << 28 | 0x826ef73U) + << 28 | 0xd52bcd6U) + << 28 | 0x24fd147U) + << 28 | 0x0e99cb7U), + UINTMAX_MAX / 263) +P (6, 44, + (((((uintmax_t) 0x3ce8U << 28 | 0x354b2eaU) + << 28 | 0x1c8cd8fU) + << 28 | 0xb3ddbd6U) + << 28 | 0x205b5c5U), + UINTMAX_MAX / 269) +P (2, 46, + (((((uintmax_t) 0x8715U << 28 | 0xba188f9U) + << 28 | 0x63302d5U) + << 28 | 0x7da36caU) + << 28 | 0x27acdefU), + UINTMAX_MAX / 271) +P (6, 54, + (((((uintmax_t) 0xb25eU << 28 | 0x4463cffU) + << 28 | 0x13686eeU) + << 28 | 0x70c03b2U) + << 28 | 0x5e4463dU), + UINTMAX_MAX / 277) +P (4, 56, + (((((uintmax_t) 0x6c69U << 28 | 0xae01d27U) + << 28 | 0x2ca3fc5U) + << 28 | 0xb1a6b80U) + << 28 | 0x749cb29U), + UINTMAX_MAX / 281) +P (2, 64, + (((((uintmax_t) 0xf26eU << 28 | 0x5c44bfcU) + << 28 | 0x61b2347U) + << 28 | 0x768073cU) + << 28 | 0x9b97113U), + UINTMAX_MAX / 283) +P (10, 56, + (((((uintmax_t) 0xb07dU << 28 | 0xd0d1b15U) + << 28 | 0xd7cf125U) + << 28 | 0x91e9488U) + << 28 | 0x4ce32adU), + UINTMAX_MAX / 293) +P (14, 46, + (((((uintmax_t) 0xd2f8U << 28 | 0x7ebfcaaU) + << 28 | 0x1c5a0f0U) + << 28 | 0x2806abcU) + << 28 | 0x74be1fbU), + UINTMAX_MAX / 307) +P (4, 48, + (((((uintmax_t) 0xbe25U << 28 | 0xdd6d7aaU) + << 28 | 0x646ca7eU) + << 28 | 0xc3e8f3aU) + << 28 | 0x7198487U), + UINTMAX_MAX / 311) +P (2, 54, + (((((uintmax_t) 0xbc1dU << 28 | 0x71afd8bU) + << 28 | 0xdc03458U) + << 28 | 0x550f8a3U) + << 28 | 0x9409d09U), + UINTMAX_MAX / 313) +P (4, 56, + (((((uintmax_t) 0x2ed6U << 28 | 0xd05a72aU) + << 28 | 0xcd1f7ecU) + << 28 | 0x9e48ae6U) + << 28 | 0xf71de15U), + UINTMAX_MAX / 317) +P (14, 48, + (((((uintmax_t) 0x62ffU << 28 | 0x3a018bfU) + << 28 | 0xce8062fU) + << 28 | 0xf3a018bU) + << 28 | 0xfce8063U), + UINTMAX_MAX / 331) +P (6, 46, + (((((uintmax_t) 0x3fcfU << 28 | 0x61fe7b0U) + << 28 | 0xff3d87fU) + << 28 | 0x9ec3fcfU) + << 28 | 0x61fe7b1U), + UINTMAX_MAX / 337) +P (10, 42, + (((((uintmax_t) 0x398bU << 28 | 0x6f668c2U) + << 28 | 0xc43df89U) + << 28 | 0xf5abe57U) + << 28 | 0x0e046d3U), + UINTMAX_MAX / 347) +P (2, 48, + (((((uintmax_t) 0x8c1aU << 28 | 0x682913cU) + << 28 | 0xe1ecedaU) + << 28 | 0x971b23fU) + << 28 | 0x1545af5U), + UINTMAX_MAX / 349) +P (4, 48, + (((((uintmax_t) 0x0b9aU << 28 | 0x7862a0fU) + << 28 | 0xf465879U) + << 28 | 0xd5f00b9U) + << 28 | 0xa7862a1U), + UINTMAX_MAX / 353) +P (6, 50, + (((((uintmax_t) 0xe7c1U << 28 | 0x3f77161U) + << 28 | 0xb18f54dU) + << 28 | 0xba1df32U) + << 28 | 0xa128a57U), + UINTMAX_MAX / 359) +P (8, 52, + (((((uintmax_t) 0x7318U << 28 | 0x6a06f9bU) + << 28 | 0x8d9a287U) + << 28 | 0x530217bU) + << 28 | 0x7747d8fU), + UINTMAX_MAX / 367) +P (6, 48, + (((((uintmax_t) 0x7c39U << 28 | 0xa6c708eU) + << 28 | 0xc18b530U) + << 28 | 0xbaae53bU) + << 28 | 0xb5e06ddU), + UINTMAX_MAX / 373) +P (6, 52, + (((((uintmax_t) 0x3763U << 28 | 0x4af9ebbU) + << 28 | 0xc742deeU) + << 28 | 0x70206c1U) + << 28 | 0x2e9b5b3U), + UINTMAX_MAX / 379) +P (4, 50, + (((((uintmax_t) 0x5035U << 28 | 0x78fb523U) + << 28 | 0x6cf34cdU) + << 28 | 0xde9462eU) + << 28 | 0xc9dbe7fU), + UINTMAX_MAX / 383) +P (6, 50, + (((((uintmax_t) 0xbcdfU << 28 | 0xc0d2975U) + << 28 | 0xccab1afU) + << 28 | 0xb64b05eU) + << 28 | 0xc41cf4dU), + UINTMAX_MAX / 389) +P (8, 46, + (((((uintmax_t) 0xf5aeU << 28 | 0xc02944fU) + << 28 | 0xf5aec02U) + << 28 | 0x944ff5aU) + << 28 | 0xec02945U), + UINTMAX_MAX / 397) +P (4, 48, + (((((uintmax_t) 0xc7d2U << 28 | 0x08f00a3U) + << 28 | 0x6e71a2cU) + << 28 | 0xb033128U) + << 28 | 0x382df71U), + UINTMAX_MAX / 401) +P (8, 48, + (((((uintmax_t) 0xd38fU << 28 | 0x55c0280U) + << 28 | 0xf05a21cU) + << 28 | 0xcacc0c8U) + << 28 | 0x4b1c2a9U), + UINTMAX_MAX / 409) +P (10, 42, + (((((uintmax_t) 0xca3bU << 28 | 0xe03aa76U) + << 28 | 0x87a3219U) + << 28 | 0xa93db57U) + << 28 | 0x5eb3a0bU), + UINTMAX_MAX / 419) +P (2, 42, + (((((uintmax_t) 0x6a69U << 28 | 0xce2344bU) + << 28 | 0x66c3cceU) + << 28 | 0xbeef94fU) + << 28 | 0xa86fe2dU), + UINTMAX_MAX / 421) +P (10, 36, + (((((uintmax_t) 0xfecfU << 28 | 0xe37d53bU) + << 28 | 0xfd9fc6fU) + << 28 | 0xaa77fb3U) + << 28 | 0xf8df54fU), + UINTMAX_MAX / 431) +P (2, 46, + (((((uintmax_t) 0xa58aU << 28 | 0xf00975aU) + << 28 | 0x750ff68U) + << 28 | 0xa58af00U) + << 28 | 0x975a751U), + UINTMAX_MAX / 433) +P (6, 48, + (((((uintmax_t) 0xdc6dU << 28 | 0xa187df5U) + << 28 | 0x80dfed5U) + << 28 | 0x6e36d0cU) + << 28 | 0x3efac07U), + UINTMAX_MAX / 439) +P (4, 48, + (((((uintmax_t) 0x8fe4U << 28 | 0x4308ab0U) + << 28 | 0xd4a8bd8U) + << 28 | 0xb44c47aU) + << 28 | 0x8299b73U), + UINTMAX_MAX / 443) +P (6, 50, + (((((uintmax_t) 0xf1bfU << 28 | 0x0091f5bU) + << 28 | 0xcb8bb02U) + << 28 | 0xd9ccaf9U) + << 28 | 0xba70e41U), + UINTMAX_MAX / 449) +P (8, 46, + (((((uintmax_t) 0x5e1cU << 28 | 0x023d9e8U) + << 28 | 0x78ff709U) + << 28 | 0x85e1c02U) + << 28 | 0x3d9e879U), + UINTMAX_MAX / 457) +P (4, 48, + (((((uintmax_t) 0x7880U << 28 | 0xd53da3dU) + << 28 | 0x15a842aU) + << 28 | 0x343316cU) + << 28 | 0x494d305U), + UINTMAX_MAX / 461) +P (2, 58, + (((((uintmax_t) 0x1ddbU << 28 | 0x81ef699U) + << 28 | 0xb5e8c70U) + << 28 | 0xcb7916aU) + << 28 | 0xb67652fU), + UINTMAX_MAX / 463) +P (4, 56, + (((((uintmax_t) 0xf364U << 28 | 0x5121706U) + << 28 | 0x07acad3U) + << 28 | 0x98f132fU) + << 28 | 0xb10fe5bU), + UINTMAX_MAX / 467) +P (12, 62, + (((((uintmax_t) 0xadb1U << 28 | 0xf8848afU) + << 28 | 0x4c6d06fU) + << 28 | 0x2a38a6bU) + << 28 | 0xf54fa1fU), + UINTMAX_MAX / 479) +P (8, 60, + (((((uintmax_t) 0xd9a0U << 28 | 0x541b55aU) + << 28 | 0xf0c1721U) + << 28 | 0x1df689bU) + << 28 | 0x98f81d7U), + UINTMAX_MAX / 487) +P (4, 66, + (((((uintmax_t) 0x673bU << 28 | 0xf592825U) + << 28 | 0x8a2ac0eU) + << 28 | 0x994983eU) + << 28 | 0x90f1ec3U), + UINTMAX_MAX / 491) +P (8, 64, + (((((uintmax_t) 0x0ddaU << 28 | 0x093c062U) + << 28 | 0x8041aadU) + << 28 | 0x671e44bU) + << 28 | 0xed87f3bU), + UINTMAX_MAX / 499) +P (4, 66, + (((((uintmax_t) 0xa9fcU << 28 | 0xf24229bU) + << 28 | 0xbcd1af9U) + << 28 | 0x623a051U) + << 28 | 0x6e70fc7U), + UINTMAX_MAX / 503) +P (6, 62, + (((((uintmax_t) 0xcbb1U << 28 | 0x8a4f773U) + << 28 | 0x2cc324bU) + << 28 | 0x7129be9U) + << 28 | 0xdece355U), + UINTMAX_MAX / 509) +P (12, 56, + (((((uintmax_t) 0x01f7U << 28 | 0x27cce5fU) + << 28 | 0x530a519U) + << 28 | 0x0f3b747U) + << 28 | 0x3f62c39U), + UINTMAX_MAX / 521) +P (2, 64, + (((((uintmax_t) 0x6da4U << 28 | 0xf4bdeb7U) + << 28 | 0x1121c63U) + << 28 | 0xdacc9aaU) + << 28 | 0xd46f9a3U), + UINTMAX_MAX / 523) +P (18, 52, + (((((uintmax_t) 0x4d9aU << 28 | 0xbc552cfU) + << 28 | 0x42b88c1U) + << 28 | 0x108fda2U) + << 28 | 0x4e8d035U), + UINTMAX_MAX / 541) +P (6, 52, + (((((uintmax_t) 0x141fU << 28 | 0xd312409U) + << 28 | 0x5c328b7U) + << 28 | 0x7578472U) + << 28 | 0x319bd8bU), + UINTMAX_MAX / 547) +P (10, 44, + (((((uintmax_t) 0xddfdU << 28 | 0x3e0bf32U) + << 28 | 0x18d1947U) + << 28 | 0x3d20a1cU) + << 28 | 0x7ed9da5U), + UINTMAX_MAX / 557) +P (6, 44, + (((((uintmax_t) 0xdb2bU << 28 | 0x3278f3bU) + << 28 | 0x910d2fbU) + << 28 | 0xe85af0fU) + << 28 | 0xea2c8fbU), + UINTMAX_MAX / 563) +P (6, 44, + (((((uintmax_t) 0xcb5cU << 28 | 0x3b636e3U) + << 28 | 0xa7d1358U) + << 28 | 0xa1f7e6cU) + << 28 | 0xe0f4c09U), + UINTMAX_MAX / 569) +P (2, 46, + (((((uintmax_t) 0x1bcbU << 28 | 0xfe34e75U) + << 28 | 0x76cf21aU) + << 28 | 0x00e58c5U) + << 28 | 0x44986f3U), + UINTMAX_MAX / 571) +P (6, 42, + (((((uintmax_t) 0x6b5eU << 28 | 0x80aa5efU) + << 28 | 0x23f0071U) + << 28 | 0x94a17f5U) + << 28 | 0x5a10dc1U), + UINTMAX_MAX / 577) +P (10, 44, + (((((uintmax_t) 0x9a62U << 28 | 0x8feb110U) + << 28 | 0x22e3a70U) + << 28 | 0x8494478U) + << 28 | 0x5e33763U), + UINTMAX_MAX / 587) +P (6, 48, + (((((uintmax_t) 0xbe61U << 28 | 0x909eddeU) + << 28 | 0x53c01baU) + << 28 | 0x10679bdU) + << 28 | 0x84886b1U), + UINTMAX_MAX / 593) +P (6, 44, + (((((uintmax_t) 0x4febU << 28 | 0x7c5e05fU) + << 28 | 0xbb9e8ebU) + << 28 | 0xe9c6bb3U) + << 28 | 0x1260967U), + UINTMAX_MAX / 599) +P (2, 46, + (((((uintmax_t) 0x1ff2U << 28 | 0x5e8ff92U) + << 28 | 0xf47fc97U) + << 28 | 0xa3fe4bdU) + << 28 | 0x1ff25e9U), + UINTMAX_MAX / 601) +P (6, 46, + (((((uintmax_t) 0x3014U << 28 | 0x3e6b1faU) + << 28 | 0x187616cU) + << 28 | 0x6388395U) + << 28 | 0xb84d99fU), + UINTMAX_MAX / 607) +P (6, 46, + (((((uintmax_t) 0xd491U << 28 | 0x54c6c94U) + << 28 | 0xac0f08cU) + << 28 | 0x51da6a1U) + << 28 | 0x335df6dU), + UINTMAX_MAX / 613) +P (4, 44, + (((((uintmax_t) 0x9b97U << 28 | 0x71454a4U) + << 28 | 0x4e00d46U) + << 28 | 0xf323447U) + << 28 | 0x5d5add9U), + UINTMAX_MAX / 617) +P (2, 54, + (((((uintmax_t) 0x3abaU << 28 | 0x1b4baefU) + << 28 | 0x0b2a990U) + << 28 | 0x5605ca3U) + << 28 | 0xc619a43U), + UINTMAX_MAX / 619) +P (12, 46, + (((((uintmax_t) 0xcc11U << 28 | 0xd9dd1bfU) + << 28 | 0xe608eceU) + << 28 | 0xe8dff30U) + << 28 | 0x4767747U), + UINTMAX_MAX / 631) +P (10, 42, + (((((uintmax_t) 0xff99U << 28 | 0xc27f006U) + << 28 | 0x63d80ffU) + << 28 | 0x99c27f0U) + << 28 | 0x0663d81U), + UINTMAX_MAX / 641) +P (2, 48, + (((((uintmax_t) 0x111eU << 28 | 0xa8032f6U) + << 28 | 0x0bf1aacU) + << 28 | 0xca407f6U) + << 28 | 0x71ddc2bU), + UINTMAX_MAX / 643) +P (4, 54, + (((((uintmax_t) 0xdd93U << 28 | 0x95f5b66U) + << 28 | 0x7aa88e7U) + << 28 | 0x1298bacU) + << 28 | 0x1e12337U), + UINTMAX_MAX / 647) +P (6, 56, + (((((uintmax_t) 0xa7caU << 28 | 0xaed9303U) + << 28 | 0x8740afaU) + << 28 | 0x1e94309U) + << 28 | 0xcd09045U), + UINTMAX_MAX / 653) +P (6, 60, + (((((uintmax_t) 0x2be5U << 28 | 0x958f582U) + << 28 | 0xe9db7beU) + << 28 | 0xbccb8e9U) + << 28 | 0x1496b9bU), + UINTMAX_MAX / 659) +P (2, 66, + (((((uintmax_t) 0x995eU << 28 | 0x1ca8dbfU) + << 28 | 0xb5a3d31U) + << 28 | 0x2fa30ccU) + << 28 | 0x7d7b8bdU), + UINTMAX_MAX / 661) +P (12, 60, + (((((uintmax_t) 0x9f00U << 28 | 0x6160ff9U) + << 28 | 0xe9f0061U) + << 28 | 0x60ff9e9U) + << 28 | 0xf006161U), + UINTMAX_MAX / 673) +P (4, 62, + (((((uintmax_t) 0xb33cU << 28 | 0xe15ee9bU) + << 28 | 0x097416bU) + << 28 | 0x03673b5U) + << 28 | 0xe28152dU), + UINTMAX_MAX / 677) +P (6, 60, + (((((uintmax_t) 0xfa00U << 28 | 0xbfe802fU) + << 28 | 0xfa00bfeU) + << 28 | 0x802ffa0U) + << 28 | 0x0bfe803U), + UINTMAX_MAX / 683) +P (8, 60, + (((((uintmax_t) 0x1c28U << 28 | 0x02f6bcfU) + << 28 | 0x18d26e6U) + << 28 | 0x6fe25c9U) + << 28 | 0xe907c7bU), + UINTMAX_MAX / 691) +P (10, 56, + (((((uintmax_t) 0xcf6dU << 28 | 0xec4793eU) + << 28 | 0x72aba3fU) + << 28 | 0x8b236c7U) + << 28 | 0x6528895U), + UINTMAX_MAX / 701) +P (8, 52, + (((((uintmax_t) 0x1e54U << 28 | 0x7da72d2U) + << 28 | 0x24d44f6U) + << 28 | 0xf923bf0U) + << 28 | 0x1ce2c0dU), + UINTMAX_MAX / 709) +P (10, 50, + (((((uintmax_t) 0x7746U << 28 | 0xda9d5fcU) + << 28 | 0x708306cU) + << 28 | 0x3d3d98bU) + << 28 | 0xed7c42fU), + UINTMAX_MAX / 719) +P (8, 46, + (((((uintmax_t) 0xcdffU << 28 | 0x4bb5591U) + << 28 | 0x6e37a30U) + << 28 | 0x981efcdU) + << 28 | 0x4b010e7U), + UINTMAX_MAX / 727) +P (6, 54, + (((((uintmax_t) 0x2c01U << 28 | 0x65a1b3dU) + << 28 | 0xd13356fU) + << 28 | 0x691fc81U) + << 28 | 0xebbe575U), + UINTMAX_MAX / 733) +P (6, 58, + (((((uintmax_t) 0xa802U << 28 | 0xc574bddU) + << 28 | 0x5bccbb1U) + << 28 | 0x0480ddbU) + << 28 | 0x47b52cbU), + UINTMAX_MAX / 739) +P (4, 66, + (((((uintmax_t) 0x5411U << 28 | 0xeaa350fU) + << 28 | 0x8134b74U) + << 28 | 0xcd59ed6U) + << 28 | 0x4f3f0d7U), + UINTMAX_MAX / 743) +P (8, 60, + (((((uintmax_t) 0xfceeU << 28 | 0x9d7c6bbU) + << 28 | 0x7bbd301U) + << 28 | 0x05cb813U) + << 28 | 0x16d6c0fU), + UINTMAX_MAX / 751) +P (6, 64, + (((((uintmax_t) 0x4248U << 28 | 0x5eb0874U) + << 28 | 0x553879bU) + << 28 | 0xe64c6d9U) + << 28 | 0x1c1195dU), + UINTMAX_MAX / 757) +P (4, 62, + (((((uintmax_t) 0xe060U << 28 | 0xe20f797U) + << 28 | 0x0b19e71U) + << 28 | 0xb3f945aU) + << 28 | 0x27b1f49U), + UINTMAX_MAX / 761) +P (8, 58, + (((((uintmax_t) 0x782dU << 28 | 0x463deb5U) + << 28 | 0xc369877U) + << 28 | 0xd80d50eU) + << 28 | 0x508fd01U), + UINTMAX_MAX / 769) +P (4, 56, + (((((uintmax_t) 0x4a2fU << 28 | 0x06f468aU) + << 28 | 0x6e9cfa5U) + << 28 | 0xeb778e1U) + << 28 | 0x33551cdU), + UINTMAX_MAX / 773) +P (14, 52, + (((((uintmax_t) 0xda44U << 28 | 0x4f5ea87U) + << 28 | 0xf831718U) + << 28 | 0x657d3c2U) + << 28 | 0xd8a3f1bU), + UINTMAX_MAX / 787) +P (10, 56, + (((((uintmax_t) 0xfb80U << 28 | 0xcd9225eU) + << 28 | 0x6f2302eU) + << 28 | 0x40e220cU) + << 28 | 0x34ad735U), + UINTMAX_MAX / 797) +P (12, 48, + (((((uintmax_t) 0x1719U << 28 | 0xa1b36beU) + << 28 | 0x7f357a7U) + << 28 | 0x6593c70U) + << 28 | 0xa714919U), + UINTMAX_MAX / 809) +P (2, 48, + (((((uintmax_t) 0x2867U << 28 | 0x894fdcaU) + << 28 | 0x567da1eU) + << 28 | 0xef45212U) + << 28 | 0x4eea383U), + UINTMAX_MAX / 811) +P (10, 42, + (((((uintmax_t) 0x8932U << 28 | 0xd36914eU) + << 28 | 0x43f9c38U) + << 28 | 0x206dc24U) + << 28 | 0x2ba771dU), + UINTMAX_MAX / 821) +P (2, 54, + (((((uintmax_t) 0xdeb7U << 28 | 0x8610cc0U) + << 28 | 0xdafbf4cU) + << 28 | 0xd4c3580U) + << 28 | 0x7772287U), + UINTMAX_MAX / 823) +P (4, 54, + (((((uintmax_t) 0x8fa1U << 28 | 0xe560e3dU) + << 28 | 0x4a9a283U) + << 28 | 0xde917d5U) + << 28 | 0xe69ddf3U), + UINTMAX_MAX / 827) +P (2, 54, + (((((uintmax_t) 0x6724U << 28 | 0x2159dccU) + << 28 | 0xbcfd388U) + << 28 | 0x2ef0403U) + << 28 | 0xb4a6c15U), + UINTMAX_MAX / 829) +P (10, 48, + (((((uintmax_t) 0x5e96U << 28 | 0xbb58ca9U) + << 28 | 0xa64b0f8U) + << 28 | 0xfb6c51cU) + << 28 | 0x606b677U), + UINTMAX_MAX / 839) +P (14, 54, + (((((uintmax_t) 0x2450U << 28 | 0x6e7171bU) + << 28 | 0xe930eb4U) + << 28 | 0xabaac44U) + << 28 | 0x6d3e1fdU), + UINTMAX_MAX / 853) +P (4, 54, + (((((uintmax_t) 0x3743U << 28 | 0x3611535U) + << 28 | 0x7861fa9U) + << 28 | 0xf83bbe4U) + << 28 | 0x84a14e9U), + UINTMAX_MAX / 857) +P (2, 60, + (((((uintmax_t) 0x232aU << 28 | 0x9df37baU) + << 28 | 0xdbf080bU) + << 28 | 0xebbc0d1U) + << 28 | 0xce874d3U), + UINTMAX_MAX / 859) +P (4, 66, + (((((uintmax_t) 0x569eU << 28 | 0x67d2e92U) + << 28 | 0x8a3bebdU) + << 28 | 0x418eaf0U) + << 28 | 0x473189fU), + UINTMAX_MAX / 863) +P (14, 60, + (((((uintmax_t) 0x7e1aU << 28 | 0x457923eU) + << 28 | 0x77ae444U) + << 28 | 0xe3af6f3U) + << 28 | 0x72b7e65U), + UINTMAX_MAX / 877) +P (4, 60, + (((((uintmax_t) 0x9764U << 28 | 0x3fed672U) + << 28 | 0x7cf2ec8U) + << 28 | 0x7fdace4U) + << 28 | 0xf9e5d91U), + UINTMAX_MAX / 881) +P (2, 64, + (((((uintmax_t) 0xea8bU << 28 | 0xbde5e83U) + << 28 | 0x9fbf0ecU) + << 28 | 0x93479c4U) + << 28 | 0x46bd9bbU), + UINTMAX_MAX / 883) +P (4, 66, + (((((uintmax_t) 0x3d2fU << 28 | 0x9f06a35U) + << 28 | 0xae9c6daU) + << 28 | 0xc4d592eU) + << 28 | 0x777c647U), + UINTMAX_MAX / 887) +P (20, 60, + (((((uintmax_t) 0x81d5U << 28 | 0xa9a1ba9U) + << 28 | 0x11379a6U) + << 28 | 0x3ea8c8fU) + << 28 | 0x61f0c23U), + UINTMAX_MAX / 907) +P (4, 60, + (((((uintmax_t) 0x752eU << 28 | 0x5ddb77fU) + << 28 | 0xdc07de4U) + << 28 | 0x76062eaU) + << 28 | 0x5cbbb6fU), + UINTMAX_MAX / 911) +P (8, 58, + (((((uintmax_t) 0x1abdU << 28 | 0xfafc60fU) + << 28 | 0x0add2dfU) + << 28 | 0x68761c6U) + << 28 | 0x9daac27U), + UINTMAX_MAX / 919) +P (10, 54, + (((((uintmax_t) 0xac3aU << 28 | 0x6b786c0U) + << 28 | 0x582e4b8U) + << 28 | 0x13d7376U) + << 28 | 0x37aa061U), + UINTMAX_MAX / 929) +P (8, 54, + (((((uintmax_t) 0x131fU << 28 | 0xf741d81U) + << 28 | 0xc6a01a3U) + << 28 | 0xa77aac1U) + << 28 | 0xfb15099U), + UINTMAX_MAX / 937) +P (4, 56, + (((((uintmax_t) 0xc53cU << 28 | 0xaad918cU) + << 28 | 0x1b34817U) + << 28 | 0xf0c3e07U) + << 28 | 0x12c5825U), + UINTMAX_MAX / 941) +P (6, 62, + (((((uintmax_t) 0xea1aU << 28 | 0x7df8f8bU) + << 28 | 0x37f52fdU) + << 28 | 0x912a70fU) + << 28 | 0xf30637bU), + UINTMAX_MAX / 947) +P (6, 60, + (((((uintmax_t) 0xbb3bU << 28 | 0x5dc0113U) + << 28 | 0x1288ffbU) + << 28 | 0xb3b5dc0U) + << 28 | 0x1131289U), + UINTMAX_MAX / 953) +P (14, 52, + (((((uintmax_t) 0x50beU << 28 | 0x9c31c53U) + << 28 | 0xa81b885U) + << 28 | 0x6d560a0U) + << 28 | 0xf5acdf7U), + UINTMAX_MAX / 967) +P (4, 50, + (((((uintmax_t) 0x6580U << 28 | 0xec3a008U) + << 28 | 0x6fc9296U) + << 28 | 0x472f314U) + << 28 | 0xd3f89e3U), + UINTMAX_MAX / 971) +P (6, 54, + (((((uintmax_t) 0x1108U << 28 | 0x1f71752U) + << 28 | 0x03ab1a7U) + << 28 | 0x6f5c7edU) + << 28 | 0x2253531U), + UINTMAX_MAX / 977) +P (6, 50, + (((((uintmax_t) 0xb81fU << 28 | 0x4053563U) + << 28 | 0x3908981U) + << 28 | 0x6eae7c7U) + << 28 | 0xbf69fe7U), + UINTMAX_MAX / 983) +P (8, 48, + (((((uintmax_t) 0x9c8bU << 28 | 0x7ed668eU) + << 28 | 0x14263b6U) + << 28 | 0xa2bea4cU) + << 28 | 0xfb1781fU), + UINTMAX_MAX / 991) +P (6, 52, + (((((uintmax_t) 0x0291U << 28 | 0x54fdb06U) + << 28 | 0x6b547a3U) + << 28 | 0x900c533U) + << 28 | 0x18e81edU), + UINTMAX_MAX / 997) +P (12, 42, + (((((uintmax_t) 0x2240U << 28 | 0x71aa3e6U) + << 28 | 0xa0db360U) + << 28 | 0xaa7f5d9U) + << 28 | 0xf148d11U), + UINTMAX_MAX / 1009) +P (4, 48, + (((((uintmax_t) 0x02c7U << 28 | 0xa505cffU) + << 28 | 0xbf4e16bU) + << 28 | 0xe8c0102U) + << 28 | 0xc7a505dU), + UINTMAX_MAX / 1013) +P (6, 44, + (((((uintmax_t) 0xcafdU << 28 | 0xbd2c779U) + << 28 | 0x57ad98fU) + << 28 | 0xf3f0ed2U) + << 28 | 0x8728f33U), + UINTMAX_MAX / 1019) +P (2, 48, + (((((uintmax_t) 0x513cU << 28 | 0xedb245bU) + << 28 | 0x4473568U) + << 28 | 0x0e0a87eU) + << 28 | 0x5ec7155U), + UINTMAX_MAX / 1021) +P (10, 56, + (((((uintmax_t) 0x2e6eU << 28 | 0xbe33267U) + << 28 | 0xca5ddbbU) + << 28 | 0xf70fa49U) + << 28 | 0xfe829b7U), + UINTMAX_MAX / 1031) +P (2, 58, + (((((uintmax_t) 0x007eU << 28 | 0xe2825abU) + << 28 | 0x3eb2ed6U) + << 28 | 0x9d1e7b6U) + << 28 | 0xa50ca39U), + UINTMAX_MAX / 1033) +P (6, 54, + (((((uintmax_t) 0x2f8dU << 28 | 0xacb84cdU) + << 28 | 0xfb90a1aU) + << 28 | 0x1e0f46bU) + << 28 | 0x6d26aefU), + UINTMAX_MAX / 1039) +P (10, 48, + (((((uintmax_t) 0x01f3U << 28 | 0xcc435b0U) + << 28 | 0x713c474U) + << 28 | 0x29f9a7aU) + << 28 | 0x8251829U), + UINTMAX_MAX / 1049) +P (2, 52, + (((((uintmax_t) 0x8c0eU << 28 | 0x9d59e14U) + << 28 | 0xf29a6d9U) + << 28 | 0xc2219d1U) + << 28 | 0xb863613U), + UINTMAX_MAX / 1051) +P (10, 48, + (((((uintmax_t) 0x6e81U << 28 | 0xcf42d5cU) + << 28 | 0x6932e91U) + << 28 | 0x406c182U) + << 28 | 0x0d077adU), + UINTMAX_MAX / 1061) +P (2, 54, + (((((uintmax_t) 0x9c4cU << 28 | 0x1a02688U) + << 28 | 0x4efdd52U) + << 28 | 0x1f4ec02U) + << 28 | 0xe3d2b97U), + UINTMAX_MAX / 1063) +P (6, 54, + (((((uintmax_t) 0x7bcfU << 28 | 0x2599067U) + << 28 | 0x74255bbU) + << 28 | 0x8283b63U) + << 28 | 0xdc8eba5U), + UINTMAX_MAX / 1069) +P (18, 42, + (((((uintmax_t) 0x46a7U << 28 | 0x3667275U) + << 28 | 0x48c5d43U) + << 28 | 0x1eda153U) + << 28 | 0x229ebbfU), + UINTMAX_MAX / 1087) +P (4, 60, + (((((uintmax_t) 0xe720U << 28 | 0x9daecfeU) + << 28 | 0x5b832afU) + << 28 | 0x0bf78d7U) + << 28 | 0xe01686bU), + UINTMAX_MAX / 1091) +P (2, 60, + (((((uintmax_t) 0x194bU << 28 | 0xa6ff4c1U) + << 28 | 0xeeaafa9U) + << 28 | 0xced0742U) + << 28 | 0xc086e8dU), + UINTMAX_MAX / 1093) +P (4, 66, + (((((uintmax_t) 0x777bU << 28 | 0x730c5e4U) + << 28 | 0x768c7c2U) + << 28 | 0x6458ad9U) + << 28 | 0xf632df9U), + UINTMAX_MAX / 1097) +P (6, 68, + (((((uintmax_t) 0x2aefU << 28 | 0xfc49577U) + << 28 | 0xfe24abbU) + << 28 | 0xff1255dU) + << 28 | 0xff892afU), + UINTMAX_MAX / 1103) +P (6, 72, + (((((uintmax_t) 0xf1b0U << 28 | 0x213da24U) + << 28 | 0x78f59cbU) + << 28 | 0xd49a333U) + << 28 | 0xf04d8fdU), + UINTMAX_MAX / 1109) +P (8, 70, + (((((uintmax_t) 0x8822U << 28 | 0xd60f205U) + << 28 | 0x0ac58ecU) + << 28 | 0x84ed6f9U) + << 28 | 0xcfdeff5U), + UINTMAX_MAX / 1117) +P (6, 70, + (((((uintmax_t) 0x3606U << 28 | 0xd6bd351U) + << 28 | 0xd682d97U) + << 28 | 0x980cc40U) + << 28 | 0xbda9d4bU), + UINTMAX_MAX / 1123) +P (6, 72, + (((((uintmax_t) 0x0122U << 28 | 0x3d38ea0U) + << 28 | 0x15c4977U) + << 28 | 0x7f34d52U) + << 28 | 0x4f5cbd9U), + UINTMAX_MAX / 1129) +P (22, 62, + (((((uintmax_t) 0x78feU << 28 | 0x716e8a5U) + << 28 | 0x7a1b227U) + << 28 | 0x97051d9U) + << 28 | 0x4cbbb7fU), + UINTMAX_MAX / 1151) +P (2, 64, + (((((uintmax_t) 0xd6ecU << 28 | 0xaef5908U) + << 28 | 0xa8be0eaU) + << 28 | 0x769051bU) + << 28 | 0x4f43b81U), + UINTMAX_MAX / 1153) +P (10, 60, + (((((uintmax_t) 0x7867U << 28 | 0xe595e6eU) + << 28 | 0x801c2ceU) + << 28 | 0x7910f30U) + << 28 | 0x34d4323U), + UINTMAX_MAX / 1163) +P (8, 58, + (((((uintmax_t) 0xa705U << 28 | 0xe713e4eU) + << 28 | 0x43c5692U) + << 28 | 0x791d137U) + << 28 | 0x4f5b99bU), + UINTMAX_MAX / 1171) +P (10, 50, + (((((uintmax_t) 0x92c0U << 28 | 0x0ddf7c3U) + << 28 | 0x4e40989U) + << 28 | 0xa5645ccU) + << 28 | 0x68ea1b5U), + UINTMAX_MAX / 1181) +P (6, 50, + (((((uintmax_t) 0xab06U << 28 | 0xaf8e205U) + << 28 | 0x9b7f75fU) + << 28 | 0x8aacf79U) + << 28 | 0x6c0cf0bU), + UINTMAX_MAX / 1187) +P (6, 56, + (((((uintmax_t) 0xe187U << 28 | 0x673725fU) + << 28 | 0xb4774f2U) + << 28 | 0xe90a15eU) + << 28 | 0x33edf99U), + UINTMAX_MAX / 1193) +P (8, 58, + (((((uintmax_t) 0x57d1U << 28 | 0xf5579b6U) + << 28 | 0x3f8538eU) + << 28 | 0x99e5febU) + << 28 | 0x897c451U), + UINTMAX_MAX / 1201) +P (12, 64, + (((((uintmax_t) 0x5f64U << 28 | 0xab5ec29U) + << 28 | 0x5d7e6acU) + << 28 | 0xa2eda38U) + << 28 | 0xfb91695U), + UINTMAX_MAX / 1213) +P (4, 62, + (((((uintmax_t) 0x48c8U << 28 | 0x41a1574U) + << 28 | 0xbf0035dU) + << 28 | 0x9b737beU) + << 28 | 0x5ea8b41U), + UINTMAX_MAX / 1217) +P (6, 60, + (((((uintmax_t) 0x348aU << 28 | 0x26ef0b8U) + << 28 | 0x33e964aU) + << 28 | 0xefe1db9U) + << 28 | 0x3fd7cf7U), + UINTMAX_MAX / 1223) +P (6, 60, + (((((uintmax_t) 0x5247U << 28 | 0x3d081faU) + << 28 | 0x958f1a0U) + << 28 | 0x994ef20U) + << 28 | 0xb3f8805U), + UINTMAX_MAX / 1229) +P (2, 60, + (((((uintmax_t) 0x0ec3U << 28 | 0xe6367c5U) + << 28 | 0xc55ae10U) + << 28 | 0x3890bdaU) + << 28 | 0x912822fU), + UINTMAX_MAX / 1231) +P (6, 60, + (((((uintmax_t) 0xb57fU << 28 | 0x46921bbU) + << 28 | 0xb4ab5b4U) + << 28 | 0x41659d1U) + << 28 | 0x3a9147dU), + UINTMAX_MAX / 1237) +P (12, 52, + (((((uintmax_t) 0xb2eeU << 28 | 0xfcecf03U) + << 28 | 0x7c00d1eU) + << 28 | 0x2134440U) + << 28 | 0xc4c3f21U), + UINTMAX_MAX / 1249) +P (10, 44, + (((((uintmax_t) 0xed4bU << 28 | 0x07ee1b3U) + << 28 | 0xf3ccc26U) + << 28 | 0x3a27727U) + << 28 | 0xa6883c3U), + UINTMAX_MAX / 1259) +P (18, 30, + (((((uintmax_t) 0x435bU << 28 | 0x9d5e6bdU) + << 28 | 0xa4fc978U) + << 28 | 0xe221472U) + << 28 | 0xab33855U), + UINTMAX_MAX / 1277) +P (2, 40, + (((((uintmax_t) 0x6013U << 28 | 0x370b023U) + << 28 | 0x3a3ed95U) + << 28 | 0xeac88e8U) + << 28 | 0x2e6faffU), + UINTMAX_MAX / 1279) +P (4, 38, + (((((uintmax_t) 0x3447U << 28 | 0x089473bU) + << 28 | 0xa900ff6U) + << 28 | 0x6c25831U) + << 28 | 0x7be8dabU), + UINTMAX_MAX / 1283) +P (6, 38, + (((((uintmax_t) 0x0f7dU << 28 | 0xb74fa3dU) + << 28 | 0x912de09U) + << 28 | 0xee202c7U) + << 28 | 0xcb91939U), + UINTMAX_MAX / 1289) +P (2, 70, + (((((uintmax_t) 0x5316U << 28 | 0x02c6b14U) + << 28 | 0x6caa88dU) + << 28 | 0x2fca104U) + << 28 | 0x2a09ea3U), + UINTMAX_MAX / 1291) +P (6, 70, + (((((uintmax_t) 0x2128U << 28 | 0xdb7c26aU) + << 28 | 0xfaabb82U) + << 28 | 0x779c856U) + << 28 | 0xd8b8bf1U), + UINTMAX_MAX / 1297) +P (4, 72, + (((((uintmax_t) 0xb01cU << 28 | 0x55cadf2U) + << 28 | 0x39d9d38U) + << 28 | 0x79361cbU) + << 28 | 0xa8a223dU), + UINTMAX_MAX / 1301) +P (2, 78, + (((((uintmax_t) 0x3d4cU << 28 | 0x6d3cb58U) + << 28 | 0x9b9a9f2U) + << 28 | 0x3f43639U) + << 28 | 0xc3182a7U), + UINTMAX_MAX / 1303) +P (4, 92, + (((((uintmax_t) 0x0bc0U << 28 | 0x89e42fcU) + << 28 | 0xab94aa0U) + << 28 | 0x3868fc4U) + << 28 | 0x74bcd13U), + UINTMAX_MAX / 1307) +P (12, 90, + (((((uintmax_t) 0x34fcU << 28 | 0x4ff6af1U) + << 28 | 0x0e2b165U) + << 28 | 0x1e78b8cU) + << 28 | 0x5311a97U), + UINTMAX_MAX / 1319) +P (2, 102, + (((((uintmax_t) 0x18ffU << 28 | 0xce639c0U) + << 28 | 0x0c6718fU) + << 28 | 0xfce639cU) + << 28 | 0x00c6719U), + UINTMAX_MAX / 1321) +P (6, 100, + (((((uintmax_t) 0x9b4cU << 28 | 0x33b39aeU) + << 28 | 0x96dc4f7U) + << 28 | 0xb460754U) + << 28 | 0xb0b61cfU), + UINTMAX_MAX / 1327) +P (34, 68, + (((((uintmax_t) 0xbbe8U << 28 | 0xad0c9a3U) + << 28 | 0xd51d27bU) + << 28 | 0x03f3359U) + << 28 | 0xb8e63b1U), + UINTMAX_MAX / 1361) +P (6, 66, + (((((uintmax_t) 0xa28dU << 28 | 0x33dfca1U) + << 28 | 0x0dabba5U) + << 28 | 0x5c53260U) + << 28 | 0x41eb667U), + UINTMAX_MAX / 1367) +P (6, 66, + (((((uintmax_t) 0x677bU << 28 | 0x3ed5acdU) + << 28 | 0x78a2964U) + << 28 | 0x7f88ab8U) + << 28 | 0x96a76f5U), + UINTMAX_MAX / 1373) +P (8, 66, + (((((uintmax_t) 0xf4e0U << 28 | 0xac06ac6U) + << 28 | 0x595988fU) + << 28 | 0xd971434U) + << 28 | 0xa55a46dU), + UINTMAX_MAX / 1381) +P (18, 52, + (((((uintmax_t) 0x3ba7U << 28 | 0x6f12d90U) + << 28 | 0x609e19fU) + << 28 | 0xbf96995U) + << 28 | 0x8046447U), + UINTMAX_MAX / 1399) +P (10, 44, + (((((uintmax_t) 0x3d69U << 28 | 0x32b0f71U) + << 28 | 0x8e43399U) + << 28 | 0x86feba6U) + << 28 | 0x9be3a81U), + UINTMAX_MAX / 1409) +P (14, 36, + (((((uintmax_t) 0xb7adU << 28 | 0xf701426U) + << 28 | 0x239eda6U) + << 28 | 0x68b3e6dU) + << 28 | 0x053796fU), + UINTMAX_MAX / 1423) +P (4, 44, + (((((uintmax_t) 0xd0d1U << 28 | 0x893d2caU) + << 28 | 0xb80fc97U) + << 28 | 0x694e658U) + << 28 | 0x9f4e09bU), + UINTMAX_MAX / 1427) +P (2, 52, + (((((uintmax_t) 0xc00bU << 28 | 0x7721dbcU) + << 28 | 0xffd2237U) + << 28 | 0x890c00bU) + << 28 | 0x7721dbdU), + UINTMAX_MAX / 1429) +P (4, 50, + (((((uintmax_t) 0xe9d9U << 28 | 0x0e1cf0dU) + << 28 | 0x0a8a45aU) + << 28 | 0xc094a23U) + << 28 | 0x5f37ea9U), + UINTMAX_MAX / 1433) +P (6, 48, + (((((uintmax_t) 0x8489U << 28 | 0x56fe661U) + << 28 | 0xd881831U) + << 28 | 0xcff775fU) + << 28 | 0x2d5d65fU), + UINTMAX_MAX / 1439) +P (8, 42, + (((((uintmax_t) 0xfd85U << 28 | 0xed3f28dU) + << 28 | 0xe356dddU) + << 28 | 0xad8e6b3U) + << 28 | 0x6505217U), + UINTMAX_MAX / 1447) +P (4, 42, + (((((uintmax_t) 0x0a68U << 28 | 0xcca8aacU) + << 28 | 0x8c7035aU) + << 28 | 0x27df897U) + << 28 | 0x062cd03U), + UINTMAX_MAX / 1451) +P (2, 46, + (((((uintmax_t) 0x57eaU << 28 | 0xdb877ceU) + << 28 | 0xaae6ce2U) + << 28 | 0x396fe0fU) + << 28 | 0xdb5a625U), + UINTMAX_MAX / 1453) +P (6, 52, + (((((uintmax_t) 0x1c12U << 28 | 0xf330f43U) + << 28 | 0xe76f6b3U) + << 28 | 0x52a4957U) + << 28 | 0xe82317bU), + UINTMAX_MAX / 1459) +P (12, 52, + (((((uintmax_t) 0x472dU << 28 | 0xc52d6c1U) + << 28 | 0x2cb9dd8U) + << 28 | 0xab3f2c6U) + << 28 | 0x0c2ea3fU), + UINTMAX_MAX / 1471) +P (10, 50, + (((((uintmax_t) 0xda51U << 28 | 0x3e0e2c9U) + << 28 | 0x8ce0b68U) + << 28 | 0x93f702fU) + << 28 | 0x0452479U), + UINTMAX_MAX / 1481) +P (2, 60, + (((((uintmax_t) 0x442fU << 28 | 0xa4dae2dU) + << 28 | 0x3a2c896U) + << 28 | 0x86fdc18U) + << 28 | 0x2acf7e3U), + UINTMAX_MAX / 1483) +P (4, 62, + (((((uintmax_t) 0x091fU << 28 | 0xd96fbb2U) + << 28 | 0x2f2be68U) + << 28 | 0x5403717U) + << 28 | 0x3dce12fU), + UINTMAX_MAX / 1487) +P (2, 64, + (((((uintmax_t) 0x3accU << 28 | 0x97fbdfaU) + << 28 | 0xd798d7fU) + << 28 | 0x0ded168U) + << 28 | 0x5c27331U), + UINTMAX_MAX / 1489) +P (4, 66, + (((((uintmax_t) 0x4d54U << 28 | 0xe047548U) + << 28 | 0x87cd3eeU) + << 28 | 0xda72e1fU) + << 28 | 0xe490b7dU), + UINTMAX_MAX / 1493) +P (6, 68, + (((((uintmax_t) 0x7e8cU << 28 | 0x61afbbbU) + << 28 | 0x013209eU) + << 28 | 0x7bfc959U) + << 28 | 0xa8e6e53U), + UINTMAX_MAX / 1499) +P (12, 60, + (((((uintmax_t) 0xc4b3U << 28 | 0x96f4fccU) + << 28 | 0x7ebab49U) + << 28 | 0xb314d6dU) + << 28 | 0x4753dd7U), + UINTMAX_MAX / 1511) +P (12, 56, + (((((uintmax_t) 0x9eadU << 28 | 0x21c933fU) + << 28 | 0x089292eU) + << 28 | 0x8f8c5acU) + << 28 | 0x4aa1b3bU), + UINTMAX_MAX / 1523) +P (8, 52, + (((((uintmax_t) 0x0584U << 28 | 0x992a4deU) + << 28 | 0xb99aab8U) + << 28 | 0xef72348U) + << 28 | 0x1163d33U), + UINTMAX_MAX / 1531) +P (12, 54, + (((((uintmax_t) 0x8b08U << 28 | 0x7620d9aU) + << 28 | 0xcb6806aU) + << 28 | 0x2ec96a5U) + << 28 | 0x94287b7U), + UINTMAX_MAX / 1543) +P (6, 52, + (((((uintmax_t) 0xc108U << 28 | 0x6dbce6bU) + << 28 | 0x6c94bdbU) + << 28 | 0xa41c6d1U) + << 28 | 0x3aab8c5U), + UINTMAX_MAX / 1549) +P (4, 54, + (((((uintmax_t) 0xe478U << 28 | 0xaa1e005U) + << 28 | 0x46633c2U) + << 28 | 0xadbe648U) + << 28 | 0xdc3aaf1U), + UINTMAX_MAX / 1553) +P (6, 50, + (((((uintmax_t) 0x5cf1U << 28 | 0x0e9d4faU) + << 28 | 0x40b2a87U) + << 28 | 0xa2bade5U) + << 28 | 0x65f91a7U), + UINTMAX_MAX / 1559) +P (8, 46, + (((((uintmax_t) 0x9ecbU << 28 | 0x8ef2c45U) + << 28 | 0xec11a4dU) + << 28 | 0x6fe8798U) + << 28 | 0xc01f5dfU), + UINTMAX_MAX / 1567) +P (4, 48, + (((((uintmax_t) 0xfb99U << 28 | 0xaa49543U) + << 28 | 0xf39d937U) + << 28 | 0x91310c8U) + << 28 | 0xc23d98bU), + UINTMAX_MAX / 1571) +P (8, 42, + (((((uintmax_t) 0x7abbU << 28 | 0x187b379U) + << 28 | 0xc2112f8U) + << 28 | 0x0e446b0U) + << 28 | 0x1228883U), + UINTMAX_MAX / 1579) +P (4, 44, + (((((uintmax_t) 0x3cceU << 28 | 0x5a3d212U) + << 28 | 0x6f95e9aU) + << 28 | 0xed1436fU) + << 28 | 0xbf500cfU), + UINTMAX_MAX / 1583) +P (14, 40, + (((((uintmax_t) 0xc6eeU << 28 | 0xd90c05cU) + << 28 | 0x5547a78U) + << 28 | 0x39b54ccU) + << 28 | 0x8b24115U), + UINTMAX_MAX / 1597) +P (4, 56, + (((((uintmax_t) 0x8798U << 28 | 0x627f99aU) + << 28 | 0x9f948c1U) + << 28 | 0x28c646aU) + << 28 | 0xd0309c1U), + UINTMAX_MAX / 1601) +P (6, 56, + (((((uintmax_t) 0x5233U << 28 | 0x4bab403U) + << 28 | 0x2fa1b14U) + << 28 | 0xde63162U) + << 28 | 0x4a3c377U), + UINTMAX_MAX / 1607) +P (2, 58, + (((((uintmax_t) 0x0e51U << 28 | 0xc7ad43fU) + << 28 | 0x016e93fU) + << 28 | 0x7b9fe68U) + << 28 | 0xb0ecbf9U), + UINTMAX_MAX / 1609) +P (4, 56, + (((((uintmax_t) 0x00a2U << 28 | 0x84ffd75U) + << 28 | 0xec00a28U) + << 28 | 0x4ffd75eU) + << 28 | 0xc00a285U), + UINTMAX_MAX / 1613) +P (6, 74, + (((((uintmax_t) 0xe72cU << 28 | 0xbfa4ebeU) + << 28 | 0xb20bb37U) + << 28 | 0x803cb80U) + << 28 | 0xdea2ddbU), + UINTMAX_MAX / 1619) +P (2, 76, + (((((uintmax_t) 0x22beU << 28 | 0x75d04e5U) + << 28 | 0x4f6ff86U) + << 28 | 0xb63f7c9U) + << 28 | 0xac4c6fdU), + UINTMAX_MAX / 1621) +P (6, 72, + (((((uintmax_t) 0x84f4U << 28 | 0xd419cdfU) + << 28 | 0x6dfbe8bU) + << 28 | 0x6851d1bU) + << 28 | 0xd99b9d3U), + UINTMAX_MAX / 1627) +P (10, 72, + (((((uintmax_t) 0xe83aU << 28 | 0xccdcd04U) + << 28 | 0xd90f7b6U) + << 28 | 0x2fda77cU) + << 28 | 0xa343b6dU), + UINTMAX_MAX / 1637) +P (20, 64, + (((((uintmax_t) 0x9e34U << 28 | 0x383c8ffU) + << 28 | 0xd872f1fU) + << 28 | 0x0dc009eU) + << 28 | 0x34383c9U), + UINTMAX_MAX / 1657) +P (6, 60, + (((((uintmax_t) 0x2e7dU << 28 | 0x4e5ad2eU) + << 28 | 0x55e5d49U) + << 28 | 0x6dc21ddU) + << 28 | 0xd35b97fU), + UINTMAX_MAX / 1663) +P (4, 66, + (((((uintmax_t) 0xe596U << 28 | 0x098573aU) + << 28 | 0x33e80b0U) + << 28 | 0xe96ce17U) + << 28 | 0x090f82bU), + UINTMAX_MAX / 1667) +P (2, 72, + (((((uintmax_t) 0x7181U << 28 | 0x4dc42e0U) + << 28 | 0x3fceeaaU) + << 28 | 0xdf05acdU) + << 28 | 0xd7d024dU), + UINTMAX_MAX / 1669) +P (24, 54, + (((((uintmax_t) 0xa4abU << 28 | 0x2bb32f5U) + << 28 | 0x43975cbU) + << 28 | 0x1381967U) + << 28 | 0x46eafb5U), + UINTMAX_MAX / 1693) +P (4, 56, + (((((uintmax_t) 0xa2ecU << 28 | 0x3cf1f87U) + << 28 | 0x5102434U) + << 28 | 0x7f52373U) + << 28 | 0x6755d61U), + UINTMAX_MAX / 1697) +P (2, 60, + (((((uintmax_t) 0x6ff3U << 28 | 0xf223422U) + << 28 | 0x5ab51d1U) + << 28 | 0x4a48a05U) + << 28 | 0x1f7dd0bU), + UINTMAX_MAX / 1699) +P (10, 68, + (((((uintmax_t) 0x6c00U << 28 | 0x9963e9dU) + << 28 | 0x48f3447U) + << 28 | 0x4d71b1cU) + << 28 | 0xe914d25U), + UINTMAX_MAX / 1709) +P (12, 62, + (((((uintmax_t) 0x894cU << 28 | 0x02f99a8U) + << 28 | 0xd502d38U) + << 28 | 0x6063f5eU) + << 28 | 0x28c1f89U), + UINTMAX_MAX / 1721) +P (2, 64, + (((((uintmax_t) 0xc8e0U << 28 | 0xa6684d4U) + << 28 | 0x2b6281dU) + << 28 | 0xb7325e3U) + << 28 | 0x2d04e73U), + UINTMAX_MAX / 1723) +P (10, 56, + (((((uintmax_t) 0xf8c2U << 28 | 0xfdc8c0aU) + << 28 | 0x0b85afeU) + << 28 | 0xf748d38U) + << 28 | 0x93b880dU), + UINTMAX_MAX / 1733) +P (8, 60, + (((((uintmax_t) 0xd0a7U << 28 | 0x0a25594U) + << 28 | 0x123bb2fU) + << 28 | 0x3351506U) + << 28 | 0xe935605U), + UINTMAX_MAX / 1741) +P (6, 64, + (((((uintmax_t) 0xdb5dU << 28 | 0xa31878bU) + << 28 | 0xf158a7aU) + << 28 | 0x3637fa2U) + << 28 | 0x376415bU), + UINTMAX_MAX / 1747) +P (6, 70, + (((((uintmax_t) 0x75b4U << 28 | 0x5a8abbcU) + << 28 | 0xd2e004aU) + << 28 | 0xc525d2bU) + << 28 | 0xaa21969U), + UINTMAX_MAX / 1753) +P (6, 72, + (((((uintmax_t) 0x7e53U << 28 | 0x89d2e22U) + << 28 | 0xa34af3aU) + << 28 | 0x11c16b4U) + << 28 | 0x2cd351fU), + UINTMAX_MAX / 1759) +P (18, 70, + (((((uintmax_t) 0xeaf7U << 28 | 0x801270aU) + << 28 | 0x843ff6cU) + << 28 | 0x7abde00U) + << 28 | 0x49c2a11U), + UINTMAX_MAX / 1777) +P (6, 78, + (((((uintmax_t) 0x1ad9U << 28 | 0x60a0cecU) + << 28 | 0x0ae9754U) + << 28 | 0xdad0303U) + << 28 | 0xe069ac7U), + UINTMAX_MAX / 1783) +P (4, 80, + (((((uintmax_t) 0x082aU << 28 | 0x676e737U) + << 28 | 0x70be3ebU) + << 28 | 0xf1ac9fdU) + << 28 | 0xfe91433U), + UINTMAX_MAX / 1787) +P (2, 82, + (((((uintmax_t) 0x50b4U << 28 | 0xdfcda14U) + << 28 | 0x51d9efaU) + << 28 | 0xfdda823U) + << 28 | 0x7cec655U), + UINTMAX_MAX / 1789) +P (12, 72, + (((((uintmax_t) 0x1ffbU << 28 | 0x738ffdbU) + << 28 | 0x9c7fedcU) + << 28 | 0xe3ff6e7U) + << 28 | 0x1ffb739U), + UINTMAX_MAX / 1801) +P (10, 66, + (((((uintmax_t) 0xa660U << 28 | 0xf8ca6cdU) + << 28 | 0x88f9ebeU) + << 28 | 0xd5737d6U) + << 28 | 0x286db1bU), + UINTMAX_MAX / 1811) +P (12, 56, + (((((uintmax_t) 0xed52U << 28 | 0xb6467eaU) + << 28 | 0xa7abbe4U) + << 28 | 0x79e431fU) + << 28 | 0xe08b4dfU), + UINTMAX_MAX / 1823) +P (8, 58, + (((((uintmax_t) 0xdaf2U << 28 | 0xff4d09aU) + << 28 | 0x5ae119dU) + << 28 | 0xd9b0dd7U) + << 28 | 0x742f897U), + UINTMAX_MAX / 1831) +P (16, 54, + (((((uintmax_t) 0x6054U << 28 | 0x454d33bU) + << 28 | 0x2efc88fU) + << 28 | 0x09d7402U) + << 28 | 0xc5a5e87U), + UINTMAX_MAX / 1847) +P (14, 46, + (((((uintmax_t) 0xf545U << 28 | 0x31625b1U) + << 28 | 0x0a51292U) + << 28 | 0x16d5c4dU) + << 28 | 0x958738dU), + UINTMAX_MAX / 1861) +P (6, 46, + (((((uintmax_t) 0x6df8U << 28 | 0x0c1100aU) + << 28 | 0xf82f2b3U) + << 28 | 0x139ba11U) + << 28 | 0xd34ca63U), + UINTMAX_MAX / 1867) +P (4, 60, + (((((uintmax_t) 0xaf8bU << 28 | 0xf8e2952U) + << 28 | 0x3b61d47U) + << 28 | 0xd54f7edU) + << 28 | 0x644afafU), + UINTMAX_MAX / 1871) +P (2, 60, + (((((uintmax_t) 0x4d5cU << 28 | 0x4227171U) + << 28 | 0x9491f92U) + << 28 | 0xa81d85cU) + << 28 | 0xf11a1b1U), + UINTMAX_MAX / 1873) +P (4, 72, + (((((uintmax_t) 0xf78bU << 28 | 0x4082eeaU) + << 28 | 0xdc21475U) + << 28 | 0x4b26533U) + << 28 | 0x253bdfdU), + UINTMAX_MAX / 1877) +P (2, 72, + (((((uintmax_t) 0xf354U << 28 | 0x558f76aU) + << 28 | 0xad92bbbU) + << 28 | 0xe0efc98U) + << 28 | 0x0bfd467U), + UINTMAX_MAX / 1879) +P (10, 84, + (((((uintmax_t) 0x0ab4U << 28 | 0xc91d231U) + << 28 | 0x99d11c0U) + << 28 | 0xd8d594fU) + << 28 | 0x024dca1U), + UINTMAX_MAX / 1889) +P (12, 78, + (((((uintmax_t) 0x1b56U << 28 | 0x52256feU) + << 28 | 0x84c7d82U) + << 28 | 0x38d43bcU) + << 28 | 0xaac1a65U), + UINTMAX_MAX / 1901) +P (6, 80, + (((((uintmax_t) 0xaca2U << 28 | 0xb39dbc1U) + << 28 | 0x2cb3e27U) + << 28 | 0x779c1faU) + << 28 | 0xe6175bbU), + UINTMAX_MAX / 1907) +P (6, 80, + (((((uintmax_t) 0x3856U << 28 | 0xb755c78U) + << 28 | 0x7068ea7U) + << 28 | 0x46ca9afU) + << 28 | 0x708b2c9U), + UINTMAX_MAX / 1913) +P (18, 66, + (((((uintmax_t) 0x052bU << 28 | 0x9de5385U) + << 28 | 0x8076c93U) + << 28 | 0xf3cd9f3U) + << 28 | 0x89be823U), + UINTMAX_MAX / 1931) +P (2, 66, + (((((uintmax_t) 0x820dU << 28 | 0x822f698U) + << 28 | 0xd4f545cU) + << 28 | 0xb4a4c04U) + << 28 | 0xc489345U), + UINTMAX_MAX / 1933) +P (16, 54, + (((((uintmax_t) 0xcd09U << 28 | 0x536828fU) + << 28 | 0xb23dbbfU) + << 28 | 0x6047743U) + << 28 | 0xe85b6b5U), + UINTMAX_MAX / 1949) +P (2, 60, + (((((uintmax_t) 0x8486U << 28 | 0xe386c1eU) + << 28 | 0xf778961U) + << 28 | 0xc147831U) + << 28 | 0x563545fU), + UINTMAX_MAX / 1951) +P (22, 44, + (((((uintmax_t) 0xec68U << 28 | 0x5200c74U) + << 28 | 0xc6c78edU) + << 28 | 0xb47c0aeU) + << 28 | 0x62dee9dU), + UINTMAX_MAX / 1973) +P (6, 48, + (((((uintmax_t) 0xd8acU << 28 | 0xd298624U) + << 28 | 0xff1830aU) + << 28 | 0x3824386U) + << 28 | 0x673a573U), + UINTMAX_MAX / 1979) +P (8, 42, + (((((uintmax_t) 0x03ddU << 28 | 0x78b87ecU) + << 28 | 0x6aad6a4U) + << 28 | 0xa77d19eU) + << 28 | 0x575a0ebU), + UINTMAX_MAX / 1987) +P (6, 46, + (((((uintmax_t) 0x8950U << 28 | 0x062a636U) + << 28 | 0xb8325a2U) + << 28 | 0xbee045eU) + << 28 | 0x066c279U), + UINTMAX_MAX / 1993) +P (4, 56, + (((((uintmax_t) 0xa9daU << 28 | 0xd301275U) + << 28 | 0xae369c2U) + << 28 | 0x3618de8U) + << 28 | 0xab43d05U), + UINTMAX_MAX / 1997) +P (2, 64, + (((((uintmax_t) 0xfa3cU << 28 | 0xb3cd496U) + << 28 | 0x174ec26U) + << 28 | 0x6b51521U) + << 28 | 0x6cb9f2fU), + UINTMAX_MAX / 1999) +P (4, 66, + (((((uintmax_t) 0x5c05U << 28 | 0x9fa1eedU) + << 28 | 0xfaa1ce2U) + << 28 | 0x79edd9eU) + << 28 | 0x9c2e85bU), + UINTMAX_MAX / 2003) +P (8, 70, + (((((uintmax_t) 0x8e52U << 28 | 0x3c5712bU) + << 28 | 0x68c48d0U) + << 28 | 0xc591c22U) + << 28 | 0x1dc9c53U), + UINTMAX_MAX / 2011) +P (6, 66, + (((((uintmax_t) 0x8de5U << 28 | 0xdaaf67bU) + << 28 | 0x1d10a06U) + << 28 | 0xda8ee9cU) + << 28 | 0x9ee7c21U), + UINTMAX_MAX / 2017) +P (10, 60, + (((((uintmax_t) 0xec2bU << 28 | 0xf35ed8fU) + << 28 | 0x98f179dU) + << 28 | 0xfebcaf4U) + << 28 | 0xc27e8c3U), + UINTMAX_MAX / 2027) +P (2, 60, + (((((uintmax_t) 0xe8c8U << 28 | 0xdd0cfedU) + << 28 | 0xd4d9849U) + << 28 | 0xaeff9f1U) + << 28 | 0x9dd6de5U), + UINTMAX_MAX / 2029) +P (10, 60, + (((((uintmax_t) 0x65f2U << 28 | 0xb107280U) + << 28 | 0xd0eb086U) + << 28 | 0x976a57aU) + << 28 | 0x296e9c7U), + UINTMAX_MAX / 2039) +P (14, 58, + (((((uintmax_t) 0x44b5U << 28 | 0x0ed6b9cU) + << 28 | 0xbe093a3U) + << 28 | 0xb9abf48U) + << 28 | 0x72b84cdU), + UINTMAX_MAX / 2053) +P (10, 50, + (((((uintmax_t) 0x9e96U << 28 | 0xa5899dfU) + << 28 | 0x7cf5b34U) + << 28 | 0xfca6483U) + << 28 | 0x895e6efU), + UINTMAX_MAX / 2063) +P (6, 60, + (((((uintmax_t) 0x49beU << 28 | 0x6c24212U) + << 28 | 0x8f47e34U) + << 28 | 0xb5a3339U) + << 28 | 0x88f873dU), + UINTMAX_MAX / 2069) +P (12, 50, + (((((uintmax_t) 0xd1fdU << 28 | 0xc922526U) + << 28 | 0xc0275d9U) + << 28 | 0xdd4f19bU) + << 28 | 0x5f17be1U), + UINTMAX_MAX / 2081) +P (2, 54, + (((((uintmax_t) 0xb8d7U << 28 | 0x51f95d0U) + << 28 | 0x8f8bfb9U) + << 28 | 0x35b507fU) + << 28 | 0xd0ce78bU), + UINTMAX_MAX / 2083) +P (4, 54, + (((((uintmax_t) 0x971fU << 28 | 0x47835f8U) + << 28 | 0xe2aeeb4U) + << 28 | 0x50f5540U) + << 28 | 0x660e797U), + UINTMAX_MAX / 2087) +P (2, 54, + (((((uintmax_t) 0x418fU << 28 | 0xfe0a0c7U) + << 28 | 0xff05063U) + << 28 | 0xff82831U) + << 28 | 0xffc1419U), + UINTMAX_MAX / 2089) +P (10, 54, + (((((uintmax_t) 0xd06fU << 28 | 0x3ae8760U) + << 28 | 0xf5e0889U) + << 28 | 0x92f718cU) + << 28 | 0x22a32fbU), + UINTMAX_MAX / 2099) +P (12, 50, + (((((uintmax_t) 0x16adU << 28 | 0x6a5a779U) + << 28 | 0x25f515fU) + << 28 | 0x3253ad0U) + << 28 | 0xd37e7bfU), + UINTMAX_MAX / 2111) +P (2, 66, + (((((uintmax_t) 0xfe0fU << 28 | 0xc007c0fU) + << 28 | 0xfe0fc00U) + << 28 | 0x7c0ffe0U) + << 28 | 0xfc007c1U), + UINTMAX_MAX / 2113) +P (16, 74, + (((((uintmax_t) 0x9763U << 28 | 0x3395b43U) + << 28 | 0xf020b4dU) + << 28 | 0x8ebadc0U) + << 28 | 0xc0640b1U), + UINTMAX_MAX / 2129) +P (2, 76, + (((((uintmax_t) 0x9a20U << 28 | 0xea7f195U) + << 28 | 0x90471e2U) + << 28 | 0x729af83U) + << 28 | 0x1037bdbU), + UINTMAX_MAX / 2131) +P (6, 76, + (((((uintmax_t) 0x7285U << 28 | 0xee07e80U) + << 28 | 0xa8ab8b8U) + << 28 | 0xf64bf30U) + << 28 | 0xfeebfe9U), + UINTMAX_MAX / 2137) +P (4, 80, + (((((uintmax_t) 0x3dd1U << 28 | 0x5e1a10fU) + << 28 | 0xa9e8cdaU) + << 28 | 0x93124b5U) + << 28 | 0x44c0bf5U), + UINTMAX_MAX / 2141) +P (2, 94, + (((((uintmax_t) 0x4f14U << 28 | 0xe7bff85U) + << 28 | 0xac9e29cU) + << 28 | 0xf7ff0b5U) + << 28 | 0x93c539fU), + UINTMAX_MAX / 2143) +P (10, 86, + (((((uintmax_t) 0x12e7U << 28 | 0xdccdf10U) + << 28 | 0x4a322d6U) + << 28 | 0xbd8861fU) + << 28 | 0xa0e07d9U), + UINTMAX_MAX / 2153) +P (8, 82, + (((((uintmax_t) 0xd7b8U << 28 | 0xebfac9aU) + << 28 | 0x00b5f5cU) + << 28 | 0xfe75c0bU) + << 28 | 0xd8ab891U), + UINTMAX_MAX / 2161) +P (18, 72, + (((((uintmax_t) 0xae1cU << 28 | 0xe6bd9efU) + << 28 | 0x512ea43U) + << 28 | 0xe808757U) + << 28 | 0xc2e862bU), + UINTMAX_MAX / 2179) +P (24, 64, + (((((uintmax_t) 0x459bU << 28 | 0x5dc70f3U) + << 28 | 0x90e8690U) + << 28 | 0xcaa96d5U) + << 28 | 0x95c9d93U), + UINTMAX_MAX / 2203) +P (4, 62, + (((((uintmax_t) 0x4ec2U << 28 | 0xa38d65bU) + << 28 | 0xa2bd88fU) + << 28 | 0xd550625U) + << 28 | 0xd07135fU), + UINTMAX_MAX / 2207) +P (6, 60, + (((((uintmax_t) 0x525dU << 28 | 0x3cf6a14U) + << 28 | 0x20da676U) + << 28 | 0xb010a86U) + << 28 | 0xe209f2dU), + UINTMAX_MAX / 2213) +P (8, 60, + (((((uintmax_t) 0x716bU << 28 | 0x4f6a9e5U) + << 28 | 0xf3522ecU) + << 28 | 0xc042644U) + << 28 | 0x7769b25U), + UINTMAX_MAX / 2221) +P (16, 50, + (((((uintmax_t) 0x48abU << 28 | 0x336212fU) + << 28 | 0xf32ece3U) + << 28 | 0x81339caU) + << 28 | 0xabe3295U), + UINTMAX_MAX / 2237) +P (2, 54, + (((((uintmax_t) 0xbde9U << 28 | 0xd1944b7U) + << 28 | 0x656aad1U) + << 28 | 0xb190a2dU) + << 28 | 0x0c7673fU), + UINTMAX_MAX / 2239) +P (4, 54, + (((((uintmax_t) 0xb595U << 28 | 0xdb3fccdU) + << 28 | 0xe54afc3U) + << 28 | 0xbce3cf2U) + << 28 | 0x6b0e7ebU), + UINTMAX_MAX / 2243) +P (8, 58, + (((((uintmax_t) 0x8a10U << 28 | 0x9aab45fU) + << 28 | 0x137285fU) + << 28 | 0x87e76f5U) + << 28 | 0x6c61ce3U), + UINTMAX_MAX / 2251) +P (16, 44, + (((((uintmax_t) 0x2e69U << 28 | 0x78b763bU) + << 28 | 0x65f88c0U) + << 28 | 0x6c6857aU) + << 28 | 0x124b353U), + UINTMAX_MAX / 2267) +P (2, 64, + (((((uintmax_t) 0x7e40U << 28 | 0x4f6dc75U) + << 28 | 0xca11d38U) + << 28 | 0xc040fcbU) + << 28 | 0xa630f75U), + UINTMAX_MAX / 2269) +P (4, 66, + (((((uintmax_t) 0xa706U << 28 | 0x6b72173U) + << 28 | 0x37865d0U) + << 28 | 0x78bc4fbU) + << 28 | 0xd533b21U), + UINTMAX_MAX / 2273) +P (8, 60, + (((((uintmax_t) 0x1165U << 28 | 0x5853800U) + << 28 | 0xe5d99deU) + << 28 | 0x8e15c5dU) + << 28 | 0xd354f59U), + UINTMAX_MAX / 2281) +P (6, 60, + (((((uintmax_t) 0xad0dU << 28 | 0xfdfc31bU) + << 28 | 0x33610caU) + << 28 | 0x61d53d7U) + << 28 | 0x414260fU), + UINTMAX_MAX / 2287) +P (6, 58, + (((((uintmax_t) 0x65b5U << 28 | 0x32cc4f0U) + << 28 | 0xb46abb5U) + << 28 | 0x6bf5ba8U) + << 28 | 0xeae635dU), + UINTMAX_MAX / 2293) +P (4, 60, + (((((uintmax_t) 0xcdbcU << 28 | 0x7622fecU) + << 28 | 0x6285844U) + << 28 | 0xa72cb0fU) + << 28 | 0xb6e3949U), + UINTMAX_MAX / 2297) +P (12, 62, + (((((uintmax_t) 0x37c4U << 28 | 0x92cae49U) + << 28 | 0xd6fa587U) + << 28 | 0x9839a71U) + << 28 | 0x4f45bcdU), + UINTMAX_MAX / 2309) +P (2, 66, + (((((uintmax_t) 0xc031U << 28 | 0xa083283U) + << 28 | 0x60ed802U) + << 28 | 0xa8994fdU) + << 28 | 0xe5314b7U), + UINTMAX_MAX / 2311) +P (22, 48, + (((((uintmax_t) 0xc841U << 28 | 0xd685a6aU) + << 28 | 0xe081eb9U) + << 28 | 0x71920cfU) + << 28 | 0x2b90135U), + UINTMAX_MAX / 2333) +P (6, 44, + (((((uintmax_t) 0xc4c9U << 28 | 0xd2b0364U) + << 28 | 0x9549a8aU) + << 28 | 0x8fd0b7dU) + << 28 | 0xf9a6e8bU), + UINTMAX_MAX / 2339) +P (2, 48, + (((((uintmax_t) 0xe3c9U << 28 | 0x5290213U) + << 28 | 0xe7112b3U) + << 28 | 0x1f9a84cU) + << 28 | 0x1c6eaadU), + UINTMAX_MAX / 2341) +P (6, 46, + (((((uintmax_t) 0xf02fU << 28 | 0x1ede4bbU) + << 28 | 0x2c64c92U) + << 28 | 0x293b028U) + << 28 | 0x23c6d83U), + UINTMAX_MAX / 2347) +P (4, 48, + (((((uintmax_t) 0x83f9U << 28 | 0x7773bffU) + << 28 | 0x907f2eeU) + << 28 | 0xe77ff20U) + << 28 | 0xfe5ddcfU), + UINTMAX_MAX / 2351) +P (6, 54, + (((((uintmax_t) 0xd472U << 28 | 0x42b02b7U) + << 28 | 0x1ef460eU) + << 28 | 0x1ea0f6cU) + << 28 | 0x496c11dU), + UINTMAX_MAX / 2357) +P (14, 46, + (((((uintmax_t) 0xd905U << 28 | 0xb8f4727U) + << 28 | 0x318f0fdU) + << 28 | 0xf2d3d6fU) + << 28 | 0x88ccb6bU), + UINTMAX_MAX / 2371) +P (6, 46, + (((((uintmax_t) 0xf2c0U << 28 | 0xc7e3914U) + << 28 | 0x920a1faU) + << 28 | 0x9d74a34U) + << 28 | 0x57738f9U), + UINTMAX_MAX / 2377) +P (4, 56, + (((((uintmax_t) 0x6c7cU << 28 | 0x4a67008U) + << 28 | 0x99f72efU) + << 28 | 0xc3ca3dbU) + << 28 | 0x71a5785U), + UINTMAX_MAX / 2381) +P (2, 58, + (((((uintmax_t) 0x7e55U << 28 | 0xba2c0b9U) + << 28 | 0xa289b8eU) + << 28 | 0x2071718U) + << 28 | 0xd0d6dafU), + UINTMAX_MAX / 2383) +P (6, 58, + (((((uintmax_t) 0xbf46U << 28 | 0xd4d0be4U) + << 28 | 0xff091bcU) + << 28 | 0x0fdbfebU) + << 28 | 0x6cfabfdU), + UINTMAX_MAX / 2389) +P (4, 66, + (((((uintmax_t) 0x1908U << 28 | 0x738977bU) + << 28 | 0x58af71eU) + << 28 | 0xeab613eU) + << 28 | 0x5e5aee9U), + UINTMAX_MAX / 2393) +P (6, 68, + (((((uintmax_t) 0x6a48U << 28 | 0xc6e8d7fU) + << 28 | 0xbbb472dU) + << 28 | 0x2388e90U) + << 28 | 0xe9e929fU), + UINTMAX_MAX / 2399) +P (12, 62, + (((((uintmax_t) 0x9f7bU << 28 | 0x7cc2f24U) + << 28 | 0xd82eb81U) + << 28 | 0xdbafba5U) + << 28 | 0x88ddb43U), + UINTMAX_MAX / 2411) +P (6, 60, + (((((uintmax_t) 0x57ceU << 28 | 0x01e8101U) + << 28 | 0x96b8152U) + << 28 | 0xeebc51cU) + << 28 | 0x4799791U), + UINTMAX_MAX / 2417) +P (6, 80, + (((((uintmax_t) 0x22c2U << 28 | 0x9d6cb7dU) + << 28 | 0x695651cU) + << 28 | 0x6bc4693U) + << 28 | 0xb45a047U), + UINTMAX_MAX / 2423) +P (14, 84, + (((((uintmax_t) 0x366aU << 28 | 0x190050aU) + << 28 | 0xd1e2606U) + << 28 | 0xeee0974U) + << 28 | 0x498874dU), + UINTMAX_MAX / 2437) +P (4, 90, + (((((uintmax_t) 0x7708U << 28 | 0x7eb0665U) + << 28 | 0xba929d8U) + << 28 | 0x5b7377aU) + << 28 | 0x9953cb9U), + UINTMAX_MAX / 2441) +P (6, 92, + (((((uintmax_t) 0x8f53U << 28 | 0x96f6b06U) + << 28 | 0x2c2614bU) + << 28 | 0x6df412dU) + << 28 | 0x4caf56fU), + UINTMAX_MAX / 2447) +P (12, 84, + (((((uintmax_t) 0x0c2eU << 28 | 0x394250fU) + << 28 | 0xedad56bU) + << 28 | 0x8afbbb4U) + << 28 | 0xa053493U), + UINTMAX_MAX / 2459) +P (8, 82, + (((((uintmax_t) 0x78afU << 28 | 0x29d1b7fU) + << 28 | 0xbd965ccU) + << 28 | 0x5299c96U) + << 28 | 0xac7720bU), + UINTMAX_MAX / 2467) +P (6, 78, + (((((uintmax_t) 0x1287U << 28 | 0x9bcb69bU) + << 28 | 0x11e89adU) + << 28 | 0xce84b5cU) + << 28 | 0x710aa99U), + UINTMAX_MAX / 2473) +P (4, 80, + (((((uintmax_t) 0x92c2U << 28 | 0x17c54bfU) + << 28 | 0x67de19dU) + << 28 | 0x673f5aaU) + << 28 | 0x3804225U), + UINTMAX_MAX / 2477) +P (26, 76, + (((((uintmax_t) 0xd46eU << 28 | 0x0ce30e3U) + << 28 | 0x76f2ce6U) + << 28 | 0x541268eU) + << 28 | 0xfbce7f7U), + UINTMAX_MAX / 2503) +P (18, 70, + (((((uintmax_t) 0xa49bU << 28 | 0x91ec4ccU) + << 28 | 0x5004dfcU) + << 28 | 0xf41e76cU) + << 28 | 0xf5be669U), + UINTMAX_MAX / 2521) +P (10, 62, + (((((uintmax_t) 0x6098U << 28 | 0x1f8eb77U) + << 28 | 0xa7cd05cU) + << 28 | 0x3eb5dc3U) + << 28 | 0x1c383cbU), + UINTMAX_MAX / 2531) +P (8, 70, + (((((uintmax_t) 0x62e9U << 28 | 0x505bf44U) + << 28 | 0xdd6a930U) + << 28 | 0x1832d11U) + << 28 | 0xd8ad6c3U), + UINTMAX_MAX / 2539) +P (4, 74, + (((((uintmax_t) 0xb3cbU << 28 | 0x3fecabfU) + << 28 | 0x119df2eU) + << 28 | 0x9c0942fU) + << 28 | 0x1ce450fU), + UINTMAX_MAX / 2543) +P (6, 72, + (((((uintmax_t) 0xef3aU << 28 | 0x59c92a1U) + << 28 | 0x4b05b97U) + << 28 | 0xf3f2be3U) + << 28 | 0x7a39a5dU), + UINTMAX_MAX / 2549) +P (2, 82, + (((((uintmax_t) 0xe69cU << 28 | 0x5983c36U) + << 28 | 0x30c57e8U) + << 28 | 0xb7d8a96U) + << 28 | 0x54187c7U), + UINTMAX_MAX / 2551) +P (6, 90, + (((((uintmax_t) 0x437aU << 28 | 0xa4cb09bU) + << 28 | 0x61d08b5U) + << 28 | 0xd024d7dU) + << 28 | 0xa5b1b55U), + UINTMAX_MAX / 2557) +P (22, 78, + (((((uintmax_t) 0x1b65U << 28 | 0x8bdca98U) + << 28 | 0xaabb9b8U) + << 28 | 0xba9d6e7U) + << 28 | 0xae3501bU), + UINTMAX_MAX / 2579) +P (12, 68, + (((((uintmax_t) 0x3ea4U << 28 | 0x3624f3dU) + << 28 | 0x8dfb0f5U) + << 28 | 0x0865f71U) + << 28 | 0xb90f1dfU), + UINTMAX_MAX / 2591) +P (2, 70, + (((((uintmax_t) 0x2d05U << 28 | 0x08fbf3cU) + << 28 | 0x1ffcd73U) + << 28 | 0x9c16828U) + << 28 | 0x47df9e1U), + UINTMAX_MAX / 2593) +P (16, 62, + (((((uintmax_t) 0xc716U << 28 | 0xdcc634cU) + << 28 | 0xa218ec4U) + << 28 | 0x70a4d84U) + << 28 | 0x2b90ed1U), + UINTMAX_MAX / 2609) +P (8, 60, + (((((uintmax_t) 0xe30bU << 28 | 0x71f669dU) + << 28 | 0x7e49c1fU) + << 28 | 0xb1be116U) + << 28 | 0x98cc409U), + UINTMAX_MAX / 2617) +P (4, 62, + (((((uintmax_t) 0xa624U << 28 | 0x238d871U) + << 28 | 0x4cde4d8U) + << 28 | 0xd5512a7U) + << 28 | 0xcd35d15U), + UINTMAX_MAX / 2621) +P (12, 54, + (((((uintmax_t) 0x6488U << 28 | 0x81e55c1U) + << 28 | 0x30e7ca5U) + << 28 | 0x4968217U) + << 28 | 0x23e07f9U), + UINTMAX_MAX / 2633) +P (14, 42, + (((((uintmax_t) 0x8513U << 28 | 0xd3830beU) + << 28 | 0x54ea0bcU) + << 28 | 0xc8c6d7aU) + << 28 | 0xbaa8167U), + UINTMAX_MAX / 2647) +P (10, 36, + (((((uintmax_t) 0x49b5U << 28 | 0x0a4f32fU) + << 28 | 0x800c552U) + << 28 | 0xc396c95U) + << 28 | 0xeb619a1U), + UINTMAX_MAX / 2657) +P (2, 40, + (((((uintmax_t) 0xa1f0U << 28 | 0x049f0c9U) + << 28 | 0xcbd166eU) + << 28 | 0xb7e3808U) + << 28 | 0x78ec74bU), + UINTMAX_MAX / 2659) +P (4, 44, + (((((uintmax_t) 0x25f8U << 28 | 0xe2df380U) + << 28 | 0xb892e3dU) + << 28 | 0x5513b50U) + << 28 | 0x4537157U), + UINTMAX_MAX / 2663) +P (8, 40, + (((((uintmax_t) 0x1654U << 28 | 0xeb02967U) + << 28 | 0x9b8e231U) + << 28 | 0x4391f88U) + << 28 | 0x62e948fU), + UINTMAX_MAX / 2671) +P (6, 36, + (((((uintmax_t) 0x304aU << 28 | 0xf935d6eU) + << 28 | 0x11c97dcU) + << 28 | 0x0b17cfcU) + << 28 | 0xd81f5ddU), + UINTMAX_MAX / 2677) +P (6, 36, + (((((uintmax_t) 0xef7eU << 28 | 0x3c1c9feU) + << 28 | 0xaa07d2fU) + << 28 | 0x6bea3ecU) + << 28 | 0x89044b3U), + UINTMAX_MAX / 2683) +P (4, 42, + (((((uintmax_t) 0xd02cU << 28 | 0x34f8dabU) + << 28 | 0xf7ff3ceU) + << 28 | 0x13a0586U) + << 28 | 0x9f1b57fU), + UINTMAX_MAX / 2687) +P (2, 42, + (((((uintmax_t) 0xca7fU << 28 | 0x00185f3U) + << 28 | 0x3e2ad75U) + << 28 | 0x93474e8U) + << 28 | 0xace3581U), + UINTMAX_MAX / 2689) +P (4, 48, + (((((uintmax_t) 0x613fU << 28 | 0x67e6e76U) + << 28 | 0x10ebc07U) + << 28 | 0xfc32929U) + << 28 | 0x5a05e4dU), + UINTMAX_MAX / 2693) +P (6, 50, + (((((uintmax_t) 0x91e1U << 28 | 0x1433fa4U) + << 28 | 0xf1ad7b0U) + << 28 | 0x5377cbaU) + << 28 | 0x4908d23U), + UINTMAX_MAX / 2699) +P (8, 46, + (((((uintmax_t) 0x99c5U << 28 | 0x2d7ced2U) + << 28 | 0xe3e9ae7U) + << 28 | 0xb2131a6U) + << 28 | 0x28aa39bU), + UINTMAX_MAX / 2707) +P (4, 56, + (((((uintmax_t) 0xe699U << 28 | 0x2a662c6U) + << 28 | 0x1d45f90U) + << 28 | 0x31dbed7U) + << 28 | 0xde01527U), + UINTMAX_MAX / 2711) +P (2, 64, + (((((uintmax_t) 0x86efU << 28 | 0x7ca673aU) + << 28 | 0xf9ad876U) + << 28 | 0x844b1c6U) + << 28 | 0x70aa9a9U), + UINTMAX_MAX / 2713) +P (6, 70, + (((((uintmax_t) 0xb29bU << 28 | 0x59ea585U) + << 28 | 0x098266aU) + << 28 | 0x03f4533U) + << 28 | 0xb08915fU), + UINTMAX_MAX / 2719) +P (10, 62, + (((((uintmax_t) 0x2d67U << 28 | 0x181bc45U) + << 28 | 0x6ad8b1dU) + << 28 | 0xbca579dU) + << 28 | 0xb0a3999U), + UINTMAX_MAX / 2729) +P (2, 66, + (((((uintmax_t) 0xffa0U << 28 | 0x02ffe80U) + << 28 | 0x0bffa00U) + << 28 | 0x2ffe800U) + << 28 | 0xbffa003U), + UINTMAX_MAX / 2731) +P (10, 60, + (((((uintmax_t) 0xef00U << 28 | 0x778c303U) + << 28 | 0x1503a47U) + << 28 | 0x8ab1a3eU) + << 28 | 0x936139dU), + UINTMAX_MAX / 2741) +P (8, 54, + (((((uintmax_t) 0xd453U << 28 | 0x113a63aU) + << 28 | 0x4bcdb66U) + << 28 | 0xe722bc4U) + << 28 | 0xc5cc095U), + UINTMAX_MAX / 2749) +P (4, 66, + (((((uintmax_t) 0x01c4U << 28 | 0x4cfeca8U) + << 28 | 0x7f35a7aU) + << 28 | 0x8f63c71U) + << 28 | 0x7278541U), + UINTMAX_MAX / 2753) +P (14, 66, + (((((uintmax_t) 0x3887U << 28 | 0x72a189cU) + << 28 | 0x2c09fdfU) + << 28 | 0x6eee24dU) + << 28 | 0x292bc2fU), + UINTMAX_MAX / 2767) +P (10, 60, + (((((uintmax_t) 0x835dU << 28 | 0x625cbd2U) + << 28 | 0xa50339fU) + << 28 | 0xc20d172U) + << 28 | 0x37dd569U), + UINTMAX_MAX / 2777) +P (12, 54, + (((((uintmax_t) 0x8052U << 28 | 0x3e3ba9bU) + << 28 | 0x7da8ccdU) + << 28 | 0xf993235U) + << 28 | 0x6bda2edU), + UINTMAX_MAX / 2789) +P (2, 60, + (((((uintmax_t) 0xced6U << 28 | 0x1518ac7U) + << 28 | 0x0a2e697U) + << 28 | 0xb5e332eU) + << 28 | 0x80f68d7U), + UINTMAX_MAX / 2791) +P (6, 60, + (((((uintmax_t) 0x42d0U << 28 | 0x7f67b31U) + << 28 | 0xe1cbd46U) + << 28 | 0xeee26fdU) + << 28 | 0x875e2e5U), + UINTMAX_MAX / 2797) +P (4, 60, + (((((uintmax_t) 0xa787U << 28 | 0x5b7cc16U) + << 28 | 0x4cf4935U) + << 28 | 0x48a8e65U) + << 28 | 0x157a611U), + UINTMAX_MAX / 2801) +P (2, 76, + (((((uintmax_t) 0x69abU << 28 | 0x6d816a6U) + << 28 | 0x6791ac2U) + << 28 | 0x88d03beU) + << 28 | 0x9b71e3bU), + UINTMAX_MAX / 2803) +P (16, 68, + (((((uintmax_t) 0xace8U << 28 | 0x1dc954bU) + << 28 | 0xa58d081U) + << 28 | 0x51186dbU) + << 28 | 0x38937abU), + UINTMAX_MAX / 2819) +P (14, 64, + (((((uintmax_t) 0x7c3fU << 28 | 0xfa377bbU) + << 28 | 0x52dd078U) + << 28 | 0x00b9108U) + << 28 | 0x95a45f1U), + UINTMAX_MAX / 2833) +P (4, 66, + (((((uintmax_t) 0x1f0aU << 28 | 0x8ec0eccU) + << 28 | 0x79a36aeU) + << 28 | 0xe0b0241U) + << 28 | 0x82eec3dU), + UINTMAX_MAX / 2837) +P (6, 66, + (((((uintmax_t) 0x609eU << 28 | 0x7b00a15U) + << 28 | 0xca83496U) + << 28 | 0x323eda1U) + << 28 | 0x73b5713U), + UINTMAX_MAX / 2843) +P (8, 66, + (((((uintmax_t) 0x7362U << 28 | 0x52ca08cU) + << 28 | 0xcba690eU) + << 28 | 0xd0dbd03U) + << 28 | 0xae77c8bU), + UINTMAX_MAX / 2851) +P (6, 70, + (((((uintmax_t) 0xa370U << 28 | 0x463ffa4U) + << 28 | 0x3eb91f7U) + << 28 | 0x3800b78U) + << 28 | 0x28dc119U), + UINTMAX_MAX / 2857) +P (4, 78, + (((((uintmax_t) 0x4586U << 28 | 0x7cbbe80U) + << 28 | 0x502c61bU) + << 28 | 0x61715ecU) + << 28 | 0x22b7ca5U), + UINTMAX_MAX / 2861) +P (18, 74, + (((((uintmax_t) 0x508fU << 28 | 0xb1c027dU) + << 28 | 0x607a5a8U) + << 28 | 0x533a991U) + << 28 | 0xead64bfU), + UINTMAX_MAX / 2879) +P (8, 70, + (((((uintmax_t) 0xbc40U << 28 | 0xe8adccbU) + << 28 | 0xf2e057fU) + << 28 | 0x6c7290eU) + << 28 | 0x46c2e77U), + UINTMAX_MAX / 2887) +P (10, 66, + (((((uintmax_t) 0x73d9U << 28 | 0x78cc4e1U) + << 28 | 0xdde3e63U) + << 28 | 0x25e8d90U) + << 28 | 0x7b01db1U), + UINTMAX_MAX / 2897) +P (6, 66, + (((((uintmax_t) 0x1c21U << 28 | 0x8299f86U) + << 28 | 0xa86ec28U) + << 28 | 0x909f701U) + << 28 | 0x52a1067U), + UINTMAX_MAX / 2903) +P (6, 62, + (((((uintmax_t) 0x5da2U << 28 | 0x8a842e1U) + << 28 | 0xd0a78eaU) + << 28 | 0x7077af0U) + << 28 | 0x997a0f5U), + UINTMAX_MAX / 2909) +P (8, 82, + (((((uintmax_t) 0x21f6U << 28 | 0xb281b61U) + << 28 | 0xadae07eU) + << 28 | 0x605cad1U) + << 28 | 0x0c32e6dU), + UINTMAX_MAX / 2917) +P (10, 74, + (((((uintmax_t) 0x2e9dU << 28 | 0xf4a1477U) + << 28 | 0x4c2dd47U) + << 28 | 0x1b33570U) + << 28 | 0x635b38fU), + UINTMAX_MAX / 2927) +P (12, 72, + (((((uintmax_t) 0x891aU << 28 | 0x37ebcabU) + << 28 | 0x12ba3abU) + << 28 | 0x559fa99U) + << 28 | 0x7a61bb3U), + UINTMAX_MAX / 2939) +P (14, 66, + (((((uintmax_t) 0xccadU << 28 | 0xbad1f78U) + << 28 | 0x11569adU) + << 28 | 0x4bdae56U) + << 28 | 0x2bddab9U), + UINTMAX_MAX / 2953) +P (4, 66, + (((((uintmax_t) 0xb335U << 28 | 0x6a92a82U) + << 28 | 0x08d4a05U) + << 28 | 0x5e1b2f2U) + << 28 | 0xed62f45U), + UINTMAX_MAX / 2957) +P (6, 74, + (((((uintmax_t) 0x58bbU << 28 | 0x5017802U) + << 28 | 0x12d5c03U) + << 28 | 0xcd328b1U) + << 28 | 0xa2dca9bU), + UINTMAX_MAX / 2963) +P (6, 72, + (((((uintmax_t) 0x7501U << 28 | 0xa365242U) + << 28 | 0x0c3e6d2U) + << 28 | 0x8f4e087U) + << 28 | 0x33218a9U), + UINTMAX_MAX / 2969) +P (2, 78, + (((((uintmax_t) 0x18a4U << 28 | 0xbffa7c4U) + << 28 | 0x073ceb6U) + << 28 | 0x800b077U) + << 28 | 0xf186293U), + UINTMAX_MAX / 2971) +P (28, 62, + (((((uintmax_t) 0xa633U << 28 | 0x0bdd838U) + << 28 | 0xae2356fU) + << 28 | 0xbd138c3U) + << 28 | 0xfd9c207U), + UINTMAX_MAX / 2999) +P (2, 66, + (((((uintmax_t) 0xe2ffU << 28 | 0x0fc80a3U) + << 28 | 0xc9104b1U) + << 28 | 0x17ccd12U) + << 28 | 0xae88a89U), + UINTMAX_MAX / 3001) +P (10, 68, + (((((uintmax_t) 0x1183U << 28 | 0xb2cce6eU) + << 28 | 0xb2b722fU) + << 28 | 0x1a1a044U) + << 28 | 0x046bcebU), + UINTMAX_MAX / 3011) +P (8, 64, + (((((uintmax_t) 0xbfb9U << 28 | 0x73118d8U) + << 28 | 0x666f154U) + << 28 | 0x8aba0b0U) + << 28 | 0x60541e3U), + UINTMAX_MAX / 3019) +P (4, 66, + (((((uintmax_t) 0xa152U << 28 | 0xbc81bc6U) + << 28 | 0xc0e90cfU) + << 28 | 0x4e808ceU) + << 28 | 0xa111b2fU), + UINTMAX_MAX / 3023) +P (14, 72, + (((((uintmax_t) 0xaebdU << 28 | 0xa92d6f2U) + << 28 | 0xef39bdbU) + << 28 | 0xec1b4faU) + << 28 | 0x855a475U), + UINTMAX_MAX / 3037) +P (4, 78, + (((((uintmax_t) 0x890cU << 28 | 0xb62bf18U) + << 28 | 0x542ece3U) + << 28 | 0xf794eb6U) + << 28 | 0x00d7821U), + UINTMAX_MAX / 3041) +P (8, 72, + (((((uintmax_t) 0x699fU << 28 | 0xc793db6U) + << 28 | 0x480a134U) + << 28 | 0xfae0d9aU) + << 28 | 0x11f7c59U), + UINTMAX_MAX / 3049) +P (12, 76, + (((((uintmax_t) 0x14fdU << 28 | 0xe8c0055U) + << 28 | 0xa3d62f0U) + << 28 | 0x06b0ccbU) + << 28 | 0xbac085dU), + UINTMAX_MAX / 3061) +P (6, 96, + (((((uintmax_t) 0xa99cU << 28 | 0x01006adU) + << 28 | 0x72efe3fU) + << 28 | 0x45076dcU) + << 28 | 0x3114733U), + UINTMAX_MAX / 3067) +P (12, 88, + (((((uintmax_t) 0x59e0U << 28 | 0xe778f96U) + << 28 | 0xe7f8aeeU) + << 28 | 0xf49bfa5U) + << 28 | 0x8a1a1b7U), + UINTMAX_MAX / 3079) +P (4, 86, + (((((uintmax_t) 0x6edaU << 28 | 0x627b0f3U) + << 28 | 0x2121a12U) + << 28 | 0xc4218beU) + << 28 | 0xa691fa3U), + UINTMAX_MAX / 3083) +P (6, 92, + (((((uintmax_t) 0xf88aU << 28 | 0x9107df8U) + << 28 | 0x35b3ebcU) + << 28 | 0x7504e3bU) + << 28 | 0xd5e64f1U), + UINTMAX_MAX / 3089) +P (20, 78, + (((((uintmax_t) 0xcddaU << 28 | 0x9dee60fU) + << 28 | 0xf969a4eU) + << 28 | 0xe21c292U) + << 28 | 0xbb92fadU), + UINTMAX_MAX / 3109) +P (10, 72, + (((((uintmax_t) 0x4ff1U << 28 | 0x8de982bU) + << 28 | 0xfe5bc34U) + << 28 | 0x338b732U) + << 28 | 0x7a4bacfU), + UINTMAX_MAX / 3119) +P (2, 82, + (((((uintmax_t) 0x8fdfU << 28 | 0x30a40ccU) + << 28 | 0xbc0053fU) + << 28 | 0xe5c0833U) + << 28 | 0xd6fccd1U), + UINTMAX_MAX / 3121) +P (16, 72, + (((((uintmax_t) 0x0ca6U << 28 | 0x26ae799U) + << 28 | 0x8087cb1U) + << 28 | 0xe707435U) + << 28 | 0x35203c1U), + UINTMAX_MAX / 3137) +P (26, 54, + (((((uintmax_t) 0x3a1cU << 28 | 0xa6ba507U) + << 28 | 0x340aaefU) + << 28 | 0xbb5dcdfU) + << 28 | 0xb4e43d3U), + UINTMAX_MAX / 3163) +P (4, 54, + (((((uintmax_t) 0x340eU << 28 | 0x8ccfe76U) + << 28 | 0xd34c8caU) + << 28 | 0x68467caU) + << 28 | 0x5394f9fU), + UINTMAX_MAX / 3167) +P (2, 60, + (((((uintmax_t) 0xe94cU << 28 | 0xd3010cdU) + << 28 | 0x82c978cU) + << 28 | 0x51c0814U) + << 28 | 0x08b97a1U), + UINTMAX_MAX / 3169) +P (12, 70, + (((((uintmax_t) 0x69d4U << 28 | 0x0f213ccU) + << 28 | 0x2c1a132U) + << 28 | 0x75a899dU) + << 28 | 0xfa5dd65U), + UINTMAX_MAX / 3181) +P (6, 66, + (((((uintmax_t) 0xcc45U << 28 | 0x14a4d46U) + << 28 | 0x1ff849eU) + << 28 | 0x674cb62U) + << 28 | 0xe1b78bbU), + UINTMAX_MAX / 3187) +P (4, 66, + (((((uintmax_t) 0x6351U << 28 | 0xbffadd9U) + << 28 | 0x54cc6a3U) + << 28 | 0x7ff5bb2U) + << 28 | 0xa998d47U), + UINTMAX_MAX / 3191) +P (12, 56, + (((((uintmax_t) 0x77baU << 28 | 0x4e2aae1U) + << 28 | 0x3a95c79U) + << 28 | 0x2a999dbU) + << 28 | 0x131a22bU), + UINTMAX_MAX / 3203) +P (6, 62, + (((((uintmax_t) 0x8d1fU << 28 | 0x82e96c6U) + << 28 | 0xa42da1bU) + << 28 | 0x48841bcU) + << 28 | 0x30d29b9U), + UINTMAX_MAX / 3209) +P (8, 82, + (((((uintmax_t) 0x0ef5U << 28 | 0xe4c8da5U) + << 28 | 0xc2683f0U) + << 28 | 0x6721d20U) + << 28 | 0x11d3471U), + UINTMAX_MAX / 3217) +P (4, 80, + (((((uintmax_t) 0x9ccfU << 28 | 0x98fef77U) + << 28 | 0xeed5293U) + << 28 | 0xfd2386dU) + << 28 | 0xff85ebdU), + UINTMAX_MAX / 3221) +P (8, 78, + (((((uintmax_t) 0x9c06U << 28 | 0xa8de9f5U) + << 28 | 0xb182e4cU) + << 28 | 0xe72f54cU) + << 28 | 0x07ed9b5U), + UINTMAX_MAX / 3229) +P (22, 62, + (((((uintmax_t) 0xdcf5U << 28 | 0x5e929f8U) + << 28 | 0x99148d6U) + << 28 | 0xd0fd3e7U) + << 28 | 0x1dd827bU), + UINTMAX_MAX / 3251) +P (2, 66, + (((((uintmax_t) 0xcebcU << 28 | 0x664e397U) + << 28 | 0x2d17d85U) + << 28 | 0x6405fb1U) + << 28 | 0xeed819dU), + UINTMAX_MAX / 3253) +P (4, 66, + (((((uintmax_t) 0x921eU << 28 | 0x0671f84U) + << 28 | 0xc15b18eU) + << 28 | 0xa8aceb7U) + << 28 | 0xc443989U), + UINTMAX_MAX / 3257) +P (2, 70, + (((((uintmax_t) 0x4223U << 28 | 0xfa07b2bU) + << 28 | 0x4830634U) + << 28 | 0xa13026fU) + << 28 | 0x62e5873U), + UINTMAX_MAX / 3259) +P (12, 60, + (((((uintmax_t) 0x4ceeU << 28 | 0xdc3bcb1U) + << 28 | 0x806e31eU) + << 28 | 0xea0208eU) + << 28 | 0xc0af4f7U), + UINTMAX_MAX / 3271) +P (28, 44, + (((((uintmax_t) 0x969eU << 28 | 0xc4a2f55U) + << 28 | 0xe703563U) + << 28 | 0x679853cU) + << 28 | 0xea598cbU), + UINTMAX_MAX / 3299) +P (2, 46, + (((((uintmax_t) 0xd886U << 28 | 0xa176bb8U) + << 28 | 0x577a9c3U) + << 28 | 0x0b3ebd6U) + << 28 | 0x1f2d0edU), + UINTMAX_MAX / 3301) +P (6, 52, + (((((uintmax_t) 0xaaecU << 28 | 0xb97a633U) + << 28 | 0xdda117eU) + << 28 | 0xb9037bcU) + << 28 | 0x7f43bc3U), + UINTMAX_MAX / 3307) +P (6, 48, + (((((uintmax_t) 0x1a59U << 28 | 0x7af0505U) + << 28 | 0xcb9c2a5U) + << 28 | 0x83e6f6cU) + << 28 | 0xe016411U), + UINTMAX_MAX / 3313) +P (6, 52, + (((((uintmax_t) 0x76c8U << 28 | 0x6358785U) + << 28 | 0x34d5cf1U) + << 28 | 0x938d895U) + << 28 | 0xf1a74c7U), + UINTMAX_MAX / 3319) +P (4, 50, + (((((uintmax_t) 0xb781U << 28 | 0xa8058bfU) + << 28 | 0xac2e880U) + << 28 | 0xcf1491cU) + << 28 | 0x1e81e33U), + UINTMAX_MAX / 3323) +P (6, 60, + (((((uintmax_t) 0xc604U << 28 | 0x75cf8d9U) + << 28 | 0x2a5f33cU) + << 28 | 0x0f12886U) + << 28 | 0xba8f301U), + UINTMAX_MAX / 3329) +P (2, 60, + (((((uintmax_t) 0x9d2aU << 28 | 0x8009d65U) + << 28 | 0x861c20eU) + << 28 | 0x4b786e0U) + << 28 | 0xdfcc5abU), + UINTMAX_MAX / 3331) +P (12, 64, + (((((uintmax_t) 0x4053U << 28 | 0x511894dU) + << 28 | 0xe137367U) + << 28 | 0x2684c93U) + << 28 | 0xf2d41efU), + UINTMAX_MAX / 3343) +P (4, 66, + (((((uintmax_t) 0xcbfdU << 28 | 0x3f19edcU) + << 28 | 0xbd615e0U) + << 28 | 0x0757badU) + << 28 | 0xb35c51bU), + UINTMAX_MAX / 3347) +P (12, 74, + (((((uintmax_t) 0x303eU << 28 | 0x309fbe2U) + << 28 | 0x6de63d6U) + << 28 | 0xd84afe6U) + << 28 | 0x6472edfU), + UINTMAX_MAX / 3359) +P (2, 88, + (((((uintmax_t) 0x1123U << 28 | 0x440491fU) + << 28 | 0x00137fbU) + << 28 | 0xbc0eedcU) + << 28 | 0xbbfb6e1U), + UINTMAX_MAX / 3361) +P (10, 86, + (((((uintmax_t) 0x5ae7U << 28 | 0x03df7f3U) + << 28 | 0x3de4825U) + << 28 | 0x0f43aa0U) + << 28 | 0x8a84983U), + UINTMAX_MAX / 3371) +P (2, 88, + (((((uintmax_t) 0x11fcU << 28 | 0xcff5122U) + << 28 | 0x3abe804U) + << 28 | 0x400e927U) + << 28 | 0xb1acaa5U), + UINTMAX_MAX / 3373) +P (16, 74, + (((((uintmax_t) 0x80cbU << 28 | 0x0c29652U) + << 28 | 0x5643d56U) + << 28 | 0x572be34U) + << 28 | 0xb9d3215U), + UINTMAX_MAX / 3389) +P (2, 76, + (((((uintmax_t) 0xc57dU << 28 | 0xffd958dU) + << 28 | 0xb3c0487U) + << 28 | 0x964ef77U) + << 28 | 0x81c62bfU), + UINTMAX_MAX / 3391) +P (16, 62, + (((((uintmax_t) 0x9c4aU << 28 | 0x3cdce8eU) + << 28 | 0xea48e29U) + << 28 | 0xed84051U) + << 28 | 0xc06e9afU), + UINTMAX_MAX / 3407) +P (6, 78, + (((((uintmax_t) 0x0cf9U << 28 | 0xeca5ea8U) + << 28 | 0xc4381b0U) + << 28 | 0x0acd11eU) + << 28 | 0xd3f87fdU), + UINTMAX_MAX / 3413) +P (20, 66, + (((((uintmax_t) 0xfe48U << 28 | 0xee074edU) + << 28 | 0x223a506U) + << 28 | 0x3078817U) + << 28 | 0x44152d9U), + UINTMAX_MAX / 3433) +P (16, 62, + (((((uintmax_t) 0xa409U << 28 | 0x342e04eU) + << 28 | 0x6187e7aU) + << 28 | 0x786459fU) + << 28 | 0x5c1ccc9U), + UINTMAX_MAX / 3449) +P (8, 60, + (((((uintmax_t) 0xe4e5U << 28 | 0x902e357U) + << 28 | 0x74c7f13U) + << 28 | 0x08125d7U) + << 28 | 0x4563281U), + UINTMAX_MAX / 3457) +P (4, 66, + (((((uintmax_t) 0x7588U << 28 | 0x9dfe5f6U) + << 28 | 0xae1e539U) + << 28 | 0x5310a48U) + << 28 | 0x0b3e34dU), + UINTMAX_MAX / 3461) +P (2, 66, + (((((uintmax_t) 0x3784U << 28 | 0x6603fdeU) + << 28 | 0xe1c3d35U) + << 28 | 0x985baa8U) + << 28 | 0xb202837U), + UINTMAX_MAX / 3463) +P (4, 66, + (((((uintmax_t) 0xb450U << 28 | 0xa1daeecU) + << 28 | 0xba5ea96U) + << 28 | 0x304a6e0U) + << 28 | 0x52b3223U), + UINTMAX_MAX / 3467) +P (2, 70, + (((((uintmax_t) 0xfbf0U << 28 | 0xf20d6e5U) + << 28 | 0x363d8bdU) + << 28 | 0x8265fc9U) + << 28 | 0xaf8fd45U), + UINTMAX_MAX / 3469) +P (22, 50, + (((((uintmax_t) 0xeeb1U << 28 | 0x9bd44b6U) + << 28 | 0x27bee1bU) + << 28 | 0x6d0b383U) + << 28 | 0xec58e0bU), + UINTMAX_MAX / 3491) +P (8, 48, + (((((uintmax_t) 0x7386U << 28 | 0x8c53fdfU) + << 28 | 0x38fe9c2U) + << 28 | 0x1a7c3b6U) + << 28 | 0x8b28503U), + UINTMAX_MAX / 3499) +P (12, 46, + (((((uintmax_t) 0xba13U << 28 | 0x65219cfU) + << 28 | 0xbb2b623U) + << 28 | 0x6fa180fU) + << 28 | 0xbfd6007U), + UINTMAX_MAX / 3511) +P (6, 42, + (((((uintmax_t) 0xe16dU << 28 | 0xb1887adU) + << 28 | 0xe4c6dc4U) + << 28 | 0x2accd44U) + << 28 | 0x0ed9595U), + UINTMAX_MAX / 3517) +P (10, 44, + (((((uintmax_t) 0x4cf0U << 28 | 0x1ab5e49U) + << 28 | 0x04b7c7aU) + << 28 | 0xcf71282U) + << 28 | 0x36ba3f7U), + UINTMAX_MAX / 3527) +P (2, 52, + (((((uintmax_t) 0x6374U << 28 | 0x6df92e5U) + << 28 | 0xaad5ff9U) + << 28 | 0x09367a9U) + << 28 | 0x87b9c79U), + UINTMAX_MAX / 3529) +P (4, 50, + (((((uintmax_t) 0x3fc3U << 28 | 0xb6abbabU) + << 28 | 0xa82dcb6U) + << 28 | 0x4efb252U) + << 28 | 0xbfba705U), + UINTMAX_MAX / 3533) +P (6, 54, + (((((uintmax_t) 0x82b6U << 28 | 0x6ef6f53U) + << 28 | 0x8c8ce98U) + << 28 | 0x0d4f5a7U) + << 28 | 0xe4cd25bU), + UINTMAX_MAX / 3539) +P (2, 66, + (((((uintmax_t) 0x20c0U << 28 | 0x04a07f3U) + << 28 | 0xdab1fe1U) + << 28 | 0xecc4ef2U) + << 28 | 0x7b0c37dU), + UINTMAX_MAX / 3541) +P (6, 66, + (((((uintmax_t) 0xfb2aU << 28 | 0x13c68cbU) + << 28 | 0xd185291U) + << 28 | 0x11aebb8U) + << 28 | 0x1d72653U), + UINTMAX_MAX / 3547) +P (10, 60, + (((((uintmax_t) 0x8908U << 28 | 0x46d1b90U) + << 28 | 0x96d9c89U) + << 28 | 0x51f985cU) + << 28 | 0xb2c67edU), + UINTMAX_MAX / 3557) +P (2, 64, + (((((uintmax_t) 0xf7baU << 28 | 0x5f17856U) + << 28 | 0xe44e8c4U) + << 28 | 0x39d4fc5U) + << 28 | 0x4e0b5d7U), + UINTMAX_MAX / 3559) +P (12, 60, + (((((uintmax_t) 0x811cU << 28 | 0x75db26eU) + << 28 | 0xd4a0de8U) + << 28 | 0x57bf318U) + << 28 | 0x96d533bU), + UINTMAX_MAX / 3571) +P (10, 56, + (((((uintmax_t) 0x6fbcU << 28 | 0x83d31afU) + << 28 | 0x37d51b6U) + << 28 | 0x14bb4cbU) + << 28 | 0x5023755U), + UINTMAX_MAX / 3581) +P (2, 60, + (((((uintmax_t) 0xdf7dU << 28 | 0xad8c657U) + << 28 | 0x4f61193U) + << 28 | 0x8a89e54U) + << 28 | 0x73bf1ffU), + UINTMAX_MAX / 3583) +P (10, 66, + (((((uintmax_t) 0x48beU << 28 | 0xf2f618aU) + << 28 | 0x70259eaU) + << 28 | 0xc481acaU) + << 28 | 0x34de039U), + UINTMAX_MAX / 3593) +P (14, 64, + (((((uintmax_t) 0x5c8cU << 28 | 0x86d951dU) + << 28 | 0x4fd8414U) + << 28 | 0xb961badU) + << 28 | 0xf4809a7U), + UINTMAX_MAX / 3607) +P (6, 60, + (((((uintmax_t) 0x3e35U << 28 | 0xfddfd4eU) + << 28 | 0xb85d876U) + << 28 | 0x784fecbU) + << 28 | 0xa352435U), + UINTMAX_MAX / 3613) +P (4, 60, + (((((uintmax_t) 0x3f46U << 28 | 0x480d05dU) + << 28 | 0xfde06efU) + << 28 | 0xa689bb5U) + << 28 | 0x8aef5e1U), + UINTMAX_MAX / 3617) +P (6, 68, + (((((uintmax_t) 0xa7f5U << 28 | 0x427da20U) + << 28 | 0x5cb49b2U) + << 28 | 0xb2c4db9U) + << 28 | 0xc3a8197U), + UINTMAX_MAX / 3623) +P (8, 66, + (((((uintmax_t) 0x1756U << 28 | 0x39f44bdU) + << 28 | 0xcbf7d25U) + << 28 | 0x03bc992U) + << 28 | 0x279f8cfU), + UINTMAX_MAX / 3631) +P (6, 64, + (((((uintmax_t) 0xf7b1U << 28 | 0xba9905dU) + << 28 | 0x798f3d2U) + << 28 | 0xab9aec5U) + << 28 | 0xca1541dU), + UINTMAX_MAX / 3637) +P (6, 66, + (((((uintmax_t) 0x0ec1U << 28 | 0xcf3b3d3U) + << 28 | 0x4ea253eU) + << 28 | 0x78ba146U) + << 28 | 0x0f99af3U), + UINTMAX_MAX / 3643) +P (16, 60, + (((((uintmax_t) 0x694bU << 28 | 0xe954ddeU) + << 28 | 0xd63b30aU) + << 28 | 0x0142657U) + << 28 | 0x2cfcb63U), + UINTMAX_MAX / 3659) +P (12, 56, + (((((uintmax_t) 0xd628U << 28 | 0x9612455U) + << 28 | 0x13dfebeU) + << 28 | 0xa857968U) + << 28 | 0xf3cbd67U), + UINTMAX_MAX / 3671) +P (2, 60, + (((((uintmax_t) 0x63bcU << 28 | 0xcfb30dbU) + << 28 | 0xaffca78U) + << 28 | 0xdb213eeU) + << 28 | 0xfe659e9U), + UINTMAX_MAX / 3673) +P (4, 62, + (((((uintmax_t) 0x7cf8U << 28 | 0xb08fb32U) + << 28 | 0x328ba96U) + << 28 | 0x3e8541aU) + << 28 | 0x74d35f5U), + UINTMAX_MAX / 3677) +P (14, 70, + (((((uintmax_t) 0x99e7U << 28 | 0xb98849cU) + << 28 | 0xbfb489eU) + << 28 | 0x22d1527U) + << 28 | 0x76f2e43U), + UINTMAX_MAX / 3691) +P (6, 70, + (((((uintmax_t) 0x1767U << 28 | 0xa90721dU) + << 28 | 0xc686c05U) + << 28 | 0xd10d39dU) + << 28 | 0x1e1f291U), + UINTMAX_MAX / 3697) +P (4, 68, + (((((uintmax_t) 0x817cU << 28 | 0xb6e3047U) + << 28 | 0xeff3d37U) + << 28 | 0x4468dccU) + << 28 | 0xaced1ddU), + UINTMAX_MAX / 3701) +P (8, 70, + (((((uintmax_t) 0x916dU << 28 | 0x896be15U) + << 28 | 0xac3548dU) + << 28 | 0x145c7d1U) + << 28 | 0x10c5ad5U), + UINTMAX_MAX / 3709) +P (10, 74, + (((((uintmax_t) 0x50e1U << 28 | 0xc7f7bd5U) + << 28 | 0xdf5f332U) + << 28 | 0x51a39f5U) + << 28 | 0xacb5737U), + UINTMAX_MAX / 3719) +P (8, 70, + (((((uintmax_t) 0xc1e7U << 28 | 0xf58f36eU) + << 28 | 0x1b567a6U) + << 28 | 0x6e50171U) + << 28 | 0x443506fU), + UINTMAX_MAX / 3727) +P (6, 70, + (((((uintmax_t) 0xe72cU << 28 | 0xc7f8de3U) + << 28 | 0x0f6e112U) + << 28 | 0x4f69ad9U) + << 28 | 0x1dd4cbdU), + UINTMAX_MAX / 3733) +P (6, 82, + (((((uintmax_t) 0x81e2U << 28 | 0x02e029aU) + << 28 | 0x0d485ecU) + << 28 | 0x24f8f2aU) + << 28 | 0x61a2793U), + UINTMAX_MAX / 3739) +P (22, 62, + (((((uintmax_t) 0x66a5U << 28 | 0x216bc00U) + << 28 | 0x45b35b4U) + << 28 | 0x72148e6U) + << 28 | 0x56b7a51U), + UINTMAX_MAX / 3761) +P (6, 66, + (((((uintmax_t) 0x3442U << 28 | 0x9973536U) + << 28 | 0x29ba00aU) + << 28 | 0xdf9570eU) + << 28 | 0x1142f07U), + UINTMAX_MAX / 3767) +P (2, 78, + (((((uintmax_t) 0xc952U << 28 | 0x869f58aU) + << 28 | 0x38eb489U) + << 28 | 0xbf33b06U) + << 28 | 0x5119789U), + UINTMAX_MAX / 3769) +P (10, 72, + (((((uintmax_t) 0xc462U << 28 | 0xe78b7b7U) + << 28 | 0xebf2b8fU) + << 28 | 0x0149803U) + << 28 | 0xcb291ebU), + UINTMAX_MAX / 3779) +P (14, 60, + (((((uintmax_t) 0xa7b8U << 28 | 0x300e09dU) + << 28 | 0xa9be883U) + << 28 | 0x34b63afU) + << 28 | 0xd190a31U), + UINTMAX_MAX / 3793) +P (4, 66, + (((((uintmax_t) 0x678fU << 28 | 0x45607afU) + << 28 | 0xa226292U) + << 28 | 0x0908d50U) + << 28 | 0xd6aba7dU), + UINTMAX_MAX / 3797) +P (6, 74, + (((((uintmax_t) 0x3066U << 28 | 0x51b882dU) + << 28 | 0xc63e557U) + << 28 | 0xd8b018cU) + << 28 | 0x5a33d53U), + UINTMAX_MAX / 3803) +P (18, 60, + (((((uintmax_t) 0x03f3U << 28 | 0xf0b9737U) + << 28 | 0x01682eaU) + << 28 | 0x1773092U) + << 28 | 0xdc27ee5U), + UINTMAX_MAX / 3821) +P (2, 66, + (((((uintmax_t) 0x824fU << 28 | 0x6b12f35U) + << 28 | 0x80e76caU) + << 28 | 0xe5f38b7U) + << 28 | 0xbf2e00fU), + UINTMAX_MAX / 3823) +P (10, 74, + (((((uintmax_t) 0xba8aU << 28 | 0x4084821U) + << 28 | 0xa94f02bU) + << 28 | 0xd02df34U) + << 28 | 0xf695349U), + UINTMAX_MAX / 3833) +P (14, 64, + (((((uintmax_t) 0x1f9bU << 28 | 0xea70762U) + << 28 | 0xf3f48ddU) + << 28 | 0xfecd5beU) + << 28 | 0x62e2eb7U), + UINTMAX_MAX / 3847) +P (4, 66, + (((((uintmax_t) 0xb7acU << 28 | 0x817ee73U) + << 28 | 0x45119dbU) + << 28 | 0xf849ebeU) + << 28 | 0xc96c4a3U), + UINTMAX_MAX / 3851) +P (2, 66, + (((((uintmax_t) 0xf8c2U << 28 | 0x0286585U) + << 28 | 0xe14dcdaU) + << 28 | 0x31d4d01U) + << 28 | 0x87357c5U), + UINTMAX_MAX / 3853) +P (10, 60, + (((((uintmax_t) 0x7727U << 28 | 0x2a58ab3U) + << 28 | 0xdb276e3U) + << 28 | 0x4e21cc2U) + << 28 | 0xd5418a7U), + UINTMAX_MAX / 3863) +P (14, 52, + (((((uintmax_t) 0x61caU << 28 | 0x83edc68U) + << 28 | 0xdb38968U) + << 28 | 0xca5137aU) + << 28 | 0x9e574adU), + UINTMAX_MAX / 3877) +P (4, 50, + (((((uintmax_t) 0x74f3U << 28 | 0x8879e60U) + << 28 | 0x2c53a3eU) + << 28 | 0xaa0d0f8U) + << 28 | 0x04bfd19U), + UINTMAX_MAX / 3881) +P (8, 54, + (((((uintmax_t) 0x1c6fU << 28 | 0xe7c6996U) + << 28 | 0x04df055U) + << 28 | 0x4fb753cU) + << 28 | 0xc20e9d1U), + UINTMAX_MAX / 3889) +P (18, 40, + (((((uintmax_t) 0x374dU << 28 | 0x408a62aU) + << 28 | 0xda31679U) + << 28 | 0x7afcca1U) + << 28 | 0x300756bU), + UINTMAX_MAX / 3907) +P (4, 56, + (((((uintmax_t) 0xc8e2U << 28 | 0xbdb1524U) + << 28 | 0x758f48bU) + << 28 | 0x8d950b5U) + << 28 | 0x2eeea77U), + UINTMAX_MAX / 3911) +P (6, 72, + (((((uintmax_t) 0xbfc1U << 28 | 0x421336fU) + << 28 | 0x6ea5dfbU) + << 28 | 0x6cd166aU) + << 28 | 0xcabc185U), + UINTMAX_MAX / 3917) +P (2, 82, + (((((uintmax_t) 0x7daeU << 28 | 0x58b5560U) + << 28 | 0x7b5454eU) + << 28 | 0xb6c5ed9U) + << 28 | 0x437a7afU), + UINTMAX_MAX / 3919) +P (4, 80, + (((((uintmax_t) 0xf1f8U << 28 | 0x4cbdc3dU) + << 28 | 0x573f5d1U) + << 28 | 0xeddbd91U) + << 28 | 0xb790cdbU), + UINTMAX_MAX / 3923) +P (6, 78, + (((((uintmax_t) 0xa6abU << 28 | 0x9f4ec63U) + << 28 | 0x4c6db93U) + << 28 | 0xd714ea4U) + << 28 | 0xd8948e9U), + UINTMAX_MAX / 3929) +P (2, 82, + (((((uintmax_t) 0x8198U << 28 | 0x742e1b7U) + << 28 | 0xb68a73cU) + << 28 | 0xa13ed81U) + << 28 | 0x45188d3U), + UINTMAX_MAX / 3931) +P (12, 76, + (((((uintmax_t) 0x5ab3U << 28 | 0x52c7947U) + << 28 | 0xbe09382U) + << 28 | 0x9086016U) + << 28 | 0xda89c57U), + UINTMAX_MAX / 3943) +P (4, 74, + (((((uintmax_t) 0xec69U << 28 | 0x9751239U) + << 28 | 0xb9900d7U) + << 28 | 0xda1f432U) + << 28 | 0x124a543U), + UINTMAX_MAX / 3947) +P (20, 60, + (((((uintmax_t) 0xa4e1U << 28 | 0x58dc715U) + << 28 | 0x1a22b7eU) + << 28 | 0xad55816U) + << 28 | 0x32fb07fU), + UINTMAX_MAX / 3967) +P (22, 60, + (((((uintmax_t) 0x4cd1U << 28 | 0xba8fa08U) + << 28 | 0x1613a35U) + << 28 | 0x443837fU) + << 28 | 0x63ec3bdU), + UINTMAX_MAX / 3989) +P (12, 50, + (((((uintmax_t) 0x48afU << 28 | 0x92759a4U) + << 28 | 0x3f37589U) + << 28 | 0xe2b200eU) + << 28 | 0x5519461U), + UINTMAX_MAX / 4001) +P (2, 54, + (((((uintmax_t) 0x9293U << 28 | 0xfc29b25U) + << 28 | 0xcbafee9U) + << 28 | 0xae44f0bU) + << 28 | 0x7289c0bU), + UINTMAX_MAX / 4003) +P (4, 66, + (((((uintmax_t) 0xc02cU << 28 | 0xfa2fa91U) + << 28 | 0xcaf9094U) + << 28 | 0x387a277U) + << 28 | 0xb9fa817U), + UINTMAX_MAX / 4007) +P (6, 66, + (((((uintmax_t) 0x15c0U << 28 | 0xd8627efU) + << 28 | 0x28a2cc8U) + << 28 | 0x4f1a58aU) + << 28 | 0xbfc2c25U), + UINTMAX_MAX / 4013) +P (6, 72, + (((((uintmax_t) 0x1143U << 28 | 0x12ca6e3U) + << 28 | 0x2522b71U) + << 28 | 0x101d8e3U) + << 28 | 0xc83377bU), + UINTMAX_MAX / 4019) +P (2, 72, + (((((uintmax_t) 0xcfadU << 28 | 0x7d3b04aU) + << 28 | 0x5c91ec0U) + << 28 | 0x24abe5cU) + << 28 | 0x50ba69dU), + UINTMAX_MAX / 4021) +P (6, 72, + (((((uintmax_t) 0x9d46U << 28 | 0x3eef687U) + << 28 | 0x26d7815U) + << 28 | 0xde4eb36U) + << 28 | 0x5a65d73U), + UINTMAX_MAX / 4027) +P (22, 62, + (((((uintmax_t) 0xe98eU << 28 | 0x1152e37U) + << 28 | 0xc3cf309U) + << 28 | 0xed28a76U) + << 28 | 0xbcca931U), + UINTMAX_MAX / 4049) +P (2, 76, + (((((uintmax_t) 0xa002U << 28 | 0x05affefU) + << 28 | 0xd280081U) + << 28 | 0x6bffbf4U) + << 28 | 0xa00205bU), + UINTMAX_MAX / 4051) +P (6, 72, + (((((uintmax_t) 0x1d87U << 28 | 0xfb74ed0U) + << 28 | 0x1b4271fU) + << 28 | 0x5c71543U) + << 28 | 0xd558069U), + UINTMAX_MAX / 4057) +P (16, 60, + (((((uintmax_t) 0x7051U << 28 | 0x751852fU) + << 28 | 0x74370f2U) + << 28 | 0x5c64d0eU) + << 28 | 0xc53b859U), + UINTMAX_MAX / 4073) +P (6, 60, + (((((uintmax_t) 0x88e1U << 28 | 0x6f867eeU) + << 28 | 0x6d54296U) + << 28 | 0xc02c2efU) + << 28 | 0x1e0ff0fU), + UINTMAX_MAX / 4079) +P (12, 62, + (((((uintmax_t) 0xe8e8U << 28 | 0xc8bebb9U) + << 28 | 0xaa05219U) + << 28 | 0xa804816U) + << 28 | 0x870a333U), + UINTMAX_MAX / 4091) +P (2, 64, + (((((uintmax_t) 0xc605U << 28 | 0x20f62e2U) + << 28 | 0x8a79f6dU) + << 28 | 0xe49add0U) + << 28 | 0x971c555U), + UINTMAX_MAX / 4093) +P (6, 60, + (((((uintmax_t) 0x46c2U << 28 | 0xbb7cd89U) + << 28 | 0x7639d52U) + << 28 | 0x8087e68U) + << 28 | 0x4c71aabU), + UINTMAX_MAX / 4099) +P (12, 66, + (((((uintmax_t) 0xfc73U << 28 | 0x53e15cbU) + << 28 | 0x9127ea9U) + << 28 | 0x4152c26U) + << 28 | 0x9bcdeefU), + UINTMAX_MAX / 4111) +P (16, 74, + (((((uintmax_t) 0x3d78U << 28 | 0xe5c2d68U) + << 28 | 0x0673803U) + << 28 | 0x79450a3U) + << 28 | 0xc2b6bdfU), + UINTMAX_MAX / 4127) +P (2, 82, + (((((uintmax_t) 0x4a66U << 28 | 0x8c7e3baU) + << 28 | 0x4fbb8d2U) + << 28 | 0xcd38bafU) + << 28 | 0xe5373e1U), + UINTMAX_MAX / 4129) +P (4, 84, + (((((uintmax_t) 0x616eU << 28 | 0xb008eb5U) + << 28 | 0xfb2b2c2U) + << 28 | 0x9df2beaU) + << 28 | 0x71d8badU), + UINTMAX_MAX / 4133) +P (6, 80, + (((((uintmax_t) 0x12bdU << 28 | 0xa25ba9aU) + << 28 | 0x80c5ec1U) + << 28 | 0x5862775U) + << 28 | 0xf302e83U), + UINTMAX_MAX / 4139) +P (14, 76, + (((((uintmax_t) 0x98dfU << 28 | 0x642b264U) + << 28 | 0x7a0d310U) + << 28 | 0x16af2feU) + << 28 | 0x55ede09U), + UINTMAX_MAX / 4153) +P (4, 74, + (((((uintmax_t) 0xcc45U << 28 | 0x381a1c7U) + << 28 | 0x3878b3dU) + << 28 | 0x26dbd9dU) + << 28 | 0x1910715U), + UINTMAX_MAX / 4157) +P (2, 82, + (((((uintmax_t) 0x1344U << 28 | 0x23b36d8U) + << 28 | 0x0d4ba62U) + << 28 | 0x1dab2dfU) + << 28 | 0xaf3dfbfU), + UINTMAX_MAX / 4159) +P (18, 66, + (((((uintmax_t) 0xd614U << 28 | 0x399c587U) + << 28 | 0xff827b6U) + << 28 | 0xf1d7ac2U) + << 28 | 0x87338b1U), + UINTMAX_MAX / 4177) +P (24, 52, + (((((uintmax_t) 0x5c04U << 28 | 0x24ce751U) + << 28 | 0xf620c8dU) + << 28 | 0x9e9f0c3U) + << 28 | 0xf9e7fd9U), + UINTMAX_MAX / 4201) +P (10, 48, + (((((uintmax_t) 0xa4cfU << 28 | 0x6d1fac5U) + << 28 | 0x93e8e60U) + << 28 | 0xa93f876U) + << 28 | 0x2e914bbU), + UINTMAX_MAX / 4211) +P (6, 44, + (((((uintmax_t) 0x16b4U << 28 | 0x4c7d8a9U) + << 28 | 0x7e358b1U) + << 28 | 0x4371f24U) + << 28 | 0x7c159c9U), + UINTMAX_MAX / 4217) +P (2, 52, + (((((uintmax_t) 0x7d2dU << 28 | 0xb0c132cU) + << 28 | 0x9926a6dU) + << 28 | 0xd3b4844U) + << 28 | 0x71d4eb3U), + UINTMAX_MAX / 4219) +P (10, 44, + (((((uintmax_t) 0xc12aU << 28 | 0x5044c45U) + << 28 | 0xfa4f4cdU) + << 28 | 0x172f470U) + << 28 | 0x1c1684dU), + UINTMAX_MAX / 4229) +P (2, 52, + (((((uintmax_t) 0x3b6aU << 28 | 0xabf51beU) + << 28 | 0x4a6c103U) + << 28 | 0x72e686eU) + << 28 | 0xd8bb537U), + UINTMAX_MAX / 4231) +P (10, 48, + (((((uintmax_t) 0x0b0bU << 28 | 0xe43ba38U) + << 28 | 0x61105bcU) + << 28 | 0x07f7ca6U) + << 28 | 0x5c5b071U), + UINTMAX_MAX / 4241) +P (2, 54, + (((((uintmax_t) 0x1841U << 28 | 0x2954499U) + << 28 | 0xbb949abU) + << 28 | 0x2b6170cU) + << 28 | 0x3f78d9bU), + UINTMAX_MAX / 4243) +P (10, 74, + (((((uintmax_t) 0x67e4U << 28 | 0x8d552c3U) + << 28 | 0xde0d1f3U) + << 28 | 0xd74f461U) + << 28 | 0xfe6f5b5U), + UINTMAX_MAX / 4253) +P (6, 78, + (((((uintmax_t) 0xa030U << 28 | 0x161ea7bU) + << 28 | 0x38ae8dbU) + << 28 | 0xc13f4b3U) + << 28 | 0x1f3230bU), + UINTMAX_MAX / 4259) +P (2, 78, + (((((uintmax_t) 0xf2a9U << 28 | 0x8b90bb7U) + << 28 | 0x2eec1d1U) + << 28 | 0x420716eU) + << 28 | 0x3f1572dU), + UINTMAX_MAX / 4261) +P (10, 78, + (((((uintmax_t) 0xa0c1U << 28 | 0xb926e68U) + << 28 | 0x69f8ed5U) + << 28 | 0xbe2fd4dU) + << 28 | 0x805464fU), + UINTMAX_MAX / 4271) +P (2, 84, + (((((uintmax_t) 0xc4edU << 28 | 0x7ccb753U) + << 28 | 0xef76ec6U) + << 28 | 0x8b97c13U) + << 28 | 0x6943851U), + UINTMAX_MAX / 4273) +P (10, 80, + (((((uintmax_t) 0x5305U << 28 | 0xada2a32U) + << 28 | 0xce35e9eU) + << 28 | 0x27918afU) + << 28 | 0x7cfb473U), + UINTMAX_MAX / 4283) +P (6, 84, + (((((uintmax_t) 0x0b38U << 28 | 0xa4bcd9fU) + << 28 | 0xaa0cc5eU) + << 28 | 0xc8ab6c3U) + << 28 | 0x6ac7f41U), + UINTMAX_MAX / 4289) +P (8, 94, + (((((uintmax_t) 0xc8f3U << 28 | 0x8c6bf3dU) + << 28 | 0x8adf696U) + << 28 | 0x4076331U) + << 28 | 0xdd90979U), + UINTMAX_MAX / 4297) +P (30, 70, + (((((uintmax_t) 0x3ed4U << 28 | 0xdeb0e60U) + << 28 | 0x6fb3530U) + << 28 | 0x198eff7U) + << 28 | 0x7b002d7U), + UINTMAX_MAX / 4327) +P (10, 72, + (((((uintmax_t) 0xe304U << 28 | 0x8b8a2eaU) + << 28 | 0x19da93aU) + << 28 | 0xf7cb958U) + << 28 | 0x3ece011U), + UINTMAX_MAX / 4337) +P (2, 82, + (((((uintmax_t) 0x63b5U << 28 | 0xa908ca7U) + << 28 | 0xcb9bb34U) + << 28 | 0xce06f64U) + << 28 | 0x3d9883bU), + UINTMAX_MAX / 4339) +P (10, 74, + (((((uintmax_t) 0xd58fU << 28 | 0x1940b11U) + << 28 | 0x0300879U) + << 28 | 0xf767e52U) + << 28 | 0x8708c55U), + UINTMAX_MAX / 4349) +P (8, 84, + (((((uintmax_t) 0xa973U << 28 | 0xcee1454U) + << 28 | 0x5fa7a18U) + << 28 | 0x5332d2eU) + << 28 | 0xf2313cdU), + UINTMAX_MAX / 4357) +P (6, 84, + (((((uintmax_t) 0xc544U << 28 | 0x1f37189U) + << 28 | 0x5bd3a43U) + << 28 | 0xb611b84U) + << 28 | 0xc8332a3U), + UINTMAX_MAX / 4363) +P (10, 78, + (((((uintmax_t) 0xc201U << 28 | 0x49b4038U) + << 28 | 0x330c3c2U) + << 28 | 0xe215e4fU) + << 28 | 0x43bb63dU), + UINTMAX_MAX / 4373) +P (18, 66, + (((((uintmax_t) 0xfcf7U << 28 | 0xe56a2a8U) + << 28 | 0xf4dd4f9U) + << 28 | 0x4b9dd22U) + << 28 | 0xce44e97U), + UINTMAX_MAX / 4391) +P (6, 66, + (((((uintmax_t) 0xc364U << 28 | 0x3300862U) + << 28 | 0x47258d8U) + << 28 | 0x95834a1U) + << 28 | 0xdb166a5U), + UINTMAX_MAX / 4397) +P (12, 72, + (((((uintmax_t) 0xa5f1U << 28 | 0xb76bd2bU) + << 28 | 0x5f83834U) + << 28 | 0x7d2f16dU) + << 28 | 0x19b8d09U), + UINTMAX_MAX / 4409) +P (12, 62, + (((((uintmax_t) 0x9b97U << 28 | 0x89df750U) + << 28 | 0x6e4081bU) + << 28 | 0x54d4dc4U) + << 28 | 0x5b7d98dU), + UINTMAX_MAX / 4421) +P (2, 70, + (((((uintmax_t) 0x612dU << 28 | 0xe5f44efU) + << 28 | 0x2839e11U) + << 28 | 0x7ac30d9U) + << 28 | 0xa044877U), + UINTMAX_MAX / 4423) +P (18, 66, + (((((uintmax_t) 0x9811U << 28 | 0x1015369U) + << 28 | 0x6e9ec0eU) + << 28 | 0x10b78a6U) + << 28 | 0x7a526e9U), + UINTMAX_MAX / 4441) +P (6, 66, + (((((uintmax_t) 0xa197U << 28 | 0x1cf4c64U) + << 28 | 0x2a99792U) + << 28 | 0xda68a81U) + << 28 | 0x8688a9fU), + UINTMAX_MAX / 4447) +P (4, 66, + (((((uintmax_t) 0x0f02U << 28 | 0xeeeb01cU) + << 28 | 0x870bacfU) + << 28 | 0x2b6c87fU) + << 28 | 0x741f84bU), + UINTMAX_MAX / 4451) +P (6, 62, + (((((uintmax_t) 0x8d2eU << 28 | 0x94fe559U) + << 28 | 0x50d09d2U) + << 28 | 0x64f9bd4U) + << 28 | 0x1e18ed9U), + UINTMAX_MAX / 4457) +P (6, 60, + (((((uintmax_t) 0xa84bU << 28 | 0xb74450fU) + << 28 | 0xe38c973U) + << 28 | 0x3cbeaa9U) + << 28 | 0x7166d8fU), + UINTMAX_MAX / 4463) +P (18, 66, + (((((uintmax_t) 0x495aU << 28 | 0xe4dcfaaU) + << 28 | 0xfd8b1c9U) + << 28 | 0xf475b02U) + << 28 | 0x1d22e81U), + UINTMAX_MAX / 4481) +P (2, 66, + (((((uintmax_t) 0x6837U << 28 | 0x46fb256U) + << 28 | 0x74d6073U) + << 28 | 0x1f76f2eU) + << 28 | 0xc4c852bU), + UINTMAX_MAX / 4483) +P (10, 68, + (((((uintmax_t) 0xf6ffU << 28 | 0x5f8d222U) + << 28 | 0x12931daU) + << 28 | 0xf6f0c97U) + << 28 | 0x8f69945U), + UINTMAX_MAX / 4493) +P (14, 60, + (((((uintmax_t) 0xd49aU << 28 | 0xb982b2bU) + << 28 | 0x1c92174U) + << 28 | 0x9c8ad20U) + << 28 | 0xc61ec93U), + UINTMAX_MAX / 4507) +P (6, 70, + (((((uintmax_t) 0x2f4fU << 28 | 0x04983ffU) + << 28 | 0xc5e9e09U) + << 28 | 0x307ff8bU) + << 28 | 0xd3c1261U), + UINTMAX_MAX / 4513) +P (4, 74, + (((((uintmax_t) 0xadefU << 28 | 0x566dd5fU) + << 28 | 0x282eb33U) + << 28 | 0x4a69fb5U) + << 28 | 0xa486e2dU), + UINTMAX_MAX / 4517) +P (2, 78, + (((((uintmax_t) 0xd118U << 28 | 0x137ccc9U) + << 28 | 0xe647f1fU) + << 28 | 0x36c7bf3U) + << 28 | 0x1578617U), + UINTMAX_MAX / 4519) +P (4, 80, + (((((uintmax_t) 0x01cfU << 28 | 0xa9f7f67U) + << 28 | 0xdc3aa31U) + << 28 | 0xebbcc27U) + << 28 | 0x9ea6103U), + UINTMAX_MAX / 4523) +P (24, 74, + (((((uintmax_t) 0x9c1fU << 28 | 0x4da38ddU) + << 28 | 0x2657442U) + << 28 | 0xe2aad11U) + << 28 | 0x9f466ebU), + UINTMAX_MAX / 4547) +P (2, 88, + (((((uintmax_t) 0x41acU << 28 | 0x994bcdcU) + << 28 | 0xd3d2c10U) + << 28 | 0x6ec05a0U) + << 28 | 0xab1450dU), + UINTMAX_MAX / 4549) +P (12, 78, + (((((uintmax_t) 0x556dU << 28 | 0x480324aU) + << 28 | 0x6d002b1U) + << 28 | 0xb38db92U) + << 28 | 0xa99e731U), + UINTMAX_MAX / 4561) +P (6, 76, + (((((uintmax_t) 0x9c39U << 28 | 0x2ce6456U) + << 28 | 0x52d9278U) + << 28 | 0x4ae377eU) + << 28 | 0x67071e7U), + UINTMAX_MAX / 4567) +P (16, 66, + (((((uintmax_t) 0xcdc8U << 28 | 0x79fec56U) + << 28 | 0x781893eU) + << 28 | 0x9e1471bU) + << 28 | 0xa6671d7U), + UINTMAX_MAX / 4583) +P (8, 60, + (((((uintmax_t) 0x375eU << 28 | 0xf621586U) + << 28 | 0x1b19982U) + << 28 | 0xc29b59dU) + << 28 | 0x4d73d0fU), + UINTMAX_MAX / 4591) +P (6, 60, + (((((uintmax_t) 0x75c7U << 28 | 0xfa35597U) + << 28 | 0xdcce0c2U) + << 28 | 0x3dd0712U) + << 28 | 0x8b5525dU), + UINTMAX_MAX / 4597) +P (6, 60, + (((((uintmax_t) 0x4083U << 28 | 0xb2ce1ccU) + << 28 | 0xf1d164dU) + << 28 | 0x4e5ce0eU) + << 28 | 0x9245133U), + UINTMAX_MAX / 4603) +P (18, 52, + (((((uintmax_t) 0x9d9cU << 28 | 0x64622aeU) + << 28 | 0x10824c8U) + << 28 | 0xfd1057cU) + << 28 | 0x09f8cc5U), + UINTMAX_MAX / 4621) +P (16, 42, + (((((uintmax_t) 0x02b4U << 28 | 0x87cfdbcU) + << 28 | 0x89230eaU) + << 28 | 0x1516e94U) + << 28 | 0xf394035U), + UINTMAX_MAX / 4637) +P (2, 52, + (((((uintmax_t) 0x32e1U << 28 | 0x4328c7fU) + << 28 | 0xce8e0b5U) + << 28 | 0xe3319c5U) + << 28 | 0x64ee9dfU), + UINTMAX_MAX / 4639) +P (4, 60, + (((((uintmax_t) 0xf929U << 28 | 0xbd10602U) + << 28 | 0x894a612U) + << 28 | 0x6a69f90U) + << 28 | 0xd822d8bU), + UINTMAX_MAX / 4643) +P (6, 72, + (((((uintmax_t) 0xa0bcU << 28 | 0x8b6d15cU) + << 28 | 0x03be950U) + << 28 | 0x1ed6348U) + << 28 | 0x857aa19U), + UINTMAX_MAX / 4649) +P (2, 72, + (((((uintmax_t) 0xf169U << 28 | 0xf4a94f1U) + << 28 | 0x86231deU) + << 28 | 0x344a324U) + << 28 | 0xeee1c83U), + UINTMAX_MAX / 4651) +P (6, 72, + (((((uintmax_t) 0xafdaU << 28 | 0x2e10d23U) + << 28 | 0x58ab11dU) + << 28 | 0xd9690cbU) + << 28 | 0x2c406d1U), + UINTMAX_MAX / 4657) +P (6, 70, + (((((uintmax_t) 0x70eeU << 28 | 0x0c3017bU) + << 28 | 0x7881908U) + << 28 | 0xd6c5178U) + << 28 | 0xd5e4387U), + UINTMAX_MAX / 4663) +P (10, 78, + (((((uintmax_t) 0x2b47U << 28 | 0x45bd0e3U) + << 28 | 0x051844cU) + << 28 | 0xea4050aU) + << 28 | 0x3e8fdc1U), + UINTMAX_MAX / 4673) +P (6, 80, + (((((uintmax_t) 0x5aa8U << 28 | 0x9fc2b8dU) + << 28 | 0x1a891c1U) + << 28 | 0x14a06acU) + << 28 | 0xc83f777U), + UINTMAX_MAX / 4679) +P (12, 92, + (((((uintmax_t) 0x834dU << 28 | 0x385f9c7U) + << 28 | 0x5a89320U) + << 28 | 0xb060ebcU) + << 28 | 0x0ea01dbU), + UINTMAX_MAX / 4691) +P (12, 84, + (((((uintmax_t) 0xcbb0U << 28 | 0x86fea3aU) + << 28 | 0x06a40feU) + << 28 | 0x50045acU) + << 28 | 0xb78c99fU), + UINTMAX_MAX / 4703) +P (18, 68, + (((((uintmax_t) 0x4bceU << 28 | 0xc35242bU) + << 28 | 0x29eaa29U) + << 28 | 0x1a68705U) + << 28 | 0xb196e91U), + UINTMAX_MAX / 4721) +P (2, 70, + (((((uintmax_t) 0x1cf1U << 28 | 0xbea1a20U) + << 28 | 0x324cdc1U) + << 28 | 0x042c724U) + << 28 | 0x273e2bbU), + UINTMAX_MAX / 4723) +P (6, 70, + (((((uintmax_t) 0x530aU << 28 | 0xaa16d83U) + << 28 | 0x622522cU) + << 28 | 0xee680bbU) + << 28 | 0x165b7c9U), + UINTMAX_MAX / 4729) +P (4, 68, + (((((uintmax_t) 0x6dbeU << 28 | 0xc4fd598U) + << 28 | 0x42343fdU) + << 28 | 0x2ff9f12U) + << 28 | 0xe0776d5U), + UINTMAX_MAX / 4733) +P (18, 62, + (((((uintmax_t) 0x9327U << 28 | 0xd1e0357U) + << 28 | 0x3cba016U) + << 28 | 0x6a5da63U) + << 28 | 0xaf2cc6fU), + UINTMAX_MAX / 4751) +P (8, 58, + (((((uintmax_t) 0xfe7eU << 28 | 0x69c1b53U) + << 28 | 0xa5d7dedU) + << 28 | 0xd16a593U) + << 28 | 0x0408d27U), + UINTMAX_MAX / 4759) +P (24, 48, + (((((uintmax_t) 0xdba8U << 28 | 0x6fc17c3U) + << 28 | 0xa04d12aU) + << 28 | 0xdf30c26U) + << 28 | 0x528844fU), + UINTMAX_MAX / 4783) +P (4, 74, + (((((uintmax_t) 0x4928U << 28 | 0x7ba43b4U) + << 28 | 0x0f9d99aU) + << 28 | 0x48d6572U) + << 28 | 0xb5eec7bU), + UINTMAX_MAX / 4787) +P (2, 82, + (((((uintmax_t) 0xfd7cU << 28 | 0xd1c2bd5U) + << 28 | 0x72fbc6eU) + << 28 | 0x8bf2877U) + << 28 | 0x503cb9dU), + UINTMAX_MAX / 4789) +P (4, 84, + (((((uintmax_t) 0x1951U << 28 | 0x21b3d5eU) + << 28 | 0x975e0eaU) + << 28 | 0x27a191aU) + << 28 | 0x7045389U), + UINTMAX_MAX / 4793) +P (6, 90, + (((((uintmax_t) 0xced1U << 28 | 0x00e827bU) + << 28 | 0x0325b6eU) + << 28 | 0xb091f34U) + << 28 | 0xdd45d3fU), + UINTMAX_MAX / 4799) +P (2, 102, + (((((uintmax_t) 0xe394U << 28 | 0x4a02e12U) + << 28 | 0x05dd8dcU) + << 28 | 0x8a6cabbU) + << 28 | 0x2937d41U), + UINTMAX_MAX / 4801) +P (12, 96, + (((((uintmax_t) 0x3e2dU << 28 | 0xa2eb33fU) + << 28 | 0x746e6bcU) + << 28 | 0x2f04f25U) + << 28 | 0x4922a05U), + UINTMAX_MAX / 4813) +P (4, 102, + (((((uintmax_t) 0xf205U << 28 | 0xd890fadU) + << 28 | 0x84cf441U) + << 28 | 0x431f4d6U) + << 28 | 0xeb38631U), + UINTMAX_MAX / 4817) +P (14, 100, + (((((uintmax_t) 0x7974U << 28 | 0xa2271b8U) + << 28 | 0x09c017bU) + << 28 | 0xd717435U) + << 28 | 0xa08291fU), + UINTMAX_MAX / 4831) +P (30, 72, + (((((uintmax_t) 0xf434U << 28 | 0x0837312U) + << 28 | 0x2b4a342U) + << 28 | 0x32df9c9U) + << 28 | 0x1fc1a55U), + UINTMAX_MAX / 4861) +P (10, 66, + (((((uintmax_t) 0x4c78U << 28 | 0x09ab985U) + << 28 | 0xc13f8a4U) + << 28 | 0x651e1d5U) + << 28 | 0x382eab7U), + UINTMAX_MAX / 4871) +P (6, 66, + (((((uintmax_t) 0x9273U << 28 | 0x60376e4U) + << 28 | 0x8c0bf7cU) + << 28 | 0xfb5409dU) + << 28 | 0xe4cf3c5U), + UINTMAX_MAX / 4877) +P (12, 62, + (((((uintmax_t) 0x47a1U << 28 | 0xbf627e6U) + << 28 | 0x7276dcdU) + << 28 | 0xd636fb0U) + << 28 | 0x68b9929U), + UINTMAX_MAX / 4889) +P (14, 54, + (((((uintmax_t) 0x3f55U << 28 | 0x93b5db8U) + << 28 | 0xe2d01eeU) + << 28 | 0x8f95e74U) + << 28 | 0x0462c97U), + UINTMAX_MAX / 4903) +P (6, 58, + (((((uintmax_t) 0x29aaU << 28 | 0xc9d12b8U) + << 28 | 0xb650349U) + << 28 | 0x0f97b3aU) + << 28 | 0x758b4a5U), + UINTMAX_MAX / 4909) +P (10, 50, + (((((uintmax_t) 0x3c51U << 28 | 0x65394caU) + << 28 | 0x8d3eb64U) + << 28 | 0x1431563U) + << 28 | 0xc441287U), + UINTMAX_MAX / 4919) +P (12, 42, + (((((uintmax_t) 0xf258U << 28 | 0x91c808bU) + << 28 | 0x8d292b7U) + << 28 | 0x43dad3eU) + << 28 | 0xc45916bU), + UINTMAX_MAX / 4931) +P (2, 54, + (((((uintmax_t) 0x708fU << 28 | 0xa57e92aU) + << 28 | 0x8098c7bU) + << 28 | 0x188be8fU) + << 28 | 0x55c878dU), + UINTMAX_MAX / 4933) +P (4, 56, + (((((uintmax_t) 0x983dU << 28 | 0xcf2775dU) + << 28 | 0xcd7ead8U) + << 28 | 0x05648b2U) + << 28 | 0xca54ef9U), + UINTMAX_MAX / 4937) +P (6, 56, + (((((uintmax_t) 0x729cU << 28 | 0xb7c09bcU) + << 28 | 0x91a2776U) + << 28 | 0xdbe6eefU) + << 28 | 0x60123afU), + UINTMAX_MAX / 4943) +P (8, 255, + (((((uintmax_t) 0xe8f0U << 28 | 0x5536727U) + << 28 | 0xa8b8137U) + << 28 | 0x11525e6U) + << 28 | 0xa9e8867U), + UINTMAX_MAX / 4951) +P (6, 255, + (((((uintmax_t) 0xbdf2U << 28 | 0x781fd01U) + << 28 | 0x3014a85U) + << 28 | 0xc2215cbU) + << 28 | 0x383d8f5U), + UINTMAX_MAX / 4957) +P (10, 255, + (((((uintmax_t) 0x0439U << 28 | 0xee5f8e3U) + << 28 | 0x30656e5U) + << 28 | 0x8f554c8U) + << 28 | 0x9825857U), + UINTMAX_MAX / 4967) +P (2, 255, + (((((uintmax_t) 0x77adU << 28 | 0xfb283c9U) + << 28 | 0x63b0a8fU) + << 28 | 0xbd3b17cU) + << 28 | 0x01dacd9U), + UINTMAX_MAX / 4969) +P (4, 255, + (((((uintmax_t) 0x5d7bU << 28 | 0xe851f3fU) + << 28 | 0x443554cU) + << 28 | 0x8c39dc7U) + << 28 | 0xaedee65U), + UINTMAX_MAX / 4973) +P (14, 255, + (((((uintmax_t) 0x373cU << 28 | 0x1c8a99bU) + << 28 | 0x1412465U) + << 28 | 0x3ac6ddaU) + << 28 | 0x86cd3b3U), + UINTMAX_MAX / 4987) +P (6, 255, + (((((uintmax_t) 0x5b50U << 28 | 0xa687decU) + << 28 | 0x6a07b0dU) + << 28 | 0x61c6791U) + << 28 | 0xa9c2c81U), + UINTMAX_MAX / 4993) +P (6, 255, + (((((uintmax_t) 0x0b44U << 28 | 0x292c4bfU) + << 28 | 0xef9cdb6U) + << 28 | 0x27a3009U) + << 28 | 0x0354237U), + UINTMAX_MAX / 4999) + +#undef FIRST_OMITTED_PRIME +#define FIRST_OMITTED_PRIME 5003 diff --git a/src/printenv.c b/src/printenv.c new file mode 100644 index 0000000..54d52e9 --- /dev/null +++ b/src/printenv.c @@ -0,0 +1,154 @@ +/* printenv -- print all or part of environment + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Usage: printenv [variable...] + + If no arguments are given, print the entire environment. + If one or more variable names are given, print the value of + each one that is set, and nothing for ones that are not set. + + Exit status: + 0 if all variables specified were found + 1 if not + 2 if some other error occurred + + David MacKenzie and Richard Mlynarik */ + +#include +#include +#include +#include + +#include "system.h" + +/* Exit status for syntax errors, etc. */ +enum { PRINTENV_FAILURE = 2 }; + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "printenv" + +#define AUTHORS \ + proper_name ("David MacKenzie"), \ + proper_name ("Richard Mlynarik") + +static struct option const longopts[] = +{ + {"null", no_argument, nullptr, '0'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [VARIABLE]...\n\ +Print the values of the specified environment VARIABLE(s).\n\ +If no VARIABLE is specified, print name and value pairs for them all.\n\ +\n\ +"), + program_name); + fputs (_("\ + -0, --null end each output line with NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + char **env; + char *ep, *ap; + int i; + bool ok; + int optc; + bool opt_nul_terminate_output = false; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (PRINTENV_FAILURE); + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "+iu:0", longopts, nullptr)) != -1) + { + switch (optc) + { + case '0': + opt_nul_terminate_output = true; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (PRINTENV_FAILURE); + } + } + + if (optind >= argc) + { + for (env = environ; *env != nullptr; ++env) + printf ("%s%c", *env, opt_nul_terminate_output ? '\0' : '\n'); + ok = true; + } + else + { + int matches = 0; + + for (i = optind; i < argc; ++i) + { + bool matched = false; + + /* 'printenv a=b' is silent, even if 'a=b=c' is in environ. */ + if (strchr (argv[i], '=')) + continue; + + for (env = environ; *env; ++env) + { + ep = *env; + ap = argv[i]; + while (*ep != '\0' && *ap != '\0' && *ep++ == *ap++) + { + if (*ep == '=' && *ap == '\0') + { + printf ("%s%c", ep + 1, + opt_nul_terminate_output ? '\0' : '\n'); + matched = true; + break; + } + } + } + + matches += matched; + } + + ok = (matches == argc - optind); + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/printf.c b/src/printf.c new file mode 100644 index 0000000..063f077 --- /dev/null +++ b/src/printf.c @@ -0,0 +1,725 @@ +/* printf - format and print data + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Usage: printf format [argument...] + + A front end to the printf function that lets it be used from the shell. + + Backslash escapes: + + \" = double quote + \\ = backslash + \a = alert (bell) + \b = backspace + \c = produce no further output + \e = escape + \f = form feed + \n = new line + \r = carriage return + \t = horizontal tab + \v = vertical tab + \ooo = octal number (ooo is 1 to 3 digits) + \xhh = hexadecimal number (hhh is 1 to 2 digits) + \uhhhh = 16-bit Unicode character (hhhh is 4 digits) + \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits) + + Additional directive: + + %b = print an argument string, interpreting backslash escapes, + except that octal escapes are of the form \0 or \0ooo. + + %q = print an argument string in a format that can be + reused as shell input. Escaped characters used the proposed + POSIX $'' syntax supported by most shells. + + The 'format' argument is re-used as many times as necessary + to convert all of the given arguments. + + David MacKenzie */ + +#include +#include +#include +#include + +#include "system.h" +#include "cl-strtod.h" +#include "quote.h" +#include "unicodeio.h" +#include "xprintf.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "printf" + +#define AUTHORS proper_name ("David MacKenzie") + +#define isodigit(c) ((c) >= '0' && (c) <= '7') +#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \ + (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') +#define octtobin(c) ((c) - '0') + +/* The value to return to the calling program. */ +static int exit_status; + +/* True if the POSIXLY_CORRECT environment variable is set. */ +static bool posixly_correct; + +/* This message appears in N_() here rather than just in _() below because + the sole use would have been in a #define. */ +static char const *const cfcc_msg = + N_("warning: %s: character(s) following character constant have been ignored"); + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s FORMAT [ARGUMENT]...\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + fputs (_("\ +Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +FORMAT controls the output as in C printf. Interpreted sequences are:\n\ +\n\ + \\\" double quote\n\ +"), stdout); + fputs (_("\ + \\\\ backslash\n\ + \\a alert (BEL)\n\ + \\b backspace\n\ + \\c produce no further output\n\ + \\e escape\n\ + \\f form feed\n\ + \\n new line\n\ + \\r carriage return\n\ + \\t horizontal tab\n\ + \\v vertical tab\n\ +"), stdout); + fputs (_("\ + \\NNN byte with octal value NNN (1 to 3 digits)\n\ + \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\ + \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\ + \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\ +"), stdout); + fputs (_("\ + %% a single %\n\ + %b ARGUMENT as a string with '\\' escapes interpreted,\n\ + except that octal escapes are of the form \\0 or \\0NNN\n\ + %q ARGUMENT is printed in a format that can be reused as shell input,\n\ + escaping non-printable characters with the proposed POSIX $'' syntax.\ +\n\n\ +and all C format specifications ending with one of diouxXfeEgGcs, with\n\ +ARGUMENTs converted to proper type first. Variable widths are handled.\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +verify_numeric (char const *s, char const *end) +{ + if (errno) + { + error (0, errno, "%s", quote (s)); + exit_status = EXIT_FAILURE; + } + else if (*end) + { + if (s == end) + error (0, 0, _("%s: expected a numeric value"), quote (s)); + else + error (0, 0, _("%s: value not completely converted"), quote (s)); + exit_status = EXIT_FAILURE; + } +} + +#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \ +static TYPE \ +FUNC_NAME (char const *s) \ +{ \ + char *end; \ + TYPE val; \ + \ + if ((*s == '\"' || *s == '\'') && *(s + 1)) \ + { \ + unsigned char ch = *++s; \ + val = ch; \ + \ + if (MB_CUR_MAX > 1 && *(s + 1)) \ + { \ + mbstate_t mbstate = { 0, }; \ + wchar_t wc; \ + size_t slen = strlen (s); \ + ssize_t bytes; \ + bytes = mbrtowc (&wc, s, slen, &mbstate); \ + if (0 < bytes) \ + { \ + val = wc; \ + s += bytes - 1; \ + } \ + } \ + \ + /* If POSIXLY_CORRECT is not set, then give a warning that there \ + are characters following the character constant and that GNU \ + printf is ignoring those characters. If POSIXLY_CORRECT *is* \ + set, then don't give the warning. */ \ + if (*++s != 0 && !posixly_correct) \ + error (0, 0, _(cfcc_msg), s); \ + } \ + else \ + { \ + errno = 0; \ + val = (LIB_FUNC_EXPR); \ + verify_numeric (s, end); \ + } \ + return val; \ +} \ + +STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0)) +STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0)) +STRTOX (long double, vstrtold, cl_strtold (s, &end)) + +/* Output a single-character \ escape. */ + +static void +print_esc_char (char c) +{ + switch (c) + { + case 'a': /* Alert. */ + putchar ('\a'); + break; + case 'b': /* Backspace. */ + putchar ('\b'); + break; + case 'c': /* Cancel the rest of the output. */ + exit (EXIT_SUCCESS); + break; + case 'e': /* Escape. */ + putchar ('\x1B'); + break; + case 'f': /* Form feed. */ + putchar ('\f'); + break; + case 'n': /* New line. */ + putchar ('\n'); + break; + case 'r': /* Carriage return. */ + putchar ('\r'); + break; + case 't': /* Horizontal tab. */ + putchar ('\t'); + break; + case 'v': /* Vertical tab. */ + putchar ('\v'); + break; + default: + putchar (c); + break; + } +} + +/* Print a \ escape sequence starting at ESCSTART. + Return the number of characters in the escape sequence + besides the backslash. + If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o + is an octal digit; otherwise they are of the form \ooo. */ + +static int +print_esc (char const *escstart, bool octal_0) +{ + char const *p = escstart + 1; + int esc_value = 0; /* Value of \nnn escape. */ + int esc_length; /* Length of \nnn escape. */ + + if (*p == 'x') + { + /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */ + for (esc_length = 0, ++p; + esc_length < 2 && isxdigit (to_uchar (*p)); + ++esc_length, ++p) + esc_value = esc_value * 16 + hextobin (*p); + if (esc_length == 0) + error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape")); + putchar (esc_value); + } + else if (isodigit (*p)) + { + /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise). + Allow \ooo if octal_0 && *p != '0'; this is an undocumented + extension to POSIX that is compatible with Bash 2.05b. */ + for (esc_length = 0, p += octal_0 && *p == '0'; + esc_length < 3 && isodigit (*p); + ++esc_length, ++p) + esc_value = esc_value * 8 + octtobin (*p); + putchar (esc_value); + } + else if (*p && strchr ("\"\\abcefnrtv", *p)) + print_esc_char (*p++); + else if (*p == 'u' || *p == 'U') + { + char esc_char = *p; + unsigned int uni_value; + + uni_value = 0; + for (esc_length = (esc_char == 'u' ? 4 : 8), ++p; + esc_length > 0; + --esc_length, ++p) + { + if (! isxdigit (to_uchar (*p))) + error (EXIT_FAILURE, 0, _("missing hexadecimal number in escape")); + uni_value = uni_value * 16 + hextobin (*p); + } + + /* Error for invalid code points 0000D800 through 0000DFFF inclusive. + Note print_unicode_char() would print the literal \u.. in this case. */ + if (uni_value >= 0xd800 && uni_value <= 0xdfff) + error (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"), + esc_char, (esc_char == 'u' ? 4 : 8), uni_value); + + print_unicode_char (stdout, uni_value, 0); + } + else + { + putchar ('\\'); + if (*p) + { + putchar (*p); + p++; + } + } + return p - escstart - 1; +} + +/* Print string STR, evaluating \ escapes. */ + +static void +print_esc_string (char const *str) +{ + for (; *str; str++) + if (*str == '\\') + str += print_esc (str, true); + else + putchar (*str); +} + +/* Evaluate a printf conversion specification. START is the start of + the directive, LENGTH is its length, and CONVERSION specifies the + type of conversion. LENGTH does not include any length modifier or + the conversion specifier itself. FIELD_WIDTH and PRECISION are the + field width and precision for '*' values, if HAVE_FIELD_WIDTH and + HAVE_PRECISION are true, respectively. ARGUMENT is the argument to + be formatted. */ + +static void +print_direc (char const *start, size_t length, char conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + char const *argument) +{ + char *p; /* Null-terminated copy of % directive. */ + + /* Create a null-terminated copy of the % directive, with an + intmax_t-wide length modifier substituted for any existing + integer length modifier. */ + { + char *q; + char const *length_modifier; + size_t length_modifier_len; + + switch (conversion) + { + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + length_modifier = PRIdMAX; + length_modifier_len = sizeof PRIdMAX - 2; + break; + + case 'a': case 'e': case 'f': case 'g': + case 'A': case 'E': case 'F': case 'G': + length_modifier = "L"; + length_modifier_len = 1; + break; + + default: + length_modifier = start; /* Any valid pointer will do. */ + length_modifier_len = 0; + break; + } + + p = xmalloc (length + length_modifier_len + 2); + q = mempcpy (p, start, length); + q = mempcpy (q, length_modifier, length_modifier_len); + *q++ = conversion; + *q = '\0'; + } + + switch (conversion) + { + case 'd': + case 'i': + { + intmax_t arg = vstrtoimax (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'o': + case 'u': + case 'x': + case 'X': + { + uintmax_t arg = vstrtoumax (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + { + long double arg = vstrtold (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'c': + if (!have_field_width) + xprintf (p, *argument); + else + xprintf (p, field_width, *argument); + break; + + case 's': + if (!have_field_width) + { + if (!have_precision) + xprintf (p, argument); + else + xprintf (p, precision, argument); + } + else + { + if (!have_precision) + xprintf (p, field_width, argument); + else + xprintf (p, field_width, precision, argument); + } + break; + } + + free (p); +} + +/* Print the text in FORMAT, using ARGV (with ARGC elements) for + arguments to any '%' directives. + Return the number of elements of ARGV used. */ + +static int +print_formatted (char const *format, int argc, char **argv) +{ + int save_argc = argc; /* Preserve original value. */ + char const *f; /* Pointer into 'format'. */ + char const *direc_start; /* Start of % directive. */ + size_t direc_length; /* Length of % directive. */ + bool have_field_width; /* True if FIELD_WIDTH is valid. */ + int field_width = 0; /* Arg to first '*'. */ + bool have_precision; /* True if PRECISION is valid. */ + int precision = 0; /* Arg to second '*'. */ + char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */ + + for (f = format; *f; ++f) + { + switch (*f) + { + case '%': + direc_start = f++; + direc_length = 1; + have_field_width = have_precision = false; + if (*f == '%') + { + putchar ('%'); + break; + } + if (*f == 'b') + { + /* FIXME: Field width and precision are not supported + for %b, even though POSIX requires it. */ + if (argc > 0) + { + print_esc_string (*argv); + ++argv; + --argc; + } + break; + } + + if (*f == 'q') + { + if (argc > 0) + { + fputs (quotearg_style (shell_escape_quoting_style, *argv), + stdout); + ++argv; + --argc; + } + break; + } + + memset (ok, 0, sizeof ok); + ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] = + ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = + ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1; + + for (;; f++, direc_length++) + switch (*f) + { +#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__ + case 'I': +#endif + case '\'': + ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = + ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; + break; + case '-': case '+': case ' ': + break; + case '#': + ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; + break; + case '0': + ok['c'] = ok['s'] = 0; + break; + default: + goto no_more_flag_characters; + } + no_more_flag_characters: + + if (*f == '*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t width = vstrtoimax (*argv); + if (INT_MIN <= width && width <= INT_MAX) + field_width = width; + else + error (EXIT_FAILURE, 0, _("invalid field width: %s"), + quote (*argv)); + ++argv; + --argc; + } + else + field_width = 0; + have_field_width = true; + } + else + while (ISDIGIT (*f)) + { + ++f; + ++direc_length; + } + if (*f == '.') + { + ++f; + ++direc_length; + ok['c'] = 0; + if (*f == '*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t prec = vstrtoimax (*argv); + if (prec < 0) + { + /* A negative precision is taken as if the + precision were omitted, so -1 is safe + here even if prec < INT_MIN. */ + precision = -1; + } + else if (INT_MAX < prec) + error (EXIT_FAILURE, 0, _("invalid precision: %s"), + quote (*argv)); + else + precision = prec; + ++argv; + --argc; + } + else + precision = 0; + have_precision = true; + } + else + while (ISDIGIT (*f)) + { + ++f; + ++direc_length; + } + } + + while (*f == 'l' || *f == 'L' || *f == 'h' + || *f == 'j' || *f == 't' || *f == 'z') + ++f; + + { + unsigned char conversion = *f; + int speclen = MIN (f + 1 - direc_start, INT_MAX); + if (! ok[conversion]) + error (EXIT_FAILURE, 0, + _("%.*s: invalid conversion specification"), + speclen, direc_start); + } + + print_direc (direc_start, direc_length, *f, + have_field_width, field_width, + have_precision, precision, + (argc <= 0 ? "" : (argc--, *argv++))); + break; + + case '\\': + f += print_esc (f, false); + break; + + default: + putchar (*f); + } + } + + return save_argc - argc; +} + +int +main (int argc, char **argv) +{ + char *format; + int args_used; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + exit_status = EXIT_SUCCESS; + + posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr); + + /* We directly parse options, rather than use parse_long_options, in + order to avoid accepting abbreviations. */ + if (argc == 2) + { + if (STREQ (argv[1], "--help")) + usage (EXIT_SUCCESS); + + if (STREQ (argv[1], "--version")) + { + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, + (char *) nullptr); + return EXIT_SUCCESS; + } + } + + /* The above handles --help and --version. + Since there is no other invocation of getopt, handle '--' here. */ + if (1 < argc && STREQ (argv[1], "--")) + { + --argc; + ++argv; + } + + if (argc <= 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + format = argv[1]; + argc -= 2; + argv += 2; + + do + { + args_used = print_formatted (format, argc, argv); + argc -= args_used; + argv += args_used; + } + while (args_used > 0 && argc > 0); + + if (argc > 0) + error (0, 0, + _("warning: ignoring excess arguments, starting with %s"), + quote (argv[0])); + + return exit_status; +} diff --git a/src/prog-fprintf.c b/src/prog-fprintf.c new file mode 100644 index 0000000..6c620c7 --- /dev/null +++ b/src/prog-fprintf.c @@ -0,0 +1,37 @@ +/* prog-fprintf.c - common formatting output functions and definitions + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include + +#include "system.h" + +#include "prog-fprintf.h" + +/* Display program name followed by variable list. + Used for e.g. verbose output */ +void +prog_fprintf (FILE *fp, char const *fmt, ...) +{ + va_list ap; + fputs (program_name, fp); + fputs (": ", fp); + va_start (ap, fmt); + vfprintf (fp, fmt, ap); + va_end (ap); + fputc ('\n', fp); +} diff --git a/src/prog-fprintf.h b/src/prog-fprintf.h new file mode 100644 index 0000000..9d764d4 --- /dev/null +++ b/src/prog-fprintf.h @@ -0,0 +1,25 @@ +/* prog-fprintf.h - common formatting output functions and definitions + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef PROG_FPRINTF_H +# define PROG_FPRINTF_H + +# include + +extern void prog_fprintf (FILE *fp, char const *fmt, ...) + _GL_ATTRIBUTE_FORMAT ((__printf__, 2, 3)) _GL_ATTRIBUTE_NONNULL ((1, 2)); + +#endif diff --git a/src/ptx.c b/src/ptx.c new file mode 100644 index 0000000..62b95e8 --- /dev/null +++ b/src/ptx.c @@ -0,0 +1,2049 @@ +/* Permuted index for GNU, with keywords in their context. + Copyright (C) 1990-2023 Free Software Foundation, Inc. + François Pinard , 1988. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + François Pinard */ + +#include + +#include +#include +#include "system.h" +#include +#include "argmatch.h" +#include "fadvise.h" +#include "quote.h" +#include "read-file.h" +#include "stdio--.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "ptx" + +/* TRANSLATORS: Please translate "F. Pinard" to "François Pinard" + if "ç" (c-with-cedilla) is available in the translation's character + set and encoding. */ +#define AUTHORS proper_name_lite ("F. Pinard", "Fran\xc3\xa7ois Pinard") + +/* Number of possible characters in a byte. */ +#define CHAR_SET_SIZE 256 + +#define ISODIGIT(C) ((C) >= '0' && (C) <= '7') +#define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \ + : (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0') +#define OCTTOBIN(C) ((C) - '0') + +/* Debugging the memory allocator. */ + +#if WITH_DMALLOC +# define MALLOC_FUNC_CHECK 1 +# include +#endif + +/* Global definitions. */ + +/* FIXME: There are many unchecked integer overflows in this file, + and in theory they could cause this command to have undefined + behavior given large inputs or options. This command should + diagnose any such overflow and exit. */ + +/* Program options. */ + +enum Format +{ + UNKNOWN_FORMAT, /* output format still unknown */ + DUMB_FORMAT, /* output for a dumb terminal */ + ROFF_FORMAT, /* output for 'troff' or 'nroff' */ + TEX_FORMAT /* output for 'TeX' or 'LaTeX' */ +}; + +static bool gnu_extensions = true; /* trigger all GNU extensions */ +static bool auto_reference = false; /* refs are 'file_name:line_number:' */ +static bool input_reference = false; /* refs at beginning of input lines */ +static bool right_reference = false; /* output refs after right context */ +static ptrdiff_t line_width = 72; /* output line width in characters */ +static ptrdiff_t gap_size = 3; /* number of spaces between output fields */ +static char const *truncation_string = "/"; + /* string used to mark line truncations */ +static char const *macro_name = "xx"; /* macro name for roff or TeX output */ +static enum Format output_format = UNKNOWN_FORMAT; + /* output format */ + +static bool ignore_case = false; /* fold lower to upper for sorting */ +static char const *break_file = nullptr; /* name of the 'Break chars' file */ +static char const *only_file = nullptr; /* name of the 'Only words' file */ +static char const *ignore_file = nullptr; /* name of the 'Ignore words' file */ + +/* Options that use regular expressions. */ +struct regex_data +{ + /* The original regular expression, as a string. */ + char const *string; + + /* The compiled regular expression, and its fastmap. */ + struct re_pattern_buffer pattern; + char fastmap[UCHAR_MAX + 1]; +}; + +static struct regex_data context_regex; /* end of context */ +static struct regex_data word_regex; /* keyword */ + +/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a + whole file. A WORD is similar, except it is intended for smaller regions. + A WORD_TABLE may contain several WORDs. */ + +typedef struct + { + char *start; /* pointer to beginning of region */ + char *end; /* pointer to end + 1 of region */ + } +BLOCK; + +typedef struct + { + char *start; /* pointer to beginning of region */ + ptrdiff_t size; /* length of the region */ + } +WORD; + +typedef struct + { + WORD *start; /* array of WORDs */ + size_t alloc; /* allocated length */ + ptrdiff_t length; /* number of used entries */ + } +WORD_TABLE; + +/* Pattern description tables. */ + +/* For each character, provide its folded equivalent. */ +static unsigned char folded_chars[CHAR_SET_SIZE]; + +/* End of context pattern register indices. */ +static struct re_registers context_regs; + +/* Keyword pattern register indices. */ +static struct re_registers word_regs; + +/* A word characters fastmap is used only when no word regexp has been + provided. A word is then made up of a sequence of one or more characters + allowed by the fastmap. Contains !0 if character allowed in word. Not + only this is faster in most cases, but it simplifies the implementation + of the Break files. */ +static char word_fastmap[CHAR_SET_SIZE]; + +/* Maximum length of any word read. */ +static ptrdiff_t maximum_word_length; + +/* Maximum width of any reference used. */ +static ptrdiff_t reference_max_width; + +/* Ignore and Only word tables. */ + +static WORD_TABLE ignore_table; /* table of words to ignore */ +static WORD_TABLE only_table; /* table of words to select */ + +/* Source text table, and scanning macros. */ + +static int number_input_files; /* number of text input files */ +static intmax_t total_line_count; /* total number of lines seen so far */ +static char const **input_file_name; /* array of text input file names */ +static intmax_t *file_line_count; /* array of line count values at end */ + +static BLOCK *text_buffers; /* files to study */ + +/* SKIP_NON_WHITE used only for getting or skipping the reference. */ + +#define SKIP_NON_WHITE(cursor, limit) \ + while (cursor < limit && ! isspace (to_uchar (*cursor))) \ + cursor++ + +#define SKIP_WHITE(cursor, limit) \ + while (cursor < limit && isspace (to_uchar (*cursor))) \ + cursor++ + +#define SKIP_WHITE_BACKWARDS(cursor, start) \ + while (cursor > start && isspace (to_uchar (cursor[-1]))) \ + cursor-- + +#define SKIP_SOMETHING(cursor, limit) \ + if (word_regex.string) \ + { \ + regoff_t count; \ + count = re_match (&word_regex.pattern, cursor, limit - cursor, \ + 0, nullptr); \ + if (count == -2) \ + matcher_error (); \ + cursor += count == -1 ? 1 : count; \ + } \ + else if (word_fastmap[to_uchar (*cursor)]) \ + while (cursor < limit && word_fastmap[to_uchar (*cursor)]) \ + cursor++; \ + else \ + cursor++ + +/* Occurrences table. + + The 'keyword' pointer provides the central word, which is surrounded + by a left context and a right context. The 'keyword' and 'length' + field allow full 8-bit characters keys, even including NULs. At other + places in this program, the name 'keyafter' refers to the keyword + followed by its right context. + + The left context does not extend, towards the beginning of the file, + further than a distance given by the 'left' value. This value is + relative to the keyword beginning, it is usually negative. This + insures that, except for white space, we will never have to backward + scan the source text, when it is time to generate the final output + lines. + + The right context, indirectly attainable through the keyword end, does + not extend, towards the end of the file, further than a distance given + by the 'right' value. This value is relative to the keyword + beginning, it is usually positive. + + When automatic references are used, the 'reference' value is the + overall line number in all input files read so far, in this case, it + is of type intmax_t. When input references are used, the 'reference' + value indicates the distance between the keyword beginning and the + start of the reference field, and it fits in ptrdiff_t and is usually + negative. */ + +typedef struct + { + WORD key; /* description of the keyword */ + ptrdiff_t left; /* distance to left context start */ + ptrdiff_t right; /* distance to right context end */ + intmax_t reference; /* reference descriptor */ + int file_index; /* corresponding file */ + } +OCCURS; + +/* The various OCCURS tables are indexed by the language. But the time + being, there is no such multiple language support. */ + +static OCCURS *occurs_table[1]; /* all words retained from the read text */ +static size_t occurs_alloc[1]; /* allocated size of occurs_table */ +static ptrdiff_t number_of_occurs[1]; /* number of used slots in occurs_table */ + + +/* Communication among output routines. */ + +/* Indicate if special output processing is requested for each character. */ +static char edited_flag[CHAR_SET_SIZE]; + +/* Half of line width, reference excluded. */ +static ptrdiff_t half_line_width; + +/* Maximum width of before field. */ +static ptrdiff_t before_max_width; + +/* Maximum width of keyword-and-after field. */ +static ptrdiff_t keyafter_max_width; + +/* Length of string that flags truncation. */ +static ptrdiff_t truncation_string_length; + +/* When context is limited by lines, wraparound may happen on final output: + the 'head' pointer gives access to some supplementary left context which + will be seen at the end of the output line, the 'tail' pointer gives + access to some supplementary right context which will be seen at the + beginning of the output line. */ + +static BLOCK tail; /* tail field */ +static bool tail_truncation; /* flag truncation after the tail field */ + +static BLOCK before; /* before field */ +static bool before_truncation; /* flag truncation before the before field */ + +static BLOCK keyafter; /* keyword-and-after field */ +static bool keyafter_truncation; /* flag truncation after the keyafter field */ + +static BLOCK head; /* head field */ +static bool head_truncation; /* flag truncation before the head field */ + +static BLOCK reference; /* reference field for input reference mode */ + +/* Miscellaneous routines. */ + +/* Diagnose an error in the regular expression matcher. Then exit. */ + +static void +matcher_error (void) +{ + error (EXIT_FAILURE, errno, _("error in regular expression matcher")); +} + +/* Unescape STRING in-place. */ + +static void +unescape_string (char *string) +{ + char *cursor; /* cursor in result */ + int value; /* value of \nnn escape */ + int length; /* length of \nnn escape */ + + cursor = string; + + while (*string) + { + if (*string == '\\') + { + string++; + switch (*string) + { + case 'x': /* \xhhh escape, 3 chars maximum */ + value = 0; + for (length = 0, string++; + length < 3 && isxdigit (to_uchar (*string)); + length++, string++) + value = value * 16 + HEXTOBIN (*string); + if (length == 0) + { + *cursor++ = '\\'; + *cursor++ = 'x'; + } + else + *cursor++ = value; + break; + + case '0': /* \0ooo escape, 3 chars maximum */ + value = 0; + for (length = 0, string++; + length < 3 && ISODIGIT (*string); + length++, string++) + value = value * 8 + OCTTOBIN (*string); + *cursor++ = value; + break; + + case 'a': /* alert */ +#if __STDC__ + *cursor++ = '\a'; +#else + *cursor++ = 7; +#endif + string++; + break; + + case 'b': /* backspace */ + *cursor++ = '\b'; + string++; + break; + + case 'c': /* cancel the rest of the output */ + while (*string) + string++; + break; + + case 'f': /* form feed */ + *cursor++ = '\f'; + string++; + break; + + case 'n': /* new line */ + *cursor++ = '\n'; + string++; + break; + + case 'r': /* carriage return */ + *cursor++ = '\r'; + string++; + break; + + case 't': /* horizontal tab */ + *cursor++ = '\t'; + string++; + break; + + case 'v': /* vertical tab */ +#if __STDC__ + *cursor++ = '\v'; +#else + *cursor++ = 11; +#endif + string++; + break; + + case '\0': /* lone backslash at end of string */ + /* ignore it */ + break; + + default: + *cursor++ = '\\'; + *cursor++ = *string++; + break; + } + } + else + *cursor++ = *string++; + } + + *cursor = '\0'; +} + +/*--------------------------------------------------------------------------. +| Compile the regex represented by REGEX, diagnose and abort if any error. | +`--------------------------------------------------------------------------*/ + +static void +compile_regex (struct regex_data *regex) +{ + struct re_pattern_buffer *pattern = ®ex->pattern; + char const *string = regex->string; + char const *message; + + pattern->buffer = nullptr; + pattern->allocated = 0; + pattern->fastmap = regex->fastmap; + pattern->translate = ignore_case ? folded_chars : nullptr; + + message = re_compile_pattern (string, strlen (string), pattern); + if (message) + error (EXIT_FAILURE, 0, _("%s (for regexp %s)"), message, quote (string)); + + /* The fastmap should be compiled before 're_match'. The following + call is not mandatory, because 're_search' is always called sooner, + and it compiles the fastmap if this has not been done yet. */ + + re_compile_fastmap (pattern); +} + +/*------------------------------------------------------------------------. +| This will initialize various tables for pattern match and compiles some | +| regexps. | +`------------------------------------------------------------------------*/ + +static void +initialize_regex (void) +{ + int character; /* character value */ + + /* Initialize the case folding table. */ + + if (ignore_case) + for (character = 0; character < CHAR_SET_SIZE; character++) + folded_chars[character] = toupper (character); + + /* Unless the user already provided a description of the end of line or + end of sentence sequence, select an end of line sequence to compile. + If the user provided an empty definition, thus disabling end of line + or sentence feature, make it null to speed up tests. If GNU + extensions are enabled, use end of sentence like in GNU emacs. If + disabled, use end of lines. */ + + if (context_regex.string) + { + if (!*context_regex.string) + context_regex.string = nullptr; + } + else if (gnu_extensions && !input_reference) + context_regex.string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*"; + else + context_regex.string = "\n"; + + if (context_regex.string) + compile_regex (&context_regex); + + /* If the user has already provided a non-empty regexp to describe + words, compile it. Else, unless this has already been done through + a user provided Break character file, construct a fastmap of + characters that may appear in a word. If GNU extensions enabled, + include only letters of the underlying character set. If disabled, + include almost everything, even punctuation; stop only on white + space. */ + + if (word_regex.string) + compile_regex (&word_regex); + else if (!break_file) + { + if (gnu_extensions) + { + + /* Simulate \w+. */ + + for (character = 0; character < CHAR_SET_SIZE; character++) + word_fastmap[character] = !! isalpha (character); + } + else + { + + /* Simulate [^ \t\n]+. */ + + memset (word_fastmap, 1, CHAR_SET_SIZE); + word_fastmap[' '] = 0; + word_fastmap['\t'] = 0; + word_fastmap['\n'] = 0; + } + } +} + +/*------------------------------------------------------------------------. +| This routine will attempt to swallow a whole file name FILE_NAME into a | +| contiguous region of memory and return a description of it into BLOCK. | +| Standard input is assumed whenever FILE_NAME is null, empty or "-". | +| | +| Previously, in some cases, white space compression was attempted while | +| inputting text. This was defeating some regexps like default end of | +| sentence, which checks for two consecutive spaces. If white space | +| compression is ever reinstated, it should be in output routines. | +`------------------------------------------------------------------------*/ + +static void +swallow_file_in_memory (char const *file_name, BLOCK *block) +{ + size_t used_length; /* used length in memory buffer */ + + /* As special cases, a file name which is null or "-" indicates standard + input, which is already opened. In all other cases, open the file from + its name. */ + bool using_stdin = !file_name || !*file_name || STREQ (file_name, "-"); + if (using_stdin) + block->start = fread_file (stdin, 0, &used_length); + else + block->start = read_file (file_name, 0, &used_length); + + if (!block->start) + error (EXIT_FAILURE, errno, "%s", quotef (using_stdin ? "-" : file_name)); + + if (using_stdin) + clearerr (stdin); + + block->end = block->start + used_length; +} + +/* Sort and search routines. */ + +/*--------------------------------------------------------------------------. +| Compare two words, FIRST and SECOND, and return 0 if they are identical. | +| Return less than 0 if the first word goes before the second; return | +| greater than 0 if the first word goes after the second. | +| | +| If a word is indeed a prefix of the other, the shorter should go first. | +`--------------------------------------------------------------------------*/ + +static int +compare_words (const void *void_first, const void *void_second) +{ +#define first ((const WORD *) void_first) +#define second ((const WORD *) void_second) + ptrdiff_t length; /* minimum of two lengths */ + ptrdiff_t counter; /* cursor in words */ + int value; /* value of comparison */ + + length = first->size < second->size ? first->size : second->size; + + if (ignore_case) + { + for (counter = 0; counter < length; counter++) + { + value = (folded_chars [to_uchar (first->start[counter])] + - folded_chars [to_uchar (second->start[counter])]); + if (value != 0) + return value; + } + } + else + { + for (counter = 0; counter < length; counter++) + { + value = (to_uchar (first->start[counter]) + - to_uchar (second->start[counter])); + if (value != 0) + return value; + } + } + + return (first->size > second->size) - (first->size < second->size); +#undef first +#undef second +} + +/*-----------------------------------------------------------------------. +| Decides which of two OCCURS, FIRST or SECOND, should lexicographically | +| go first. In case of a tie, preserve the original order through a | +| pointer comparison. | +`-----------------------------------------------------------------------*/ + +static int +compare_occurs (const void *void_first, const void *void_second) +{ +#define first ((const OCCURS *) void_first) +#define second ((const OCCURS *) void_second) + int value; + + value = compare_words (&first->key, &second->key); + return (value ? value + : ((first->key.start > second->key.start) + - (first->key.start < second->key.start))); +#undef first +#undef second +} + +/* True if WORD appears in TABLE. Uses a binary search. */ + +ATTRIBUTE_PURE +static bool +search_table (WORD *word, WORD_TABLE *table) +{ + ptrdiff_t lowest; /* current lowest possible index */ + ptrdiff_t highest; /* current highest possible index */ + ptrdiff_t middle; /* current middle index */ + int value; /* value from last comparison */ + + lowest = 0; + highest = table->length - 1; + while (lowest <= highest) + { + middle = (lowest + highest) / 2; + value = compare_words (word, table->start + middle); + if (value < 0) + highest = middle - 1; + else if (value > 0) + lowest = middle + 1; + else + return true; + } + return false; +} + +/*---------------------------------------------------------------------. +| Sort the whole occurs table in memory. Presumably, 'qsort' does not | +| take intermediate copies or table elements, so the sort will be | +| stabilized throughout the comparison routine. | +`---------------------------------------------------------------------*/ + +static void +sort_found_occurs (void) +{ + + /* Only one language for the time being. */ + if (number_of_occurs[0]) + qsort (occurs_table[0], number_of_occurs[0], sizeof **occurs_table, + compare_occurs); +} + +/* Parameter files reading routines. */ + +/*----------------------------------------------------------------------. +| Read a file named FILE_NAME, containing a set of break characters. | +| Build a content to the array word_fastmap in which all characters are | +| allowed except those found in the file. Characters may be repeated. | +`----------------------------------------------------------------------*/ + +static void +digest_break_file (char const *file_name) +{ + BLOCK file_contents; /* to receive a copy of the file */ + char *cursor; /* cursor in file copy */ + + swallow_file_in_memory (file_name, &file_contents); + + /* Make the fastmap and record the file contents in it. */ + + memset (word_fastmap, 1, CHAR_SET_SIZE); + for (cursor = file_contents.start; cursor < file_contents.end; cursor++) + word_fastmap[to_uchar (*cursor)] = 0; + + if (!gnu_extensions) + { + + /* If GNU extensions are enabled, the only way to avoid newline as + a break character is to write all the break characters in the + file with no newline at all, not even at the end of the file. + If disabled, spaces, tabs and newlines are always considered as + break characters even if not included in the break file. */ + + word_fastmap[' '] = 0; + word_fastmap['\t'] = 0; + word_fastmap['\n'] = 0; + } + + /* Return the space of the file, which is no more required. */ + + free (file_contents.start); +} + +/*-----------------------------------------------------------------------. +| Read a file named FILE_NAME, containing one word per line, then | +| construct in TABLE a table of WORD descriptors for them. The routine | +| swallows the whole file in memory; this is at the expense of space | +| needed for newlines, which are useless; however, the reading is fast. | +`-----------------------------------------------------------------------*/ + +static void +digest_word_file (char const *file_name, WORD_TABLE *table) +{ + BLOCK file_contents; /* to receive a copy of the file */ + char *cursor; /* cursor in file copy */ + char *word_start; /* start of the current word */ + + swallow_file_in_memory (file_name, &file_contents); + + table->start = nullptr; + table->alloc = 0; + table->length = 0; + + /* Read the whole file. */ + + cursor = file_contents.start; + while (cursor < file_contents.end) + { + + /* Read one line, and save the word in contains. */ + + word_start = cursor; + while (cursor < file_contents.end && *cursor != '\n') + cursor++; + + /* Record the word in table if it is not empty. */ + + if (cursor > word_start) + { + if (table->length == table->alloc) + table->start = x2nrealloc (table->start, &table->alloc, + sizeof *table->start); + table->start[table->length].start = word_start; + table->start[table->length].size = cursor - word_start; + table->length++; + } + + /* This test allows for an incomplete line at end of file. */ + + if (cursor < file_contents.end) + cursor++; + } + + /* Finally, sort all the words read. */ + + qsort (table->start, table->length, sizeof table->start[0], compare_words); +} + +/* Keyword recognition and selection. */ + +/*----------------------------------------------------------------------. +| For each keyword in the source text, constructs an OCCURS structure. | +`----------------------------------------------------------------------*/ + +static void +find_occurs_in_text (int file_index) +{ + char *cursor; /* for scanning the source text */ + char *scan; /* for scanning the source text also */ + char *line_start; /* start of the current input line */ + char *line_scan; /* newlines scanned until this point */ + ptrdiff_t reference_length; /* length of reference in input mode */ + WORD possible_key; /* possible key, to ease searches */ + OCCURS *occurs_cursor; /* current OCCURS under construction */ + + char *context_start; /* start of left context */ + char *context_end; /* end of right context */ + char *word_start; /* start of word */ + char *word_end; /* end of word */ + char *next_context_start; /* next start of left context */ + + const BLOCK *text_buffer = &text_buffers[file_index]; + + /* reference_length is always used within 'if (input_reference)'. + However, GNU C diagnoses that it may be used uninitialized. The + following assignment is merely to shut it up. */ + + reference_length = 0; + + /* Tracking where lines start is helpful for reference processing. In + auto reference mode, this allows counting lines. In input reference + mode, this permits finding the beginning of the references. + + The first line begins with the file, skip immediately this very first + reference in input reference mode, to help further rejection any word + found inside it. Also, unconditionally assigning these variable has + the happy effect of shutting up lint. */ + + line_start = text_buffer->start; + line_scan = line_start; + if (input_reference) + { + SKIP_NON_WHITE (line_scan, text_buffer->end); + reference_length = line_scan - line_start; + SKIP_WHITE (line_scan, text_buffer->end); + } + + /* Process the whole buffer, one line or one sentence at a time. */ + + for (cursor = text_buffer->start; + cursor < text_buffer->end; + cursor = next_context_start) + { + + /* 'context_start' gets initialized before the processing of each + line, or once for the whole buffer if no end of line or sentence + sequence separator. */ + + context_start = cursor; + + /* If an end of line or end of sentence sequence is defined and + non-empty, 'next_context_start' will be recomputed to be the end of + each line or sentence, before each one is processed. If no such + sequence, then 'next_context_start' is set at the end of the whole + buffer, which is then considered to be a single line or sentence. + This test also accounts for the case of an incomplete line or + sentence at the end of the buffer. */ + + next_context_start = text_buffer->end; + if (context_regex.string) + switch (re_search (&context_regex.pattern, cursor, + text_buffer->end - cursor, + 0, text_buffer->end - cursor, &context_regs)) + { + case -2: + matcher_error (); + + case -1: + break; + + case 0: + error (EXIT_FAILURE, 0, + _("error: regular expression has a match of length zero:" + " %s"), + quote (context_regex.string)); + + default: + next_context_start = cursor + context_regs.end[0]; + break; + } + + /* Include the separator into the right context, but not any suffix + white space in this separator; this insures it will be seen in + output and will not take more space than necessary. */ + + context_end = next_context_start; + SKIP_WHITE_BACKWARDS (context_end, context_start); + + /* Read and process a single input line or sentence, one word at a + time. */ + + while (true) + { + if (word_regex.string) + + /* If a word regexp has been compiled, use it to skip at the + beginning of the next word. If there is no such word, exit + the loop. */ + + { + regoff_t r = re_search (&word_regex.pattern, cursor, + context_end - cursor, + 0, context_end - cursor, &word_regs); + if (r == -2) + matcher_error (); + if (r == -1) + break; + word_start = cursor + word_regs.start[0]; + word_end = cursor + word_regs.end[0]; + } + else + + /* Avoid re_search and use the fastmap to skip to the + beginning of the next word. If there is no more word in + the buffer, exit the loop. */ + + { + scan = cursor; + while (scan < context_end + && !word_fastmap[to_uchar (*scan)]) + scan++; + + if (scan == context_end) + break; + + word_start = scan; + + while (scan < context_end + && word_fastmap[to_uchar (*scan)]) + scan++; + + word_end = scan; + } + + /* Skip right to the beginning of the found word. */ + + cursor = word_start; + + /* Skip any zero length word. Just advance a single position, + then go fetch the next word. */ + + if (word_end == word_start) + { + cursor++; + continue; + } + + /* This is a genuine, non empty word, so save it as a possible + key. Then skip over it. Also, maintain the maximum length of + all words read so far. It is mandatory to take the maximum + length of all words in the file, without considering if they + are actually kept or rejected, because backward jumps at output + generation time may fall in *any* word. */ + + possible_key.start = cursor; + possible_key.size = word_end - word_start; + cursor += possible_key.size; + + if (possible_key.size > maximum_word_length) + maximum_word_length = possible_key.size; + + /* In input reference mode, update 'line_start' from its previous + value. Count the lines just in case auto reference mode is + also selected. If it happens that the word just matched is + indeed part of a reference; just ignore it. */ + + if (input_reference) + { + while (line_scan < possible_key.start) + if (*line_scan == '\n') + { + total_line_count++; + line_scan++; + line_start = line_scan; + SKIP_NON_WHITE (line_scan, text_buffer->end); + reference_length = line_scan - line_start; + } + else + line_scan++; + if (line_scan > possible_key.start) + continue; + } + + /* Ignore the word if an 'Ignore words' table exists and if it is + part of it. Also ignore the word if an 'Only words' table and + if it is *not* part of it. + + It is allowed that both tables be used at once, even if this + may look strange for now. Just ignore a word that would appear + in both. If regexps are eventually implemented for these + tables, the Ignore table could then reject words that would + have been previously accepted by the Only table. */ + + if (ignore_file && search_table (&possible_key, &ignore_table)) + continue; + if (only_file && !search_table (&possible_key, &only_table)) + continue; + + /* A non-empty word has been found. First of all, insure + proper allocation of the next OCCURS, and make a pointer to + where it will be constructed. */ + + if (number_of_occurs[0] == occurs_alloc[0]) + occurs_table[0] = x2nrealloc (occurs_table[0], + &occurs_alloc[0], + sizeof *occurs_table[0]); + occurs_cursor = occurs_table[0] + number_of_occurs[0]; + + /* Define the reference field, if any. */ + + if (auto_reference) + { + + /* While auto referencing, update 'line_start' from its + previous value, counting lines as we go. If input + referencing at the same time, 'line_start' has been + advanced earlier, and the following loop is never really + executed. */ + + while (line_scan < possible_key.start) + if (*line_scan == '\n') + { + total_line_count++; + line_scan++; + line_start = line_scan; + SKIP_NON_WHITE (line_scan, text_buffer->end); + } + else + line_scan++; + + occurs_cursor->reference = total_line_count; + } + else if (input_reference) + { + + /* If only input referencing, 'line_start' has been computed + earlier to detect the case the word matched would be part + of the reference. The reference position is simply the + value of 'line_start'. */ + + occurs_cursor->reference = line_start - possible_key.start; + if (reference_length > reference_max_width) + reference_max_width = reference_length; + } + + /* Exclude the reference from the context in simple cases. */ + + if (input_reference && line_start == context_start) + { + SKIP_NON_WHITE (context_start, context_end); + SKIP_WHITE (context_start, context_end); + } + + /* Completes the OCCURS structure. */ + + occurs_cursor->key = possible_key; + occurs_cursor->left = context_start - possible_key.start; + occurs_cursor->right = context_end - possible_key.start; + occurs_cursor->file_index = file_index; + + number_of_occurs[0]++; + } + } +} + +/* Formatting and actual output - service routines. */ + +/*-----------------------------------------. +| Prints some NUMBER of spaces on stdout. | +`-----------------------------------------*/ + +static void +print_spaces (ptrdiff_t number) +{ + for (ptrdiff_t counter = number; counter > 0; counter--) + putchar (' '); +} + +/*-------------------------------------. +| Prints the field provided by FIELD. | +`-------------------------------------*/ + +static void +print_field (BLOCK field) +{ + char *cursor; /* Cursor in field to print */ + + /* Whitespace is not really compressed. Instead, each white space + character (tab, vt, ht etc.) is printed as one single space. */ + + for (cursor = field.start; cursor < field.end; cursor++) + { + unsigned char character = *cursor; + if (edited_flag[character]) + { + /* Handle cases which are specific to 'roff' or TeX. All + white space processing is done as the default case of + this switch. */ + + switch (character) + { + case '"': + /* In roff output format, double any quote. */ + putchar ('"'); + putchar ('"'); + break; + + case '$': + case '%': + case '&': + case '#': + case '_': + /* In TeX output format, precede these with a backslash. */ + putchar ('\\'); + putchar (character); + break; + + case '{': + case '}': + /* In TeX output format, precede these with a backslash and + force mathematical mode. */ + printf ("$\\%c$", character); + break; + + case '\\': + /* In TeX output mode, request production of a backslash. */ + fputs ("\\backslash{}", stdout); + break; + + default: + /* Any other flagged character produces a single space. */ + putchar (' '); + } + } + else + putchar (*cursor); + } +} + +/* Formatting and actual output - planning routines. */ + +/*--------------------------------------------------------------------. +| From information collected from command line options and input file | +| readings, compute and fix some output parameter values. | +`--------------------------------------------------------------------*/ + +static void +fix_output_parameters (void) +{ + size_t file_index; /* index in text input file arrays */ + intmax_t line_ordinal; /* line ordinal value for reference */ + ptrdiff_t reference_width; /* width for the whole reference */ + int character; /* character ordinal */ + char const *cursor; /* cursor in some constant strings */ + + /* In auto reference mode, the maximum width of this field is + precomputed and subtracted from the overall line width. Add one for + the column which separate the file name from the line number. */ + + if (auto_reference) + { + reference_max_width = 0; + for (file_index = 0; file_index < number_input_files; file_index++) + { + line_ordinal = file_line_count[file_index] + 1; + if (file_index > 0) + line_ordinal -= file_line_count[file_index - 1]; + char ordinal_string[INT_BUFSIZE_BOUND (intmax_t)]; + reference_width = sprintf (ordinal_string, "%"PRIdMAX, line_ordinal); + if (input_file_name[file_index]) + reference_width += strlen (input_file_name[file_index]); + if (reference_width > reference_max_width) + reference_max_width = reference_width; + } + reference_max_width++; + reference.start = xmalloc (reference_max_width + 1); + } + + /* If the reference appears to the left of the output line, reserve some + space for it right away, including one gap size. */ + + if ((auto_reference || input_reference) && !right_reference) + line_width -= reference_max_width + gap_size; + if (line_width < 0) + line_width = 0; + + /* The output lines, minimally, will contain from left to right a left + context, a gap, and a keyword followed by the right context with no + special intervening gap. Half of the line width is dedicated to the + left context and the gap, the other half is dedicated to the keyword + and the right context; these values are computed once and for all here. + There also are tail and head wrap around fields, used when the keyword + is near the beginning or the end of the line, or when some long word + cannot fit in, but leave place from wrapped around shorter words. The + maximum width of these fields are recomputed separately for each line, + on a case by case basis. It is worth noting that it cannot happen that + both the tail and head fields are used at once. */ + + half_line_width = line_width / 2; + before_max_width = half_line_width - gap_size; + keyafter_max_width = half_line_width; + + /* If truncation_string is the empty string, make it null to speed up + tests. In this case, truncation_string_length will never get used, so + there is no need to set it. */ + + if (truncation_string && *truncation_string) + truncation_string_length = strlen (truncation_string); + else + truncation_string = nullptr; + + if (gnu_extensions) + { + + /* When flagging truncation at the left of the keyword, the + truncation mark goes at the beginning of the before field, + unless there is a head field, in which case the mark goes at the + left of the head field. When flagging truncation at the right + of the keyword, the mark goes at the end of the keyafter field, + unless there is a tail field, in which case the mark goes at the + end of the tail field. Only eight combination cases could arise + for truncation marks: + + . None. + . One beginning the before field. + . One beginning the head field. + . One ending the keyafter field. + . One ending the tail field. + . One beginning the before field, another ending the keyafter field. + . One ending the tail field, another beginning the before field. + . One ending the keyafter field, another beginning the head field. + + So, there is at most two truncation marks, which could appear both + on the left side of the center of the output line, both on the + right side, or one on either side. */ + + before_max_width -= 2 * truncation_string_length; + if (before_max_width < 0) + before_max_width = 0; + keyafter_max_width -= 2 * truncation_string_length; + } + else + { + + /* I never figured out exactly how UNIX' ptx plans the output width + of its various fields. If GNU extensions are disabled, do not + try computing the field widths correctly; instead, use the + following formula, which does not completely imitate UNIX' ptx, + but almost. */ + + keyafter_max_width -= 2 * truncation_string_length + 1; + } + + /* Compute which characters need special output processing. Initialize + by flagging any white space character. Some systems do not consider + form feed as a space character, but we do. */ + + for (character = 0; character < CHAR_SET_SIZE; character++) + edited_flag[character] = !! isspace (character); + edited_flag['\f'] = 1; + + /* Complete the special character flagging according to selected output + format. */ + + switch (output_format) + { + case UNKNOWN_FORMAT: + /* Should never happen. */ + + case DUMB_FORMAT: + break; + + case ROFF_FORMAT: + + /* 'Quote' characters should be doubled. */ + + edited_flag['"'] = 1; + break; + + case TEX_FORMAT: + + /* Various characters need special processing. */ + + for (cursor = "$%&#_{}\\"; *cursor; cursor++) + edited_flag[to_uchar (*cursor)] = 1; + + break; + } +} + +/*------------------------------------------------------------------. +| Compute the position and length of all the output fields, given a | +| pointer to some OCCURS. | +`------------------------------------------------------------------*/ + +static void +define_all_fields (OCCURS *occurs) +{ + ptrdiff_t tail_max_width; /* allowable width of tail field */ + ptrdiff_t head_max_width; /* allowable width of head field */ + char *cursor; /* running cursor in source text */ + char *left_context_start; /* start of left context */ + char *right_context_end; /* end of right context */ + char *left_field_start; /* conservative start for 'head'/'before' */ + char const *file_name; /* file name for reference */ + intmax_t line_ordinal; /* line ordinal for reference */ + char const *buffer_start; /* start of buffered file for this occurs */ + char const *buffer_end; /* end of buffered file for this occurs */ + + /* Define 'keyafter', start of left context and end of right context. + 'keyafter' starts at the saved position for keyword and extend to the + right from the end of the keyword, eating separators or full words, but + not beyond maximum allowed width for 'keyafter' field or limit for the + right context. Suffix spaces will be removed afterwards. */ + + keyafter.start = occurs->key.start; + keyafter.end = keyafter.start + occurs->key.size; + left_context_start = keyafter.start + occurs->left; + right_context_end = keyafter.start + occurs->right; + + buffer_start = text_buffers[occurs->file_index].start; + buffer_end = text_buffers[occurs->file_index].end; + + cursor = keyafter.end; + while (cursor < right_context_end + && cursor <= keyafter.start + keyafter_max_width) + { + keyafter.end = cursor; + SKIP_SOMETHING (cursor, right_context_end); + } + if (cursor <= keyafter.start + keyafter_max_width) + keyafter.end = cursor; + + keyafter_truncation = truncation_string && keyafter.end < right_context_end; + + SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start); + + /* When the left context is wide, it might take some time to catch up from + the left context boundary to the beginning of the 'head' or 'before' + fields. So, in this case, to speed the catchup, we jump back from the + keyword, using some secure distance, possibly falling in the middle of + a word. A secure backward jump would be at least half the maximum + width of a line, plus the size of the longest word met in the whole + input. We conclude this backward jump by a skip forward of at least + one word. In this manner, we should not inadvertently accept only part + of a word. From the reached point, when it will be time to fix the + beginning of 'head' or 'before' fields, we will skip forward words or + delimiters until we get sufficiently near. */ + + if (-occurs->left > half_line_width + maximum_word_length) + { + left_field_start + = keyafter.start - (half_line_width + maximum_word_length); + SKIP_SOMETHING (left_field_start, keyafter.start); + } + else + left_field_start = keyafter.start + occurs->left; + + /* 'before' certainly ends at the keyword, but not including separating + spaces. It starts after than the saved value for the left context, by + advancing it until it falls inside the maximum allowed width for the + before field. There will be no prefix spaces either. 'before' only + advances by skipping single separators or whole words. */ + + before.start = left_field_start; + before.end = keyafter.start; + SKIP_WHITE_BACKWARDS (before.end, before.start); + + while (before.start + before_max_width < before.end) + SKIP_SOMETHING (before.start, before.end); + + if (truncation_string) + { + cursor = before.start; + SKIP_WHITE_BACKWARDS (cursor, buffer_start); + before_truncation = cursor > left_context_start; + } + else + before_truncation = false; + + SKIP_WHITE (before.start, buffer_end); + + /* The tail could not take more columns than what has been left in the + left context field, and a gap is mandatory. It starts after the + right context, and does not contain prefixed spaces. It ends at + the end of line, the end of buffer or when the tail field is full, + whichever comes first. It cannot contain only part of a word, and + has no suffixed spaces. */ + + tail_max_width + = before_max_width - (before.end - before.start) - gap_size; + + if (tail_max_width > 0) + { + tail.start = keyafter.end; + SKIP_WHITE (tail.start, buffer_end); + + tail.end = tail.start; + cursor = tail.end; + while (cursor < right_context_end + && cursor < tail.start + tail_max_width) + { + tail.end = cursor; + SKIP_SOMETHING (cursor, right_context_end); + } + + if (cursor < tail.start + tail_max_width) + tail.end = cursor; + + if (tail.end > tail.start) + { + keyafter_truncation = false; + tail_truncation = truncation_string && tail.end < right_context_end; + } + else + tail_truncation = false; + + SKIP_WHITE_BACKWARDS (tail.end, tail.start); + } + else + { + + /* No place left for a tail field. */ + + tail.start = nullptr; + tail.end = nullptr; + tail_truncation = false; + } + + /* 'head' could not take more columns than what has been left in the right + context field, and a gap is mandatory. It ends before the left + context, and does not contain suffixed spaces. Its pointer is advanced + until the head field has shrunk to its allowed width. It cannot + contain only part of a word, and has no suffixed spaces. */ + + head_max_width + = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size; + + if (head_max_width > 0) + { + head.end = before.start; + SKIP_WHITE_BACKWARDS (head.end, buffer_start); + + head.start = left_field_start; + while (head.start + head_max_width < head.end) + SKIP_SOMETHING (head.start, head.end); + + if (head.end > head.start) + { + before_truncation = false; + head_truncation = (truncation_string + && head.start > left_context_start); + } + else + head_truncation = false; + + SKIP_WHITE (head.start, head.end); + } + else + { + + /* No place left for a head field. */ + + head.start = nullptr; + head.end = nullptr; + head_truncation = false; + } + + if (auto_reference) + { + + /* Construct the reference text in preallocated space from the file + name and the line number. Standard input yields an empty file name. + Ensure line numbers are 1 based, even if they are computed 0 based. */ + + file_name = input_file_name[occurs->file_index]; + if (!file_name) + file_name = ""; + + line_ordinal = occurs->reference + 1; + if (occurs->file_index > 0) + line_ordinal -= file_line_count[occurs->file_index - 1]; + + char *file_end = stpcpy (reference.start, file_name); + reference.end = file_end + sprintf (file_end, ":%"PRIdMAX, line_ordinal); + } + else if (input_reference) + { + + /* Reference starts at saved position for reference and extends right + until some white space is met. */ + + reference.start = keyafter.start + occurs->reference; + reference.end = reference.start; + SKIP_NON_WHITE (reference.end, right_context_end); + } +} + +/* Formatting and actual output - control routines. */ + +/*----------------------------------------------------------------------. +| Output the current output fields as one line for 'troff' or 'nroff'. | +`----------------------------------------------------------------------*/ + +static void +output_one_roff_line (void) +{ + /* Output the 'tail' field. */ + + printf (".%s \"", macro_name); + print_field (tail); + if (tail_truncation) + fputs (truncation_string, stdout); + putchar ('"'); + + /* Output the 'before' field. */ + + fputs (" \"", stdout); + if (before_truncation) + fputs (truncation_string, stdout); + print_field (before); + putchar ('"'); + + /* Output the 'keyafter' field. */ + + fputs (" \"", stdout); + print_field (keyafter); + if (keyafter_truncation) + fputs (truncation_string, stdout); + putchar ('"'); + + /* Output the 'head' field. */ + + fputs (" \"", stdout); + if (head_truncation) + fputs (truncation_string, stdout); + print_field (head); + putchar ('"'); + + /* Conditionally output the 'reference' field. */ + + if (auto_reference || input_reference) + { + fputs (" \"", stdout); + print_field (reference); + putchar ('"'); + } + + putchar ('\n'); +} + +/*---------------------------------------------------------. +| Output the current output fields as one line for 'TeX'. | +`---------------------------------------------------------*/ + +static void +output_one_tex_line (void) +{ + BLOCK key; /* key field, isolated */ + BLOCK after; /* after field, isolated */ + char *cursor; /* running cursor in source text */ + + printf ("\\%s ", macro_name); + putchar ('{'); + print_field (tail); + fputs ("}{", stdout); + print_field (before); + fputs ("}{", stdout); + key.start = keyafter.start; + after.end = keyafter.end; + cursor = keyafter.start; + SKIP_SOMETHING (cursor, keyafter.end); + key.end = cursor; + after.start = cursor; + print_field (key); + fputs ("}{", stdout); + print_field (after); + fputs ("}{", stdout); + print_field (head); + putchar ('}'); + if (auto_reference || input_reference) + { + putchar ('{'); + print_field (reference); + putchar ('}'); + } + putchar ('\n'); +} + +/*-------------------------------------------------------------------. +| Output the current output fields as one line for a dumb terminal. | +`-------------------------------------------------------------------*/ + +static void +output_one_dumb_line (void) +{ + if (!right_reference) + { + if (auto_reference) + { + + /* Output the 'reference' field, in such a way that GNU emacs + next-error will handle it. The ending colon is taken from the + gap which follows. */ + + print_field (reference); + putchar (':'); + print_spaces (reference_max_width + + gap_size + - (reference.end - reference.start) + - 1); + } + else + { + + /* Output the 'reference' field and its following gap. */ + + print_field (reference); + print_spaces (reference_max_width + + gap_size + - (reference.end - reference.start)); + } + } + + if (tail.start < tail.end) + { + /* Output the 'tail' field. */ + + print_field (tail); + if (tail_truncation) + fputs (truncation_string, stdout); + + print_spaces (half_line_width - gap_size + - (before.end - before.start) + - (before_truncation ? truncation_string_length : 0) + - (tail.end - tail.start) + - (tail_truncation ? truncation_string_length : 0)); + } + else + print_spaces (half_line_width - gap_size + - (before.end - before.start) + - (before_truncation ? truncation_string_length : 0)); + + /* Output the 'before' field. */ + + if (before_truncation) + fputs (truncation_string, stdout); + print_field (before); + + print_spaces (gap_size); + + /* Output the 'keyafter' field. */ + + print_field (keyafter); + if (keyafter_truncation) + fputs (truncation_string, stdout); + + if (head.start < head.end) + { + /* Output the 'head' field. */ + + print_spaces (half_line_width + - (keyafter.end - keyafter.start) + - (keyafter_truncation ? truncation_string_length : 0) + - (head.end - head.start) + - (head_truncation ? truncation_string_length : 0)); + if (head_truncation) + fputs (truncation_string, stdout); + print_field (head); + } + else + + if ((auto_reference || input_reference) && right_reference) + print_spaces (half_line_width + - (keyafter.end - keyafter.start) + - (keyafter_truncation ? truncation_string_length : 0)); + + if ((auto_reference || input_reference) && right_reference) + { + /* Output the 'reference' field. */ + + print_spaces (gap_size); + print_field (reference); + } + + putchar ('\n'); +} + +/*------------------------------------------------------------------------. +| Scan the whole occurs table and, for each entry, output one line in the | +| appropriate format. | +`------------------------------------------------------------------------*/ + +static void +generate_all_output (void) +{ + ptrdiff_t occurs_index; /* index of keyword entry being processed */ + OCCURS *occurs_cursor; /* current keyword entry being processed */ + + /* The following assignments are useful to provide default values in case + line contexts or references are not used, in which case these variables + would never be computed. */ + + tail.start = nullptr; + tail.end = nullptr; + tail_truncation = false; + + head.start = nullptr; + head.end = nullptr; + head_truncation = false; + + /* Loop over all keyword occurrences. */ + + occurs_cursor = occurs_table[0]; + + for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++) + { + /* Compute the exact size of every field and whenever truncation flags + are present or not. */ + + define_all_fields (occurs_cursor); + + /* Produce one output line according to selected format. */ + + switch (output_format) + { + case UNKNOWN_FORMAT: + /* Should never happen. */ + + case DUMB_FORMAT: + output_one_dumb_line (); + break; + + case ROFF_FORMAT: + output_one_roff_line (); + break; + + case TEX_FORMAT: + output_one_tex_line (); + break; + } + + /* Advance the cursor into the occurs table. */ + + occurs_cursor++; + } +} + +/* Option decoding and main program. */ + +/*------------------------------------------------------. +| Print program identification and options, then exit. | +`------------------------------------------------------*/ + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [INPUT]... (without -G)\n\ + or: %s -G [OPTION]... [INPUT [OUTPUT]]\n"), + program_name, program_name); + fputs (_("\ +Output a permuted index, including context, of the words in the input files.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -A, --auto-reference output automatically generated references\n\ + -G, --traditional behave more like System V 'ptx'\n\ +"), stdout); + fputs (_("\ + -F, --flag-truncation=STRING use STRING for flagging line truncations.\n\ + The default is '/'\n\ +"), stdout); + fputs (_("\ + -M, --macro-name=STRING macro name to use instead of 'xx'\n\ + -O, --format=roff generate output as roff directives\n\ + -R, --right-side-refs put references at right, not counted in -w\n\ + -S, --sentence-regexp=REGEXP for end of lines or end of sentences\n\ + -T, --format=tex generate output as TeX directives\n\ +"), stdout); + fputs (_("\ + -W, --word-regexp=REGEXP use REGEXP to match each keyword\n\ + -b, --break-file=FILE word break characters in this FILE\n\ + -f, --ignore-case fold lower case to upper case for sorting\n\ + -g, --gap-size=NUMBER gap size in columns between output fields\n\ + -i, --ignore-file=FILE read ignore word list from FILE\n\ + -o, --only-file=FILE read only word list from this FILE\n\ +"), stdout); + fputs (_("\ + -r, --references first field of each line is a reference\n\ + -t, --typeset-mode - not implemented -\n\ + -w, --width=NUMBER output width in columns, reference excluded\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/*----------------------------------------------------------------------. +| Main program. Decode ARGC arguments passed through the ARGV array of | +| strings, then launch execution. | +`----------------------------------------------------------------------*/ + +/* Long options equivalences. */ +static struct option const long_options[] = +{ + {"auto-reference", no_argument, nullptr, 'A'}, + {"break-file", required_argument, nullptr, 'b'}, + {"flag-truncation", required_argument, nullptr, 'F'}, + {"ignore-case", no_argument, nullptr, 'f'}, + {"gap-size", required_argument, nullptr, 'g'}, + {"ignore-file", required_argument, nullptr, 'i'}, + {"macro-name", required_argument, nullptr, 'M'}, + {"only-file", required_argument, nullptr, 'o'}, + {"references", no_argument, nullptr, 'r'}, + {"right-side-refs", no_argument, nullptr, 'R'}, + {"format", required_argument, nullptr, 10}, + {"sentence-regexp", required_argument, nullptr, 'S'}, + {"traditional", no_argument, nullptr, 'G'}, + {"typeset-mode", no_argument, nullptr, 't'}, + {"width", required_argument, nullptr, 'w'}, + {"word-regexp", required_argument, nullptr, 'W'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0}, +}; + +static char const *const format_args[] = +{ + "roff", "tex", nullptr +}; + +static enum Format const format_vals[] = +{ + ROFF_FORMAT, TEX_FORMAT +}; + +int +main (int argc, char **argv) +{ + int optchar; /* argument character */ + int file_index; /* index in text input file arrays */ + + /* Decode program options. */ + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + +#if HAVE_SETCHRCLASS + setchrclass (nullptr); +#endif + + while (optchar = getopt_long (argc, argv, "AF:GM:ORS:TW:b:i:fg:o:trw:", + long_options, nullptr), + optchar != EOF) + { + switch (optchar) + { + default: + usage (EXIT_FAILURE); + + case 'G': + gnu_extensions = false; + break; + + case 'b': + break_file = optarg; + break; + + case 'f': + ignore_case = true; + break; + + case 'g': + { + intmax_t tmp; + if (! (xstrtoimax (optarg, nullptr, 0, &tmp, "") == LONGINT_OK + && 0 < tmp && tmp <= PTRDIFF_MAX)) + error (EXIT_FAILURE, 0, _("invalid gap width: %s"), + quote (optarg)); + gap_size = tmp; + break; + } + + case 'i': + ignore_file = optarg; + break; + + case 'o': + only_file = optarg; + break; + + case 'r': + input_reference = true; + break; + + case 't': + /* Yet to understand... */ + break; + + case 'w': + { + intmax_t tmp; + if (! (xstrtoimax (optarg, nullptr, 0, &tmp, "") == LONGINT_OK + && 0 < tmp && tmp <= PTRDIFF_MAX)) + error (EXIT_FAILURE, 0, _("invalid line width: %s"), + quote (optarg)); + line_width = tmp; + break; + } + + case 'A': + auto_reference = true; + break; + + case 'F': + truncation_string = optarg; + unescape_string (optarg); + break; + + case 'M': + macro_name = optarg; + break; + + case 'O': + output_format = ROFF_FORMAT; + break; + + case 'R': + right_reference = true; + break; + + case 'S': + context_regex.string = optarg; + unescape_string (optarg); + break; + + case 'T': + output_format = TEX_FORMAT; + break; + + case 'W': + word_regex.string = optarg; + unescape_string (optarg); + if (!*word_regex.string) + word_regex.string = nullptr; + break; + + case 10: + output_format = XARGMATCH ("--format", optarg, + format_args, format_vals); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + } + } + + /* Process remaining arguments. If GNU extensions are enabled, process + all arguments as input parameters. If disabled, accept at most two + arguments, the second of which is an output parameter. */ + + if (optind == argc) + { + + /* No more argument simply means: read standard input. */ + + input_file_name = xmalloc (sizeof *input_file_name); + file_line_count = xmalloc (sizeof *file_line_count); + text_buffers = xmalloc (sizeof *text_buffers); + number_input_files = 1; + input_file_name[0] = nullptr; + } + else if (gnu_extensions) + { + number_input_files = argc - optind; + input_file_name = xnmalloc (number_input_files, sizeof *input_file_name); + file_line_count = xnmalloc (number_input_files, sizeof *file_line_count); + text_buffers = xnmalloc (number_input_files, sizeof *text_buffers); + + for (file_index = 0; file_index < number_input_files; file_index++) + { + if (!*argv[optind] || STREQ (argv[optind], "-")) + input_file_name[file_index] = nullptr; + else + input_file_name[file_index] = argv[optind]; + optind++; + } + } + else + { + + /* There is one necessary input file. */ + + number_input_files = 1; + input_file_name = xmalloc (sizeof *input_file_name); + file_line_count = xmalloc (sizeof *file_line_count); + text_buffers = xmalloc (sizeof *text_buffers); + if (!*argv[optind] || STREQ (argv[optind], "-")) + input_file_name[0] = nullptr; + else + input_file_name[0] = argv[optind]; + optind++; + + /* Redirect standard output, only if requested. */ + + if (optind < argc) + { + if (! freopen (argv[optind], "w", stdout)) + error (EXIT_FAILURE, errno, "%s", quotef (argv[optind])); + optind++; + } + + /* Diagnose any other argument as an error. */ + + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + } + + /* If the output format has not been explicitly selected, choose dumb + terminal format if GNU extensions are enabled, else 'roff' format. */ + + if (output_format == UNKNOWN_FORMAT) + output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT; + + /* Initialize the main tables. */ + + initialize_regex (); + + /* Read 'Break character' file, if any. */ + + if (break_file) + digest_break_file (break_file); + + /* Read 'Ignore words' file and 'Only words' files, if any. If any of + these files is empty, reset the name of the file to null, to avoid + unnecessary calls to search_table. */ + + if (ignore_file) + { + digest_word_file (ignore_file, &ignore_table); + if (ignore_table.length == 0) + ignore_file = nullptr; + } + + if (only_file) + { + digest_word_file (only_file, &only_table); + if (only_table.length == 0) + only_file = nullptr; + } + + /* Prepare to study all the input files. */ + + number_of_occurs[0] = 0; + total_line_count = 0; + maximum_word_length = 0; + reference_max_width = 0; + + for (file_index = 0; file_index < number_input_files; file_index++) + { + BLOCK *text_buffer = text_buffers + file_index; + + /* Read the file contents into memory, then study it. */ + + swallow_file_in_memory (input_file_name[file_index], text_buffer); + find_occurs_in_text (file_index); + + /* Maintain for each file how many lines has been read so far when its + end is reached. Incrementing the count first is a simple kludge to + handle a possible incomplete line at end of file. */ + + total_line_count++; + file_line_count[file_index] = total_line_count; + } + + /* Do the output process phase. */ + + sort_found_occurs (); + fix_output_parameters (); + generate_all_output (); + + /* All done. */ + + return EXIT_SUCCESS; +} diff --git a/src/pwd.c b/src/pwd.c new file mode 100644 index 0000000..7b2e7c7 --- /dev/null +++ b/src/pwd.c @@ -0,0 +1,392 @@ +/* pwd - print current directory + Copyright (C) 1994-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" +#include "root-dev-ino.h" +#include "xgetcwd.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "pwd" + +#define AUTHORS proper_name ("Jim Meyering") + +struct file_name +{ + char *buf; + size_t n_alloc; + char *start; +}; + +static struct option const longopts[] = +{ + {"logical", no_argument, nullptr, 'L'}, + {"physical", no_argument, nullptr, 'P'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]...\n"), program_name); + fputs (_("\ +Print the full filename of the current working directory.\n\ +\n\ +"), stdout); + fputs (_("\ + -L, --logical use PWD from environment, even if it contains symlinks\n\ + -P, --physical avoid all symlinks\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\n\ +If no option is specified, -P is assumed.\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +file_name_free (struct file_name *p) +{ + free (p->buf); + free (p); +} + +static struct file_name * +file_name_init (void) +{ + struct file_name *p = xmalloc (sizeof *p); + + /* Start with a buffer larger than PATH_MAX, but beware of systems + on which PATH_MAX is very large -- e.g., INT_MAX. */ + p->n_alloc = MIN (2 * PATH_MAX, 32 * 1024); + + p->buf = xmalloc (p->n_alloc); + p->start = p->buf + (p->n_alloc - 1); + p->start[0] = '\0'; + return p; +} + +/* Prepend the name S of length S_LEN, to the growing file_name, P. */ +static void +file_name_prepend (struct file_name *p, char const *s, size_t s_len) +{ + size_t n_free = p->start - p->buf; + if (n_free < 1 + s_len) + { + size_t half = p->n_alloc + 1 + s_len; + /* Use xnmalloc+free rather than xnrealloc, since with the latter + we'd end up copying the data twice: once via realloc, then again + to align it with the end of the new buffer. With xnmalloc, we + copy it only once. */ + char *q = xnmalloc (2, half); + size_t n_used = p->n_alloc - n_free; + p->start = q + 2 * half - n_used; + memcpy (p->start, p->buf + n_free, n_used); + free (p->buf); + p->buf = q; + p->n_alloc = 2 * half; + } + + p->start -= 1 + s_len; + p->start[0] = '/'; + memcpy (p->start + 1, s, s_len); +} + +/* Return a string (malloc'd) consisting of N '/'-separated ".." components. */ +static char * +nth_parent (size_t n) +{ + char *buf = xnmalloc (3, n); + char *p = buf; + + for (size_t i = 0; i < n; i++) + { + memcpy (p, "../", 3); + p += 3; + } + p[-1] = '\0'; + return buf; +} + +/* Determine the basename of the current directory, where DOT_SB is the + result of lstat'ing "." and prepend that to the file name in *FILE_NAME. + Find the directory entry in '..' that matches the dev/i-node of DOT_SB. + Upon success, update *DOT_SB with stat information of '..', chdir to '..', + and prepend "/basename" to FILE_NAME. + Otherwise, exit with a diagnostic. + PARENT_HEIGHT is the number of levels '..' is above the starting directory. + The first time this function is called (from the initial directory), + PARENT_HEIGHT is 1. This is solely for diagnostics. + Exit nonzero upon error. */ + +static void +find_dir_entry (struct stat *dot_sb, struct file_name *file_name, + size_t parent_height) +{ + DIR *dirp; + int fd; + struct stat parent_sb; + bool use_lstat; + bool found; + + dirp = opendir (".."); + if (dirp == nullptr) + error (EXIT_FAILURE, errno, _("cannot open directory %s"), + quote (nth_parent (parent_height))); + + fd = dirfd (dirp); + if ((0 <= fd ? fchdir (fd) : chdir ("..")) < 0) + error (EXIT_FAILURE, errno, _("failed to chdir to %s"), + quote (nth_parent (parent_height))); + + if ((0 <= fd ? fstat (fd, &parent_sb) : stat (".", &parent_sb)) < 0) + error (EXIT_FAILURE, errno, _("failed to stat %s"), + quote (nth_parent (parent_height))); + + /* If parent and child directory are on different devices, then we + can't rely on d_ino for useful i-node numbers; use lstat instead. */ + use_lstat = (parent_sb.st_dev != dot_sb->st_dev); + + found = false; + while (true) + { + struct dirent const *dp; + struct stat ent_sb; + ino_t ino; + + errno = 0; + if ((dp = readdir_ignoring_dot_and_dotdot (dirp)) == nullptr) + { + if (errno) + { + /* Save/restore errno across closedir call. */ + int e = errno; + closedir (dirp); + errno = e; + + /* Arrange to give a diagnostic after exiting this loop. */ + dirp = nullptr; + } + break; + } + + ino = D_INO (dp); + + if (ino == NOT_AN_INODE_NUMBER || use_lstat) + { + if (lstat (dp->d_name, &ent_sb) < 0) + { + /* Skip any entry we can't stat. */ + continue; + } + ino = ent_sb.st_ino; + } + + if (ino != dot_sb->st_ino) + continue; + + /* If we're not crossing a device boundary, then a simple i-node + match is enough. */ + if ( ! use_lstat || ent_sb.st_dev == dot_sb->st_dev) + { + file_name_prepend (file_name, dp->d_name, _D_EXACT_NAMLEN (dp)); + found = true; + break; + } + } + + if (dirp == nullptr || closedir (dirp) != 0) + { + /* Note that this diagnostic serves for both readdir + and closedir failures. */ + error (EXIT_FAILURE, errno, _("reading directory %s"), + quote (nth_parent (parent_height))); + } + + if ( ! found) + error (EXIT_FAILURE, 0, + _("couldn't find directory entry in %s with matching i-node"), + quote (nth_parent (parent_height))); + + *dot_sb = parent_sb; +} + +/* Construct the full, absolute name of the current working + directory and store it in *FILE_NAME. + The getcwd function performs nearly the same task, but is typically + unable to handle names longer than PATH_MAX. This function has + no such limitation. However, this function *can* fail due to + permission problems or a lack of memory, while GNU/Linux's getcwd + function works regardless of restricted permissions on parent + directories. Upon failure, give a diagnostic and exit nonzero. + + Note: although this function is similar to getcwd, it has a fundamental + difference in that it gives a diagnostic and exits upon failure. + I would have liked a function that did not exit, and that could be + used as a getcwd replacement. Unfortunately, considering all of + the information the caller would require in order to produce good + diagnostics, it doesn't seem worth the added complexity. + In any case, any getcwd replacement must *not* exceed the PATH_MAX + limitation. Otherwise, functions like 'chdir' would fail with + ENAMETOOLONG. + + FIXME-maybe: if find_dir_entry fails due to permissions, try getcwd, + in case the unreadable directory is close enough to the root that + getcwd works from there. */ + +static void +robust_getcwd (struct file_name *file_name) +{ + size_t height = 1; + struct dev_ino dev_ino_buf; + struct dev_ino *root_dev_ino = get_root_dev_ino (&dev_ino_buf); + struct stat dot_sb; + + if (root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + + if (stat (".", &dot_sb) < 0) + error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (".")); + + while (true) + { + /* If we've reached the root, we're done. */ + if (SAME_INODE (dot_sb, *root_dev_ino)) + break; + + find_dir_entry (&dot_sb, file_name, height++); + } + + /* See if a leading slash is needed; file_name_prepend adds one. */ + if (file_name->start[0] == '\0') + file_name_prepend (file_name, "", 0); +} + + +/* Return PWD from the environment if it is acceptable for 'pwd -L' + output, otherwise nullptr. */ +static char * +logical_getcwd (void) +{ + struct stat st1; + struct stat st2; + char *wd = getenv ("PWD"); + char *p; + + /* Textual validation first. */ + if (!wd || wd[0] != '/') + return nullptr; + p = wd; + while ((p = strstr (p, "/."))) + { + if (!p[2] || p[2] == '/' + || (p[2] == '.' && (!p[3] || p[3] == '/'))) + return nullptr; + p++; + } + + /* System call validation. */ + if (stat (wd, &st1) == 0 && stat (".", &st2) == 0 && SAME_INODE (st1, st2)) + return wd; + return nullptr; +} + + +int +main (int argc, char **argv) +{ + char *wd; + /* POSIX requires a default of -L, but most scripts expect -P. + Currently shells default to -L, while stand-alone + pwd implementations default to -P. */ + bool logical = (getenv ("POSIXLY_CORRECT") != nullptr); + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "LP", longopts, nullptr); + if (c == -1) + break; + switch (c) + { + case 'L': + logical = true; + break; + case 'P': + logical = false; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (optind < argc) + error (0, 0, _("ignoring non-option arguments")); + + if (logical) + { + wd = logical_getcwd (); + if (wd) + { + puts (wd); + return EXIT_SUCCESS; + } + } + + wd = xgetcwd (); + if (wd != nullptr) + { + puts (wd); + free (wd); + } + else + { + struct file_name *file_name = file_name_init (); + robust_getcwd (file_name); + puts (file_name->start); + file_name_free (file_name); + } + + return EXIT_SUCCESS; +} diff --git a/src/readlink.c b/src/readlink.c new file mode 100644 index 0000000..aa33991 --- /dev/null +++ b/src/readlink.c @@ -0,0 +1,177 @@ +/* readlink -- display value of a symbolic link. + Copyright (C) 2002-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Dmitry V. Levin */ + +#include +#include +#include +#include + +#include "system.h" +#include "canonicalize.h" +#include "areadlink.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "readlink" + +#define AUTHORS proper_name ("Dmitry V. Levin") + +/* If true, do not output the trailing newline. */ +static bool no_newline; + +/* If true, report error messages. */ +static bool verbose; + +static struct option const longopts[] = +{ + {"canonicalize", no_argument, nullptr, 'f'}, + {"canonicalize-existing", no_argument, nullptr, 'e'}, + {"canonicalize-missing", no_argument, nullptr, 'm'}, + {"no-newline", no_argument, nullptr, 'n'}, + {"quiet", no_argument, nullptr, 'q'}, + {"silent", no_argument, nullptr, 's'}, + {"verbose", no_argument, nullptr, 'v'}, + {"zero", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + fputs (_("Print value of a symbolic link or canonical file name\n\n"), + stdout); + fputs (_("\ + -f, --canonicalize canonicalize by following every symlink in\n\ + every component of the given name recursively;\ +\n\ + all but the last component must exist\n\ + -e, --canonicalize-existing canonicalize by following every symlink in\n\ + every component of the given name recursively,\ +\n\ + all components must exist\n\ +"), stdout); + fputs (_("\ + -m, --canonicalize-missing canonicalize by following every symlink in\n\ + every component of the given name recursively,\ +\n\ + without requirements on components existence\n\ + -n, --no-newline do not output the trailing delimiter\n\ + -q, --quiet\n\ + -s, --silent suppress most error messages (on by default)\n\ + -v, --verbose report error messages\n\ + -z, --zero end each output line with NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + /* If not -1, use this method to canonicalize. */ + int can_mode = -1; + int status = EXIT_SUCCESS; + int optc; + bool use_nuls = false; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "efmnqsvz", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'e': + can_mode = CAN_EXISTING; + break; + case 'f': + can_mode = CAN_ALL_BUT_LAST; + break; + case 'm': + can_mode = CAN_MISSING; + break; + case 'n': + no_newline = true; + break; + case 'q': + case 's': + verbose = false; + break; + case 'v': + verbose = true; + break; + case 'z': + use_nuls = true; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind >= argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (argc - optind > 1) + { + if (no_newline) + error (0, 0, _("ignoring --no-newline with multiple arguments")); + no_newline = false; + } + + for (; optind < argc; ++optind) + { + char const *fname = argv[optind]; + char *value = (can_mode != -1 + ? canonicalize_filename_mode (fname, can_mode) + : areadlink_with_size (fname, 63)); + if (value) + { + fputs (value, stdout); + if (! no_newline) + putchar (use_nuls ? '\0' : '\n'); + free (value); + } + else + { + status = EXIT_FAILURE; + if (verbose) + error (0, errno, "%s", quotef (fname)); + } + } + + return status; +} diff --git a/src/realpath.c b/src/realpath.c new file mode 100644 index 0000000..564b836 --- /dev/null +++ b/src/realpath.c @@ -0,0 +1,276 @@ +/* realpath - print the resolved path + Copyright (C) 2011-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#include +#include +#include +#include + +#include "system.h" +#include "canonicalize.h" +#include "relpath.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "realpath" + +#define AUTHORS proper_name_lite ("Padraig Brady", "P\303\241draig Brady") + +enum +{ + RELATIVE_TO_OPTION = CHAR_MAX + 1, + RELATIVE_BASE_OPTION +}; + +static bool verbose = true; +static bool logical; +static bool use_nuls; +static char const *can_relative_to; +static char const *can_relative_base; + +static struct option const longopts[] = +{ + {"canonicalize-existing", no_argument, nullptr, 'e'}, + {"canonicalize-missing", no_argument, nullptr, 'm'}, + {"relative-to", required_argument, nullptr, RELATIVE_TO_OPTION}, + {"relative-base", required_argument, nullptr, RELATIVE_BASE_OPTION}, + {"quiet", no_argument, nullptr, 'q'}, + {"strip", no_argument, nullptr, 's'}, + {"no-symlinks", no_argument, nullptr, 's'}, + {"zero", no_argument, nullptr, 'z'}, + {"logical", no_argument, nullptr, 'L'}, + {"physical", no_argument, nullptr, 'P'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + fputs (_("\ +Print the resolved absolute file name;\n\ +all but the last component must exist\n\ +\n\ +"), stdout); + fputs (_("\ + -e, --canonicalize-existing all components of the path must exist\n\ + -m, --canonicalize-missing no path components need exist or be a directory\ +\n\ + -L, --logical resolve '..' components before symlinks\n\ + -P, --physical resolve symlinks as encountered (default)\n\ + -q, --quiet suppress most error messages\n\ + --relative-to=DIR print the resolved path relative to DIR\n\ + --relative-base=DIR print absolute paths unless paths below DIR\n\ + -s, --strip, --no-symlinks don't expand symlinks\n\ + -z, --zero end each output line with NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* A wrapper around canonicalize_filename_mode(), + to call it twice when in LOGICAL mode. */ +static char * +realpath_canon (char const *fname, int can_mode) +{ + char *can_fname = canonicalize_filename_mode (fname, can_mode); + if (logical && can_fname) /* canonicalize again to resolve symlinks. */ + { + can_mode &= ~CAN_NOLINKS; + char *can_fname2 = canonicalize_filename_mode (can_fname, can_mode); + free (can_fname); + return can_fname2; + } + return can_fname; +} + +/* Test whether canonical prefix is parent or match of path. */ +ATTRIBUTE_PURE +static bool +path_prefix (char const *prefix, char const *path) +{ + /* We already know prefix[0] and path[0] are '/'. */ + prefix++; + path++; + + /* '/' is the prefix of everything except '//' (since we know '//' + is only present after canonicalization if it is distinct). */ + if (!*prefix) + return *path != '/'; + + /* Likewise, '//' is a prefix of any double-slash path. */ + if (*prefix == '/' && !prefix[1]) + return *path == '/'; + + /* Any other prefix has a non-slash portion. */ + while (*prefix && *path) + { + if (*prefix != *path) + break; + prefix++; + path++; + } + return (!*prefix && (*path == '/' || !*path)); +} + +static bool +isdir (char const *path) +{ + struct stat sb; + if (stat (path, &sb) != 0) + error (EXIT_FAILURE, errno, _("cannot stat %s"), quoteaf (path)); + return S_ISDIR (sb.st_mode); +} + +static bool +process_path (char const *fname, int can_mode) +{ + char *can_fname = realpath_canon (fname, can_mode); + if (!can_fname) + { + if (verbose) + error (0, errno, "%s", quotef (fname)); + return false; + } + + if (!can_relative_to + || (can_relative_base && !path_prefix (can_relative_base, can_fname)) + || (can_relative_to && !relpath (can_fname, can_relative_to, nullptr, 0))) + fputs (can_fname, stdout); + + putchar (use_nuls ? '\0' : '\n'); + + free (can_fname); + + return true; +} + +int +main (int argc, char **argv) +{ + bool ok = true; + int can_mode = CAN_ALL_BUT_LAST; + char const *relative_to = nullptr; + char const *relative_base = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while (true) + { + int c = getopt_long (argc, argv, "eLmPqsz", longopts, nullptr); + if (c == -1) + break; + switch (c) + { + case 'e': + can_mode &= ~CAN_MODE_MASK; + can_mode |= CAN_EXISTING; + break; + case 'm': + can_mode &= ~CAN_MODE_MASK; + can_mode |= CAN_MISSING; + break; + case 'L': + can_mode |= CAN_NOLINKS; + logical = true; + break; + case 's': + can_mode |= CAN_NOLINKS; + logical = false; + break; + case 'P': + can_mode &= ~CAN_NOLINKS; + logical = false; + break; + case 'q': + verbose = false; + break; + case 'z': + use_nuls = true; + break; + case RELATIVE_TO_OPTION: + relative_to = optarg; + break; + case RELATIVE_BASE_OPTION: + relative_base = optarg; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind >= argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (relative_base && !relative_to) + relative_to = relative_base; + + bool need_dir = (can_mode & CAN_MODE_MASK) == CAN_EXISTING; + if (relative_to) + { + can_relative_to = realpath_canon (relative_to, can_mode); + if (!can_relative_to) + error (EXIT_FAILURE, errno, "%s", quotef (relative_to)); + if (need_dir && !isdir (can_relative_to)) + error (EXIT_FAILURE, ENOTDIR, "%s", quotef (relative_to)); + } + if (relative_base == relative_to) + can_relative_base = can_relative_to; + else if (relative_base) + { + char *base = realpath_canon (relative_base, can_mode); + if (!base) + error (EXIT_FAILURE, errno, "%s", quotef (relative_base)); + if (need_dir && !isdir (base)) + error (EXIT_FAILURE, ENOTDIR, "%s", quotef (relative_base)); + /* --relative-to is a no-op if it does not have --relative-base + as a prefix */ + if (path_prefix (base, can_relative_to)) + can_relative_base = base; + else + { + free (base); + can_relative_base = can_relative_to; + can_relative_to = nullptr; + } + } + + for (; optind < argc; ++optind) + ok &= process_path (argv[optind], can_mode); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/relpath.c b/src/relpath.c new file mode 100644 index 0000000..2edbb4b --- /dev/null +++ b/src/relpath.c @@ -0,0 +1,133 @@ +/* relpath - print the relative path + Copyright (C) 2012-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#include + +#include "system.h" +#include "relpath.h" + + +/* Return the length of the longest common prefix + of canonical PATH1 and PATH2, ensuring only full path components + are matched. Return 0 on no match. */ +ATTRIBUTE_PURE +static int +path_common_prefix (char const *path1, char const *path2) +{ + int i = 0; + int ret = 0; + + /* We already know path1[0] and path2[0] are '/'. Special case + '//', which is only present in a canonical name on platforms + where it is distinct. */ + if ((path1[1] == '/') != (path2[1] == '/')) + return 0; + + while (*path1 && *path2) + { + if (*path1 != *path2) + break; + if (*path1 == '/') + ret = i + 1; + path1++; + path2++; + i++; + } + + if ((!*path1 && !*path2) + || (!*path1 && *path2 == '/') + || (!*path2 && *path1 == '/')) + ret = i; + + return ret; +} + +/* Either output STR to stdout or + if *PBUF is not null then append STR to *PBUF + and update *PBUF to point to the end of the buffer + and adjust *PLEN to reflect the remaining space. + Return TRUE on failure. */ +static bool +buffer_or_output (char const *str, char **pbuf, size_t *plen) +{ + if (*pbuf) + { + size_t slen = strlen (str); + if (slen >= *plen) + return true; + memcpy (*pbuf, str, slen + 1); + *pbuf += slen; + *plen -= slen; + } + else + { + fputs (str, stdout); + } + + return false; +} + +/* Output the relative representation if possible. + If BUF is non-null, write to that buffer rather than to stdout. */ +bool +relpath (char const *can_fname, char const *can_reldir, char *buf, size_t len) +{ + bool buf_err = false; + + /* Skip the prefix common to --relative-to and path. */ + int common_index = path_common_prefix (can_reldir, can_fname); + if (!common_index) + return false; + + char const *relto_suffix = can_reldir + common_index; + char const *fname_suffix = can_fname + common_index; + + /* Skip over extraneous '/'. */ + if (*relto_suffix == '/') + relto_suffix++; + if (*fname_suffix == '/') + fname_suffix++; + + /* Replace remaining components of --relative-to with '..', to get + to a common directory. Then output the remainder of fname. */ + if (*relto_suffix) + { + buf_err |= buffer_or_output ("..", &buf, &len); + for (; *relto_suffix; ++relto_suffix) + { + if (*relto_suffix == '/') + buf_err |= buffer_or_output ("/..", &buf, &len); + } + + if (*fname_suffix) + { + buf_err |= buffer_or_output ("/", &buf, &len); + buf_err |= buffer_or_output (fname_suffix, &buf, &len); + } + } + else + { + buf_err |= buffer_or_output (*fname_suffix ? fname_suffix : ".", + &buf, &len); + } + + if (buf_err) + error (0, ENAMETOOLONG, "%s", _("generating relative path")); + + return !buf_err; +} diff --git a/src/relpath.h b/src/relpath.h new file mode 100644 index 0000000..351e7f1 --- /dev/null +++ b/src/relpath.h @@ -0,0 +1,25 @@ +/* relpath - print the relative path + Copyright (C) 2012-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#ifndef _RELPATH_H +# define _RELPATH_H + +extern bool +relpath (char const *can_fname, char const *can_reldir, char *buf, size_t len); + +#endif diff --git a/src/remove.c b/src/remove.c new file mode 100644 index 0000000..d569e8b --- /dev/null +++ b/src/remove.c @@ -0,0 +1,648 @@ +/* remove.c -- core functions for removing files and directories + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from rm.c, librarified, then rewritten twice by Jim Meyering. */ + +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "file-type.h" +#include "filenamecat.h" +#include "ignore-value.h" +#include "remove.h" +#include "root-dev-ino.h" +#include "stat-time.h" +#include "write-any-file.h" +#include "xfts.h" +#include "yesno.h" + +/* The prompt function may be called twice for a given directory. + The first time, we ask whether to descend into it, and the + second time, we ask whether to remove it. */ +enum Prompt_action + { + PA_DESCEND_INTO_DIR = 2, + PA_REMOVE_DIR + }; + +/* D_TYPE(D) is the type of directory entry D if known, DT_UNKNOWN + otherwise. */ +#if ! HAVE_STRUCT_DIRENT_D_TYPE +/* Any int values will do here, so long as they're distinct. + Undef any existing macros out of the way. */ +# undef DT_UNKNOWN +# undef DT_DIR +# undef DT_LNK +# define DT_UNKNOWN 0 +# define DT_DIR 1 +# define DT_LNK 2 +#endif + +/* Like fstatat, but cache on POSIX-compatible systems. */ +static int +cache_fstatat (int fd, char const *file, struct stat *st, int flag) +{ +#if HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC + /* If ST->st_atim.tv_nsec is -1, the status has not been gotten yet. + If less than -1, fstatat failed with errno == ST->st_ino. + Otherwise, the status has already been gotten, so return 0. */ + if (0 <= st->st_atim.tv_nsec) + return 0; + if (st->st_atim.tv_nsec == -1) + { + if (fstatat (fd, file, st, flag) == 0) + return 0; + st->st_atim.tv_nsec = -2; + st->st_ino = errno; + } + errno = st->st_ino; + return -1; +#else + return fstatat (fd, file, st, flag); +#endif +} + +/* Initialize a fstatat cache *ST. Return ST for convenience. */ +static inline struct stat * +cache_stat_init (struct stat *st) +{ +#if HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC + st->st_atim.tv_nsec = -1; +#endif + return st; +} + +/* Return 1 if FILE is an unwritable non-symlink, + 0 if it is writable or some other type of file, + -1 and set errno if there is some problem in determining the answer. + Set *BUF to the file status. */ +static int +write_protected_non_symlink (int fd_cwd, + char const *file, + struct stat *buf) +{ + if (can_write_any_file ()) + return 0; + if (cache_fstatat (fd_cwd, file, buf, AT_SYMLINK_NOFOLLOW) != 0) + return -1; + if (S_ISLNK (buf->st_mode)) + return 0; + /* Here, we know FILE is not a symbolic link. */ + + /* In order to be reentrant -- i.e., to avoid changing the working + directory, and at the same time to be able to deal with alternate + access control mechanisms (ACLs, xattr-style attributes) and + arbitrarily deep trees -- we need a function like eaccessat, i.e., + like Solaris' eaccess, but fd-relative, in the spirit of openat. */ + + /* In the absence of a native eaccessat function, here are some of + the implementation choices [#4 and #5 were suggested by Paul Eggert]: + 1) call openat with O_WRONLY|O_NOCTTY + Disadvantage: may create the file and doesn't work for directory, + may mistakenly report 'unwritable' for EROFS or ACLs even though + perm bits say the file is writable. + + 2) fake eaccessat (save_cwd, fchdir, call euidaccess, restore_cwd) + Disadvantage: changes working directory (not reentrant) and can't + work if save_cwd fails. + + 3) if (euidaccess (full_name, W_OK) == 0) + Disadvantage: doesn't work if full_name is too long. + Inefficient for very deep trees (O(Depth^2)). + + 4) If the full pathname is sufficiently short (say, less than + PATH_MAX or 8192 bytes, whichever is shorter): + use method (3) (i.e., euidaccess (full_name, W_OK)); + Otherwise: vfork, fchdir in the child, run euidaccess in the + child, then the child exits with a status that tells the parent + whether euidaccess succeeded. + + This avoids the O(N**2) algorithm of method (3), and it also avoids + the failure-due-to-too-long-file-names of method (3), but it's fast + in the normal shallow case. It also avoids the lack-of-reentrancy + and the save_cwd problems. + Disadvantage; it uses a process slot for very-long file names, + and would be very slow for hierarchies with many such files. + + 5) If the full file name is sufficiently short (say, less than + PATH_MAX or 8192 bytes, whichever is shorter): + use method (3) (i.e., euidaccess (full_name, W_OK)); + Otherwise: look just at the file bits. Perhaps issue a warning + the first time this occurs. + + This is like (4), except for the "Otherwise" case where it isn't as + "perfect" as (4) but is considerably faster. It conforms to current + POSIX, and is uniformly better than what Solaris and FreeBSD do (they + mess up with long file names). */ + + { + if (faccessat (fd_cwd, file, W_OK, AT_EACCESS) == 0) + return 0; + + return errno == EACCES ? 1 : -1; + } +} + +/* Return the status of the directory identified by FTS and ENT. + This is -1 if the directory is empty, 0 if it is nonempty, + and a positive error number if there was trouble determining the status, + e.g., it is not a directory, or permissions problems, or I/O errors. + Use *DIR_STATUS as a cache for the status. */ +static int +get_dir_status (FTS const *fts, FTSENT const *ent, int *dir_status) +{ + if (*dir_status == DS_UNKNOWN) + *dir_status = directory_status (fts->fts_cwd_fd, ent->fts_accpath); + return *dir_status; +} + +/* Prompt whether to remove FILENAME, if required via a combination of + the options specified by X and/or file attributes. If the file may + be removed, return RM_OK or RM_USER_ACCEPTED, the latter if the user + was prompted and accepted. If the user declines to remove the file, + return RM_USER_DECLINED. If not ignoring missing files and we + cannot lstat FILENAME, then return RM_ERROR. + + IS_DIR is true if ENT designates a directory, false otherwise. + + Depending on MODE, ask whether to 'descend into' or to 'remove' the + directory FILENAME. MODE is ignored when FILENAME is not a directory. + Use and update *DIR_STATUS as needed, via the conventions of + get_dir_status. */ +static enum RM_status +prompt (FTS const *fts, FTSENT const *ent, bool is_dir, + struct rm_options const *x, enum Prompt_action mode, + int *dir_status) +{ + int fd_cwd = fts->fts_cwd_fd; + char const *full_name = ent->fts_path; + char const *filename = ent->fts_accpath; + struct stat st; + struct stat *sbuf = &st; + cache_stat_init (sbuf); + + int dirent_type = is_dir ? DT_DIR : DT_UNKNOWN; + int write_protected = 0; + + /* When nonzero, this indicates that we failed to remove a child entry, + either because the user declined an interactive prompt, or due to + some other failure, like permissions. */ + if (ent->fts_number) + return RM_USER_DECLINED; + + if (x->interactive == RMI_NEVER) + return RM_OK; + + int wp_errno = 0; + if (!x->ignore_missing_files + && (x->interactive == RMI_ALWAYS || x->stdin_tty) + && dirent_type != DT_LNK) + { + write_protected = write_protected_non_symlink (fd_cwd, filename, sbuf); + wp_errno = errno; + } + + if (write_protected || x->interactive == RMI_ALWAYS) + { + if (0 <= write_protected && dirent_type == DT_UNKNOWN) + { + if (cache_fstatat (fd_cwd, filename, sbuf, AT_SYMLINK_NOFOLLOW) == 0) + { + if (S_ISLNK (sbuf->st_mode)) + dirent_type = DT_LNK; + else if (S_ISDIR (sbuf->st_mode)) + dirent_type = DT_DIR; + /* Otherwise it doesn't matter, so leave it DT_UNKNOWN. */ + } + else + { + /* This happens, e.g., with 'rm '''. */ + write_protected = -1; + wp_errno = errno; + } + } + + if (0 <= write_protected) + switch (dirent_type) + { + case DT_LNK: + /* Using permissions doesn't make sense for symlinks. */ + if (x->interactive != RMI_ALWAYS) + return RM_OK; + break; + + case DT_DIR: + /* Unless we're either deleting directories or deleting + recursively, we want to raise an EISDIR error rather than + prompting the user */ + if ( ! (x->recursive + || (x->remove_empty_directories + && get_dir_status (fts, ent, dir_status) != 0))) + { + write_protected = -1; + wp_errno = *dir_status <= 0 ? EISDIR : *dir_status; + } + break; + } + + char const *quoted_name = quoteaf (full_name); + + if (write_protected < 0) + { + error (0, wp_errno, _("cannot remove %s"), quoted_name); + return RM_ERROR; + } + + /* Issue the prompt. */ + if (dirent_type == DT_DIR + && mode == PA_DESCEND_INTO_DIR + && get_dir_status (fts, ent, dir_status) == DS_NONEMPTY) + fprintf (stderr, + (write_protected + ? _("%s: descend into write-protected directory %s? ") + : _("%s: descend into directory %s? ")), + program_name, quoted_name); + else if (0 < *dir_status) + { + if ( ! (x->remove_empty_directories && *dir_status == EACCES)) + { + error (0, *dir_status, _("cannot remove %s"), quoted_name); + return RM_ERROR; + } + + /* The following code can lead to a successful deletion only with + the --dir (-d) option (remove_empty_directories) and an empty + inaccessible directory. In the first prompt call for a directory, + we'd normally ask whether to descend into it, but in this case + (it's inaccessible), that is not possible, so don't prompt. */ + if (mode == PA_DESCEND_INTO_DIR) + return RM_OK; + + fprintf (stderr, + _("%s: attempt removal of inaccessible directory %s? "), + program_name, quoted_name); + } + else + { + if (cache_fstatat (fd_cwd, filename, sbuf, AT_SYMLINK_NOFOLLOW) != 0) + { + error (0, errno, _("cannot remove %s"), quoted_name); + return RM_ERROR; + } + + fprintf (stderr, + (write_protected + /* TRANSLATORS: In the next two strings the second %s is + replaced by the type of the file. To avoid grammatical + problems, it may be more convenient to translate these + strings instead as: "%1$s: %3$s is write-protected and + is of type '%2$s' -- remove it? ". */ + ? _("%s: remove write-protected %s %s? ") + : _("%s: remove %s %s? ")), + program_name, file_type (sbuf), quoted_name); + } + + return yesno () ? RM_USER_ACCEPTED : RM_USER_DECLINED; + } + return RM_OK; +} + +/* When a function like unlink, rmdir, or fstatat fails with an errno + value of ERRNUM, return true if the specified file system object + is guaranteed not to exist; otherwise, return false. */ +static inline bool +nonexistent_file_errno (int errnum) +{ + /* Do not include ELOOP here, since the specified file may indeed + exist, but be (in)accessible only via too long a symlink chain. + Likewise for ENAMETOOLONG, since rm -f ./././.../foo may fail + if the "..." part expands to a long enough sequence of "./"s, + even though ./foo does indeed exist. + + Another case to consider is when a particular name is invalid for + a given file system. In 2011, smbfs returns EINVAL, but the next + revision of POSIX will require EILSEQ for that situation: + https://austingroupbugs.net/view.php?id=293 + */ + + switch (errnum) + { + case EILSEQ: + case EINVAL: + case ENOENT: + case ENOTDIR: + return true; + default: + return false; + } +} + +/* Encapsulate the test for whether the errno value, ERRNUM, is ignorable. */ +static inline bool +ignorable_missing (struct rm_options const *x, int errnum) +{ + return x->ignore_missing_files && nonexistent_file_errno (errnum); +} + +/* Tell fts not to traverse into the hierarchy at ENT. */ +static void +fts_skip_tree (FTS *fts, FTSENT *ent) +{ + fts_set (fts, ent, FTS_SKIP); + /* Ensure that we do not process ENT a second time. */ + ignore_value (fts_read (fts)); +} + +/* Upon unlink failure, or when the user declines to remove ENT, mark + each of its ancestor directories, so that we know not to prompt for + its removal. */ +static void +mark_ancestor_dirs (FTSENT *ent) +{ + FTSENT *p; + for (p = ent->fts_parent; FTS_ROOTLEVEL <= p->fts_level; p = p->fts_parent) + { + if (p->fts_number) + break; + p->fts_number = 1; + } +} + +/* Remove the file system object specified by ENT. IS_DIR specifies + whether it is expected to be a directory or non-directory. + Return RM_OK upon success, else RM_ERROR. */ +static enum RM_status +excise (FTS *fts, FTSENT *ent, struct rm_options const *x, bool is_dir) +{ + int flag = is_dir ? AT_REMOVEDIR : 0; + if (unlinkat (fts->fts_cwd_fd, ent->fts_accpath, flag) == 0) + { + if (x->verbose) + { + printf ((is_dir + ? _("removed directory %s\n") + : _("removed %s\n")), quoteaf (ent->fts_path)); + } + return RM_OK; + } + + /* The unlinkat from kernels like linux-2.6.32 reports EROFS even for + nonexistent files. When the file is indeed missing, map that to ENOENT, + so that rm -f ignores it, as required. Even without -f, this is useful + because it makes rm print the more precise diagnostic. */ + if (errno == EROFS) + { + struct stat st; + if ( ! (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st, + AT_SYMLINK_NOFOLLOW) + && errno == ENOENT)) + errno = EROFS; + } + + if (ignorable_missing (x, errno)) + return RM_OK; + + /* When failing to rmdir an unreadable directory, we see errno values + like EISDIR or ENOTDIR (or, on Solaris 10, EEXIST), but they would be + meaningless in a diagnostic. When that happens, use the earlier, more + descriptive errno value. */ + if (ent->fts_info == FTS_DNR + && (errno == ENOTEMPTY || errno == EISDIR || errno == ENOTDIR + || errno == EEXIST) + && ent->fts_errno != 0) + errno = ent->fts_errno; + error (0, errno, _("cannot remove %s"), quoteaf (ent->fts_path)); + mark_ancestor_dirs (ent); + return RM_ERROR; +} + +/* This function is called once for every file system object that fts + encounters. fts performs a depth-first traversal. + A directory is usually processed twice, first with fts_info == FTS_D, + and later, after all of its entries have been processed, with FTS_DP. + Return RM_ERROR upon error, RM_USER_DECLINED for a negative response + to an interactive prompt, and otherwise, RM_OK. */ +static enum RM_status +rm_fts (FTS *fts, FTSENT *ent, struct rm_options const *x) +{ + int dir_status = DS_UNKNOWN; + + switch (ent->fts_info) + { + case FTS_D: /* preorder directory */ + if (! x->recursive + && !(x->remove_empty_directories + && get_dir_status (fts, ent, &dir_status) != 0)) + { + /* This is the first (pre-order) encounter with a directory + that we cannot delete. + Not recursive, and it's not an empty directory (if we're removing + them) so arrange to skip contents. */ + int err = x->remove_empty_directories ? ENOTEMPTY : EISDIR; + error (0, err, _("cannot remove %s"), quoteaf (ent->fts_path)); + mark_ancestor_dirs (ent); + fts_skip_tree (fts, ent); + return RM_ERROR; + } + + /* Perform checks that can apply only for command-line arguments. */ + if (ent->fts_level == FTS_ROOTLEVEL) + { + /* POSIX says: + If the basename of a command line argument is "." or "..", + diagnose it and do nothing more with that argument. */ + if (dot_or_dotdot (last_component (ent->fts_accpath))) + { + error (0, 0, + _("refusing to remove %s or %s directory: skipping %s"), + quoteaf_n (0, "."), quoteaf_n (1, ".."), + quoteaf_n (2, ent->fts_path)); + fts_skip_tree (fts, ent); + return RM_ERROR; + } + + /* POSIX also says: + If a command line argument resolves to "/" (and --preserve-root + is in effect -- default) diagnose and skip it. */ + if (ROOT_DEV_INO_CHECK (x->root_dev_ino, ent->fts_statp)) + { + ROOT_DEV_INO_WARN (ent->fts_path); + fts_skip_tree (fts, ent); + return RM_ERROR; + } + + /* If a command line argument is a mount point and + --preserve-root=all is in effect, diagnose and skip it. + This doesn't handle "/", but that's handled above. */ + if (x->preserve_all_root) + { + bool failed = false; + char *parent = file_name_concat (ent->fts_accpath, "..", nullptr); + struct stat statbuf; + + if (!parent || lstat (parent, &statbuf)) + { + error (0, 0, + _("failed to stat %s: skipping %s"), + quoteaf_n (0, parent), + quoteaf_n (1, ent->fts_accpath)); + failed = true; + } + + free (parent); + + if (failed || fts->fts_dev != statbuf.st_dev) + { + if (! failed) + { + error (0, 0, + _("skipping %s, since it's on a different device"), + quoteaf (ent->fts_path)); + error (0, 0, _("and --preserve-root=all is in effect")); + } + fts_skip_tree (fts, ent); + return RM_ERROR; + } + } + } + + { + enum RM_status s = prompt (fts, ent, true /*is_dir*/, x, + PA_DESCEND_INTO_DIR, &dir_status); + + if (s == RM_USER_ACCEPTED && dir_status == DS_EMPTY) + { + /* When we know (from prompt when in interactive mode) + that this is an empty directory, don't prompt twice. */ + s = excise (fts, ent, x, true); + if (s == RM_OK) + fts_skip_tree (fts, ent); + } + + if (! (s == RM_OK || s == RM_USER_ACCEPTED)) + { + mark_ancestor_dirs (ent); + fts_skip_tree (fts, ent); + } + + return s; + } + + case FTS_F: /* regular file */ + case FTS_NS: /* stat(2) failed */ + case FTS_SL: /* symbolic link */ + case FTS_SLNONE: /* symbolic link without target */ + case FTS_DP: /* postorder directory */ + case FTS_DNR: /* unreadable directory */ + case FTS_NSOK: /* e.g., dangling symlink */ + case FTS_DEFAULT: /* none of the above */ + { + /* With --one-file-system, do not attempt to remove a mount point. + fts' FTS_XDEV ensures that we don't process any entries under + the mount point. */ + if (ent->fts_info == FTS_DP + && x->one_file_system + && FTS_ROOTLEVEL < ent->fts_level + && ent->fts_statp->st_dev != fts->fts_dev) + { + mark_ancestor_dirs (ent); + error (0, 0, _("skipping %s, since it's on a different device"), + quoteaf (ent->fts_path)); + return RM_ERROR; + } + + bool is_dir = ent->fts_info == FTS_DP || ent->fts_info == FTS_DNR; + enum RM_status s = prompt (fts, ent, is_dir, x, PA_REMOVE_DIR, + &dir_status); + if (! (s == RM_OK || s == RM_USER_ACCEPTED)) + return s; + return excise (fts, ent, x, is_dir); + } + + case FTS_DC: /* directory that causes cycles */ + emit_cycle_warning (ent->fts_path); + fts_skip_tree (fts, ent); + return RM_ERROR; + + case FTS_ERR: + /* Various failures, from opendir to ENOMEM, to failure to "return" + to preceding directory, can provoke this. */ + error (0, ent->fts_errno, _("traversal failed: %s"), + quotef (ent->fts_path)); + fts_skip_tree (fts, ent); + return RM_ERROR; + + default: + error (0, 0, _("unexpected failure: fts_info=%d: %s\n" + "please report to %s"), + ent->fts_info, + quotef (ent->fts_path), + PACKAGE_BUGREPORT); + abort (); + } +} + +/* Remove FILEs, honoring options specified via X. + Return RM_OK if successful. */ +enum RM_status +rm (char *const *file, struct rm_options const *x) +{ + enum RM_status rm_status = RM_OK; + + if (*file) + { + int bit_flags = (FTS_CWDFD + | FTS_NOSTAT + | FTS_PHYSICAL); + + if (x->one_file_system) + bit_flags |= FTS_XDEV; + + FTS *fts = xfts_open (file, bit_flags, nullptr); + + while (true) + { + FTSENT *ent; + + ent = fts_read (fts); + if (ent == nullptr) + { + if (errno != 0) + { + error (0, errno, _("fts_read failed")); + rm_status = RM_ERROR; + } + break; + } + + enum RM_status s = rm_fts (fts, ent, x); + + affirm (VALID_STATUS (s)); + UPDATE_STATUS (rm_status, s); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + rm_status = RM_ERROR; + } + } + + return rm_status; +} diff --git a/src/remove.h b/src/remove.h new file mode 100644 index 0000000..fa81a4f --- /dev/null +++ b/src/remove.h @@ -0,0 +1,103 @@ +/* Remove directory entries. + + Copyright (C) 1998-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef REMOVE_H +# define REMOVE_H + +# include "dev-ino.h" + +enum rm_interactive +{ + /* Start with any number larger than 1, so that any legacy tests + against values of 0 or 1 will fail. */ + RMI_ALWAYS = 3, + RMI_SOMETIMES, + RMI_NEVER +}; + +struct rm_options +{ + /* If true, ignore nonexistent files. */ + bool ignore_missing_files; + + /* If true, query the user about whether to remove each file. */ + enum rm_interactive interactive; + + /* FIXME: remove */ + /* If true, do not traverse into (or remove) any directory that is + on a file system (i.e., that has a different device number) other + than that of the corresponding command line argument. Note that + even without this option, rm will fail in the end, due to its + probable inability to remove the mount point. But there, the + diagnostic comes too late -- after removing all contents. */ + bool one_file_system; + + /* If true, recursively remove directories. */ + bool recursive; + + /* If true, remove empty directories. */ + bool remove_empty_directories; + + /* Pointer to the device and inode numbers of '/', when --recursive + and preserving '/'. Otherwise null. */ + struct dev_ino *root_dev_ino; + + /* If true, do not traverse into (or remove) any directory that is + the root of a file system. I.e., a separate device. */ + bool preserve_all_root; + + /* If nonzero, stdin is a tty. */ + bool stdin_tty; + + /* If true, display the name of each file removed. */ + bool verbose; + + /* If true, treat the failure by the rm function to restore the + current working directory as a fatal error. I.e., if this field + is true and the rm function cannot restore cwd, it must exit with + a nonzero status. Some applications require that the rm function + restore cwd (e.g., mv) and some others do not (e.g., rm, + in many cases). */ + bool require_restore_cwd; +}; + +enum RM_status +{ + /* These must be listed in order of increasing seriousness. */ + RM_OK = 2, + RM_USER_ACCEPTED, + RM_USER_DECLINED, + RM_ERROR, + RM_NONEMPTY_DIR +}; + +# define VALID_STATUS(S) \ + ((S) == RM_OK || (S) == RM_USER_ACCEPTED || (S) == RM_USER_DECLINED \ + || (S) == RM_ERROR) + +# define UPDATE_STATUS(S, New_value) \ + do \ + { \ + if ((New_value) == RM_ERROR \ + || ((New_value) == RM_USER_DECLINED && (S) == RM_OK)) \ + (S) = (New_value); \ + } \ + while (0) + +extern enum RM_status rm (char *const *file, struct rm_options const *x); + +#endif diff --git a/src/rm.c b/src/rm.c new file mode 100644 index 0000000..ecd7699 --- /dev/null +++ b/src/rm.c @@ -0,0 +1,369 @@ +/* 'rm' file deletion utility for GNU. + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Initially written by Paul Rubin, David MacKenzie, and Richard Stallman. + Reworked to use chdir and avoid recursion, and later, rewritten + once again, to use fts, by Jim Meyering. */ + +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "remove.h" +#include "root-dev-ino.h" +#include "yesno.h" +#include "priv-set.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "rm" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Richard M. Stallman"), \ + proper_name ("Jim Meyering") + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + INTERACTIVE_OPTION = CHAR_MAX + 1, + ONE_FILE_SYSTEM, + NO_PRESERVE_ROOT, + PRESERVE_ROOT, + PRESUME_INPUT_TTY_OPTION +}; + +enum interactive_type + { + interactive_never, /* 0: no option or --interactive=never */ + interactive_once, /* 1: -I or --interactive=once */ + interactive_always /* 2: default, -i or --interactive=always */ + }; + +static struct option const long_opts[] = +{ + {"force", no_argument, nullptr, 'f'}, + {"interactive", optional_argument, nullptr, INTERACTIVE_OPTION}, + + {"one-file-system", no_argument, nullptr, ONE_FILE_SYSTEM}, + {"no-preserve-root", no_argument, nullptr, NO_PRESERVE_ROOT}, + {"preserve-root", optional_argument, nullptr, PRESERVE_ROOT}, + + /* This is solely for testing. Do not document. */ + /* It is relatively difficult to ensure that there is a tty on stdin. + Since rm acts differently depending on that, without this option, + it'd be harder to test the parts of rm that depend on that setting. */ + {"-presume-input-tty", no_argument, nullptr, PRESUME_INPUT_TTY_OPTION}, + + {"recursive", no_argument, nullptr, 'r'}, + {"dir", no_argument, nullptr, 'd'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static char const *const interactive_args[] = +{ + "never", "no", "none", + "once", + "always", "yes", nullptr +}; +static enum interactive_type const interactive_types[] = +{ + interactive_never, interactive_never, interactive_never, + interactive_once, + interactive_always, interactive_always +}; +ARGMATCH_VERIFY (interactive_args, interactive_types); + +/* Advise the user about invalid usages like "rm -foo" if the file + "-foo" exists, assuming ARGC and ARGV are as with 'main'. */ + +static void +diagnose_leading_hyphen (int argc, char **argv) +{ + /* OPTIND is unreliable, so iterate through the arguments looking + for a file name that looks like an option. */ + + for (int i = 1; i < argc; i++) + { + char const *arg = argv[i]; + struct stat st; + + if (arg[0] == '-' && arg[1] && lstat (arg, &st) == 0) + { + fprintf (stderr, + _("Try '%s ./%s' to remove the file %s.\n"), + argv[0], + quotearg_n_style (1, shell_escape_quoting_style, arg), + quoteaf (arg)); + break; + } + } +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); + fputs (_("\ +Remove (unlink) the FILE(s).\n\ +\n\ + -f, --force ignore nonexistent files and arguments, never prompt\n\ + -i prompt before every removal\n\ +"), stdout); + fputs (_("\ + -I prompt once before removing more than three files, or\n\ + when removing recursively; less intrusive than -i,\n\ + while still giving protection against most mistakes\n\ + --interactive[=WHEN] prompt according to WHEN: never, once (-I), or\n\ + always (-i); without WHEN, prompt always\n\ +"), stdout); + fputs (_("\ + --one-file-system when removing a hierarchy recursively, skip any\n\ + directory that is on a file system different from\n\ + that of the corresponding command line argument\n\ +"), stdout); + fputs (_("\ + --no-preserve-root do not treat '/' specially\n\ + --preserve-root[=all] do not remove '/' (default);\n\ + with 'all', reject any command line argument\n\ + on a separate device from its parent\n\ +"), stdout); + fputs (_("\ + -r, -R, --recursive remove directories and their contents recursively\n\ + -d, --dir remove empty directories\n\ + -v, --verbose explain what is being done\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +By default, rm does not remove directories. Use the --recursive (-r or -R)\n\ +option to remove each listed directory, too, along with all of its contents.\n\ +"), stdout); + printf (_("\ +\n\ +To remove a file whose name starts with a '-', for example '-foo',\n\ +use one of these commands:\n\ + %s -- -foo\n\ +\n\ + %s ./-foo\n\ +"), + program_name, program_name); + fputs (_("\ +\n\ +Note that if you use rm to remove a file, it might be possible to recover\n\ +some of its contents, given sufficient expertise and/or time. For greater\n\ +assurance that the contents are truly unrecoverable, consider using shred(1).\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +rm_option_init (struct rm_options *x) +{ + x->ignore_missing_files = false; + x->interactive = RMI_SOMETIMES; + x->one_file_system = false; + x->remove_empty_directories = false; + x->recursive = false; + x->root_dev_ino = nullptr; + x->preserve_all_root = false; + x->stdin_tty = isatty (STDIN_FILENO); + x->verbose = false; + + /* Since this program exits immediately after calling 'rm', rm need not + expend unnecessary effort to preserve the initial working directory. */ + x->require_restore_cwd = false; +} + +int +main (int argc, char **argv) +{ + bool preserve_root = true; + struct rm_options x; + bool prompt_once = false; + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdin); + + rm_option_init (&x); + + /* Try to disable the ability to unlink a directory. */ + priv_set_remove_linkdir (); + + while ((c = getopt_long (argc, argv, "dfirvIR", long_opts, nullptr)) != -1) + { + switch (c) + { + case 'd': + x.remove_empty_directories = true; + break; + + case 'f': + x.interactive = RMI_NEVER; + x.ignore_missing_files = true; + prompt_once = false; + break; + + case 'i': + x.interactive = RMI_ALWAYS; + x.ignore_missing_files = false; + prompt_once = false; + break; + + case 'I': + x.interactive = RMI_SOMETIMES; + x.ignore_missing_files = false; + prompt_once = true; + break; + + case 'r': + case 'R': + x.recursive = true; + break; + + case INTERACTIVE_OPTION: + { + int i; + if (optarg) + i = XARGMATCH ("--interactive", optarg, interactive_args, + interactive_types); + else + i = interactive_always; + switch (i) + { + case interactive_never: + x.interactive = RMI_NEVER; + prompt_once = false; + break; + + case interactive_once: + x.interactive = RMI_SOMETIMES; + x.ignore_missing_files = false; + prompt_once = true; + break; + + case interactive_always: + x.interactive = RMI_ALWAYS; + x.ignore_missing_files = false; + prompt_once = false; + break; + } + break; + } + + case ONE_FILE_SYSTEM: + x.one_file_system = true; + break; + + case NO_PRESERVE_ROOT: + if (! STREQ (argv[optind - 1], "--no-preserve-root")) + error (EXIT_FAILURE, 0, + _("you may not abbreviate the --no-preserve-root option")); + preserve_root = false; + break; + + case PRESERVE_ROOT: + if (optarg) + { + if STREQ (optarg, "all") + x.preserve_all_root = true; + else + error (EXIT_FAILURE, 0, + _("unrecognized --preserve-root argument: %s"), + quoteaf (optarg)); + } + preserve_root = true; + break; + + case PRESUME_INPUT_TTY_OPTION: + x.stdin_tty = true; + break; + + case 'v': + x.verbose = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + diagnose_leading_hyphen (argc, argv); + usage (EXIT_FAILURE); + } + } + + if (argc <= optind) + { + if (x.ignore_missing_files) + return EXIT_SUCCESS; + else + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + } + + if (x.recursive && preserve_root) + { + static struct dev_ino dev_ino_buf; + x.root_dev_ino = get_root_dev_ino (&dev_ino_buf); + if (x.root_dev_ino == nullptr) + error (EXIT_FAILURE, errno, _("failed to get attributes of %s"), + quoteaf ("/")); + } + + uintmax_t n_files = argc - optind; + char **file = argv + optind; + + if (prompt_once && (x.recursive || 3 < n_files)) + { + fprintf (stderr, + (x.recursive + ? ngettext ("%s: remove %"PRIuMAX" argument recursively? ", + "%s: remove %"PRIuMAX" arguments recursively? ", + select_plural (n_files)) + : ngettext ("%s: remove %"PRIuMAX" argument? ", + "%s: remove %"PRIuMAX" arguments? ", + select_plural (n_files))), + program_name, n_files); + if (!yesno ()) + return EXIT_SUCCESS; + } + + enum RM_status status = rm (file, &x); + affirm (VALID_STATUS (status)); + return status == RM_ERROR ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/rmdir.c b/src/rmdir.c new file mode 100644 index 0000000..1a95869 --- /dev/null +++ b/src/rmdir.c @@ -0,0 +1,298 @@ +/* rmdir -- remove directories + + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Options: + -p, --parent Remove any parent dirs that are explicitly mentioned + in an argument, if they become empty after the + argument file is removed. + + David MacKenzie */ + +#include +#include +#include +#include + +#include "system.h" +#include "prog-fprintf.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "rmdir" + +#define AUTHORS proper_name ("David MacKenzie") + +/* If true, remove empty parent directories. */ +static bool remove_empty_parents; + +/* If true, don't treat failure to remove a nonempty directory + as an error. */ +static bool ignore_fail_on_non_empty; + +/* If true, output a diagnostic for every directory processed. */ +static bool verbose; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + IGNORE_FAIL_ON_NON_EMPTY_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + /* Don't name this '--force' because it's not close enough in meaning + to e.g. rm's -f option. */ + {"ignore-fail-on-non-empty", no_argument, nullptr, + IGNORE_FAIL_ON_NON_EMPTY_OPTION}, + + {"path", no_argument, nullptr, 'p'}, /* Deprecated. */ + {"parents", no_argument, nullptr, 'p'}, + {"verbose", no_argument, nullptr, 'v'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return true if ERROR_NUMBER is one of the values associated + with a failed rmdir due to non-empty target directory. */ +static bool +errno_rmdir_non_empty (int error_number) +{ + return error_number == ENOTEMPTY || error_number == EEXIST; +} + +/* Return true if when rmdir fails with errno == ERROR_NUMBER + the directory may be non empty. */ +static bool +errno_may_be_non_empty (int error_number) +{ + switch (error_number) + { + case EACCES: + case EPERM: + case EROFS: + case EBUSY: + return true; + default: + return false; + } +} + +/* Return true if an rmdir failure with errno == error_number + for DIR is ignorable. */ +static bool +ignorable_failure (int error_number, char const *dir) +{ + return (ignore_fail_on_non_empty + && (errno_rmdir_non_empty (error_number) + || (errno_may_be_non_empty (error_number) + && directory_status (AT_FDCWD, dir) == DS_NONEMPTY))); +} + +/* Remove any empty parent directories of DIR. + If DIR contains slash characters, at least one of them + (beginning with the rightmost) is replaced with a NUL byte. + Return true if successful. */ + +static bool +remove_parents (char *dir) +{ + char *slash; + bool ok = true; + + strip_trailing_slashes (dir); + while (true) + { + slash = strrchr (dir, '/'); + if (slash == nullptr) + break; + /* Remove any characters after the slash, skipping any extra + slashes in a row. */ + while (slash > dir && *slash == '/') + --slash; + slash[1] = 0; + + /* Give a diagnostic for each attempted removal if --verbose. */ + if (verbose) + prog_fprintf (stdout, _("removing directory, %s"), quoteaf (dir)); + + ok = (rmdir (dir) == 0); + int rmdir_errno = errno; + + if (! ok) + { + /* Stop quietly if --ignore-fail-on-non-empty. */ + if (ignorable_failure (rmdir_errno, dir)) + { + ok = true; + } + else + { + char const *error_msg; + if (rmdir_errno != ENOTDIR) + { + /* Barring race conditions, + DIR is expected to be a directory. */ + error_msg = N_("failed to remove directory %s"); + } + else + { + /* A path component could be a symbolic link */ + error_msg = N_("failed to remove %s"); + } + error (0, rmdir_errno, _(error_msg), quoteaf (dir)); + } + break; + } + } + return ok; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... DIRECTORY...\n"), program_name); + fputs (_("\ +Remove the DIRECTORY(ies), if they are empty.\n\ +\n\ +"), stdout); + fputs (_("\ + --ignore-fail-on-non-empty\n\ + ignore each failure to remove a non-empty directory\n\ +"), stdout); + fputs (_("\ + -p, --parents remove DIRECTORY and its ancestors;\n\ + e.g., 'rmdir -p a/b' is similar to 'rmdir a/b a'\n\ +\n\ +"), stdout); + fputs (_("\ + -v, --verbose output a diagnostic for every directory processed\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + bool ok = true; + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + remove_empty_parents = false; + + while ((optc = getopt_long (argc, argv, "pv", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'p': + remove_empty_parents = true; + break; + case IGNORE_FAIL_ON_NON_EMPTY_OPTION: + ignore_fail_on_non_empty = true; + break; + case 'v': + verbose = true; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (optind == argc) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + for (; optind < argc; ++optind) + { + char *dir = argv[optind]; + + /* Give a diagnostic for each attempted removal if --verbose. */ + if (verbose) + prog_fprintf (stdout, _("removing directory, %s"), quoteaf (dir)); + + if (rmdir (dir) != 0) + { + int rmdir_errno = errno; + if (ignorable_failure (rmdir_errno, dir)) + continue; + + /* Distinguish the case for a symlink with trailing slash. + On Linux, rmdir(2) confusingly does not follow the symlink, + thus giving the errno ENOTDIR, while on other systems the symlink + is followed. We don't provide consistent behavior here, + but at least we provide a more accurate error message. */ + bool custom_error = false; + if (rmdir_errno == ENOTDIR) + { + char const *last_unix_slash = strrchr (dir, '/'); + if (last_unix_slash && (*(last_unix_slash + 1) == '\0')) + { + struct stat st; + int ret = stat (dir, &st); + /* Some other issue following, or is actually a directory. */ + if ((ret != 0 && errno != ENOTDIR) + || (ret == 0 && S_ISDIR (st.st_mode))) + { + /* Ensure the last component was a symlink. */ + char *dir_arg = xstrdup (dir); + strip_trailing_slashes (dir); + ret = lstat (dir, &st); + if (ret == 0 && S_ISLNK (st.st_mode)) + { + error (0, 0, + _("failed to remove %s:" + " Symbolic link not followed"), + quoteaf (dir_arg)); + custom_error = true; + } + free (dir_arg); + } + } + } + + if (! custom_error) + error (0, rmdir_errno, _("failed to remove %s"), quoteaf (dir)); + + ok = false; + } + else if (remove_empty_parents) + { + ok &= remove_parents (dir); + } + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/runcon.c b/src/runcon.c new file mode 100644 index 0000000..0487451 --- /dev/null +++ b/src/runcon.c @@ -0,0 +1,263 @@ +/* runcon -- run command with specified security context + Copyright (C) 2005-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* + * runcon [ context + * | ( [ -c ] [ -r role ] [-t type] [ -u user ] [ -l levelrange ] ) + * command [arg1 [arg2 ...] ] + * + * attempt to run the specified command with the specified context. + * + * -r role : use the current context with the specified role + * -t type : use the current context with the specified type + * -u user : use the current context with the specified user + * -l level : use the current context with the specified level range + * -c : compute process transition context before modifying + * + * Contexts are interpreted as follows: + * + * Number of MLS + * components system? + * + * 1 - type + * 2 - role:type + * 3 Y role:type:range + * 3 N user:role:type + * 4 Y user:role:type:range + * 4 N error + */ + +#include +#include +#include +#include +#include +#include +#include "system.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "runcon" + +#define AUTHORS proper_name ("Russell Coker") + +static struct option const long_options[] = +{ + {"role", required_argument, nullptr, 'r'}, + {"type", required_argument, nullptr, 't'}, + {"user", required_argument, nullptr, 'u'}, + {"range", required_argument, nullptr, 'l'}, + {"compute", no_argument, nullptr, 'c'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s CONTEXT COMMAND [args]\n\ + or: %s [ -c ] [-u USER] [-r ROLE] [-t TYPE] [-l RANGE] COMMAND [args]\n\ +"), program_name, program_name); + fputs (_("\ +Run a program in a different SELinux security context.\n\ +With neither CONTEXT nor COMMAND, print the current security context.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + CONTEXT Complete security context\n\ + -c, --compute compute process transition context before modifying\n\ + -t, --type=TYPE type (for same role as parent)\n\ + -u, --user=USER user identity\n\ + -r, --role=ROLE role\n\ + -l, --range=RANGE levelrange\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + char *role = nullptr; + char *range = nullptr; + char *user = nullptr; + char *type = nullptr; + char *context = nullptr; + char *cur_context = nullptr; + char *file_context = nullptr; + char *new_context = nullptr; + bool compute_trans = false; + + context_t con; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + while (true) + { + int option_index = 0; + int c = getopt_long (argc, argv, "+r:t:u:l:c", long_options, + &option_index); + if (c == -1) + break; + switch (c) + { + case 'r': + if (role) + error (EXIT_CANCELED, 0, _("multiple roles")); + role = optarg; + break; + case 't': + if (type) + error (EXIT_CANCELED, 0, _("multiple types")); + type = optarg; + break; + case 'u': + if (user) + error (EXIT_CANCELED, 0, _("multiple users")); + user = optarg; + break; + case 'l': + if (range) + error (EXIT_CANCELED, 0, _("multiple levelranges")); + range = optarg; + break; + case 'c': + compute_trans = true; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_CANCELED); + break; + } + } + + if (argc - optind == 0) + { + if (getcon (&cur_context) < 0) + error (EXIT_CANCELED, errno, _("failed to get current context")); + fputs (cur_context, stdout); + fputc ('\n', stdout); + return EXIT_SUCCESS; + } + + if (!(user || role || type || range || compute_trans)) + { + if (optind >= argc) + { + error (0, 0, _("you must specify -c, -t, -u, -l, -r, or context")); + usage (EXIT_CANCELED); + } + context = argv[optind++]; + } + + if (optind >= argc) + { + error (0, 0, _("no command specified")); + usage (EXIT_CANCELED); + } + + if (is_selinux_enabled () != 1) + error (EXIT_CANCELED, 0, _("%s may be used only on a SELinux kernel"), + program_name); + + if (context) + { + con = context_new (context); + if (!con) + error (EXIT_CANCELED, errno, _("failed to create security context: %s"), + quote (context)); + } + else + { + if (getcon (&cur_context) < 0) + error (EXIT_CANCELED, errno, _("failed to get current context")); + + /* We will generate context based on process transition */ + if (compute_trans) + { + /* Get context of file to be executed */ + if (getfilecon (argv[optind], &file_context) == -1) + error (EXIT_CANCELED, errno, + _("failed to get security context of %s"), + quoteaf (argv[optind])); + /* compute result of process transition */ + if (security_compute_create (cur_context, file_context, + string_to_security_class ("process"), + &new_context) != 0) + error (EXIT_CANCELED, errno, _("failed to compute a new context")); + /* free contexts */ + freecon (file_context); + freecon (cur_context); + + /* set cur_context equal to new_context */ + cur_context = new_context; + } + + con = context_new (cur_context); + if (!con) + error (EXIT_CANCELED, errno, _("failed to create security context: %s"), + quote (cur_context)); + if (user && context_user_set (con, user)) + error (EXIT_CANCELED, errno, _("failed to set new user: %s"), + quote (user)); + if (type && context_type_set (con, type)) + error (EXIT_CANCELED, errno, _("failed to set new type: %s"), + quote (type)); + if (range && context_range_set (con, range)) + error (EXIT_CANCELED, errno, _("failed to set new range: %s"), + quote (range)); + if (role && context_role_set (con, role)) + error (EXIT_CANCELED, errno, _("failed to set new role: %s"), + quote (role)); + } + + if (security_check_context (context_str (con)) < 0) + error (EXIT_CANCELED, errno, _("invalid context: %s"), + quote (context_str (con))); + + if (setexeccon (context_str (con)) != 0) + error (EXIT_CANCELED, errno, _("unable to set security context %s"), + quote (context_str (con))); + if (cur_context != nullptr) + freecon (cur_context); + + (compute_trans ? execv : execvp) (argv[optind], argv + optind); + + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + error (0, errno, "%s", quote (argv[optind])); + return exit_status; +} diff --git a/src/selinux.c b/src/selinux.c new file mode 100644 index 0000000..0fdd0c8 --- /dev/null +++ b/src/selinux.c @@ -0,0 +1,327 @@ +/* selinux - core functions for maintaining SELinux labeling + Copyright (C) 2012-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Daniel Walsh */ + +#include +#include +#include +#include + +#include "system.h" +#include "canonicalize.h" +#include "xfts.h" +#include "selinux.h" + +#if HAVE_SELINUX_LABEL_H + +# if ! HAVE_MODE_TO_SECURITY_CLASS +/* + This function has been added to libselinux-2.1.12-5, but is here + for support with older versions of SELinux + + Translates a mode into an Internal SELinux security_class definition. + Returns 0 on failure, with errno set to EINVAL. +*/ +static security_class_t +mode_to_security_class (mode_t m) +{ + + if (S_ISREG (m)) + return string_to_security_class ("file"); + if (S_ISDIR (m)) + return string_to_security_class ("dir"); + if (S_ISCHR (m)) + return string_to_security_class ("chr_file"); + if (S_ISBLK (m)) + return string_to_security_class ("blk_file"); + if (S_ISFIFO (m)) + return string_to_security_class ("fifo_file"); + if (S_ISLNK (m)) + return string_to_security_class ("lnk_file"); + if (S_ISSOCK (m)) + return string_to_security_class ("sock_file"); + + errno = EINVAL; + return 0; +} +# endif + +/* + This function takes a PATH and a MODE and then asks SELinux what the label + of the path object would be if the current process label created it. + It then returns the label. + + Returns -1 on failure. errno will be set appropriately. +*/ + +static int +computecon (char const *path, mode_t mode, char **con) +{ + char *scon = nullptr; + char *tcon = nullptr; + security_class_t tclass; + int rc = -1; + + char *dir = dir_name (path); + if (!dir) + goto quit; + if (getcon (&scon) < 0) + goto quit; + if (getfilecon (dir, &tcon) < 0) + goto quit; + tclass = mode_to_security_class (mode); + if (!tclass) + goto quit; + rc = security_compute_create (scon, tcon, tclass, con); + + quit:; + int err = errno; + free (dir); + freecon (scon); + freecon (tcon); + errno = err; + return rc; +} + +/* + This function takes a handle, path and mode, it calls computecon to get the + label of the path object if the current process created it, then it calls + selabel_lookup to get the default type for the object. It substitutes the + default type into label. It tells the SELinux Kernel to label all new file + system objects created by the current process with this label. + + Returns -1 on failure. errno will be set appropriately. +*/ +int +defaultcon (struct selabel_handle *selabel_handle, + char const *path, mode_t mode) +{ + int rc = -1; + char *scon = nullptr; + char *tcon = nullptr; + context_t scontext = 0, tcontext = 0; + char const *contype; + char const *constr; + char *newpath = nullptr; + + if (! IS_ABSOLUTE_FILE_NAME (path)) + { + /* Generate absolute name as required by subsequent selabel_lookup. */ + newpath = canonicalize_filename_mode (path, CAN_MISSING); + if (! newpath) + goto quit; + path = newpath; + } + + if (selabel_lookup (selabel_handle, &scon, path, mode) < 0) + { + /* "No such file or directory" is a confusing error, + when processing files, when in fact it was the + associated default context that was not found. + Therefore map the error to something more appropriate + to the context in which we're using selabel_lookup(). */ + if (errno == ENOENT) + errno = ENODATA; + goto quit; + } + if (computecon (path, mode, &tcon) < 0) + goto quit; + if (!(scontext = context_new (scon))) + goto quit; + if (!(tcontext = context_new (tcon))) + goto quit; + + if (!(contype = context_type_get (scontext))) + goto quit; + if (context_type_set (tcontext, contype)) + goto quit; + if (!(constr = context_str (tcontext))) + goto quit; + + rc = setfscreatecon (constr); + + quit:; + int err = errno; + context_free (scontext); + context_free (tcontext); + freecon (scon); + freecon (tcon); + free (newpath); + errno = err; + return rc; +} + +/* + If SELABEL_HANDLE is null, set PATH's label to the default to the + local process. Otherwise use selabel_lookup to determine the + default label, extract the type field and then modify the file + system object. Note only the type field is updated, thus preserving MLS + levels and user identity etc. of the PATH. + + Returns -1 on failure. errno will be set appropriately. +*/ +static int +restorecon_private (struct selabel_handle *selabel_handle, char const *path) +{ + int rc = -1; + struct stat sb; + char *scon = nullptr; + char *tcon = nullptr; + context_t scontext = 0, tcontext = 0; + char const *contype; + char const *constr; + int fd; + + if (!selabel_handle) + { + if (getfscreatecon (&tcon) < 0) + return rc; + if (!tcon) + { + errno = ENODATA; + return rc; + } + rc = lsetfilecon (path, tcon); + int err = errno; + freecon (tcon); + errno = err; + return rc; + } + + fd = open (path, O_RDONLY | O_NOFOLLOW); + if (fd == -1 && (errno != ELOOP)) + goto quit; + + if (fd != -1) + { + if (fstat (fd, &sb) < 0) + goto quit; + } + else + { + if (lstat (path, &sb) < 0) + goto quit; + } + + if (selabel_lookup (selabel_handle, &scon, path, sb.st_mode) < 0) + { + /* "No such file or directory" is a confusing error, + when processing files, when in fact it was the + associated default context that was not found. + Therefore map the error to something more appropriate + to the context in which we're using selabel_lookup. */ + if (errno == ENOENT) + errno = ENODATA; + goto quit; + } + if (!(scontext = context_new (scon))) + goto quit; + + if (fd != -1) + { + if (fgetfilecon (fd, &tcon) < 0) + goto quit; + } + else + { + if (lgetfilecon (path, &tcon) < 0) + goto quit; + } + + if (!(tcontext = context_new (tcon))) + goto quit; + + if (!(contype = context_type_get (scontext))) + goto quit; + if (context_type_set (tcontext, contype)) + goto quit; + if (!(constr = context_str (tcontext))) + goto quit; + + if (fd != -1) + rc = fsetfilecon (fd, constr); + else + rc = lsetfilecon (path, constr); + + quit:; + int err = errno; + if (fd != -1) + close (fd); + context_free (scontext); + context_free (tcontext); + freecon (scon); + freecon (tcon); + errno = err; + return rc; +} + +/* + This function takes three parameters: + + SELABEL_HANDLE for selabel_lookup, or null to preserve. + + PATH of an existing file system object. + + A RECURSE boolean which if the file system object is a directory, will + call restorecon_private on every file system object in the directory. + + Return false on failure. errno will be set appropriately. +*/ +bool +restorecon (struct selabel_handle *selabel_handle, + char const *path, bool recurse) +{ + char *newpath = nullptr; + + if (! IS_ABSOLUTE_FILE_NAME (path)) + { + /* Generate absolute name as required by subsequent selabel_lookup. + When RECURSE, this also generates absolute names in the + fts entries, which may be quicker to process in any case. */ + newpath = canonicalize_filename_mode (path, CAN_MISSING); + if (! newpath) + return false; + path = newpath; + } + + if (! recurse) + { + bool ok = restorecon_private (selabel_handle, path) != -1; + int err = errno; + free (newpath); + errno = err; + return ok; + } + + char const *ftspath[2] = { path, nullptr }; + FTS *fts = xfts_open ((char *const *) ftspath, FTS_PHYSICAL, nullptr); + + int err = 0; + for (FTSENT *ent; (ent = fts_read (fts)); ) + if (restorecon_private (selabel_handle, fts->fts_path) < 0) + err = errno; + + if (errno != 0) + err = errno; + + if (fts_close (fts) != 0) + err = errno; + + free (newpath); + return !err; +} +#endif diff --git a/src/selinux.h b/src/selinux.h new file mode 100644 index 0000000..680fc84 --- /dev/null +++ b/src/selinux.h @@ -0,0 +1,55 @@ +/* selinux - core functions for maintaining SELinux labeling + Copyright (C) 2012-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Daniel Walsh */ + +#ifndef COREUTILS_SELINUX_H +# define COREUTILS_SELINUX_H + +struct selabel_handle; + +/* Return true if ERR corresponds to an unsupported request, + or if there is no context or it's inaccessible. */ +static inline bool +ignorable_ctx_err (int err) +{ + return err == ENOTSUP || err == ENODATA; +} + +# if HAVE_SELINUX_LABEL_H + +extern bool +restorecon (struct selabel_handle *selabel_handle, + char const *path, bool recurse); +extern int +defaultcon (struct selabel_handle *selabel_handle, + char const *path, mode_t mode); + +# else + +static inline bool +restorecon (struct selabel_handle *selabel_handle, + char const *path, bool recurse) +{ errno = ENOTSUP; return false; } + +static inline int +defaultcon (struct selabel_handle *selabel_handle, + char const *path, mode_t mode) +{ errno = ENOTSUP; return -1; } + +# endif + +#endif diff --git a/src/seq.c b/src/seq.c new file mode 100644 index 0000000..2822d5c --- /dev/null +++ b/src/seq.c @@ -0,0 +1,724 @@ +/* seq - print sequence of numbers to standard output. + Copyright (C) 1994-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Ulrich Drepper. */ + +#include +#include +#include +#include + +#include "system.h" +#include "cl-strtod.h" +#include "quote.h" +#include "xstrtod.h" + +/* Roll our own isfinite/isnan rather than using , so that we don't + have to worry about linking -lm just for isfinite. */ +#ifndef isfinite +# define isfinite(x) ((x) * 0 == 0) +#endif +#ifndef isnan +# define isnan(x) ((x) != (x)) +#endif + +/* Limit below which seq_fast has more throughput. + Determined with: seq 0 200 inf | pv > /dev/null */ +#define SEQ_FAST_STEP_LIMIT 200 /* Keep in sync with texinfo description. */ +#define SEQ_FAST_STEP_LIMIT_DIGITS 3 + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "seq" + +#define AUTHORS proper_name ("Ulrich Drepper") + +/* True if the locale settings were honored. */ +static bool locale_ok; + +/* If true print all number with equal width. */ +static bool equal_width; + +/* The string used to separate two numbers. */ +static char const *separator; + +/* The string output after all numbers have been output. + Usually "\n" or "\0". */ +static char const terminator[] = "\n"; + +static struct option const long_options[] = +{ + { "equal-width", no_argument, nullptr, 'w'}, + { "format", required_argument, nullptr, 'f'}, + { "separator", required_argument, nullptr, 's'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + { nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... LAST\n\ + or: %s [OPTION]... FIRST LAST\n\ + or: %s [OPTION]... FIRST INCREMENT LAST\n\ +"), program_name, program_name, program_name); + fputs (_("\ +Print numbers from FIRST to LAST, in steps of INCREMENT.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -f, --format=FORMAT use printf style floating-point FORMAT\n\ + -s, --separator=STRING use STRING to separate numbers (default: \\n)\n\ + -w, --equal-width equalize width by padding with leading zeroes\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +If FIRST or INCREMENT is omitted, it defaults to 1. That is, an\n\ +omitted INCREMENT defaults to 1 even when LAST is smaller than FIRST.\n\ +The sequence of numbers ends when the sum of the current number and\n\ +INCREMENT would become greater than LAST.\n\ +FIRST, INCREMENT, and LAST are interpreted as floating point values.\n\ +INCREMENT is usually positive if FIRST is smaller than LAST, and\n\ +INCREMENT is usually negative if FIRST is greater than LAST.\n\ +INCREMENT must not be 0; none of FIRST, INCREMENT and LAST may be NaN.\n\ +"), stdout); + fputs (_("\ +FORMAT must be suitable for printing one argument of type 'double';\n\ +it defaults to %.PRECf if FIRST, INCREMENT, and LAST are all fixed point\n\ +decimal numbers with maximum precision PREC, and to %g otherwise.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* A command-line operand. */ +struct operand +{ + /* Its value, converted to 'long double'. */ + long double value; + + /* Its print width, if it were printed out in a form similar to its + input form. An input like "-.1" is treated like "-0.1", and an + input like "1." is treated like "1", but otherwise widths are + left alone. */ + size_t width; + + /* Number of digits after the decimal point, or INT_MAX if the + number can't easily be expressed as a fixed-point number. */ + int precision; +}; +typedef struct operand operand; + +/* Description of what a number-generating format will generate. */ +struct layout +{ + /* Number of bytes before and after the number. */ + size_t prefix_len; + size_t suffix_len; +}; + +/* Read a long double value from the command line. + Return if the string is correct else signal error. */ + +static operand +scan_arg (char const *arg) +{ + operand ret; + + if (! xstrtold (arg, nullptr, &ret.value, cl_strtold)) + { + error (0, 0, _("invalid floating point argument: %s"), quote (arg)); + usage (EXIT_FAILURE); + } + + if (isnan (ret.value)) + { + error (0, 0, _("invalid %s argument: %s"), quote_n (0, "not-a-number"), + quote_n (1, arg)); + usage (EXIT_FAILURE); + } + + /* We don't output spaces or '+' so don't include in width */ + while (isspace (to_uchar (*arg)) || *arg == '+') + arg++; + + /* Default to auto width and precision. */ + ret.width = 0; + ret.precision = INT_MAX; + + /* Use no precision (and possibly fast generation) for integers. */ + char const *decimal_point = strchr (arg, '.'); + if (! decimal_point && ! strchr (arg, 'p') /* not a hex float */) + ret.precision = 0; + + /* auto set width and precision for decimal inputs. */ + if (! arg[strcspn (arg, "xX")] && isfinite (ret.value)) + { + size_t fraction_len = 0; + ret.width = strlen (arg); + + if (decimal_point) + { + fraction_len = strcspn (decimal_point + 1, "eE"); + if (fraction_len <= INT_MAX) + ret.precision = fraction_len; + ret.width += (fraction_len == 0 /* #. -> # */ + ? -1 + : (decimal_point == arg /* .# -> 0.# */ + || ! ISDIGIT (decimal_point[-1]))); /* -.# -> 0.# */ + } + char const *e = strchr (arg, 'e'); + if (! e) + e = strchr (arg, 'E'); + if (e) + { + long exponent = MAX (strtol (e + 1, nullptr, 10), -LONG_MAX); + ret.precision += exponent < 0 ? -exponent + : - MIN (ret.precision, exponent); + /* Don't account for e.... in the width since this is not output. */ + ret.width -= strlen (arg) - (e - arg); + /* Adjust the width as per the exponent. */ + if (exponent < 0) + { + if (decimal_point) + { + if (e == decimal_point + 1) /* undo #. -> # above */ + ret.width++; + } + else + ret.width++; + exponent = -exponent; + } + else + { + if (decimal_point && ret.precision == 0 && fraction_len) + ret.width--; /* discount space for '.' */ + exponent -= MIN (fraction_len, exponent); + } + ret.width += exponent; + } + } + + return ret; +} + +/* If FORMAT is a valid printf format for a double argument, return + its long double equivalent, allocated from dynamic storage, and + store into *LAYOUT a description of the output layout; otherwise, + report an error and exit. */ + +static char const * +long_double_format (char const *fmt, struct layout *layout) +{ + size_t i; + size_t prefix_len = 0; + size_t suffix_len = 0; + size_t length_modifier_offset; + bool has_L; + + for (i = 0; ! (fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1) + { + if (!fmt[i]) + error (EXIT_FAILURE, 0, + _("format %s has no %% directive"), quote (fmt)); + prefix_len++; + } + + i++; + i += strspn (fmt + i, "-+#0 '"); + i += strspn (fmt + i, "0123456789"); + if (fmt[i] == '.') + { + i++; + i += strspn (fmt + i, "0123456789"); + } + + length_modifier_offset = i; + has_L = (fmt[i] == 'L'); + i += has_L; + if (fmt[i] == '\0') + error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt)); + if (! strchr ("efgaEFGA", fmt[i])) + error (EXIT_FAILURE, 0, + _("format %s has unknown %%%c directive"), quote (fmt), fmt[i]); + + for (i++; ; i += (fmt[i] == '%') + 1) + if (fmt[i] == '%' && fmt[i + 1] != '%') + error (EXIT_FAILURE, 0, _("format %s has too many %% directives"), + quote (fmt)); + else if (fmt[i]) + suffix_len++; + else + { + size_t format_size = i + 1; + char *ldfmt = xmalloc (format_size + 1); + memcpy (ldfmt, fmt, length_modifier_offset); + ldfmt[length_modifier_offset] = 'L'; + strcpy (ldfmt + length_modifier_offset + 1, + fmt + length_modifier_offset + has_L); + layout->prefix_len = prefix_len; + layout->suffix_len = suffix_len; + return ldfmt; + } +} + +/* Actually print the sequence of numbers in the specified range, with the + given or default stepping and format. */ + +static void +print_numbers (char const *fmt, struct layout layout, + long double first, long double step, long double last) +{ + bool out_of_range = (step < 0 ? first < last : last < first); + + if (! out_of_range) + { + long double x = first; + long double i; + + for (i = 1; ; i++) + { + long double x0 = x; + if (printf (fmt, x) < 0) + write_error (); + if (out_of_range) + break; + x = first + i * step; + out_of_range = (step < 0 ? x < last : last < x); + + if (out_of_range) + { + /* If the number just past LAST prints as a value equal + to LAST, and prints differently from the previous + number, then print the number. This avoids problems + with rounding. For example, with the x86 it causes + "seq 0 0.000001 0.000003" to print 0.000003 instead + of stopping at 0.000002. */ + + bool print_extra_number = false; + long double x_val; + char *x_str; + int x_strlen; + if (locale_ok) + setlocale (LC_NUMERIC, "C"); + x_strlen = asprintf (&x_str, fmt, x); + if (locale_ok) + setlocale (LC_NUMERIC, ""); + if (x_strlen < 0) + xalloc_die (); + x_str[x_strlen - layout.suffix_len] = '\0'; + + if (xstrtold (x_str + layout.prefix_len, nullptr, + &x_val, cl_strtold) + && x_val == last) + { + char *x0_str = nullptr; + int x0_strlen = asprintf (&x0_str, fmt, x0); + if (x0_strlen < 0) + xalloc_die (); + x0_str[x0_strlen - layout.suffix_len] = '\0'; + print_extra_number = !STREQ (x0_str, x_str); + free (x0_str); + } + + free (x_str); + if (! print_extra_number) + break; + } + + if (fputs (separator, stdout) == EOF) + write_error (); + } + + if (fputs (terminator, stdout) == EOF) + write_error (); + } +} + +/* Return the default format given FIRST, STEP, and LAST. */ +static char const * +get_default_format (operand first, operand step, operand last) +{ + static char format_buf[sizeof "%0.Lf" + 2 * INT_STRLEN_BOUND (int)]; + + int prec = MAX (first.precision, step.precision); + + if (prec != INT_MAX && last.precision != INT_MAX) + { + if (equal_width) + { + /* increase first_width by any increased precision in step */ + size_t first_width = first.width + (prec - first.precision); + /* adjust last_width to use precision from first/step */ + size_t last_width = last.width + (prec - last.precision); + if (last.precision && prec == 0) + last_width--; /* don't include space for '.' */ + if (last.precision == 0 && prec) + last_width++; /* include space for '.' */ + if (first.precision == 0 && prec) + first_width++; /* include space for '.' */ + size_t width = MAX (first_width, last_width); + if (width <= INT_MAX) + { + int w = width; + sprintf (format_buf, "%%0%d.%dLf", w, prec); + return format_buf; + } + } + else + { + sprintf (format_buf, "%%.%dLf", prec); + return format_buf; + } + } + + return "%Lg"; +} + +/* The NUL-terminated string S0 of length S_LEN represents a valid + non-negative decimal integer. Adjust the string and length so + that the pair describe the next-larger value. */ +static void +incr (char **s0, size_t *s_len) +{ + char *s = *s0; + char *endp = s + *s_len - 1; + + do + { + if ((*endp)++ < '9') + return; + *endp-- = '0'; + } + while (endp >= s); + *--(*s0) = '1'; + ++*s_len; +} + +/* Compare A and B (each a NUL-terminated digit string), with lengths + given by A_LEN and B_LEN. Return +1 if A < B, -1 if B < A, else 0. */ +static int +cmp (char const *a, size_t a_len, char const *b, size_t b_len) +{ + if (a_len < b_len) + return -1; + if (b_len < a_len) + return 1; + return (memcmp (a, b, a_len)); +} + +/* Trim leading 0's from S, but if S is all 0's, leave one. + Return a pointer to the trimmed string. */ +ATTRIBUTE_PURE +static char const * +trim_leading_zeros (char const *s) +{ + char const *p = s; + while (*s == '0') + ++s; + + /* If there were only 0's, back up, to leave one. */ + if (!*s && s != p) + --s; + return s; +} + +/* Print all whole numbers from A to B, inclusive -- to stdout, each + followed by a newline. If B < A, return and print nothing. + Otherwise, do all the work and exit. */ +static void +seq_fast (char const *a, char const *b, uintmax_t step) +{ + bool inf = STREQ (b, "inf"); + + /* Skip past any leading 0's. Without this, our naive cmp + function would declare 000 to be larger than 99. */ + a = trim_leading_zeros (a); + b = trim_leading_zeros (b); + + size_t p_len = strlen (a); + size_t q_len = inf ? 0 : strlen (b); + + /* Allow for at least 31 digits without realloc. + 1 more than p_len is needed for the inf case. */ +#define INITIAL_ALLOC_DIGITS 31 + size_t inc_size = MAX (MAX (p_len + 1, q_len), INITIAL_ALLOC_DIGITS); + /* Ensure we only increase by at most 1 digit at buffer boundaries. */ + static_assert (SEQ_FAST_STEP_LIMIT_DIGITS < INITIAL_ALLOC_DIGITS - 1); + + /* Copy input strings (incl NUL) to end of new buffers. */ + char *p0 = xmalloc (inc_size + 1); + char *p = memcpy (p0 + inc_size - p_len, a, p_len + 1); + char *q; + char *q0; + if (! inf) + { + q0 = xmalloc (inc_size + 1); + q = memcpy (q0 + inc_size - q_len, b, q_len + 1); + } + else + q = q0 = nullptr; + + bool ok = inf || cmp (p, p_len, q, q_len) <= 0; + if (ok) + { + /* Reduce number of fwrite calls which is seen to + give a speed-up of more than 2x over the unbuffered code + when printing the first 10^9 integers. */ + size_t buf_size = MAX (BUFSIZ, (inc_size + 1) * 2); + char *buf = xmalloc (buf_size); + char const *buf_end = buf + buf_size; + + char *bufp = buf; + + /* Write first number to buffer. */ + bufp = mempcpy (bufp, p, p_len); + + /* Append separator then number. */ + while (true) + { + for (uintmax_t n_incr = step; n_incr; n_incr--) + incr (&p, &p_len); + + if (! inf && 0 < cmp (p, p_len, q, q_len)) + break; + + *bufp++ = *separator; + + /* Double up the buffers when needed for the inf case. */ + if (p_len == inc_size) + { + inc_size *= 2; + p0 = xrealloc (p0, inc_size + 1); + p = memmove (p0 + p_len, p0, p_len + 1); + + if (buf_size < (inc_size + 1) * 2) + { + size_t buf_offset = bufp - buf; + buf_size = (inc_size + 1) * 2; + buf = xrealloc (buf, buf_size); + buf_end = buf + buf_size; + bufp = buf + buf_offset; + } + } + + bufp = mempcpy (bufp, p, p_len); + /* If no place for another separator + number then + output buffer so far, and reset to start of buffer. */ + if (buf_end - (p_len + 1) < bufp) + { + if (fwrite (buf, bufp - buf, 1, stdout) != 1) + write_error (); + bufp = buf; + } + } + + /* Write any remaining buffered output, and the terminator. */ + *bufp++ = *terminator; + if (fwrite (buf, bufp - buf, 1, stdout) != 1) + write_error (); + } + + if (ok) + exit (EXIT_SUCCESS); + + free (p0); + free (q0); +} + +/* Return true if S consists of at least one digit and no non-digits. */ +ATTRIBUTE_PURE +static bool +all_digits_p (char const *s) +{ + size_t n = strlen (s); + return ISDIGIT (s[0]) && n == strspn (s, "0123456789"); +} + +int +main (int argc, char **argv) +{ + int optc; + operand first = { 1, 1, 0 }; + operand step = { 1, 1, 0 }; + operand last; + struct layout layout = { 0, 0 }; + + /* The printf(3) format used for output. */ + char const *format_str = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + locale_ok = !!setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + equal_width = false; + separator = "\n"; + + /* We have to handle negative numbers in the command line but this + conflicts with the command line arguments. So explicitly check first + whether the next argument looks like a negative number. */ + while (optind < argc) + { + if (argv[optind][0] == '-' + && ((optc = argv[optind][1]) == '.' || ISDIGIT (optc))) + { + /* means negative number */ + break; + } + + optc = getopt_long (argc, argv, "+f:s:w", long_options, nullptr); + if (optc == -1) + break; + + switch (optc) + { + case 'f': + format_str = optarg; + break; + + case 's': + separator = optarg; + break; + + case 'w': + equal_width = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + int n_args = argc - optind; + if (n_args < 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (3 < n_args) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 3])); + usage (EXIT_FAILURE); + } + + if (format_str) + format_str = long_double_format (format_str, &layout); + + if (format_str != nullptr && equal_width) + { + error (0, 0, _("format string may not be specified" + " when printing equal width strings")); + usage (EXIT_FAILURE); + } + + /* If the following hold: + - no format string, [FIXME: relax this, eventually] + - integer start (or no start) + - integer end + - integer increment <= SEQ_FAST_STEP_LIMIT + then use the much more efficient integer-only code, + operating on arbitrarily large numbers. */ + bool fast_step_ok = false; + if (n_args != 3 + || (all_digits_p (argv[optind + 1]) + && xstrtold (argv[optind + 1], nullptr, &step.value, cl_strtold) + && 0 < step.value && step.value <= SEQ_FAST_STEP_LIMIT)) + fast_step_ok = true; + + if (all_digits_p (argv[optind]) + && (n_args == 1 || all_digits_p (argv[optind + 1])) + && (n_args < 3 || (fast_step_ok + && all_digits_p (argv[optind + 2]))) + && !equal_width && !format_str && strlen (separator) == 1) + { + char const *s1 = n_args == 1 ? "1" : argv[optind]; + char const *s2 = argv[optind + (n_args - 1)]; + seq_fast (s1, s2, step.value); + + /* Upon any failure, let the more general code deal with it. */ + } + + last = scan_arg (argv[optind++]); + + if (optind < argc) + { + first = last; + last = scan_arg (argv[optind++]); + + if (optind < argc) + { + step = last; + if (step.value == 0) + { + error (0, 0, _("invalid Zero increment value: %s"), + quote (argv[optind - 1])); + usage (EXIT_FAILURE); + } + + last = scan_arg (argv[optind++]); + } + } + + /* Try the fast method again, for integers of the form 1e1 etc., + or "inf" end value. */ + if (first.precision == 0 && step.precision == 0 && last.precision == 0 + && isfinite (first.value) && 0 <= first.value && 0 <= last.value + && 0 < step.value && step.value <= SEQ_FAST_STEP_LIMIT + && !equal_width && !format_str && strlen (separator) == 1) + { + char *s1; + char *s2; + if (asprintf (&s1, "%0.Lf", first.value) < 0) + xalloc_die (); + if (! isfinite (last.value)) + s2 = xstrdup ("inf"); /* Ensure "inf" is used. */ + else if (asprintf (&s2, "%0.Lf", last.value) < 0) + xalloc_die (); + + if (*s1 != '-' && *s2 != '-') + seq_fast (s1, s2, step.value); + + free (s1); + free (s2); + /* Upon any failure, let the more general code deal with it. */ + } + + if (format_str == nullptr) + format_str = get_default_format (first, step, last); + + print_numbers (format_str, layout, first.value, step.value, last.value); + + main_exit (EXIT_SUCCESS); +} diff --git a/src/set-fields.c b/src/set-fields.c new file mode 100644 index 0000000..b299280 --- /dev/null +++ b/src/set-fields.c @@ -0,0 +1,308 @@ +/* set-fields.c -- common functions for parsing field list + Copyright (C) 2015-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from cut.c by Assaf Gordon */ + +#include + +#include "system.h" +#include "quote.h" +#include "set-fields.h" + +/* Array of `struct field_range_pair' holding all the finite ranges. */ +struct field_range_pair *frp; + +/* Number of finite ranges specified by the user. */ +size_t n_frp; + +/* Number of `struct field_range_pair's allocated. */ +static size_t n_frp_allocated; + +#define FATAL_ERROR(Message) \ + do \ + { \ + error (0, 0, (Message)); \ + usage (EXIT_FAILURE); \ + } \ + while (0) + +/* Append LOW, HIGH to the list RP of range pairs, allocating additional + space if necessary. Update global variable N_FRP. When allocating, + update global variable N_FRP_ALLOCATED. */ +static void +add_range_pair (uintmax_t lo, uintmax_t hi) +{ + if (n_frp == n_frp_allocated) + frp = X2NREALLOC (frp, &n_frp_allocated); + frp[n_frp].lo = lo; + frp[n_frp].hi = hi; + ++n_frp; +} + + +/* Comparison function for qsort to order the list of + struct field_range_pairs. */ +static int +compare_ranges (const void *a, const void *b) +{ + struct field_range_pair const *ap = a, *bp = b; + return (ap->lo > bp->lo) - (ap->lo < bp->lo); +} + +/* Reallocate Range Pair entries, with corresponding + entries outside the range of each specified entry. */ + +static void +complement_rp (void) +{ + struct field_range_pair *c = frp; + size_t n = n_frp; + + frp = nullptr; + n_frp = 0; + n_frp_allocated = 0; + + if (c[0].lo > 1) + add_range_pair (1, c[0].lo - 1); + + for (size_t i = 1; i < n; ++i) + { + if (c[i - 1].hi + 1 == c[i].lo) + continue; + + add_range_pair (c[i - 1].hi + 1, c[i].lo - 1); + } + + if (c[n - 1].hi < UINTMAX_MAX) + add_range_pair (c[n - 1].hi + 1, UINTMAX_MAX); + + free (c); +} + +/* Given the list of field or byte range specifications FIELDSTR, + allocate and initialize the FRP array. FIELDSTR should + be composed of one or more numbers or ranges of numbers, separated + by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; + 'n-' means 'n' through end of line. + n=0 and n>=UINTMAX_MAX values will trigger an error. + + if SETFLD_ALLOW_DASH option is used, a single '-' means all fields + (otherwise a single dash triggers an error). + + if SETFLD_COMPLEMENT option is used, the specified field list + is complemented (e.g. '1-3' will result in fields '4-'). + + if SETFLD_ERRMSG_USE_POS option is used, error messages + will say 'position' (or 'byte/character positions') + instead of fields (used with cut -b/-c). + + The function terminates on failure. + + Upon return, the FRP array is initialized to contain + a non-overlapping, increasing list of field ranges. + + N_FRP holds the number of field ranges in the FRP array. + + The first field is stored as 1 (zero is not used). + An open-ended range (i.e., until the last field of the input line) + is indicated with hi = UINTMAX_MAX. + + A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last + field range pair. + + Examples: + given '1-2,4', frp = [ { .lo = 1, .hi = 2 }, + { .lo = 4, .hi = 4 }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; + + given '3-', frp = [ { .lo = 3, .hi = UINTMAX_MAX }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; +*/ +void +set_fields (char const *fieldstr, unsigned int options) +{ + uintmax_t initial = 1; /* Value of first number in a range. */ + uintmax_t value = 0; /* If nonzero, a number being accumulated. */ + bool lhs_specified = false; + bool rhs_specified = false; + bool dash_found = false; /* True if a '-' is found in this field. */ + + bool in_digits = false; + + /* Collect and store in RP the range end points. */ + + /* Special case: '--field=-' means all fields, emulate '--field=1-' . */ + if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-")) + { + value = 1; + lhs_specified = true; + dash_found = true; + fieldstr++; + } + + while (true) + { + if (*fieldstr == '-') + { + in_digits = false; + /* Starting a range. */ + if (dash_found) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("invalid byte or character range") + : _("invalid field range")); + + dash_found = true; + fieldstr++; + + if (lhs_specified && !value) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("byte/character positions are numbered from 1") + : _("fields are numbered from 1")); + + initial = (lhs_specified ? value : 1); + value = 0; + } + else if (*fieldstr == ',' + || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') + { + in_digits = false; + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = false; + + if (!lhs_specified && !rhs_specified) + { + /* if a lone dash is allowed, emulate '1-' for all fields */ + if (options & SETFLD_ALLOW_DASH) + initial = 1; + else + FATAL_ERROR (_("invalid range with no endpoint: -")); + } + + /* A range. Possibilities: -n, m-n, n-. + In any case, 'initial' contains the start of the range. */ + if (!rhs_specified) + { + /* 'n-'. From 'initial' to end of line. */ + add_range_pair (initial, UINTMAX_MAX); + } + else + { + /* 'm-n' or '-n' (1-n). */ + if (value < initial) + FATAL_ERROR (_("invalid decreasing range")); + + add_range_pair (initial, value); + } + value = 0; + } + else + { + /* A simple field number, not a range. */ + if (value == 0) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("byte/character positions are numbered from 1") + : _("fields are numbered from 1")); + + add_range_pair (value, value); + value = 0; + } + + if (*fieldstr == '\0') + break; + + fieldstr++; + lhs_specified = false; + rhs_specified = false; + } + else if (ISDIGIT (*fieldstr)) + { + /* Record beginning of digit string, in case we have to + complain about it. */ + static char const *num_start; + if (!in_digits || !num_start) + num_start = fieldstr; + in_digits = true; + + if (dash_found) + rhs_specified = 1; + else + lhs_specified = 1; + + /* Detect overflow. */ + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t) + || value == UINTMAX_MAX) + { + /* In case the user specified -c$(echo 2^64|bc),22, + complain only about the first number. */ + /* Determine the length of the offending number. */ + size_t len = strspn (num_start, "0123456789"); + char *bad_num = ximemdup0 (num_start, len); + error (0, 0, (options & SETFLD_ERRMSG_USE_POS) + ?_("byte/character offset %s is too large") + :_("field number %s is too large"), + quote (bad_num)); + free (bad_num); + usage (EXIT_FAILURE); + } + + fieldstr++; + } + else + { + error (0, 0, (options & SETFLD_ERRMSG_USE_POS) + ?_("invalid byte/character position %s") + :_("invalid field value %s"), + quote (fieldstr)); + usage (EXIT_FAILURE); + } + } + + if (!n_frp) + FATAL_ERROR ((options&SETFLD_ERRMSG_USE_POS) + ? _("missing list of byte/character positions") + : _("missing list of fields")); + + qsort (frp, n_frp, sizeof (frp[0]), compare_ranges); + + /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ + for (size_t i = 0; i < n_frp; ++i) + { + for (size_t j = i + 1; j < n_frp; ++j) + { + if (frp[j].lo <= frp[i].hi) + { + frp[i].hi = MAX (frp[j].hi, frp[i].hi); + memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp); + n_frp--; + j--; + } + else + break; + } + } + + if (options & SETFLD_COMPLEMENT) + complement_rp (); + + /* After merging, reallocate RP so we release memory to the system. + Also add a sentinel at the end of RP, to avoid out of bounds access + and for performance reasons. */ + ++n_frp; + frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair)); + frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX; +} diff --git a/src/set-fields.h b/src/set-fields.h new file mode 100644 index 0000000..313eea8 --- /dev/null +++ b/src/set-fields.h @@ -0,0 +1,44 @@ +/* set-fields.h -- parse field list argument + + Copyright (C) 2015-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ +#ifndef SET_FIELDS_H +# define SET_FIELDS_H + +struct field_range_pair + { + uintmax_t lo; + uintmax_t hi; + }; + +/* Array of `struct range_pair' holding all the finite ranges. */ +extern struct field_range_pair *frp; + +/* Number of finite ranges specified by the user. */ +extern size_t n_frp; + +/* field list parsing options */ +enum +{ + SETFLD_ALLOW_DASH = 0x01, /* allow single dash meaning 'all fields' */ + SETFLD_COMPLEMENT = 0x02, /* complement the field list */ + SETFLD_ERRMSG_USE_POS = 0x04 /* when reporting errors, say 'position' instead + of 'field' (used with cut -b/-c) */ +}; + +/* allocates and initializes the FRP array and N_FRP count */ +extern void set_fields (char const *fieldstr, unsigned int options); + +#endif diff --git a/src/shred.c b/src/shred.c new file mode 100644 index 0000000..a5da4e0 --- /dev/null +++ b/src/shred.c @@ -0,0 +1,1273 @@ +/* shred.c - overwrite files and devices to make it harder to recover data + + Copyright (C) 1999-2023 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999 Colin Plumb. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Colin Plumb. */ + +/* + * Do a more secure overwrite of given files or devices, to make it harder + * for even very expensive hardware probing to recover the data. + * + * Although this process is also known as "wiping", I prefer the longer + * name both because I think it is more evocative of what is happening and + * because a longer name conveys a more appropriate sense of deliberateness. + * + * For the theory behind this, see "Secure Deletion of Data from Magnetic + * and Solid-State Memory", on line at + * https://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html + * + * Just for the record, reversing one or two passes of disk overwrite + * is not terribly difficult with hardware help. Hook up a good-quality + * digitizing oscilloscope to the output of the head preamplifier and copy + * the high-res digitized data to a computer for some off-line analysis. + * Read the "current" data and average all the pulses together to get an + * "average" pulse on the disk. Subtract this average pulse from all of + * the actual pulses and you can clearly see the "echo" of the previous + * data on the disk. + * + * Real hard drives have to balance the cost of the media, the head, + * and the read circuitry. They use better-quality media than absolutely + * necessary to limit the cost of the read circuitry. By throwing that + * assumption out, and the assumption that you want the data processed + * as fast as the hard drive can spin, you can do better. + * + * If asked to wipe a file, this also unlinks it, renaming it in a + * clever way to try to leave no trace of the original filename. + * + * This was inspired by a desire to improve on some code titled: + * Wipe V1.0-- Overwrite and delete files. S. 2/3/96 + * but I've rewritten everything here so completely that no trace of + * the original remains. + * + * Thanks to: + * Bob Jenkins, for his good RNG work and patience with the FSF copyright + * paperwork. + * Jim Meyering, for his work merging this into the GNU fileutils while + * still letting me feel a sense of ownership and pride. Getting me to + * tolerate the GNU brace style was quite a feat of diplomacy. + * Paul Eggert, for lots of useful discussion and code. I disagree with + * an awful lot of his suggestions, but they're disagreements worth having. + * + * Things to think about: + * - Security: Is there any risk to the race + * between overwriting and unlinking a file? Will it do anything + * drastically bad if told to attack a named pipe or socket? + */ + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "shred" + +#define AUTHORS proper_name ("Colin Plumb") + +#include + +#include +#include +#include +#include +#if defined __linux__ && HAVE_SYS_MTIO_H +# include +#endif + +#include "system.h" +#include "alignalloc.h" +#include "argmatch.h" +#include "assure.h" +#include "xdectoint.h" +#include "fcntl--.h" +#include "human.h" +#include "randint.h" +#include "randread.h" +#include "renameatu.h" +#include "stat-size.h" + +/* Default number of times to overwrite. */ +enum { DEFAULT_PASSES = 3 }; + +/* How many seconds to wait before checking whether to output another + verbose output line. */ +enum { VERBOSE_UPDATE = 5 }; + +/* Sector size and corresponding mask, for recovering after write failures. + The size must be a power of 2. */ +enum { SECTOR_SIZE = 512 }; +enum { SECTOR_MASK = SECTOR_SIZE - 1 }; +static_assert (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0); + +enum remove_method +{ + remove_none = 0, /* the default: only wipe data. */ + remove_unlink, /* don't obfuscate name, just unlink. */ + remove_wipe, /* obfuscate name before unlink. */ + remove_wipesync /* obfuscate name, syncing each byte, before unlink. */ +}; + +static char const *const remove_args[] = +{ + "unlink", "wipe", "wipesync", nullptr +}; + +static enum remove_method const remove_methods[] = +{ + remove_unlink, remove_wipe, remove_wipesync +}; + +struct Options +{ + bool force; /* -f flag: chmod files if necessary */ + size_t n_iterations; /* -n flag: Number of iterations */ + off_t size; /* -s flag: size of file */ + enum remove_method remove_file; /* -u flag: remove file after shredding */ + bool verbose; /* -v flag: Print progress */ + bool exact; /* -x flag: Do not round up file size */ + bool zero_fill; /* -z flag: Add a final zero pass */ +}; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + RANDOM_SOURCE_OPTION = CHAR_MAX + 1 +}; + +static struct option const long_opts[] = +{ + {"exact", no_argument, nullptr, 'x'}, + {"force", no_argument, nullptr, 'f'}, + {"iterations", required_argument, nullptr, 'n'}, + {"size", required_argument, nullptr, 's'}, + {"random-source", required_argument, nullptr, RANDOM_SOURCE_OPTION}, + {"remove", optional_argument, nullptr, 'u'}, + {"verbose", no_argument, nullptr, 'v'}, + {"zero", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + fputs (_("\ +Overwrite the specified FILE(s) repeatedly, in order to make it harder\n\ +for even very expensive hardware probing to recover the data.\n\ +"), stdout); + fputs (_("\ +\n\ +If FILE is -, shred standard output.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + printf (_("\ + -f, --force change permissions to allow writing if necessary\n\ + -n, --iterations=N overwrite N times instead of the default (%d)\n\ + --random-source=FILE get random bytes from FILE\n\ + -s, --size=N shred this many bytes (suffixes like K, M, G accepted)\n\ +"), DEFAULT_PASSES); + fputs (_("\ + -u deallocate and remove file after overwriting\n\ + --remove[=HOW] like -u but give control on HOW to delete; See below\n\ + -v, --verbose show progress\n\ + -x, --exact do not round file sizes up to the next full block;\n\ + this is the default for non-regular files\n\ + -z, --zero add a final overwrite with zeros to hide shredding\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Delete FILE(s) if --remove (-u) is specified. The default is not to remove\n\ +the files because it is common to operate on device files like /dev/hda,\n\ +and those files usually should not be removed.\n\ +The optional HOW parameter indicates how to remove a directory entry:\n\ +'unlink' => use a standard unlink call.\n\ +'wipe' => also first obfuscate bytes in the name.\n\ +'wipesync' => also sync each obfuscated byte to the device.\n\ +The default mode is 'wipesync', but note it can be expensive.\n\ +\n\ +"), stdout); + fputs (_("\ +CAUTION: shred assumes the file system and hardware overwrite data in place.\n\ +Although this is common, many platforms operate otherwise. Also, backups\n\ +and mirrors may contain unremovable copies that will let a shredded file\n\ +be recovered later. See the GNU coreutils manual for details.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* + * Determine if pattern type is periodic or not. + */ +static bool +periodic_pattern (int type) +{ + if (type <= 0) + return false; + + unsigned char r[3]; + unsigned int bits = type & 0xfff; + + bits |= bits << 12; + r[0] = (bits >> 4) & 255; + r[1] = (bits >> 8) & 255; + r[2] = bits & 255; + + return (r[0] != r[1]) || (r[0] != r[2]); +} + +/* + * Fill a buffer with a fixed pattern. + * + * The buffer must be at least 3 bytes long, even if + * size is less. Larger sizes are filled exactly. + */ +static void +fillpattern (int type, unsigned char *r, size_t size) +{ + size_t i; + unsigned int bits = type & 0xfff; + + bits |= bits << 12; + r[0] = (bits >> 4) & 255; + r[1] = (bits >> 8) & 255; + r[2] = bits & 255; + for (i = 3; i <= size / 2; i *= 2) + memcpy (r + i, r, i); + if (i < size) + memcpy (r + i, r, size - i); + + /* Invert the first bit of every sector. */ + if (type & 0x1000) + for (i = 0; i < size; i += SECTOR_SIZE) + r[i] ^= 0x80; +} + +/* + * Generate a 6-character (+ nul) pass name string + * FIXME: allow translation of "random". + */ +#define PASS_NAME_SIZE 7 +static void +passname (unsigned char const *data, char name[PASS_NAME_SIZE]) +{ + if (data) + sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]); + else + memcpy (name, "random", PASS_NAME_SIZE); +} + +/* Return true when it's ok to ignore an fsync or fdatasync + failure that set errno to ERRNO_VAL. */ +static bool +ignorable_sync_errno (int errno_val) +{ + return (errno_val == EINVAL + || errno_val == EBADF + /* HP-UX does this */ + || errno_val == EISDIR); +} + +/* Request that all data for FD be transferred to the corresponding + storage device. QNAME is the file name (quoted for colons). + Report any errors found. Return 0 on success, -1 + (setting errno) on failure. It is not an error if fdatasync and/or + fsync is not supported for this file, or if the file is not a + writable file descriptor. */ +static int +dosync (int fd, char const *qname) +{ + int err; + +#if HAVE_FDATASYNC + if (fdatasync (fd) == 0) + return 0; + err = errno; + if ( ! ignorable_sync_errno (err)) + { + error (0, err, _("%s: fdatasync failed"), qname); + errno = err; + return -1; + } +#endif + + if (fsync (fd) == 0) + return 0; + err = errno; + if ( ! ignorable_sync_errno (err)) + { + error (0, err, _("%s: fsync failed"), qname); + errno = err; + return -1; + } + + sync (); + return 0; +} + +/* Turn on or off direct I/O mode for file descriptor FD, if possible. + Try to turn it on if ENABLE is true. Otherwise, try to turn it off. */ +static void +direct_mode (int fd, bool enable) +{ + if (O_DIRECT) + { + int fd_flags = fcntl (fd, F_GETFL); + if (0 < fd_flags) + { + int new_flags = (enable + ? (fd_flags | O_DIRECT) + : (fd_flags & ~O_DIRECT)); + if (new_flags != fd_flags) + fcntl (fd, F_SETFL, new_flags); + } + } + +#if HAVE_DIRECTIO && defined DIRECTIO_ON && defined DIRECTIO_OFF + /* This is Solaris-specific. */ + directio (fd, enable ? DIRECTIO_ON : DIRECTIO_OFF); +#endif +} + +/* Rewind FD; its status is ST. */ +static bool +dorewind (int fd, struct stat const *st) +{ + if (S_ISCHR (st->st_mode)) + { +#if defined __linux__ && HAVE_SYS_MTIO_H + /* In the Linux kernel, lseek does not work on tape devices; it + returns a randomish value instead. Try the low-level tape + rewind operation first. */ + struct mtop op; + op.mt_op = MTREW; + op.mt_count = 1; + if (ioctl (fd, MTIOCTOP, &op) == 0) + return true; +#endif + } + off_t offset = lseek (fd, 0, SEEK_SET); + if (0 < offset) + errno = EINVAL; + return offset == 0; +} + +/* By convention, negative sizes represent unknown values. */ + +static bool +known (off_t size) +{ + return 0 <= size; +} + +/* + * Do pass number K of N, writing *SIZEP bytes of the given pattern TYPE + * to the file descriptor FD. K and N are passed in only for verbose + * progress message purposes. If N == 0, no progress messages are printed. + * + * If *SIZEP == -1, the size is unknown, and it will be filled in as soon + * as writing fails with ENOSPC. + * + * Return 1 on write error, -1 on other error, 0 on success. + */ +static int +dopass (int fd, struct stat const *st, char const *qname, off_t *sizep, + int type, struct randread_source *s, + unsigned long int k, unsigned long int n) +{ + off_t size = *sizep; + off_t offset; /* Current file position */ + time_t thresh IF_LINT ( = 0); /* Time to maybe print next status update */ + time_t now = 0; /* Current time */ + size_t lim; /* Amount of data to try writing */ + size_t soff; /* Offset into buffer for next write */ + ssize_t ssize; /* Return value from write */ + + /* Fill pattern buffer. Aligning it to a page so we can do direct I/O. */ + size_t page_size = getpagesize (); +#define PERIODIC_OUTPUT_SIZE (60 * 1024) +#define NONPERIODIC_OUTPUT_SIZE (64 * 1024) + static_assert (PERIODIC_OUTPUT_SIZE % 3 == 0); + size_t output_size = periodic_pattern (type) + ? PERIODIC_OUTPUT_SIZE : NONPERIODIC_OUTPUT_SIZE; +#define FILLPATTERN_SIZE (((output_size + 2) / 3) * 3) /* Multiple of 3 */ + unsigned char *pbuf = xalignalloc (page_size, FILLPATTERN_SIZE); + + char pass_string[PASS_NAME_SIZE]; /* Name of current pass */ + bool write_error = false; + bool other_error = false; + + /* Printable previous offset into the file */ + char previous_offset_buf[LONGEST_HUMAN_READABLE + 1]; + char const *previous_human_offset; + + /* As a performance tweak, avoid direct I/O for small sizes, + as it's just a performance rather then security consideration, + and direct I/O can often be unsupported for small non aligned sizes. */ + bool try_without_directio = 0 < size && size < output_size; + if (! try_without_directio) + direct_mode (fd, true); + + if (! dorewind (fd, st)) + { + error (0, errno, _("%s: cannot rewind"), qname); + other_error = true; + goto free_pattern_mem; + } + + /* Constant fill patterns need only be set up once. */ + if (type >= 0) + { + lim = known (size) && size < FILLPATTERN_SIZE ? size : FILLPATTERN_SIZE; + fillpattern (type, pbuf, lim); + passname (pbuf, pass_string); + } + else + { + passname (0, pass_string); + } + + /* Set position if first status update */ + if (n) + { + error (0, 0, _("%s: pass %lu/%lu (%s)..."), qname, k, n, pass_string); + thresh = time (nullptr) + VERBOSE_UPDATE; + previous_human_offset = ""; + } + + offset = 0; + while (true) + { + /* How much to write this time? */ + lim = output_size; + if (known (size) && size - offset < output_size) + { + if (size < offset) + break; + lim = size - offset; + if (!lim) + break; + } + if (type < 0) + randread (s, pbuf, lim); + /* Loop to retry partial writes. */ + for (soff = 0; soff < lim; soff += ssize) + { + ssize = write (fd, pbuf + soff, lim - soff); + if (ssize <= 0) + { + if (! known (size) && (ssize == 0 || errno == ENOSPC)) + { + /* We have found the end of the file. */ + if (soff <= OFF_T_MAX - offset) + *sizep = size = offset + soff; + break; + } + else + { + int errnum = errno; + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + + /* Retry without direct I/O since this may not be supported + at all on some (file) systems, or with the current size. + I.e., a specified --size that is not aligned, or when + dealing with slop at the end of a file with --exact. */ + if (! try_without_directio && errno == EINVAL) + { + direct_mode (fd, false); + ssize = 0; + try_without_directio = true; + continue; + } + error (0, errnum, _("%s: error writing at offset %s"), + qname, umaxtostr (offset + soff, buf)); + + /* 'shred' is often used on bad media, before throwing it + out. Thus, it shouldn't give up on bad blocks. This + code works because lim is always a multiple of + SECTOR_SIZE, except at the end. This size constraint + also enables direct I/O on some (file) systems. */ + static_assert (PERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0); + static_assert (NONPERIODIC_OUTPUT_SIZE % SECTOR_SIZE == 0); + if (errnum == EIO && known (size) + && (soff | SECTOR_MASK) < lim) + { + size_t soff1 = (soff | SECTOR_MASK) + 1; + if (lseek (fd, offset + soff1, SEEK_SET) != -1) + { + /* Arrange to skip this block. */ + ssize = soff1 - soff; + write_error = true; + continue; + } + error (0, errno, _("%s: lseek failed"), qname); + } + other_error = true; + goto free_pattern_mem; + } + } + } + + /* Okay, we have written "soff" bytes. */ + + if (OFF_T_MAX - offset < soff) + { + error (0, 0, _("%s: file too large"), qname); + other_error = true; + goto free_pattern_mem; + } + + offset += soff; + + bool done = offset == size; + + /* Time to print progress? */ + if (n && ((done && *previous_human_offset) + || thresh <= (now = time (nullptr)))) + { + char offset_buf[LONGEST_HUMAN_READABLE + 1]; + char size_buf[LONGEST_HUMAN_READABLE + 1]; + int human_progress_opts = (human_autoscale | human_SI + | human_base_1024 | human_B); + char const *human_offset + = human_readable (offset, offset_buf, + human_floor | human_progress_opts, 1, 1); + + if (done || !STREQ (previous_human_offset, human_offset)) + { + if (! known (size)) + error (0, 0, _("%s: pass %lu/%lu (%s)...%s"), + qname, k, n, pass_string, human_offset); + else + { + uintmax_t off = offset; + int percent = (size == 0 + ? 100 + : (off <= TYPE_MAXIMUM (uintmax_t) / 100 + ? off * 100 / size + : off / (size / 100))); + char const *human_size + = human_readable (size, size_buf, + human_ceiling | human_progress_opts, + 1, 1); + if (done) + human_offset = human_size; + error (0, 0, _("%s: pass %lu/%lu (%s)...%s/%s %d%%"), + qname, k, n, pass_string, human_offset, human_size, + percent); + } + + strcpy (previous_offset_buf, human_offset); + previous_human_offset = previous_offset_buf; + thresh = now + VERBOSE_UPDATE; + + /* + * Force periodic syncs to keep displayed progress accurate + * FIXME: Should these be present even if -v is not enabled, + * to keep the buffer cache from filling with dirty pages? + * It's a common problem with programs that do lots of writes, + * like mkfs. + */ + if (dosync (fd, qname) != 0) + { + if (errno != EIO) + { + other_error = true; + goto free_pattern_mem; + } + write_error = true; + } + } + } + } + + /* Force what we just wrote to hit the media. */ + if (dosync (fd, qname) != 0) + { + if (errno != EIO) + { + other_error = true; + goto free_pattern_mem; + } + write_error = true; + } + +free_pattern_mem: + alignfree (pbuf); + + return other_error ? -1 : write_error; +} + +/* + * The passes start and end with a random pass, and the passes in between + * are done in random order. The idea is to deprive someone trying to + * reverse the process of knowledge of the overwrite patterns, so they + * have the additional step of figuring out what was done to the device + * before they can try to reverse or cancel it. + * + * First, all possible 1-bit patterns. There are two of them. + * Then, all possible 2-bit patterns. There are four, but the two + * which are also 1-bit patterns can be omitted. + * Then, all possible 3-bit patterns. Likewise, 8-2 = 6. + * Then, all possible 4-bit patterns. 16-4 = 12. + * + * The basic passes are: + * 1-bit: 0x000, 0xFFF + * 2-bit: 0x555, 0xAAA + * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit) + * 100100100100 110110110110 + * 9 2 4 D B 6 + * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777, + * 0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit) + * Adding three random passes at the beginning, middle and end + * produces the default 25-pass structure. + * + * The next extension would be to 5-bit and 6-bit patterns. + * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered + * 6-bit patterns, so they would increase the time required + * significantly. 4-bit patterns are enough for most purposes. + * + * The main gotcha is that this would require a trickier encoding, + * since lcm(2,3,4) = 12 bits is easy to fit into an int, but + * lcm(2,3,4,5) = 60 bits is not. + * + * One extension that is included is to complement the first bit in each + * 512-byte block, to alter the phase of the encoded data in the more + * complex encodings. This doesn't apply to MFM, so the 1-bit patterns + * are considered part of the 3-bit ones and the 2-bit patterns are + * considered part of the 4-bit patterns. + * + * + * How does the generalization to variable numbers of passes work? + * + * Here's how... + * Have an ordered list of groups of passes. Each group is a set. + * Take as many groups as will fit, plus a random subset of the + * last partial group, and place them into the passes list. + * Then shuffle the passes list into random order and use that. + * + * One extra detail: if we can't include a large enough fraction of the + * last group to be interesting, then just substitute random passes. + * + * If you want more passes than the entire list of groups can + * provide, just start repeating from the beginning of the list. + */ +static int const + patterns[] = +{ + -2, /* 2 random passes */ + 2, 0x000, 0xFFF, /* 1-bit */ + 2, 0x555, 0xAAA, /* 2-bit */ + -1, /* 1 random pass */ + 6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6, /* 3-bit */ + 12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777, + 0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE, /* 4-bit */ + -1, /* 1 random pass */ + /* The following patterns have the first bit per block flipped */ + 8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF, + 14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777, + 0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE, + -1, /* 1 random pass */ + 0 /* End */ +}; + +/* + * Generate a random wiping pass pattern with num passes. + * This is a two-stage process. First, the passes to include + * are chosen, and then they are shuffled into the desired + * order. + */ +static void +genpattern (int *dest, size_t num, struct randint_source *s) +{ + size_t randpasses; + int const *p; + int *d; + size_t n; + size_t accum, top, swap; + int k; + + if (!num) + return; + + /* Stage 1: choose the passes to use */ + p = patterns; + randpasses = 0; + d = dest; /* Destination for generated pass list */ + n = num; /* Passes remaining to fill */ + + while (true) + { + k = *p++; /* Block descriptor word */ + if (!k) + { /* Loop back to the beginning */ + p = patterns; + } + else if (k < 0) + { /* -k random passes */ + k = -k; + if ((size_t) k >= n) + { + randpasses += n; + break; + } + randpasses += k; + n -= k; + } + else if ((size_t) k <= n) + { /* Full block of patterns */ + memcpy (d, p, k * sizeof (int)); + p += k; + d += k; + n -= k; + } + else if (n < 2 || 3 * n < (size_t) k) + { /* Finish with random */ + randpasses += n; + break; + } + else + { /* Pad out with n of the k available */ + do + { + if (n == (size_t) k || randint_choose (s, k) < n) + { + *d++ = *p; + n--; + } + p++; + k--; + } + while (n); + break; + } + } + top = num - randpasses; /* Top of initialized data */ + /* affirm (d == dest + top); */ + + /* + * We now have fixed patterns in the dest buffer up to + * "top", and we need to scramble them, with "randpasses" + * random passes evenly spaced among them. + * + * We want one at the beginning, one at the end, and + * evenly spaced in between. To do this, we basically + * use Bresenham's line draw (a.k.a DDA) algorithm + * to draw a line with slope (randpasses-1)/(num-1). + * (We use a positive accumulator and count down to + * do this.) + * + * So for each desired output value, we do the following: + * - If it should be a random pass, copy the pass type + * to top++, out of the way of the other passes, and + * set the current pass to -1 (random). + * - If it should be a normal pattern pass, choose an + * entry at random between here and top-1 (inclusive) + * and swap the current entry with that one. + */ + randpasses--; /* To speed up later math */ + accum = randpasses; /* Bresenham DDA accumulator */ + for (n = 0; n < num; n++) + { + if (accum <= randpasses) + { + accum += num - 1; + dest[top++] = dest[n]; + dest[n] = -1; + } + else + { + swap = n + randint_choose (s, top - n); + k = dest[n]; + dest[n] = dest[swap]; + dest[swap] = k; + } + accum -= randpasses; + } + /* affirm (top == num); */ +} + +/* + * The core routine to actually do the work. This overwrites the first + * size bytes of the given fd. Return true if successful. + */ +static bool +do_wipefd (int fd, char const *qname, struct randint_source *s, + struct Options const *flags) +{ + size_t i; + struct stat st; + off_t size; /* Size to write, size to read */ + off_t i_size = 0; /* For small files, initial size to overwrite inode */ + unsigned long int n; /* Number of passes for printing purposes */ + int *passarray; + bool ok = true; + struct randread_source *rs; + + n = 0; /* dopass takes n == 0 to mean "don't print progress" */ + if (flags->verbose) + n = flags->n_iterations + flags->zero_fill; + + if (fstat (fd, &st)) + { + error (0, errno, _("%s: fstat failed"), qname); + return false; + } + + /* If we know that we can't possibly shred the file, give up now. + Otherwise, we may go into an infinite loop writing data before we + find that we can't rewind the device. */ + if ((S_ISCHR (st.st_mode) && isatty (fd)) + || S_ISFIFO (st.st_mode) + || S_ISSOCK (st.st_mode)) + { + error (0, 0, _("%s: invalid file type"), qname); + return false; + } + else if (S_ISREG (st.st_mode) && st.st_size < 0) + { + error (0, 0, _("%s: file has negative size"), qname); + return false; + } + + /* Allocate pass array */ + passarray = xnmalloc (flags->n_iterations, sizeof *passarray); + + size = flags->size; + if (size == -1) + { + if (S_ISREG (st.st_mode)) + { + size = st.st_size; + + if (! flags->exact) + { + /* Round up to the nearest block size to clear slack space. */ + off_t remainder = size % ST_BLKSIZE (st); + if (size && size < ST_BLKSIZE (st)) + i_size = size; + if (remainder != 0) + { + off_t size_incr = ST_BLKSIZE (st) - remainder; + size += MIN (size_incr, OFF_T_MAX - size); + } + } + } + else + { + /* The behavior of lseek is unspecified, but in practice if + it returns a positive number that's the size of this + device. */ + size = lseek (fd, 0, SEEK_END); + if (size <= 0) + { + /* We are unable to determine the length, up front. + Let dopass do that as part of its first iteration. */ + size = -1; + } + } + } + else if (S_ISREG (st.st_mode) + && st.st_size < MIN (ST_BLKSIZE (st), size)) + i_size = st.st_size; + + /* Schedule the passes in random order. */ + genpattern (passarray, flags->n_iterations, s); + + rs = randint_get_source (s); + + while (true) + { + off_t pass_size; + unsigned long int pn = n; + + if (i_size) + { + pass_size = i_size; + i_size = 0; + pn = 0; + } + else if (size) + { + pass_size = size; + size = 0; + } + /* TODO: consider handling tail packing by + writing the tail padding as a separate pass, + (that would not rewind). */ + else + break; + + for (i = 0; i < flags->n_iterations + flags->zero_fill; i++) + { + int err = 0; + int type = i < flags->n_iterations ? passarray[i] : 0; + + err = dopass (fd, &st, qname, &pass_size, type, rs, i + 1, pn); + + if (err) + { + ok = false; + if (err < 0) + goto wipefd_out; + } + } + } + + /* Now deallocate the data. The effect of ftruncate is specified + on regular files and shared memory objects (also directories, but + they are not possible here); don't worry about errors reported + for other file types. */ + + if (flags->remove_file && ftruncate (fd, 0) != 0 + && (S_ISREG (st.st_mode) || S_TYPEISSHM (&st))) + { + error (0, errno, _("%s: error truncating"), qname); + ok = false; + goto wipefd_out; + } + +wipefd_out: + free (passarray); + return ok; +} + +/* A wrapper with a little more checking for fds on the command line */ +static bool +wipefd (int fd, char const *qname, struct randint_source *s, + struct Options const *flags) +{ + int fd_flags = fcntl (fd, F_GETFL); + + if (fd_flags < 0) + { + error (0, errno, _("%s: fcntl failed"), qname); + return false; + } + if (fd_flags & O_APPEND) + { + error (0, 0, _("%s: cannot shred append-only file descriptor"), qname); + return false; + } + return do_wipefd (fd, qname, s, flags); +} + +/* --- Name-wiping code --- */ + +/* Characters allowed in a file name - a safe universal set. */ +static char const nameset[] = +"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_."; + +/* Increment NAME (with LEN bytes). NAME must be a big-endian base N + number with the digits taken from nameset. Return true if successful. + Otherwise, (because NAME already has the greatest possible value) + return false. */ + +static bool +incname (char *name, size_t len) +{ + while (len--) + { + char const *p = strchr (nameset, name[len]); + + /* Given that NAME is composed of bytes from NAMESET, + P will never be null here. */ + + /* If this character has a successor, use it. */ + if (p[1]) + { + name[len] = p[1]; + return true; + } + + /* Otherwise, set this digit to 0 and increment the prefix. */ + name[len] = nameset[0]; + } + + return false; +} + +/* + * Repeatedly rename a file with shorter and shorter names, + * to obliterate all traces of the file name (and length) on any system + * that adds a trailing delimiter to on-device file names and reuses + * the same directory slot. Finally, unlink it. + * The passed-in filename is modified in place to the new filename. + * (Which is unlinked if this function succeeds, but is still present if + * it fails for some reason.) + * + * The main loop is written carefully to not get stuck if all possible + * names of a given length are occupied. It counts down the length from + * the original to 0. While the length is non-zero, it tries to find an + * unused file name of the given length. It continues until either the + * name is available and the rename succeeds, or it runs out of names + * to try (incname wraps and returns 1). Finally, it unlinks the file. + * + * The unlink is Unix-specific, as ANSI-standard remove has more + * portability problems with C libraries making it "safe". rename + * is ANSI-standard. + * + * To force the directory data out, we try to open the directory and + * invoke fdatasync and/or fsync on it. This is non-standard, so don't + * insist that it works: just fall back to a global sync in that case. + * This is fairly significantly Unix-specific. Of course, on any + * file system with synchronous metadata updates, this is unnecessary. + */ +static bool +wipename (char *oldname, char const *qoldname, struct Options const *flags) +{ + char *newname = xstrdup (oldname); + char *base = last_component (newname); + char *dir = dir_name (newname); + char *qdir = xstrdup (quotef (dir)); + bool first = true; + bool ok = true; + int dir_fd = -1; + + if (flags->remove_file == remove_wipesync) + dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK); + + if (flags->verbose) + error (0, 0, _("%s: removing"), qoldname); + + if (flags->remove_file != remove_unlink) + for (size_t len = base_len (base); len != 0; len--) + { + memset (base, nameset[0], len); + base[len] = 0; + bool rename_ok; + while (! (rename_ok = (renameatu (AT_FDCWD, oldname, AT_FDCWD, newname, + RENAME_NOREPLACE) + == 0)) + && errno == EEXIST && incname (base, len)) + continue; + if (rename_ok) + { + if (0 <= dir_fd && dosync (dir_fd, qdir) != 0) + ok = false; + if (flags->verbose) + { + /* People seem to understand this better than talking + about renaming OLDNAME. NEWNAME doesn't need + quoting because we picked it. OLDNAME needs to be + quoted only the first time. */ + char const *old = first ? qoldname : oldname; + error (0, 0, + _("%s: renamed to %s"), old, newname); + first = false; + } + memcpy (oldname + (base - newname), base, len + 1); + } + } + + if (unlink (oldname) != 0) + { + error (0, errno, _("%s: failed to remove"), qoldname); + ok = false; + } + else if (flags->verbose) + error (0, 0, _("%s: removed"), qoldname); + if (0 <= dir_fd) + { + if (dosync (dir_fd, qdir) != 0) + ok = false; + if (close (dir_fd) != 0) + { + error (0, errno, _("%s: failed to close"), qdir); + ok = false; + } + } + free (newname); + free (dir); + free (qdir); + return ok; +} + +/* + * Finally, the function that actually takes a filename and grinds + * it into hamburger. + * + * FIXME + * Detail to note: since we do not restore errno to EACCES after + * a failed chmod, we end up printing the error code from the chmod. + * This is actually the error that stopped us from proceeding, so + * it's arguably the right one, and in practice it'll be either EACCES + * again or EPERM, which both give similar error messages. + * Does anyone disagree? + */ +static bool +wipefile (char *name, char const *qname, + struct randint_source *s, struct Options const *flags) +{ + bool ok; + int fd; + + fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY); + if (fd < 0 + && (errno == EACCES && flags->force) + && chmod (name, S_IWUSR) == 0) + fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY); + if (fd < 0) + { + error (0, errno, _("%s: failed to open for writing"), qname); + return false; + } + + ok = do_wipefd (fd, qname, s, flags); + if (close (fd) != 0) + { + error (0, errno, _("%s: failed to close"), qname); + ok = false; + } + if (ok && flags->remove_file) + ok = wipename (name, qname, flags); + return ok; +} + + +/* Buffers for random data. */ +static struct randint_source *randint_source; + +/* Just on general principles, wipe buffers containing information + that may be related to the possibly-pseudorandom values used during + shredding. */ +static void +clear_random_data (void) +{ + randint_all_free (randint_source); +} + + +int +main (int argc, char **argv) +{ + bool ok = true; + struct Options flags = { 0, }; + char **file; + int n_files; + int c; + int i; + char const *random_source = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + flags.n_iterations = DEFAULT_PASSES; + flags.size = -1; + + while ((c = getopt_long (argc, argv, "fn:s:uvxz", long_opts, nullptr)) != -1) + { + switch (c) + { + case 'f': + flags.force = true; + break; + + case 'n': + flags.n_iterations = xdectoumax (optarg, 0, + MIN (ULONG_MAX, + SIZE_MAX / sizeof (int)), "", + _("invalid number of passes"), 0); + break; + + case RANDOM_SOURCE_OPTION: + if (random_source && !STREQ (random_source, optarg)) + error (EXIT_FAILURE, 0, _("multiple random sources specified")); + random_source = optarg; + break; + + case 'u': + if (optarg == nullptr) + flags.remove_file = remove_wipesync; + else + flags.remove_file = XARGMATCH ("--remove", optarg, + remove_args, remove_methods); + break; + + case 's': + flags.size = xnumtoumax (optarg, 0, 0, OFF_T_MAX, "cbBkKMGTPEZYRQ0", + _("invalid file size"), 0); + break; + + case 'v': + flags.verbose = true; + break; + + case 'x': + flags.exact = true; + break; + + case 'z': + flags.zero_fill = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + file = argv + optind; + n_files = argc - optind; + + if (n_files == 0) + { + error (0, 0, _("missing file operand")); + usage (EXIT_FAILURE); + } + + randint_source = randint_all_new (random_source, SIZE_MAX); + if (! randint_source) + error (EXIT_FAILURE, errno, "%s", + quotef (random_source ? random_source : "getrandom")); + atexit (clear_random_data); + + for (i = 0; i < n_files; i++) + { + char *qname = xstrdup (quotef (file[i])); + if (STREQ (file[i], "-")) + { + ok &= wipefd (STDOUT_FILENO, qname, randint_source, &flags); + } + else + { + /* Plain filename - Note that this overwrites *argv! */ + ok &= wipefile (file[i], qname, randint_source, &flags); + } + free (qname); + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} +/* + * vim:sw=2:sts=2: + */ diff --git a/src/shuf.c b/src/shuf.c new file mode 100644 index 0000000..be07c4a --- /dev/null +++ b/src/shuf.c @@ -0,0 +1,603 @@ +/* Shuffle lines of text. + + Copyright (C) 2006-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Paul Eggert. */ + +#include + +#include +#include "system.h" + +#include "fadvise.h" +#include "getopt.h" +#include "linebuffer.h" +#include "quote.h" +#include "randint.h" +#include "randperm.h" +#include "read-file.h" +#include "stdio--.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "shuf" + +#define AUTHORS proper_name ("Paul Eggert") + +/* For reservoir-sampling, allocate the reservoir lines in batches. */ +enum { RESERVOIR_LINES_INCREMENT = 1024 }; + +/* reservoir-sampling introduces CPU overhead for small inputs. + So only enable it for inputs >= this limit. + This limit was determined using these commands: + $ for p in $(seq 7); do src/seq $((10**$p)) > 10p$p.in; done + $ for p in $(seq 7); do time shuf-nores -n10 10p$p.in >/dev/null; done + $ for p in $(seq 7); do time shuf -n10 10p$p.in >/dev/null; done .*/ +enum { RESERVOIR_MIN_INPUT = 8192 * 1024 }; + + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]\n\ + or: %s -e [OPTION]... [ARG]...\n\ + or: %s -i LO-HI [OPTION]...\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Write a random permutation of the input lines to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -e, --echo treat each ARG as an input line\n\ + -i, --input-range=LO-HI treat each number LO through HI as an input line\n\ + -n, --head-count=COUNT output at most COUNT lines\n\ + -o, --output=FILE write result to FILE instead of standard output\n\ + --random-source=FILE get random bytes from FILE\n\ + -r, --repeat output lines can be repeated\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + RANDOM_SOURCE_OPTION = CHAR_MAX + 1 +}; + +static struct option const long_opts[] = +{ + {"echo", no_argument, nullptr, 'e'}, + {"input-range", required_argument, nullptr, 'i'}, + {"head-count", required_argument, nullptr, 'n'}, + {"output", required_argument, nullptr, 'o'}, + {"random-source", required_argument, nullptr, RANDOM_SOURCE_OPTION}, + {"repeat", no_argument, nullptr, 'r'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {0, 0, 0, 0}, +}; + +static void +input_from_argv (char **operand, int n_operands, char eolbyte) +{ + char *p; + size_t size = n_operands; + int i; + + for (i = 0; i < n_operands; i++) + size += strlen (operand[i]); + p = xmalloc (size); + + for (i = 0; i < n_operands; i++) + { + char *p1 = stpcpy (p, operand[i]); + operand[i] = p; + p = p1; + *p++ = eolbyte; + } + + operand[n_operands] = p; +} + +/* Return the start of the next line after LINE, which is guaranteed + to end in EOLBYTE. */ + +static char * +next_line (char *line, char eolbyte) +{ + char *p = rawmemchr (line, eolbyte); + return p + 1; +} + +/* Return the size of the input if possible or OFF_T_MAX if not. */ + +static off_t +input_size (void) +{ + off_t file_size; + + struct stat stat_buf; + if (fstat (STDIN_FILENO, &stat_buf) != 0) + return OFF_T_MAX; + if (usable_st_size (&stat_buf)) + file_size = stat_buf.st_size; + else + return OFF_T_MAX; + + off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR); + if (input_offset < 0) + return OFF_T_MAX; + + file_size -= input_offset; + + return file_size; +} + +/* Read all lines and store up to K permuted lines in *OUT_RSRV. + Return the number of lines read, up to a maximum of K. */ + +static size_t +read_input_reservoir_sampling (FILE *in, char eolbyte, size_t k, + struct randint_source *s, + struct linebuffer **out_rsrv) +{ + randint n_lines = 0; + size_t n_alloc_lines = MIN (k, RESERVOIR_LINES_INCREMENT); + struct linebuffer *line = nullptr; + struct linebuffer *rsrv; + + rsrv = xcalloc (n_alloc_lines, sizeof (struct linebuffer)); + + /* Fill the first K lines, directly into the reservoir. */ + while (n_lines < k + && (line = + readlinebuffer_delim (&rsrv[n_lines], in, eolbyte)) != nullptr) + { + n_lines++; + + /* Enlarge reservoir. */ + if (n_lines >= n_alloc_lines) + { + n_alloc_lines += RESERVOIR_LINES_INCREMENT; + rsrv = xnrealloc (rsrv, n_alloc_lines, sizeof (struct linebuffer)); + memset (&rsrv[n_lines], 0, + RESERVOIR_LINES_INCREMENT * sizeof (struct linebuffer)); + } + } + + /* last line wasn't null - so there may be more lines to read. */ + if (line != nullptr) + { + struct linebuffer dummy; + initbuffer (&dummy); /* space for lines not put in reservoir. */ + + /* Choose the fate of the next line, with decreasing probability (as + n_lines increases in size). + + If the line will be used, store it directly in the reservoir. + Otherwise, store it in dummy space. + + With 'struct linebuffer', storing into existing buffer will reduce + re-allocations (will only re-allocate if the new line is longer than + the currently allocated space). */ + do + { + randint j = randint_choose (s, n_lines + 1); /* 0 .. n_lines. */ + line = (j < k) ? (&rsrv[j]) : (&dummy); + } + while (readlinebuffer_delim (line, in, eolbyte) != nullptr && n_lines++); + + if (! n_lines) + error (EXIT_FAILURE, EOVERFLOW, _("too many input lines")); + + freebuffer (&dummy); + } + + /* no more input lines, or an input error. */ + if (ferror (in)) + error (EXIT_FAILURE, errno, _("read error")); + + *out_rsrv = rsrv; + return MIN (k, n_lines); +} + +static int +write_permuted_output_reservoir (size_t n_lines, struct linebuffer *lines, + size_t const *permutation) +{ + for (size_t i = 0; i < n_lines; i++) + { + const struct linebuffer *p = &lines[permutation[i]]; + if (fwrite (p->buffer, sizeof (char), p->length, stdout) != p->length) + return -1; + } + + return 0; +} + +/* Read data from file IN. Input lines are delimited by EOLBYTE; + silently append a trailing EOLBYTE if the file ends in some other + byte. Store a pointer to the resulting array of lines into *PLINE. + Return the number of lines read. Report an error and exit on + failure. */ + +static size_t +read_input (FILE *in, char eolbyte, char ***pline) +{ + char *p; + char *buf = nullptr; + size_t used; + char *lim; + char **line; + size_t n_lines; + + /* TODO: We should limit the amount of data read here, + to less than RESERVOIR_MIN_INPUT. I.e., adjust fread_file() to support + taking a byte limit. We'd then need to ensure we handle a line spanning + this boundary. With that in place we could set use_reservoir_sampling + when used==RESERVOIR_MIN_INPUT, and have read_input_reservoir_sampling() + call a wrapper function to populate a linebuffer from the internal pline + or if none left, stdin. Doing that would give better performance by + avoiding the reservoir CPU overhead when reading < RESERVOIR_MIN_INPUT + from a pipe, and allow us to dispense with the input_size() function. */ + if (!(buf = fread_file (in, 0, &used))) + error (EXIT_FAILURE, errno, _("read error")); + + if (used && buf[used - 1] != eolbyte) + buf[used++] = eolbyte; + + lim = buf + used; + + n_lines = 0; + for (p = buf; p < lim; p = next_line (p, eolbyte)) + n_lines++; + + *pline = line = xnmalloc (n_lines + 1, sizeof *line); + + line[0] = p = buf; + for (size_t i = 1; i <= n_lines; i++) + line[i] = p = next_line (p, eolbyte); + + return n_lines; +} + +/* Output N_LINES lines to stdout from LINE array, + chosen by the indices in PERMUTATION. + PERMUTATION and LINE must have at least N_LINES elements. + Strings in LINE must include the line-terminator character. */ +static int +write_permuted_lines (size_t n_lines, char *const *line, + size_t const *permutation) +{ + for (size_t i = 0; i < n_lines; i++) + { + char *const *p = line + permutation[i]; + size_t len = p[1] - p[0]; + if (fwrite (p[0], sizeof *p[0], len, stdout) != len) + return -1; + } + + return 0; +} + +/* Output N_LINES of numbers to stdout, from PERMUTATION array. + PERMUTATION must have at least N_LINES elements. */ +static int +write_permuted_numbers (size_t n_lines, size_t lo_input, + size_t const *permutation, char eolbyte) +{ + for (size_t i = 0; i < n_lines; i++) + { + unsigned long int n = lo_input + permutation[i]; + if (printf ("%lu%c", n, eolbyte) < 0) + return -1; + } + + return 0; +} + +/* Output COUNT numbers to stdout, chosen randomly from range + LO_INPUT through HI_INPUT. */ +static int +write_random_numbers (struct randint_source *s, size_t count, + size_t lo_input, size_t hi_input, char eolbyte) +{ + const randint range = hi_input - lo_input + 1; + + for (size_t i = 0; i < count; i++) + { + unsigned long int j = lo_input + randint_choose (s, range); + if (printf ("%lu%c", j, eolbyte) < 0) + return -1; + } + + return 0; +} + +/* Output COUNT lines to stdout from LINES array. + LINES must have at least N_LINES elements in it. + Strings in LINES_ must include the line-terminator character. */ +static int +write_random_lines (struct randint_source *s, size_t count, + char *const *lines, size_t n_lines) +{ + for (size_t i = 0; i < count; i++) + { + const randint j = randint_choose (s, n_lines); + char *const *p = lines + j; + size_t len = p[1] - p[0]; + if (fwrite (p[0], sizeof *p[0], len, stdout) != len) + return -1; + } + + return 0; +} + +int +main (int argc, char **argv) +{ + bool echo = false; + bool input_range = false; + size_t lo_input = SIZE_MAX; + size_t hi_input = 0; + size_t head_lines = SIZE_MAX; + char const *outfile = nullptr; + char *random_source = nullptr; + char eolbyte = '\n'; + char **input_lines = nullptr; + bool use_reservoir_sampling = false; + bool repeat = false; + + int optc; + int n_operands; + char **operand; + size_t n_lines; + char **line = nullptr; + struct linebuffer *reservoir = nullptr; + struct randint_source *randint_source; + size_t *permutation = nullptr; + int i; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "ei:n:o:rz", long_opts, nullptr)) + != -1) + switch (optc) + { + case 'e': + echo = true; + break; + + case 'i': + { + if (input_range) + error (EXIT_FAILURE, 0, _("multiple -i options specified")); + input_range = true; + + uintmax_t u; + char *lo_end; + strtol_error err = xstrtoumax (optarg, &lo_end, 10, &u, nullptr); + if (err == LONGINT_OK) + { + lo_input = u; + if (lo_input != u) + err = LONGINT_OVERFLOW; + else if (*lo_end != '-') + err = LONGINT_INVALID; + else + { + err = xstrtoumax (lo_end + 1, nullptr, 10, &u, ""); + if (err == LONGINT_OK) + { + hi_input = u; + if (hi_input != u) + err = LONGINT_OVERFLOW; + } + } + } + + n_lines = hi_input - lo_input + 1; + + if (err != LONGINT_OK || (lo_input <= hi_input) == (n_lines == 0)) + error (EXIT_FAILURE, err == LONGINT_OVERFLOW ? EOVERFLOW : 0, + "%s: %s", _("invalid input range"), quote (optarg)); + } + break; + + case 'n': + { + uintmax_t argval; + strtol_error e = xstrtoumax (optarg, nullptr, 10, &argval, ""); + + if (e == LONGINT_OK) + head_lines = MIN (head_lines, argval); + else if (e != LONGINT_OVERFLOW) + error (EXIT_FAILURE, 0, _("invalid line count: %s"), + quote (optarg)); + } + break; + + case 'o': + if (outfile && !STREQ (outfile, optarg)) + error (EXIT_FAILURE, 0, _("multiple output files specified")); + outfile = optarg; + break; + + case RANDOM_SOURCE_OPTION: + if (random_source && !STREQ (random_source, optarg)) + error (EXIT_FAILURE, 0, _("multiple random sources specified")); + random_source = optarg; + break; + + case 'r': + repeat = true; + break; + + case 'z': + eolbyte = '\0'; + break; + + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + + n_operands = argc - optind; + operand = argv + optind; + + /* Check invalid usage. */ + if (echo && input_range) + { + error (0, 0, _("cannot combine -e and -i options")); + usage (EXIT_FAILURE); + } + if (input_range ? 0 < n_operands : !echo && 1 < n_operands) + { + error (0, 0, _("extra operand %s"), quote (operand[!input_range])); + usage (EXIT_FAILURE); + } + + /* Prepare input. */ + if (head_lines == 0) + { + n_lines = 0; + line = nullptr; + } + else if (echo) + { + input_from_argv (operand, n_operands, eolbyte); + n_lines = n_operands; + line = operand; + } + else if (input_range) + { + n_lines = hi_input - lo_input + 1; + line = nullptr; + } + else + { + /* If an input file is specified, re-open it as stdin. */ + if (n_operands == 1 + && ! (STREQ (operand[0], "-") + || freopen (operand[0], "r", stdin))) + error (EXIT_FAILURE, errno, "%s", quotef (operand[0])); + + fadvise (stdin, FADVISE_SEQUENTIAL); + + if (repeat || head_lines == SIZE_MAX + || input_size () <= RESERVOIR_MIN_INPUT) + { + n_lines = read_input (stdin, eolbyte, &input_lines); + line = input_lines; + } + else + { + use_reservoir_sampling = true; + n_lines = SIZE_MAX; /* unknown number of input lines, for now. */ + } + } + + /* The adjusted head line count; can be less than HEAD_LINES if the + input is small and if not repeating. */ + size_t ahead_lines = repeat || head_lines < n_lines ? head_lines : n_lines; + + randint_source = randint_all_new (random_source, + (use_reservoir_sampling || repeat + ? SIZE_MAX + : randperm_bound (ahead_lines, n_lines))); + if (! randint_source) + error (EXIT_FAILURE, errno, "%s", + quotef (random_source ? random_source : "getrandom")); + + if (use_reservoir_sampling) + { + /* Instead of reading the entire file into 'line', + use reservoir-sampling to store just AHEAD_LINES random lines. */ + n_lines = read_input_reservoir_sampling (stdin, eolbyte, ahead_lines, + randint_source, &reservoir); + ahead_lines = n_lines; + } + + /* Close stdin now, rather than earlier, so that randint_all_new + doesn't have to worry about opening something other than + stdin. */ + if (! (head_lines == 0 || echo || input_range || fclose (stdin) == 0)) + error (EXIT_FAILURE, errno, _("read error")); + + if (!repeat) + permutation = randperm_new (randint_source, ahead_lines, n_lines); + + if (outfile && ! freopen (outfile, "w", stdout)) + error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + + /* Generate output according to requested method */ + if (repeat) + { + if (head_lines == 0) + i = 0; + else + { + if (n_lines == 0) + error (EXIT_FAILURE, 0, _("no lines to repeat")); + if (input_range) + i = write_random_numbers (randint_source, ahead_lines, + lo_input, hi_input, eolbyte); + else + i = write_random_lines (randint_source, ahead_lines, line, n_lines); + } + } + else + { + if (use_reservoir_sampling) + i = write_permuted_output_reservoir (n_lines, reservoir, permutation); + else if (input_range) + i = write_permuted_numbers (ahead_lines, lo_input, + permutation, eolbyte); + else + i = write_permuted_lines (ahead_lines, line, permutation); + } + + if (i != 0) + write_error (); + + main_exit (EXIT_SUCCESS); +} diff --git a/src/single-binary.mk b/src/single-binary.mk new file mode 100644 index 0000000..20a45f0 --- /dev/null +++ b/src/single-binary.mk @@ -0,0 +1,602 @@ +## Automatically generated by gen-single-binary.sh. DO NOT EDIT BY HAND! +src_libsinglebin_dir_a_DEPENDENCIES = src/libsinglebin_ls.a +src_libsinglebin_vdir_a_DEPENDENCIES = src/libsinglebin_ls.a +src_libsinglebin_arch_a_DEPENDENCIES = src/libsinglebin_uname.a +# Command arch +noinst_LIBRARIES += src/libsinglebin_arch.a +src_libsinglebin_arch_a_SOURCES = src/coreutils-arch.c +src_libsinglebin_arch_a_ldadd = src/libsinglebin_uname.a +src_libsinglebin_arch_a_CFLAGS = "-Dmain=single_binary_main_arch (int, char **); int single_binary_main_arch" -Dusage=_usage_arch $(src_coreutils_CFLAGS) +# Command hostname +noinst_LIBRARIES += src/libsinglebin_hostname.a +src_libsinglebin_hostname_a_SOURCES = src/hostname.c +src_libsinglebin_hostname_a_ldadd = $(GETHOSTNAME_LIB) +src_libsinglebin_hostname_a_DEPENDENCIES = $(src_hostname_DEPENDENCIES) +src_libsinglebin_hostname_a_CFLAGS = "-Dmain=single_binary_main_hostname (int, char **); int single_binary_main_hostname" -Dusage=_usage_hostname $(src_coreutils_CFLAGS) +# Command chroot +noinst_LIBRARIES += src/libsinglebin_chroot.a +src_libsinglebin_chroot_a_SOURCES = src/chroot.c +src_libsinglebin_chroot_a_DEPENDENCIES = $(src_chroot_DEPENDENCIES) +src_libsinglebin_chroot_a_CFLAGS = "-Dmain=single_binary_main_chroot (int, char **); int single_binary_main_chroot" -Dusage=_usage_chroot $(src_coreutils_CFLAGS) +# Command df +noinst_LIBRARIES += src/libsinglebin_df.a +src_libsinglebin_df_a_SOURCES = src/df.c src/find-mount-point.c +src_libsinglebin_df_a_DEPENDENCIES = $(src_df_DEPENDENCIES) +src_libsinglebin_df_a_CFLAGS = "-Dmain=single_binary_main_df (int, char **); int single_binary_main_df" -Dusage=_usage_df $(src_coreutils_CFLAGS) +# Command hostid +noinst_LIBRARIES += src/libsinglebin_hostid.a +src_libsinglebin_hostid_a_SOURCES = src/hostid.c +src_libsinglebin_hostid_a_DEPENDENCIES = $(src_hostid_DEPENDENCIES) +src_libsinglebin_hostid_a_CFLAGS = "-Dmain=single_binary_main_hostid (int, char **); int single_binary_main_hostid" -Dusage=_usage_hostid $(src_coreutils_CFLAGS) +# Command nice +noinst_LIBRARIES += src/libsinglebin_nice.a +src_libsinglebin_nice_a_SOURCES = src/nice.c +src_libsinglebin_nice_a_DEPENDENCIES = $(src_nice_DEPENDENCIES) +src_libsinglebin_nice_a_CFLAGS = "-Dmain=single_binary_main_nice (int, char **); int single_binary_main_nice" -Dusage=_usage_nice $(src_coreutils_CFLAGS) +# Command pinky +noinst_LIBRARIES += src/libsinglebin_pinky.a +src_libsinglebin_pinky_a_SOURCES = src/pinky.c +src_libsinglebin_pinky_a_ldadd = $(GETADDRINFO_LIB) $(READUTMP_LIB) +src_libsinglebin_pinky_a_DEPENDENCIES = $(src_pinky_DEPENDENCIES) +src_libsinglebin_pinky_a_CFLAGS = "-Dmain=single_binary_main_pinky (int, char **); int single_binary_main_pinky" -Dusage=_usage_pinky $(src_coreutils_CFLAGS) +# Command stdbuf +noinst_LIBRARIES += src/libsinglebin_stdbuf.a +src_libsinglebin_stdbuf_a_SOURCES = src/stdbuf.c +src_libsinglebin_stdbuf_a_DEPENDENCIES = $(src_stdbuf_DEPENDENCIES) +src_libsinglebin_stdbuf_a_CFLAGS = "-Dmain=single_binary_main_stdbuf (int, char **); int single_binary_main_stdbuf" -Dusage=_usage_stdbuf $(src_coreutils_CFLAGS) +# Command stty +noinst_LIBRARIES += src/libsinglebin_stty.a +src_libsinglebin_stty_a_SOURCES = src/stty.c +src_libsinglebin_stty_a_DEPENDENCIES = $(src_stty_DEPENDENCIES) +src_libsinglebin_stty_a_CFLAGS = "-Dmain=single_binary_main_stty (int, char **); int single_binary_main_stty" -Dusage=_usage_stty $(src_coreutils_CFLAGS) +# Command timeout +noinst_LIBRARIES += src/libsinglebin_timeout.a +src_libsinglebin_timeout_a_SOURCES = src/timeout.c src/operand2sig.c +src_libsinglebin_timeout_a_ldadd = $(LIB_TIMER_TIME) +src_libsinglebin_timeout_a_DEPENDENCIES = $(src_timeout_DEPENDENCIES) +src_libsinglebin_timeout_a_CFLAGS = "-Dmain=single_binary_main_timeout (int, char **); int single_binary_main_timeout" -Dusage=_usage_timeout $(src_coreutils_CFLAGS) +# Command users +noinst_LIBRARIES += src/libsinglebin_users.a +src_libsinglebin_users_a_SOURCES = src/users.c +src_libsinglebin_users_a_ldadd = $(READUTMP_LIB) +src_libsinglebin_users_a_DEPENDENCIES = $(src_users_DEPENDENCIES) +src_libsinglebin_users_a_CFLAGS = "-Dmain=single_binary_main_users (int, char **); int single_binary_main_users" -Dusage=_usage_users $(src_coreutils_CFLAGS) +# Command who +noinst_LIBRARIES += src/libsinglebin_who.a +src_libsinglebin_who_a_SOURCES = src/who.c +src_libsinglebin_who_a_ldadd = $(GETADDRINFO_LIB) $(READUTMP_LIB) +src_libsinglebin_who_a_DEPENDENCIES = $(src_who_DEPENDENCIES) +src_libsinglebin_who_a_CFLAGS = "-Dmain=single_binary_main_who (int, char **); int single_binary_main_who" -Dusage=_usage_who $(src_coreutils_CFLAGS) +# Command _ +noinst_LIBRARIES += src/libsinglebin__.a +src_libsinglebin___a_SOURCES = src/lbracket.c +src_libsinglebin___a_ldadd = $(src_test_LDADD) +src_libsinglebin___a_DEPENDENCIES = $(src___DEPENDENCIES) +src_libsinglebin___a_CFLAGS = "-Dmain=single_binary_main__ (int, char **); int single_binary_main__" -Dusage=_usage__ $(src_coreutils_CFLAGS) +# Command b2sum +noinst_LIBRARIES += src/libsinglebin_b2sum.a +src_libsinglebin_b2sum_a_SOURCES = src/digest.c src/blake2/blake2.h src/blake2/blake2-impl.h src/blake2/blake2b-ref.c src/blake2/b2sum.c src/blake2/b2sum.h +src_libsinglebin_b2sum_a_DEPENDENCIES = $(src_b2sum_DEPENDENCIES) +src_libsinglebin_b2sum_a_CFLAGS = "-Dmain=single_binary_main_b2sum (int, char **); int single_binary_main_b2sum" -Dusage=_usage_b2sum $(src_coreutils_CFLAGS) +src_libsinglebin_b2sum_a_CPPFLAGS = -DHASH_ALGO_BLAKE2=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS) +# Command base64 +noinst_LIBRARIES += src/libsinglebin_base64.a +src_libsinglebin_base64_a_SOURCES = src/basenc.c +src_libsinglebin_base64_a_DEPENDENCIES = $(src_base64_DEPENDENCIES) +src_libsinglebin_base64_a_CFLAGS = "-Dmain=single_binary_main_base64 (int, char **); int single_binary_main_base64" -Dusage=_usage_base64 $(src_coreutils_CFLAGS) +src_libsinglebin_base64_a_CPPFLAGS = -DBASE_TYPE=64 $(AM_CPPFLAGS) +# Command base32 +noinst_LIBRARIES += src/libsinglebin_base32.a +src_libsinglebin_base32_a_SOURCES = src/basenc.c +src_libsinglebin_base32_a_DEPENDENCIES = $(src_base32_DEPENDENCIES) +src_libsinglebin_base32_a_CFLAGS = "-Dmain=single_binary_main_base32 (int, char **); int single_binary_main_base32" -Dusage=_usage_base32 $(src_coreutils_CFLAGS) +src_libsinglebin_base32_a_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS) +# Command basenc +noinst_LIBRARIES += src/libsinglebin_basenc.a +src_libsinglebin_basenc_a_SOURCES = src/basenc.c +src_libsinglebin_basenc_a_DEPENDENCIES = $(src_basenc_DEPENDENCIES) +src_libsinglebin_basenc_a_CFLAGS = "-Dmain=single_binary_main_basenc (int, char **); int single_binary_main_basenc" -Dusage=_usage_basenc $(src_coreutils_CFLAGS) +src_libsinglebin_basenc_a_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS) +# Command basename +noinst_LIBRARIES += src/libsinglebin_basename.a +src_libsinglebin_basename_a_SOURCES = src/basename.c +src_libsinglebin_basename_a_DEPENDENCIES = $(src_basename_DEPENDENCIES) +src_libsinglebin_basename_a_CFLAGS = "-Dmain=single_binary_main_basename (int, char **); int single_binary_main_basename" -Dusage=_usage_basename $(src_coreutils_CFLAGS) +# Command cat +noinst_LIBRARIES += src/libsinglebin_cat.a +src_libsinglebin_cat_a_SOURCES = src/cat.c +src_libsinglebin_cat_a_DEPENDENCIES = $(src_cat_DEPENDENCIES) +src_libsinglebin_cat_a_CFLAGS = "-Dmain=single_binary_main_cat (int, char **); int single_binary_main_cat" -Dusage=_usage_cat $(src_coreutils_CFLAGS) +# Command chcon +noinst_LIBRARIES += src/libsinglebin_chcon.a +src_libsinglebin_chcon_a_SOURCES = src/chcon.c +src_libsinglebin_chcon_a_ldadd = $(LIB_SELINUX) +src_libsinglebin_chcon_a_DEPENDENCIES = $(src_chcon_DEPENDENCIES) +src_libsinglebin_chcon_a_CFLAGS = "-Dmain=single_binary_main_chcon (int, char **); int single_binary_main_chcon" -Dusage=_usage_chcon $(src_coreutils_CFLAGS) +# Command chgrp +noinst_LIBRARIES += src/libsinglebin_chgrp.a +src_libsinglebin_chgrp_a_SOURCES = src/chgrp.c src/chown-core.c +src_libsinglebin_chgrp_a_DEPENDENCIES = $(src_chgrp_DEPENDENCIES) +src_libsinglebin_chgrp_a_CFLAGS = "-Dmain=single_binary_main_chgrp (int, char **); int single_binary_main_chgrp" -Dusage=_usage_chgrp $(src_coreutils_CFLAGS) +# Command chmod +noinst_LIBRARIES += src/libsinglebin_chmod.a +src_libsinglebin_chmod_a_SOURCES = src/chmod.c +src_libsinglebin_chmod_a_DEPENDENCIES = $(src_chmod_DEPENDENCIES) +src_libsinglebin_chmod_a_CFLAGS = "-Dmain=single_binary_main_chmod (int, char **); int single_binary_main_chmod" -Dusage=_usage_chmod $(src_coreutils_CFLAGS) +# Command chown +noinst_LIBRARIES += src/libsinglebin_chown.a +src_libsinglebin_chown_a_SOURCES = src/chown.c src/chown-core.c +src_libsinglebin_chown_a_DEPENDENCIES = $(src_chown_DEPENDENCIES) +src_libsinglebin_chown_a_CFLAGS = "-Dmain=single_binary_main_chown (int, char **); int single_binary_main_chown" -Dusage=_usage_chown $(src_coreutils_CFLAGS) +# Command cksum +noinst_LIBRARIES += src/libsinglebin_cksum.a +src_libsinglebin_cksum_a_SOURCES = $(src_b2sum_SOURCES) src/sum.c src/sum.h src/cksum.c src/cksum.h src/crctab.c +src_libsinglebin_cksum_a_ldadd = $(LIB_CRYPTO) $(cksum_pclmul_ldadd) +src_libsinglebin_cksum_a_DEPENDENCIES = $(src_cksum_DEPENDENCIES) +src_libsinglebin_cksum_a_CFLAGS = "-Dmain=single_binary_main_cksum (int, char **); int single_binary_main_cksum" -Dusage=_usage_cksum $(src_coreutils_CFLAGS) +src_libsinglebin_cksum_a_CPPFLAGS = -DHASH_ALGO_CKSUM=1 -DHAVE_CONFIG_H $(AM_CPPFLAGS) +# Command comm +noinst_LIBRARIES += src/libsinglebin_comm.a +src_libsinglebin_comm_a_SOURCES = src/comm.c +src_libsinglebin_comm_a_DEPENDENCIES = $(src_comm_DEPENDENCIES) +src_libsinglebin_comm_a_CFLAGS = "-Dmain=single_binary_main_comm (int, char **); int single_binary_main_comm" -Dusage=_usage_comm $(src_coreutils_CFLAGS) +# Command cp +noinst_LIBRARIES += src/libsinglebin_cp.a +src_libsinglebin_cp_a_SOURCES = src/cp.c $(copy_sources) $(selinux_sources) +src_libsinglebin_cp_a_ldadd = $(copy_ldadd) +src_libsinglebin_cp_a_DEPENDENCIES = $(src_cp_DEPENDENCIES) +src_libsinglebin_cp_a_CFLAGS = "-Dmain=single_binary_main_cp (int, char **); int single_binary_main_cp" -Dusage=_usage_cp $(src_coreutils_CFLAGS) +# Command csplit +noinst_LIBRARIES += src/libsinglebin_csplit.a +src_libsinglebin_csplit_a_SOURCES = src/csplit.c +src_libsinglebin_csplit_a_DEPENDENCIES = $(src_csplit_DEPENDENCIES) +src_libsinglebin_csplit_a_CFLAGS = "-Dmain=single_binary_main_csplit (int, char **); int single_binary_main_csplit" -Dusage=_usage_csplit $(src_coreutils_CFLAGS) +# Command cut +noinst_LIBRARIES += src/libsinglebin_cut.a +src_libsinglebin_cut_a_SOURCES = src/cut.c src/set-fields.c +src_libsinglebin_cut_a_DEPENDENCIES = $(src_cut_DEPENDENCIES) +src_libsinglebin_cut_a_CFLAGS = "-Dmain=single_binary_main_cut (int, char **); int single_binary_main_cut" -Dusage=_usage_cut $(src_coreutils_CFLAGS) +# Command date +noinst_LIBRARIES += src/libsinglebin_date.a +src_libsinglebin_date_a_SOURCES = src/date.c +src_libsinglebin_date_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_date_a_DEPENDENCIES = $(src_date_DEPENDENCIES) +src_libsinglebin_date_a_CFLAGS = "-Dmain=single_binary_main_date (int, char **); int single_binary_main_date" -Dusage=_usage_date $(src_coreutils_CFLAGS) +# Command dd +noinst_LIBRARIES += src/libsinglebin_dd.a +src_libsinglebin_dd_a_SOURCES = src/dd.c +src_libsinglebin_dd_a_ldadd = $(GETHRXTIME_LIB) $(LIB_FDATASYNC) +src_libsinglebin_dd_a_DEPENDENCIES = $(src_dd_DEPENDENCIES) +src_libsinglebin_dd_a_CFLAGS = "-Dmain=single_binary_main_dd (int, char **); int single_binary_main_dd" -Dusage=_usage_dd $(src_coreutils_CFLAGS) +# Command dir +noinst_LIBRARIES += src/libsinglebin_dir.a +src_libsinglebin_dir_a_SOURCES = src/coreutils-dir.c +src_libsinglebin_dir_a_ldadd = $(src_ls_LDADD) src/libsinglebin_ls.a +src_libsinglebin_dir_a_CFLAGS = "-Dmain=single_binary_main_dir (int, char **); int single_binary_main_dir" -Dusage=_usage_dir $(src_coreutils_CFLAGS) +# Command dircolors +noinst_LIBRARIES += src/libsinglebin_dircolors.a +src_libsinglebin_dircolors_a_SOURCES = src/dircolors.c +src_libsinglebin_dircolors_a_DEPENDENCIES = $(src_dircolors_DEPENDENCIES) +src_libsinglebin_dircolors_a_CFLAGS = "-Dmain=single_binary_main_dircolors (int, char **); int single_binary_main_dircolors" -Dusage=_usage_dircolors $(src_coreutils_CFLAGS) +# Command dirname +noinst_LIBRARIES += src/libsinglebin_dirname.a +src_libsinglebin_dirname_a_SOURCES = src/dirname.c +src_libsinglebin_dirname_a_DEPENDENCIES = $(src_dirname_DEPENDENCIES) +src_libsinglebin_dirname_a_CFLAGS = "-Dmain=single_binary_main_dirname (int, char **); int single_binary_main_dirname" -Dusage=_usage_dirname $(src_coreutils_CFLAGS) +# Command du +noinst_LIBRARIES += src/libsinglebin_du.a +src_libsinglebin_du_a_SOURCES = src/du.c +src_libsinglebin_du_a_DEPENDENCIES = $(src_du_DEPENDENCIES) +src_libsinglebin_du_a_CFLAGS = "-Dmain=single_binary_main_du (int, char **); int single_binary_main_du" -Dusage=_usage_du $(src_coreutils_CFLAGS) +# Command echo +noinst_LIBRARIES += src/libsinglebin_echo.a +src_libsinglebin_echo_a_SOURCES = src/echo.c +src_libsinglebin_echo_a_DEPENDENCIES = $(src_echo_DEPENDENCIES) +src_libsinglebin_echo_a_CFLAGS = "-Dmain=single_binary_main_echo (int, char **); int single_binary_main_echo" -Dusage=_usage_echo $(src_coreutils_CFLAGS) +# Command env +noinst_LIBRARIES += src/libsinglebin_env.a +src_libsinglebin_env_a_SOURCES = src/env.c src/operand2sig.c +src_libsinglebin_env_a_DEPENDENCIES = $(src_env_DEPENDENCIES) +src_libsinglebin_env_a_CFLAGS = "-Dmain=single_binary_main_env (int, char **); int single_binary_main_env" -Dusage=_usage_env $(src_coreutils_CFLAGS) +# Command expand +noinst_LIBRARIES += src/libsinglebin_expand.a +src_libsinglebin_expand_a_SOURCES = src/expand.c src/expand-common.c +src_libsinglebin_expand_a_DEPENDENCIES = $(src_expand_DEPENDENCIES) +src_libsinglebin_expand_a_CFLAGS = "-Dmain=single_binary_main_expand (int, char **); int single_binary_main_expand" -Dusage=_usage_expand $(src_coreutils_CFLAGS) +# Command expr +noinst_LIBRARIES += src/libsinglebin_expr.a +src_libsinglebin_expr_a_SOURCES = src/expr.c +src_libsinglebin_expr_a_ldadd = $(LIBGMP) +src_libsinglebin_expr_a_DEPENDENCIES = $(src_expr_DEPENDENCIES) +src_libsinglebin_expr_a_CFLAGS = "-Dmain=single_binary_main_expr (int, char **); int single_binary_main_expr" -Dusage=_usage_expr $(src_coreutils_CFLAGS) +# Command factor +noinst_LIBRARIES += src/libsinglebin_factor.a +src_libsinglebin_factor_a_SOURCES = src/factor.c +src_libsinglebin_factor_a_ldadd = $(LIBGMP) +src_libsinglebin_factor_a_DEPENDENCIES = $(src_factor_DEPENDENCIES) +src_libsinglebin_factor_a_CFLAGS = "-Dmain=single_binary_main_factor (int, char **); int single_binary_main_factor" -Dusage=_usage_factor $(src_coreutils_CFLAGS) +# Command false +noinst_LIBRARIES += src/libsinglebin_false.a +src_libsinglebin_false_a_SOURCES = src/false.c +src_libsinglebin_false_a_DEPENDENCIES = $(src_false_DEPENDENCIES) +src_libsinglebin_false_a_CFLAGS = "-Dmain=single_binary_main_false (int, char **); int single_binary_main_false" -Dusage=_usage_false $(src_coreutils_CFLAGS) +# Command fmt +noinst_LIBRARIES += src/libsinglebin_fmt.a +src_libsinglebin_fmt_a_SOURCES = src/fmt.c +src_libsinglebin_fmt_a_DEPENDENCIES = $(src_fmt_DEPENDENCIES) +src_libsinglebin_fmt_a_CFLAGS = "-Dmain=single_binary_main_fmt (int, char **); int single_binary_main_fmt" -Dusage=_usage_fmt $(src_coreutils_CFLAGS) +# Command fold +noinst_LIBRARIES += src/libsinglebin_fold.a +src_libsinglebin_fold_a_SOURCES = src/fold.c +src_libsinglebin_fold_a_DEPENDENCIES = $(src_fold_DEPENDENCIES) +src_libsinglebin_fold_a_CFLAGS = "-Dmain=single_binary_main_fold (int, char **); int single_binary_main_fold" -Dusage=_usage_fold $(src_coreutils_CFLAGS) +# Command ginstall +noinst_LIBRARIES += src/libsinglebin_ginstall.a +src_libsinglebin_ginstall_a_SOURCES = src/install.c src/prog-fprintf.c $(copy_sources) $(selinux_sources) +src_libsinglebin_ginstall_a_ldadd = $(copy_ldadd) $(LIB_SELINUX) $(CLOCK_TIME_LIB) +src_libsinglebin_ginstall_a_DEPENDENCIES = $(src_ginstall_DEPENDENCIES) +src_libsinglebin_ginstall_a_CFLAGS = "-Dmain=single_binary_main_ginstall (int, char **); int single_binary_main_ginstall" -Dusage=_usage_ginstall $(src_coreutils_CFLAGS) +# Command groups +noinst_LIBRARIES += src/libsinglebin_groups.a +src_libsinglebin_groups_a_SOURCES = src/groups.c src/group-list.c +src_libsinglebin_groups_a_DEPENDENCIES = $(src_groups_DEPENDENCIES) +src_libsinglebin_groups_a_CFLAGS = "-Dmain=single_binary_main_groups (int, char **); int single_binary_main_groups" -Dusage=_usage_groups $(src_coreutils_CFLAGS) +# Command head +noinst_LIBRARIES += src/libsinglebin_head.a +src_libsinglebin_head_a_SOURCES = src/head.c +src_libsinglebin_head_a_DEPENDENCIES = $(src_head_DEPENDENCIES) +src_libsinglebin_head_a_CFLAGS = "-Dmain=single_binary_main_head (int, char **); int single_binary_main_head" -Dusage=_usage_head $(src_coreutils_CFLAGS) +# Command id +noinst_LIBRARIES += src/libsinglebin_id.a +src_libsinglebin_id_a_SOURCES = src/id.c src/group-list.c +src_libsinglebin_id_a_ldadd = $(LIB_SELINUX) $(LIB_SMACK) +src_libsinglebin_id_a_DEPENDENCIES = $(src_id_DEPENDENCIES) +src_libsinglebin_id_a_CFLAGS = "-Dmain=single_binary_main_id (int, char **); int single_binary_main_id" -Dusage=_usage_id $(src_coreutils_CFLAGS) +# Command join +noinst_LIBRARIES += src/libsinglebin_join.a +src_libsinglebin_join_a_SOURCES = src/join.c +src_libsinglebin_join_a_DEPENDENCIES = $(src_join_DEPENDENCIES) +src_libsinglebin_join_a_CFLAGS = "-Dmain=single_binary_main_join (int, char **); int single_binary_main_join" -Dusage=_usage_join $(src_coreutils_CFLAGS) +# Command kill +noinst_LIBRARIES += src/libsinglebin_kill.a +src_libsinglebin_kill_a_SOURCES = src/kill.c src/operand2sig.c +src_libsinglebin_kill_a_ldadd = $(LIBTHREAD) +src_libsinglebin_kill_a_DEPENDENCIES = $(src_kill_DEPENDENCIES) +src_libsinglebin_kill_a_CFLAGS = "-Dmain=single_binary_main_kill (int, char **); int single_binary_main_kill" -Dusage=_usage_kill $(src_coreutils_CFLAGS) +# Command link +noinst_LIBRARIES += src/libsinglebin_link.a +src_libsinglebin_link_a_SOURCES = src/link.c +src_libsinglebin_link_a_DEPENDENCIES = $(src_link_DEPENDENCIES) +src_libsinglebin_link_a_CFLAGS = "-Dmain=single_binary_main_link (int, char **); int single_binary_main_link" -Dusage=_usage_link $(src_coreutils_CFLAGS) +# Command ln +noinst_LIBRARIES += src/libsinglebin_ln.a +src_libsinglebin_ln_a_SOURCES = src/ln.c src/force-link.c src/force-link.h src/relpath.c src/relpath.h +src_libsinglebin_ln_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_ln_a_DEPENDENCIES = $(src_ln_DEPENDENCIES) +src_libsinglebin_ln_a_CFLAGS = "-Dmain=single_binary_main_ln (int, char **); int single_binary_main_ln" -Dusage=_usage_ln $(src_coreutils_CFLAGS) +# Command logname +noinst_LIBRARIES += src/libsinglebin_logname.a +src_libsinglebin_logname_a_SOURCES = src/logname.c +src_libsinglebin_logname_a_DEPENDENCIES = $(src_logname_DEPENDENCIES) +src_libsinglebin_logname_a_CFLAGS = "-Dmain=single_binary_main_logname (int, char **); int single_binary_main_logname" -Dusage=_usage_logname $(src_coreutils_CFLAGS) +# Command ls +noinst_LIBRARIES += src/libsinglebin_ls.a +src_libsinglebin_ls_a_SOURCES = src/ls.c src/ls-ls.c +src_libsinglebin_ls_a_ldadd = $(LIB_SELINUX) $(LIB_SMACK) $(CLOCK_TIME_LIB) $(LIB_CAP) $(FILE_HAS_ACL_LIB) +src_libsinglebin_ls_a_DEPENDENCIES = $(src_ls_DEPENDENCIES) +src_libsinglebin_ls_a_CFLAGS = "-Dmain=single_binary_main_ls (int, char **); int single_binary_main_ls" -Dusage=_usage_ls $(src_coreutils_CFLAGS) +# Command md5sum +noinst_LIBRARIES += src/libsinglebin_md5sum.a +src_libsinglebin_md5sum_a_SOURCES = src/digest.c +src_libsinglebin_md5sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_md5sum_a_DEPENDENCIES = $(src_md5sum_DEPENDENCIES) +src_libsinglebin_md5sum_a_CFLAGS = "-Dmain=single_binary_main_md5sum (int, char **); int single_binary_main_md5sum" -Dusage=_usage_md5sum $(src_coreutils_CFLAGS) +src_libsinglebin_md5sum_a_CPPFLAGS = -DHASH_ALGO_MD5=1 $(AM_CPPFLAGS) +# Command mkdir +noinst_LIBRARIES += src/libsinglebin_mkdir.a +src_libsinglebin_mkdir_a_SOURCES = src/mkdir.c src/prog-fprintf.c $(selinux_sources) +src_libsinglebin_mkdir_a_ldadd = $(LIB_SELINUX) $(LIB_SMACK) +src_libsinglebin_mkdir_a_DEPENDENCIES = $(src_mkdir_DEPENDENCIES) +src_libsinglebin_mkdir_a_CFLAGS = "-Dmain=single_binary_main_mkdir (int, char **); int single_binary_main_mkdir" -Dusage=_usage_mkdir $(src_coreutils_CFLAGS) +# Command mkfifo +noinst_LIBRARIES += src/libsinglebin_mkfifo.a +src_libsinglebin_mkfifo_a_SOURCES = src/mkfifo.c $(selinux_sources) +src_libsinglebin_mkfifo_a_ldadd = $(LIB_SELINUX) $(LIB_SMACK) +src_libsinglebin_mkfifo_a_DEPENDENCIES = $(src_mkfifo_DEPENDENCIES) +src_libsinglebin_mkfifo_a_CFLAGS = "-Dmain=single_binary_main_mkfifo (int, char **); int single_binary_main_mkfifo" -Dusage=_usage_mkfifo $(src_coreutils_CFLAGS) +# Command mknod +noinst_LIBRARIES += src/libsinglebin_mknod.a +src_libsinglebin_mknod_a_SOURCES = src/mknod.c $(selinux_sources) +src_libsinglebin_mknod_a_ldadd = $(LIB_SELINUX) $(LIB_SMACK) +src_libsinglebin_mknod_a_DEPENDENCIES = $(src_mknod_DEPENDENCIES) +src_libsinglebin_mknod_a_CFLAGS = "-Dmain=single_binary_main_mknod (int, char **); int single_binary_main_mknod" -Dusage=_usage_mknod $(src_coreutils_CFLAGS) +# Command mktemp +noinst_LIBRARIES += src/libsinglebin_mktemp.a +src_libsinglebin_mktemp_a_SOURCES = src/mktemp.c +src_libsinglebin_mktemp_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_mktemp_a_DEPENDENCIES = $(src_mktemp_DEPENDENCIES) +src_libsinglebin_mktemp_a_CFLAGS = "-Dmain=single_binary_main_mktemp (int, char **); int single_binary_main_mktemp" -Dusage=_usage_mktemp $(src_coreutils_CFLAGS) +# Command mv +noinst_LIBRARIES += src/libsinglebin_mv.a +src_libsinglebin_mv_a_SOURCES = src/mv.c src/remove.c $(copy_sources) $(selinux_sources) +src_libsinglebin_mv_a_ldadd = $(copy_ldadd) $(remove_ldadd) +src_libsinglebin_mv_a_DEPENDENCIES = $(src_mv_DEPENDENCIES) +src_libsinglebin_mv_a_CFLAGS = "-Dmain=single_binary_main_mv (int, char **); int single_binary_main_mv" -Dusage=_usage_mv $(src_coreutils_CFLAGS) +# Command nl +noinst_LIBRARIES += src/libsinglebin_nl.a +src_libsinglebin_nl_a_SOURCES = src/nl.c +src_libsinglebin_nl_a_DEPENDENCIES = $(src_nl_DEPENDENCIES) +src_libsinglebin_nl_a_CFLAGS = "-Dmain=single_binary_main_nl (int, char **); int single_binary_main_nl" -Dusage=_usage_nl $(src_coreutils_CFLAGS) +# Command nproc +noinst_LIBRARIES += src/libsinglebin_nproc.a +src_libsinglebin_nproc_a_SOURCES = src/nproc.c +src_libsinglebin_nproc_a_DEPENDENCIES = $(src_nproc_DEPENDENCIES) +src_libsinglebin_nproc_a_CFLAGS = "-Dmain=single_binary_main_nproc (int, char **); int single_binary_main_nproc" -Dusage=_usage_nproc $(src_coreutils_CFLAGS) +# Command nohup +noinst_LIBRARIES += src/libsinglebin_nohup.a +src_libsinglebin_nohup_a_SOURCES = src/nohup.c +src_libsinglebin_nohup_a_DEPENDENCIES = $(src_nohup_DEPENDENCIES) +src_libsinglebin_nohup_a_CFLAGS = "-Dmain=single_binary_main_nohup (int, char **); int single_binary_main_nohup" -Dusage=_usage_nohup $(src_coreutils_CFLAGS) +# Command numfmt +noinst_LIBRARIES += src/libsinglebin_numfmt.a +src_libsinglebin_numfmt_a_SOURCES = src/numfmt.c src/set-fields.c +src_libsinglebin_numfmt_a_DEPENDENCIES = $(src_numfmt_DEPENDENCIES) +src_libsinglebin_numfmt_a_CFLAGS = "-Dmain=single_binary_main_numfmt (int, char **); int single_binary_main_numfmt" -Dusage=_usage_numfmt $(src_coreutils_CFLAGS) +# Command od +noinst_LIBRARIES += src/libsinglebin_od.a +src_libsinglebin_od_a_SOURCES = src/od.c +src_libsinglebin_od_a_DEPENDENCIES = $(src_od_DEPENDENCIES) +src_libsinglebin_od_a_CFLAGS = "-Dmain=single_binary_main_od (int, char **); int single_binary_main_od" -Dusage=_usage_od $(src_coreutils_CFLAGS) +# Command paste +noinst_LIBRARIES += src/libsinglebin_paste.a +src_libsinglebin_paste_a_SOURCES = src/paste.c +src_libsinglebin_paste_a_DEPENDENCIES = $(src_paste_DEPENDENCIES) +src_libsinglebin_paste_a_CFLAGS = "-Dmain=single_binary_main_paste (int, char **); int single_binary_main_paste" -Dusage=_usage_paste $(src_coreutils_CFLAGS) +# Command pathchk +noinst_LIBRARIES += src/libsinglebin_pathchk.a +src_libsinglebin_pathchk_a_SOURCES = src/pathchk.c +src_libsinglebin_pathchk_a_DEPENDENCIES = $(src_pathchk_DEPENDENCIES) +src_libsinglebin_pathchk_a_CFLAGS = "-Dmain=single_binary_main_pathchk (int, char **); int single_binary_main_pathchk" -Dusage=_usage_pathchk $(src_coreutils_CFLAGS) +# Command pr +noinst_LIBRARIES += src/libsinglebin_pr.a +src_libsinglebin_pr_a_SOURCES = src/pr.c +src_libsinglebin_pr_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_pr_a_DEPENDENCIES = $(src_pr_DEPENDENCIES) +src_libsinglebin_pr_a_CFLAGS = "-Dmain=single_binary_main_pr (int, char **); int single_binary_main_pr" -Dusage=_usage_pr $(src_coreutils_CFLAGS) +# Command printenv +noinst_LIBRARIES += src/libsinglebin_printenv.a +src_libsinglebin_printenv_a_SOURCES = src/printenv.c +src_libsinglebin_printenv_a_DEPENDENCIES = $(src_printenv_DEPENDENCIES) +src_libsinglebin_printenv_a_CFLAGS = "-Dmain=single_binary_main_printenv (int, char **); int single_binary_main_printenv" -Dusage=_usage_printenv $(src_coreutils_CFLAGS) +# Command printf +noinst_LIBRARIES += src/libsinglebin_printf.a +src_libsinglebin_printf_a_SOURCES = src/printf.c +src_libsinglebin_printf_a_ldadd = $(LIBICONV) +src_libsinglebin_printf_a_DEPENDENCIES = $(src_printf_DEPENDENCIES) +src_libsinglebin_printf_a_CFLAGS = "-Dmain=single_binary_main_printf (int, char **); int single_binary_main_printf" -Dusage=_usage_printf $(src_coreutils_CFLAGS) +# Command ptx +noinst_LIBRARIES += src/libsinglebin_ptx.a +src_libsinglebin_ptx_a_SOURCES = src/ptx.c +src_libsinglebin_ptx_a_DEPENDENCIES = $(src_ptx_DEPENDENCIES) +src_libsinglebin_ptx_a_CFLAGS = "-Dmain=single_binary_main_ptx (int, char **); int single_binary_main_ptx" -Dusage=_usage_ptx $(src_coreutils_CFLAGS) +# Command pwd +noinst_LIBRARIES += src/libsinglebin_pwd.a +src_libsinglebin_pwd_a_SOURCES = src/pwd.c +src_libsinglebin_pwd_a_DEPENDENCIES = $(src_pwd_DEPENDENCIES) +src_libsinglebin_pwd_a_CFLAGS = "-Dmain=single_binary_main_pwd (int, char **); int single_binary_main_pwd" -Dusage=_usage_pwd $(src_coreutils_CFLAGS) +# Command readlink +noinst_LIBRARIES += src/libsinglebin_readlink.a +src_libsinglebin_readlink_a_SOURCES = src/readlink.c +src_libsinglebin_readlink_a_DEPENDENCIES = $(src_readlink_DEPENDENCIES) +src_libsinglebin_readlink_a_CFLAGS = "-Dmain=single_binary_main_readlink (int, char **); int single_binary_main_readlink" -Dusage=_usage_readlink $(src_coreutils_CFLAGS) +# Command realpath +noinst_LIBRARIES += src/libsinglebin_realpath.a +src_libsinglebin_realpath_a_SOURCES = src/realpath.c src/relpath.c src/relpath.h +src_libsinglebin_realpath_a_DEPENDENCIES = $(src_realpath_DEPENDENCIES) +src_libsinglebin_realpath_a_CFLAGS = "-Dmain=single_binary_main_realpath (int, char **); int single_binary_main_realpath" -Dusage=_usage_realpath $(src_coreutils_CFLAGS) +# Command rm +noinst_LIBRARIES += src/libsinglebin_rm.a +src_libsinglebin_rm_a_SOURCES = src/rm.c src/remove.c +src_libsinglebin_rm_a_ldadd = $(remove_ldadd) +src_libsinglebin_rm_a_DEPENDENCIES = $(src_rm_DEPENDENCIES) +src_libsinglebin_rm_a_CFLAGS = "-Dmain=single_binary_main_rm (int, char **); int single_binary_main_rm" -Dusage=_usage_rm $(src_coreutils_CFLAGS) +# Command rmdir +noinst_LIBRARIES += src/libsinglebin_rmdir.a +src_libsinglebin_rmdir_a_SOURCES = src/rmdir.c src/prog-fprintf.c +src_libsinglebin_rmdir_a_DEPENDENCIES = $(src_rmdir_DEPENDENCIES) +src_libsinglebin_rmdir_a_CFLAGS = "-Dmain=single_binary_main_rmdir (int, char **); int single_binary_main_rmdir" -Dusage=_usage_rmdir $(src_coreutils_CFLAGS) +# Command runcon +noinst_LIBRARIES += src/libsinglebin_runcon.a +src_libsinglebin_runcon_a_SOURCES = src/runcon.c +src_libsinglebin_runcon_a_ldadd = $(LIB_SELINUX) +src_libsinglebin_runcon_a_DEPENDENCIES = $(src_runcon_DEPENDENCIES) +src_libsinglebin_runcon_a_CFLAGS = "-Dmain=single_binary_main_runcon (int, char **); int single_binary_main_runcon" -Dusage=_usage_runcon $(src_coreutils_CFLAGS) +# Command seq +noinst_LIBRARIES += src/libsinglebin_seq.a +src_libsinglebin_seq_a_SOURCES = src/seq.c +src_libsinglebin_seq_a_DEPENDENCIES = $(src_seq_DEPENDENCIES) +src_libsinglebin_seq_a_CFLAGS = "-Dmain=single_binary_main_seq (int, char **); int single_binary_main_seq" -Dusage=_usage_seq $(src_coreutils_CFLAGS) +# Command sha1sum +noinst_LIBRARIES += src/libsinglebin_sha1sum.a +src_libsinglebin_sha1sum_a_SOURCES = src/digest.c +src_libsinglebin_sha1sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_sha1sum_a_DEPENDENCIES = $(src_sha1sum_DEPENDENCIES) +src_libsinglebin_sha1sum_a_CFLAGS = "-Dmain=single_binary_main_sha1sum (int, char **); int single_binary_main_sha1sum" -Dusage=_usage_sha1sum $(src_coreutils_CFLAGS) +src_libsinglebin_sha1sum_a_CPPFLAGS = -DHASH_ALGO_SHA1=1 $(AM_CPPFLAGS) +# Command sha224sum +noinst_LIBRARIES += src/libsinglebin_sha224sum.a +src_libsinglebin_sha224sum_a_SOURCES = src/digest.c +src_libsinglebin_sha224sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_sha224sum_a_DEPENDENCIES = $(src_sha224sum_DEPENDENCIES) +src_libsinglebin_sha224sum_a_CFLAGS = "-Dmain=single_binary_main_sha224sum (int, char **); int single_binary_main_sha224sum" -Dusage=_usage_sha224sum $(src_coreutils_CFLAGS) +src_libsinglebin_sha224sum_a_CPPFLAGS = -DHASH_ALGO_SHA224=1 $(AM_CPPFLAGS) +# Command sha256sum +noinst_LIBRARIES += src/libsinglebin_sha256sum.a +src_libsinglebin_sha256sum_a_SOURCES = src/digest.c +src_libsinglebin_sha256sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_sha256sum_a_DEPENDENCIES = $(src_sha256sum_DEPENDENCIES) +src_libsinglebin_sha256sum_a_CFLAGS = "-Dmain=single_binary_main_sha256sum (int, char **); int single_binary_main_sha256sum" -Dusage=_usage_sha256sum $(src_coreutils_CFLAGS) +src_libsinglebin_sha256sum_a_CPPFLAGS = -DHASH_ALGO_SHA256=1 $(AM_CPPFLAGS) +# Command sha384sum +noinst_LIBRARIES += src/libsinglebin_sha384sum.a +src_libsinglebin_sha384sum_a_SOURCES = src/digest.c +src_libsinglebin_sha384sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_sha384sum_a_DEPENDENCIES = $(src_sha384sum_DEPENDENCIES) +src_libsinglebin_sha384sum_a_CFLAGS = "-Dmain=single_binary_main_sha384sum (int, char **); int single_binary_main_sha384sum" -Dusage=_usage_sha384sum $(src_coreutils_CFLAGS) +src_libsinglebin_sha384sum_a_CPPFLAGS = -DHASH_ALGO_SHA384=1 $(AM_CPPFLAGS) +# Command sha512sum +noinst_LIBRARIES += src/libsinglebin_sha512sum.a +src_libsinglebin_sha512sum_a_SOURCES = src/digest.c +src_libsinglebin_sha512sum_a_ldadd = $(LIB_CRYPTO) +src_libsinglebin_sha512sum_a_DEPENDENCIES = $(src_sha512sum_DEPENDENCIES) +src_libsinglebin_sha512sum_a_CFLAGS = "-Dmain=single_binary_main_sha512sum (int, char **); int single_binary_main_sha512sum" -Dusage=_usage_sha512sum $(src_coreutils_CFLAGS) +src_libsinglebin_sha512sum_a_CPPFLAGS = -DHASH_ALGO_SHA512=1 $(AM_CPPFLAGS) +# Command shred +noinst_LIBRARIES += src/libsinglebin_shred.a +src_libsinglebin_shred_a_SOURCES = src/shred.c +src_libsinglebin_shred_a_ldadd = $(LIB_FDATASYNC) +src_libsinglebin_shred_a_DEPENDENCIES = $(src_shred_DEPENDENCIES) +src_libsinglebin_shred_a_CFLAGS = "-Dmain=single_binary_main_shred (int, char **); int single_binary_main_shred" -Dusage=_usage_shred $(src_coreutils_CFLAGS) +# Command shuf +noinst_LIBRARIES += src/libsinglebin_shuf.a +src_libsinglebin_shuf_a_SOURCES = src/shuf.c +src_libsinglebin_shuf_a_DEPENDENCIES = $(src_shuf_DEPENDENCIES) +src_libsinglebin_shuf_a_CFLAGS = "-Dmain=single_binary_main_shuf (int, char **); int single_binary_main_shuf" -Dusage=_usage_shuf $(src_coreutils_CFLAGS) +# Command sleep +noinst_LIBRARIES += src/libsinglebin_sleep.a +src_libsinglebin_sleep_a_SOURCES = src/sleep.c +src_libsinglebin_sleep_a_ldadd = $(NANOSLEEP_LIB) +src_libsinglebin_sleep_a_DEPENDENCIES = $(src_sleep_DEPENDENCIES) +src_libsinglebin_sleep_a_CFLAGS = "-Dmain=single_binary_main_sleep (int, char **); int single_binary_main_sleep" -Dusage=_usage_sleep $(src_coreutils_CFLAGS) +# Command sort +noinst_LIBRARIES += src/libsinglebin_sort.a +src_libsinglebin_sort_a_SOURCES = src/sort.c +src_libsinglebin_sort_a_ldadd = $(EUIDACCESS_LIBGEN) $(CLOCK_TIME_LIB) $(NANOSLEEP_LIB) $(LIB_CRYPTO) $(LIBPMULTITHREAD) $(PTHREAD_SIGMASK_LIB) +src_libsinglebin_sort_a_DEPENDENCIES = $(src_sort_DEPENDENCIES) +src_libsinglebin_sort_a_CFLAGS = "-Dmain=single_binary_main_sort (int, char **); int single_binary_main_sort" -Dusage=_usage_sort $(src_coreutils_CFLAGS) +# Command split +noinst_LIBRARIES += src/libsinglebin_split.a +src_libsinglebin_split_a_SOURCES = src/split.c src/temp-stream.c +src_libsinglebin_split_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_split_a_DEPENDENCIES = $(src_split_DEPENDENCIES) +src_libsinglebin_split_a_CFLAGS = "-Dmain=single_binary_main_split (int, char **); int single_binary_main_split" -Dusage=_usage_split $(src_coreutils_CFLAGS) +# Command stat +noinst_LIBRARIES += src/libsinglebin_stat.a +src_libsinglebin_stat_a_SOURCES = src/stat.c src/find-mount-point.c +src_libsinglebin_stat_a_ldadd = $(LIB_SELINUX) $(LIB_NVPAIR) +src_libsinglebin_stat_a_DEPENDENCIES = $(src_stat_DEPENDENCIES) +src_libsinglebin_stat_a_CFLAGS = "-Dmain=single_binary_main_stat (int, char **); int single_binary_main_stat" -Dusage=_usage_stat $(src_coreutils_CFLAGS) +# Command sum +noinst_LIBRARIES += src/libsinglebin_sum.a +src_libsinglebin_sum_a_SOURCES = src/sum.c src/sum.h src/digest.c +src_libsinglebin_sum_a_DEPENDENCIES = $(src_sum_DEPENDENCIES) +src_libsinglebin_sum_a_CFLAGS = "-Dmain=single_binary_main_sum (int, char **); int single_binary_main_sum" -Dusage=_usage_sum $(src_coreutils_CFLAGS) +src_libsinglebin_sum_a_CPPFLAGS = -DHASH_ALGO_SUM=1 $(AM_CPPFLAGS) +# Command sync +noinst_LIBRARIES += src/libsinglebin_sync.a +src_libsinglebin_sync_a_SOURCES = src/sync.c +src_libsinglebin_sync_a_ldadd = $(LIB_FDATASYNC) +src_libsinglebin_sync_a_DEPENDENCIES = $(src_sync_DEPENDENCIES) +src_libsinglebin_sync_a_CFLAGS = "-Dmain=single_binary_main_sync (int, char **); int single_binary_main_sync" -Dusage=_usage_sync $(src_coreutils_CFLAGS) +# Command tac +noinst_LIBRARIES += src/libsinglebin_tac.a +src_libsinglebin_tac_a_SOURCES = src/tac.c src/temp-stream.c +src_libsinglebin_tac_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_tac_a_DEPENDENCIES = $(src_tac_DEPENDENCIES) +src_libsinglebin_tac_a_CFLAGS = "-Dmain=single_binary_main_tac (int, char **); int single_binary_main_tac" -Dusage=_usage_tac $(src_coreutils_CFLAGS) +# Command tail +noinst_LIBRARIES += src/libsinglebin_tail.a +src_libsinglebin_tail_a_SOURCES = src/tail.c src/iopoll.c +src_libsinglebin_tail_a_ldadd = $(NANOSLEEP_LIB) +src_libsinglebin_tail_a_DEPENDENCIES = $(src_tail_DEPENDENCIES) +src_libsinglebin_tail_a_CFLAGS = "-Dmain=single_binary_main_tail (int, char **); int single_binary_main_tail" -Dusage=_usage_tail $(src_coreutils_CFLAGS) +# Command tee +noinst_LIBRARIES += src/libsinglebin_tee.a +src_libsinglebin_tee_a_SOURCES = src/tee.c src/iopoll.c +src_libsinglebin_tee_a_DEPENDENCIES = $(src_tee_DEPENDENCIES) +src_libsinglebin_tee_a_CFLAGS = "-Dmain=single_binary_main_tee (int, char **); int single_binary_main_tee" -Dusage=_usage_tee $(src_coreutils_CFLAGS) +# Command test +noinst_LIBRARIES += src/libsinglebin_test.a +src_libsinglebin_test_a_SOURCES = src/test.c +src_libsinglebin_test_a_ldadd = $(EUIDACCESS_LIBGEN) +src_libsinglebin_test_a_DEPENDENCIES = $(src_test_DEPENDENCIES) +src_libsinglebin_test_a_CFLAGS = "-Dmain=single_binary_main_test (int, char **); int single_binary_main_test" -Dusage=_usage_test $(src_coreutils_CFLAGS) +# Command touch +noinst_LIBRARIES += src/libsinglebin_touch.a +src_libsinglebin_touch_a_SOURCES = src/touch.c +src_libsinglebin_touch_a_ldadd = $(CLOCK_TIME_LIB) +src_libsinglebin_touch_a_DEPENDENCIES = $(src_touch_DEPENDENCIES) +src_libsinglebin_touch_a_CFLAGS = "-Dmain=single_binary_main_touch (int, char **); int single_binary_main_touch" -Dusage=_usage_touch $(src_coreutils_CFLAGS) +# Command tr +noinst_LIBRARIES += src/libsinglebin_tr.a +src_libsinglebin_tr_a_SOURCES = src/tr.c +src_libsinglebin_tr_a_DEPENDENCIES = $(src_tr_DEPENDENCIES) +src_libsinglebin_tr_a_CFLAGS = "-Dmain=single_binary_main_tr (int, char **); int single_binary_main_tr" -Dusage=_usage_tr $(src_coreutils_CFLAGS) +# Command true +noinst_LIBRARIES += src/libsinglebin_true.a +src_libsinglebin_true_a_SOURCES = src/true.c +src_libsinglebin_true_a_DEPENDENCIES = $(src_true_DEPENDENCIES) +src_libsinglebin_true_a_CFLAGS = "-Dmain=single_binary_main_true (int, char **); int single_binary_main_true" -Dusage=_usage_true $(src_coreutils_CFLAGS) +# Command truncate +noinst_LIBRARIES += src/libsinglebin_truncate.a +src_libsinglebin_truncate_a_SOURCES = src/truncate.c +src_libsinglebin_truncate_a_DEPENDENCIES = $(src_truncate_DEPENDENCIES) +src_libsinglebin_truncate_a_CFLAGS = "-Dmain=single_binary_main_truncate (int, char **); int single_binary_main_truncate" -Dusage=_usage_truncate $(src_coreutils_CFLAGS) +# Command tsort +noinst_LIBRARIES += src/libsinglebin_tsort.a +src_libsinglebin_tsort_a_SOURCES = src/tsort.c +src_libsinglebin_tsort_a_DEPENDENCIES = $(src_tsort_DEPENDENCIES) +src_libsinglebin_tsort_a_CFLAGS = "-Dmain=single_binary_main_tsort (int, char **); int single_binary_main_tsort" -Dusage=_usage_tsort $(src_coreutils_CFLAGS) +# Command tty +noinst_LIBRARIES += src/libsinglebin_tty.a +src_libsinglebin_tty_a_SOURCES = src/tty.c +src_libsinglebin_tty_a_DEPENDENCIES = $(src_tty_DEPENDENCIES) +src_libsinglebin_tty_a_CFLAGS = "-Dmain=single_binary_main_tty (int, char **); int single_binary_main_tty" -Dusage=_usage_tty $(src_coreutils_CFLAGS) +# Command uname +noinst_LIBRARIES += src/libsinglebin_uname.a +src_libsinglebin_uname_a_SOURCES = src/uname.c src/uname-uname.c +src_libsinglebin_uname_a_ldadd = $(GETHOSTNAME_LIB) +src_libsinglebin_uname_a_DEPENDENCIES = $(src_uname_DEPENDENCIES) +src_libsinglebin_uname_a_CFLAGS = "-Dmain=single_binary_main_uname (int, char **); int single_binary_main_uname" -Dusage=_usage_uname $(src_coreutils_CFLAGS) +# Command unexpand +noinst_LIBRARIES += src/libsinglebin_unexpand.a +src_libsinglebin_unexpand_a_SOURCES = src/unexpand.c src/expand-common.c +src_libsinglebin_unexpand_a_DEPENDENCIES = $(src_unexpand_DEPENDENCIES) +src_libsinglebin_unexpand_a_CFLAGS = "-Dmain=single_binary_main_unexpand (int, char **); int single_binary_main_unexpand" -Dusage=_usage_unexpand $(src_coreutils_CFLAGS) +# Command uniq +noinst_LIBRARIES += src/libsinglebin_uniq.a +src_libsinglebin_uniq_a_SOURCES = src/uniq.c +src_libsinglebin_uniq_a_DEPENDENCIES = $(src_uniq_DEPENDENCIES) +src_libsinglebin_uniq_a_CFLAGS = "-Dmain=single_binary_main_uniq (int, char **); int single_binary_main_uniq" -Dusage=_usage_uniq $(src_coreutils_CFLAGS) +# Command unlink +noinst_LIBRARIES += src/libsinglebin_unlink.a +src_libsinglebin_unlink_a_SOURCES = src/unlink.c +src_libsinglebin_unlink_a_DEPENDENCIES = $(src_unlink_DEPENDENCIES) +src_libsinglebin_unlink_a_CFLAGS = "-Dmain=single_binary_main_unlink (int, char **); int single_binary_main_unlink" -Dusage=_usage_unlink $(src_coreutils_CFLAGS) +# Command uptime +noinst_LIBRARIES += src/libsinglebin_uptime.a +src_libsinglebin_uptime_a_SOURCES = src/uptime.c +src_libsinglebin_uptime_a_ldadd = $(GETLOADAVG_LIBS) $(READUTMP_LIB) +src_libsinglebin_uptime_a_DEPENDENCIES = $(src_uptime_DEPENDENCIES) +src_libsinglebin_uptime_a_CFLAGS = "-Dmain=single_binary_main_uptime (int, char **); int single_binary_main_uptime" -Dusage=_usage_uptime $(src_coreutils_CFLAGS) +# Command vdir +noinst_LIBRARIES += src/libsinglebin_vdir.a +src_libsinglebin_vdir_a_SOURCES = src/coreutils-vdir.c +src_libsinglebin_vdir_a_ldadd = $(src_ls_LDADD) src/libsinglebin_ls.a +src_libsinglebin_vdir_a_CFLAGS = "-Dmain=single_binary_main_vdir (int, char **); int single_binary_main_vdir" -Dusage=_usage_vdir $(src_coreutils_CFLAGS) +# Command wc +noinst_LIBRARIES += src/libsinglebin_wc.a +src_libsinglebin_wc_a_SOURCES = src/wc.c +src_libsinglebin_wc_a_ldadd = $(wc_avx2_ldadd) +src_libsinglebin_wc_a_DEPENDENCIES = $(src_wc_DEPENDENCIES) +src_libsinglebin_wc_a_CFLAGS = "-Dmain=single_binary_main_wc (int, char **); int single_binary_main_wc" -Dusage=_usage_wc $(src_coreutils_CFLAGS) +# Command whoami +noinst_LIBRARIES += src/libsinglebin_whoami.a +src_libsinglebin_whoami_a_SOURCES = src/whoami.c +src_libsinglebin_whoami_a_DEPENDENCIES = $(src_whoami_DEPENDENCIES) +src_libsinglebin_whoami_a_CFLAGS = "-Dmain=single_binary_main_whoami (int, char **); int single_binary_main_whoami" -Dusage=_usage_whoami $(src_coreutils_CFLAGS) +# Command yes +noinst_LIBRARIES += src/libsinglebin_yes.a +src_libsinglebin_yes_a_SOURCES = src/yes.c +src_libsinglebin_yes_a_DEPENDENCIES = $(src_yes_DEPENDENCIES) +src_libsinglebin_yes_a_CFLAGS = "-Dmain=single_binary_main_yes (int, char **); int single_binary_main_yes" -Dusage=_usage_yes $(src_coreutils_CFLAGS) diff --git a/src/sleep.c b/src/sleep.c new file mode 100644 index 0000000..22b4793 --- /dev/null +++ b/src/sleep.c @@ -0,0 +1,144 @@ +/* sleep - delay for a specified amount of time. + Copyright (C) 1984-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include + +#include "system.h" +#include "cl-strtod.h" +#include "long-options.h" +#include "quote.h" +#include "xnanosleep.h" +#include "xstrtod.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "sleep" + +#define AUTHORS \ + proper_name ("Jim Meyering"), \ + proper_name ("Paul Eggert") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s NUMBER[SUFFIX]...\n\ + or: %s OPTION\n\ +Pause for NUMBER seconds. SUFFIX may be 's' for seconds (the default),\n\ +'m' for minutes, 'h' for hours or 'd' for days. NUMBER need not be an\n\ +integer. Given two or more arguments, pause for the amount of time\n\ +specified by the sum of their values.\n\ +\n\ +"), + program_name, program_name); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Given a floating point value *X, and a suffix character, SUFFIX_CHAR, + scale *X by the multiplier implied by SUFFIX_CHAR. SUFFIX_CHAR may + be the NUL byte or 's' to denote seconds, 'm' for minutes, 'h' for + hours, or 'd' for days. If SUFFIX_CHAR is invalid, don't modify *X + and return false. Otherwise return true. */ + +static bool +apply_suffix (double *x, char suffix_char) +{ + int multiplier; + + switch (suffix_char) + { + case 0: + case 's': + multiplier = 1; + break; + case 'm': + multiplier = 60; + break; + case 'h': + multiplier = 60 * 60; + break; + case 'd': + multiplier = 60 * 60 * 24; + break; + default: + return false; + } + + *x *= multiplier; + + return true; +} + +int +main (int argc, char **argv) +{ + double seconds = 0.0; + bool ok = true; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (argc == 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + for (int i = optind; i < argc; i++) + { + double s; + char const *p; + if (! (xstrtod (argv[i], &p, &s, cl_strtod) || errno == ERANGE) + /* Nonnegative interval. */ + || ! (0 <= s) + /* No extra chars after the number and an optional s,m,h,d char. */ + || (*p && *(p + 1)) + /* Check any suffix char and update S based on the suffix. */ + || ! apply_suffix (&s, *p)) + { + error (0, 0, _("invalid time interval %s"), quote (argv[i])); + ok = false; + } + + seconds += s; + } + + if (!ok) + usage (EXIT_FAILURE); + + if (xnanosleep (seconds)) + error (EXIT_FAILURE, errno, _("cannot read realtime clock")); + + return EXIT_SUCCESS; +} diff --git a/src/sort.c b/src/sort.c new file mode 100644 index 0000000..e779845 --- /dev/null +++ b/src/sort.c @@ -0,0 +1,4846 @@ +/* sort - sort lines of text (with all kinds of options). + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written December 1988 by Mike Haertel. + The author may be reached (Email) at the address mike@gnu.ai.mit.edu, + or (US mail) as Mike Haertel c/o Free Software Foundation. + + Ørn E. Hansen added NLS support in 1997. */ + +#include + +#include +#include +#include +#include +#include +#include +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "fadvise.h" +#include "filevercmp.h" +#include "flexmember.h" +#include "hard-locale.h" +#include "hash.h" +#include "heap.h" +#include "ignore-value.h" +#include "md5.h" +#include "mbswidth.h" +#include "nproc.h" +#include "physmem.h" +#include "posixver.h" +#include "quote.h" +#include "randread.h" +#include "readtokens0.h" +#include "stdlib--.h" +#include "strnumcmp.h" +#include "xmemcoll.h" +#include "xnanosleep.h" +#include "xstrtol.h" +#include "xstrtol-error.h" + +#ifndef RLIMIT_DATA +struct rlimit { size_t rlim_cur; }; +# define getrlimit(Resource, Rlp) (-1) +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "sort" + +#define AUTHORS \ + proper_name ("Mike Haertel"), \ + proper_name ("Paul Eggert") + +#if HAVE_LANGINFO_CODESET +# include +#endif + +/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is + present. */ +#ifndef SA_NOCLDSTOP +# define SA_NOCLDSTOP 0 +/* No sigprocmask. Always 'return' zero. */ +# define sigprocmask(How, Set, Oset) (0) +# define sigset_t int +# if ! HAVE_SIGINTERRUPT +# define siginterrupt(sig, flag) /* empty */ +# endif +#endif + +#if !defined OPEN_MAX && defined NR_OPEN +# define OPEN_MAX NR_OPEN +#endif +#if !defined OPEN_MAX +# define OPEN_MAX 20 +#endif + +#define UCHAR_LIM (UCHAR_MAX + 1) + +#ifndef DEFAULT_TMPDIR +# define DEFAULT_TMPDIR "/tmp" +#endif + +/* Maximum number of lines to merge every time a NODE is taken from + the merge queue. Node is at LEVEL in the binary merge tree, + and is responsible for merging TOTAL lines. */ +#define MAX_MERGE(total, level) (((total) >> (2 * ((level) + 1))) + 1) + +/* Heuristic value for the number of lines for which it is worth creating + a subthread, during an internal merge sort. I.e., it is a small number + of "average" lines for which sorting via two threads is faster than + sorting via one on an "average" system. On a dual-core 2.0 GHz i686 + system with 3GB of RAM and 2MB of L2 cache, a file containing 128K + lines of gensort -a output is sorted slightly faster with --parallel=2 + than with --parallel=1. By contrast, using --parallel=1 is about 10% + faster than using --parallel=2 with a 64K-line input. */ +enum { SUBTHREAD_LINES_HEURISTIC = 128 * 1024 }; +static_assert (4 <= SUBTHREAD_LINES_HEURISTIC); + +/* The number of threads after which there are + diminishing performance gains. */ +enum { DEFAULT_MAX_THREADS = 8 }; + +/* Exit statuses. */ +enum + { + /* POSIX says to exit with status 1 if invoked with -c and the + input is not properly sorted. */ + SORT_OUT_OF_ORDER = 1, + + /* POSIX says any other irregular exit must exit with a status + code greater than 1. */ + SORT_FAILURE = 2 + }; + +enum + { + /* The number of times we should try to fork a compression process + (we retry if the fork call fails). We don't _need_ to compress + temp files, this is just to reduce file system access, so this number + can be small. Each retry doubles in duration. */ + MAX_FORK_TRIES_COMPRESS = 4, + + /* The number of times we should try to fork a decompression process. + If we can't fork a decompression process, we can't sort, so this + number should be big. Each retry doubles in duration. */ + MAX_FORK_TRIES_DECOMPRESS = 9 + }; + +enum + { + /* Level of the end-of-merge node, one level above the root. */ + MERGE_END = 0, + + /* Level of the root node in merge tree. */ + MERGE_ROOT = 1 + }; + +/* The representation of the decimal point in the current locale. */ +static char decimal_point; + +/* Thousands separator; if outside char range, there is no separator. */ +static int thousands_sep; +/* We currently ignore multi-byte grouping chars. */ +static bool thousands_sep_ignored; + +/* Nonzero if the corresponding locales are hard. */ +static bool hard_LC_COLLATE; +#if HAVE_NL_LANGINFO +static bool hard_LC_TIME; +#endif + +#define NONZERO(x) ((x) != 0) + +/* The kind of blanks for '-b' to skip in various options. */ +enum blanktype { bl_start, bl_end, bl_both }; + +/* The character marking end of line. Default to \n. */ +static char eolchar = '\n'; + +/* Lines are held in memory as counted strings. */ +struct line +{ + char *text; /* Text of the line. */ + size_t length; /* Length including final newline. */ + char *keybeg; /* Start of first key. */ + char *keylim; /* Limit of first key. */ +}; + +/* Input buffers. */ +struct buffer +{ + char *buf; /* Dynamically allocated buffer, + partitioned into 3 regions: + - input data; + - unused area; + - an array of lines, in reverse order. */ + size_t used; /* Number of bytes used for input data. */ + size_t nlines; /* Number of lines in the line array. */ + size_t alloc; /* Number of bytes allocated. */ + size_t left; /* Number of bytes left from previous reads. */ + size_t line_bytes; /* Number of bytes to reserve for each line. */ + bool eof; /* An EOF has been read. */ +}; + +/* Sort key. */ +struct keyfield +{ + size_t sword; /* Zero-origin 'word' to start at. */ + size_t schar; /* Additional characters to skip. */ + size_t eword; /* Zero-origin last 'word' of key. */ + size_t echar; /* Additional characters in field. */ + bool const *ignore; /* Boolean array of characters to ignore. */ + char const *translate; /* Translation applied to characters. */ + bool skipsblanks; /* Skip leading blanks when finding start. */ + bool skipeblanks; /* Skip leading blanks when finding end. */ + bool numeric; /* Flag for numeric comparison. Handle + strings of digits with optional decimal + point, but no exponential notation. */ + bool random; /* Sort by random hash of key. */ + bool general_numeric; /* Flag for general, numeric comparison. + Handle numbers in exponential notation. */ + bool human_numeric; /* Flag for sorting by human readable + units with either SI or IEC prefixes. */ + bool month; /* Flag for comparison by month name. */ + bool reverse; /* Reverse the sense of comparison. */ + bool version; /* sort by version number */ + bool traditional_used; /* Traditional key option format is used. */ + struct keyfield *next; /* Next keyfield to try. */ +}; + +struct month +{ + char const *name; + int val; +}; + +/* Binary merge tree node. */ +struct merge_node +{ + struct line *lo; /* Lines to merge from LO child node. */ + struct line *hi; /* Lines to merge from HI child node. */ + struct line *end_lo; /* End of available lines from LO. */ + struct line *end_hi; /* End of available lines from HI. */ + struct line **dest; /* Pointer to destination of merge. */ + size_t nlo; /* Total Lines remaining from LO. */ + size_t nhi; /* Total lines remaining from HI. */ + struct merge_node *parent; /* Parent node. */ + struct merge_node *lo_child; /* LO child node. */ + struct merge_node *hi_child; /* HI child node. */ + unsigned int level; /* Level in merge tree. */ + bool queued; /* Node is already in heap. */ + pthread_mutex_t lock; /* Lock for node operations. */ +}; + +/* Priority queue of merge nodes. */ +struct merge_node_queue +{ + struct heap *priority_queue; /* Priority queue of merge tree nodes. */ + pthread_mutex_t mutex; /* Lock for queue operations. */ + pthread_cond_t cond; /* Conditional wait for empty queue to populate + when popping. */ +}; + +/* Used to implement --unique (-u). */ +static struct line saved_line; + +/* FIXME: None of these tables work with multibyte character sets. + Also, there are many other bugs when handling multibyte characters. + One way to fix this is to rewrite 'sort' to use wide characters + internally, but doing this with good performance is a bit + tricky. */ + +/* Table of blanks. */ +static bool blanks[UCHAR_LIM]; + +/* Table of non-printing characters. */ +static bool nonprinting[UCHAR_LIM]; + +/* Table of non-dictionary characters (not letters, digits, or blanks). */ +static bool nondictionary[UCHAR_LIM]; + +/* Translation table folding lower case to upper. */ +static char fold_toupper[UCHAR_LIM]; + +#define MONTHS_PER_YEAR 12 + +/* Table mapping month names to integers. + Alphabetic order allows binary search. */ +static struct month monthtab[] = +{ + {"APR", 4}, + {"AUG", 8}, + {"DEC", 12}, + {"FEB", 2}, + {"JAN", 1}, + {"JUL", 7}, + {"JUN", 6}, + {"MAR", 3}, + {"MAY", 5}, + {"NOV", 11}, + {"OCT", 10}, + {"SEP", 9} +}; + +/* During the merge phase, the number of files to merge at once. */ +#define NMERGE_DEFAULT 16 + +/* Minimum size for a merge or check buffer. */ +#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line)) + +/* Minimum sort size; the code might not work with smaller sizes. */ +#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE) + +/* The number of bytes needed for a merge or check buffer, which can + function relatively efficiently even if it holds only one line. If + a longer line is seen, this value is increased. */ +static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024); + +/* The approximate maximum number of bytes of main memory to use, as + specified by the user. Zero if the user has not specified a size. */ +static size_t sort_size; + +/* The initial allocation factor for non-regular files. + This is used, e.g., when reading from a pipe. + Don't make it too big, since it is multiplied by ~130 to + obtain the size of the actual buffer sort will allocate. + Also, there may be 8 threads all doing this at the same time. */ +#define INPUT_FILE_SIZE_GUESS (128 * 1024) + +/* Array of directory names in which any temporary files are to be created. */ +static char const **temp_dirs; + +/* Number of temporary directory names used. */ +static size_t temp_dir_count; + +/* Number of allocated slots in temp_dirs. */ +static size_t temp_dir_alloc; + +/* Flag to reverse the order of all comparisons. */ +static bool reverse; + +/* Flag for stable sort. This turns off the last ditch bytewise + comparison of lines, and instead leaves lines in the same order + they were read if all keys compare equal. */ +static bool stable; + +/* An int value outside char range. */ +enum { NON_CHAR = CHAR_MAX + 1 }; + +/* If TAB has this value, blanks separate fields. */ +enum { TAB_DEFAULT = CHAR_MAX + 1 }; + +/* Tab character separating fields. If TAB_DEFAULT, then fields are + separated by the empty string between a non-blank character and a blank + character. */ +static int tab = TAB_DEFAULT; + +/* Flag to remove consecutive duplicate lines from the output. + Only the last of a sequence of equal lines will be output. */ +static bool unique; + +/* Nonzero if any of the input files are the standard input. */ +static bool have_read_stdin; + +/* List of key field comparisons to be tried. */ +static struct keyfield *keylist; + +/* Program used to (de)compress temp files. Must accept -d. */ +static char const *compress_program; + +/* Annotate the output with extra info to aid the user. */ +static bool debug; + +/* Maximum number of files to merge in one go. If more than this + number are present, temp files will be used. */ +static unsigned int nmerge = NMERGE_DEFAULT; + +/* Output an error to stderr and exit using async-signal-safe routines. + This can be used safely from signal handlers, + and between fork and exec of multithreaded processes. */ + +static _Noreturn void +async_safe_die (int errnum, char const *errstr) +{ + ignore_value (write (STDERR_FILENO, errstr, strlen (errstr))); + + /* Even if defined HAVE_STRERROR_R, we can't use it, + as it may return a translated string etc. and even if not + may call malloc which is unsafe. We might improve this + by testing for sys_errlist and using that if available. + For now just report the error number. */ + if (errnum) + { + char errbuf[INT_BUFSIZE_BOUND (errnum)]; + char *p = inttostr (errnum, errbuf); + ignore_value (write (STDERR_FILENO, ": errno ", 8)); + ignore_value (write (STDERR_FILENO, p, strlen (p))); + } + + ignore_value (write (STDERR_FILENO, "\n", 1)); + + _exit (SORT_FAILURE); +} + +/* Report MESSAGE for FILE, then clean up and exit. + If FILE is null, it represents standard output. */ + +static void +sort_die (char const *message, char const *file) +{ + error (SORT_FAILURE, errno, "%s: %s", message, + quotef (file ? file : _("standard output"))); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files0-from=F\n\ +"), + program_name, program_name); + fputs (_("\ +Write sorted concatenation of all FILE(s) to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ +Ordering options:\n\ +\n\ +"), stdout); + fputs (_("\ + -b, --ignore-leading-blanks ignore leading blanks\n\ + -d, --dictionary-order consider only blanks and alphanumeric characters\ +\n\ + -f, --ignore-case fold lower case to upper case characters\n\ +"), stdout); + fputs (_("\ + -g, --general-numeric-sort compare according to general numerical value\n\ + -i, --ignore-nonprinting consider only printable characters\n\ + -M, --month-sort compare (unknown) < 'JAN' < ... < 'DEC'\n\ +"), stdout); + fputs (_("\ + -h, --human-numeric-sort compare human readable numbers (e.g., 2K 1G)\n\ +"), stdout); + fputs (_("\ + -n, --numeric-sort compare according to string numerical value\n\ + -R, --random-sort shuffle, but group identical keys. See shuf(1)\n\ + --random-source=FILE get random bytes from FILE\n\ + -r, --reverse reverse the result of comparisons\n\ +"), stdout); + fputs (_("\ + --sort=WORD sort according to WORD:\n\ + general-numeric -g, human-numeric -h, month -M,\ +\n\ + numeric -n, random -R, version -V\n\ + -V, --version-sort natural sort of (version) numbers within text\n\ +\n\ +"), stdout); + fputs (_("\ +Other options:\n\ +\n\ +"), stdout); + fputs (_("\ + --batch-size=NMERGE merge at most NMERGE inputs at once;\n\ + for more use temp files\n\ +"), stdout); + fputs (_("\ + -c, --check, --check=diagnose-first check for sorted input; do not sort\n\ + -C, --check=quiet, --check=silent like -c, but do not report first bad line\ +\n\ + --compress-program=PROG compress temporaries with PROG;\n\ + decompress them with PROG -d\n\ +"), stdout); + fputs (_("\ + --debug annotate the part of the line used to sort,\n\ + and warn about questionable usage to stderr\n\ + --files0-from=F read input from the files specified by\n\ + NUL-terminated names in file F;\n\ + If F is - then read names from standard input\n\ +"), stdout); + fputs (_("\ + -k, --key=KEYDEF sort via a key; KEYDEF gives location and type\n\ + -m, --merge merge already sorted files; do not sort\n\ +"), stdout); + fputs (_("\ + -o, --output=FILE write result to FILE instead of standard output\n\ + -s, --stable stabilize sort by disabling last-resort comparison\ +\n\ + -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ +"), stdout); + printf (_("\ + -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ + -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ + multiple options specify multiple directories\n\ + --parallel=N change the number of sorts run concurrently to N\n\ + -u, --unique with -c, check for strict ordering;\n\ + without -c, output only the first of an equal run\ +\n\ +"), DEFAULT_TMPDIR); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +KEYDEF is F[.C][OPTS][,F[.C][OPTS]] for start and stop position, where F is a\n\ +field number and C a character position in the field; both are origin 1, and\n\ +the stop position defaults to the line's end. If neither -t nor -b is in\n\ +effect, characters in a field are counted from the beginning of the preceding\n\ +whitespace. OPTS is one or more single-letter ordering options [bdfgiMhnRrV],\ +\n\ +which override global ordering options for that key. If no key is given, use\n\ +the entire line as the key. Use --debug to diagnose incorrect key usage.\n\ +\n\ +SIZE may be followed by the following multiplicative suffixes:\n\ +"), stdout); + fputs (_("\ +% 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y, R, Q.\ +\n\n\ +*** WARNING ***\n\ +The locale specified by the environment affects sort order.\n\ +Set LC_ALL=C to get the traditional sort order that uses\n\ +native byte values.\n\ +"), stdout ); + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + CHECK_OPTION = CHAR_MAX + 1, + COMPRESS_PROGRAM_OPTION, + DEBUG_PROGRAM_OPTION, + FILES0_FROM_OPTION, + NMERGE_OPTION, + RANDOM_SOURCE_OPTION, + SORT_OPTION, + PARALLEL_OPTION +}; + +static char const short_options[] = "-bcCdfghik:mMno:rRsS:t:T:uVy:z"; + +static struct option const long_options[] = +{ + {"ignore-leading-blanks", no_argument, nullptr, 'b'}, + {"check", optional_argument, nullptr, CHECK_OPTION}, + {"compress-program", required_argument, nullptr, COMPRESS_PROGRAM_OPTION}, + {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION}, + {"dictionary-order", no_argument, nullptr, 'd'}, + {"ignore-case", no_argument, nullptr, 'f'}, + {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION}, + {"general-numeric-sort", no_argument, nullptr, 'g'}, + {"ignore-nonprinting", no_argument, nullptr, 'i'}, + {"key", required_argument, nullptr, 'k'}, + {"merge", no_argument, nullptr, 'm'}, + {"month-sort", no_argument, nullptr, 'M'}, + {"numeric-sort", no_argument, nullptr, 'n'}, + {"human-numeric-sort", no_argument, nullptr, 'h'}, + {"version-sort", no_argument, nullptr, 'V'}, + {"random-sort", no_argument, nullptr, 'R'}, + {"random-source", required_argument, nullptr, RANDOM_SOURCE_OPTION}, + {"sort", required_argument, nullptr, SORT_OPTION}, + {"output", required_argument, nullptr, 'o'}, + {"reverse", no_argument, nullptr, 'r'}, + {"stable", no_argument, nullptr, 's'}, + {"batch-size", required_argument, nullptr, NMERGE_OPTION}, + {"buffer-size", required_argument, nullptr, 'S'}, + {"field-separator", required_argument, nullptr, 't'}, + {"temporary-directory", required_argument, nullptr, 'T'}, + {"unique", no_argument, nullptr, 'u'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {"parallel", required_argument, nullptr, PARALLEL_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0}, +}; + +#define CHECK_TABLE \ + _ct_("quiet", 'C') \ + _ct_("silent", 'C') \ + _ct_("diagnose-first", 'c') + +static char const *const check_args[] = +{ +#define _ct_(_s, _c) _s, + CHECK_TABLE nullptr +#undef _ct_ +}; +static char const check_types[] = +{ +#define _ct_(_s, _c) _c, + CHECK_TABLE +#undef _ct_ +}; + +#define SORT_TABLE \ + _st_("general-numeric", 'g') \ + _st_("human-numeric", 'h') \ + _st_("month", 'M') \ + _st_("numeric", 'n') \ + _st_("random", 'R') \ + _st_("version", 'V') + +static char const *const sort_args[] = +{ +#define _st_(_s, _c) _s, + SORT_TABLE nullptr +#undef _st_ +}; +static char const sort_types[] = +{ +#define _st_(_s, _c) _c, + SORT_TABLE +#undef _st_ +}; + +/* The set of signals that are caught. */ +static sigset_t caught_signals; + +/* Critical section status. */ +struct cs_status +{ + bool valid; + sigset_t sigs; +}; + +/* Enter a critical section. */ +static void +cs_enter (struct cs_status *status) +{ + int ret = pthread_sigmask (SIG_BLOCK, &caught_signals, &status->sigs); + status->valid = ret == 0; +} + +/* Leave a critical section. */ +static void +cs_leave (struct cs_status const *status) +{ + if (status->valid) + { + /* Ignore failure when restoring the signal mask. */ + pthread_sigmask (SIG_SETMASK, &status->sigs, nullptr); + } +} + +/* Possible states for a temp file. If compressed, the file's status + is unreaped or reaped, depending on whether 'sort' has waited for + the subprocess to finish. */ +enum { UNCOMPRESSED, UNREAPED, REAPED }; + +/* The list of temporary files. */ +struct tempnode +{ + struct tempnode *volatile next; + pid_t pid; /* The subprocess PID; undefined if state == UNCOMPRESSED. */ + char state; + char name[FLEXIBLE_ARRAY_MEMBER]; +}; +static struct tempnode *volatile temphead; +static struct tempnode *volatile *temptail = &temphead; + +/* A file to be sorted. */ +struct sortfile +{ + /* The file's name. */ + char const *name; + + /* Non-null if this is a temporary file, in which case NAME == TEMP->name. */ + struct tempnode *temp; +}; + +/* Map PIDs of unreaped subprocesses to their struct tempnode objects. */ +static Hash_table *proctab; + +enum { INIT_PROCTAB_SIZE = 47 }; + +static size_t +proctab_hasher (void const *entry, size_t tabsize) +{ + struct tempnode const *node = entry; + return node->pid % tabsize; +} + +static bool +proctab_comparator (void const *e1, void const *e2) +{ + struct tempnode const *n1 = e1; + struct tempnode const *n2 = e2; + return n1->pid == n2->pid; +} + +/* The number of unreaped child processes. */ +static pid_t nprocs; + +static bool delete_proc (pid_t); + +/* If PID is positive, wait for the child process with that PID to + exit, and assume that PID has already been removed from the process + table. If PID is 0 or -1, clean up some child that has exited (by + waiting for it, and removing it from the proc table) and return the + child's process ID. However, if PID is 0 and no children have + exited, return 0 without waiting. */ + +static pid_t +reap (pid_t pid) +{ + int status; + pid_t cpid = waitpid ((pid ? pid : -1), &status, (pid ? 0 : WNOHANG)); + + if (cpid < 0) + error (SORT_FAILURE, errno, _("waiting for %s [-d]"), + quoteaf (compress_program)); + else if (0 < cpid && (0 < pid || delete_proc (cpid))) + { + if (! WIFEXITED (status) || WEXITSTATUS (status)) + error (SORT_FAILURE, 0, _("%s [-d] terminated abnormally"), + quoteaf (compress_program)); + --nprocs; + } + + return cpid; +} + +/* TEMP represents a new process; add it to the process table. Create + the process table the first time it's called. */ + +static void +register_proc (struct tempnode *temp) +{ + if (! proctab) + { + proctab = hash_initialize (INIT_PROCTAB_SIZE, nullptr, + proctab_hasher, + proctab_comparator, + nullptr); + if (! proctab) + xalloc_die (); + } + + temp->state = UNREAPED; + + if (! hash_insert (proctab, temp)) + xalloc_die (); +} + +/* If PID is in the process table, remove it and return true. + Otherwise, return false. */ + +static bool +delete_proc (pid_t pid) +{ + struct tempnode test; + + test.pid = pid; + struct tempnode *node = hash_remove (proctab, &test); + if (! node) + return false; + node->state = REAPED; + return true; +} + +/* Remove PID from the process table, and wait for it to exit if it + hasn't already. */ + +static void +wait_proc (pid_t pid) +{ + if (delete_proc (pid)) + reap (pid); +} + +/* Reap any exited children. Do not block; reap only those that have + already exited. */ + +static void +reap_exited (void) +{ + while (0 < nprocs && reap (0)) + continue; +} + +/* Reap at least one exited child, waiting if necessary. */ + +static void +reap_some (void) +{ + reap (-1); + reap_exited (); +} + +/* Reap all children, waiting if necessary. */ + +static void +reap_all (void) +{ + while (0 < nprocs) + reap (-1); +} + +/* Clean up any remaining temporary files. */ + +static void +cleanup (void) +{ + struct tempnode const *node; + + for (node = temphead; node; node = node->next) + unlink (node->name); + temphead = nullptr; +} + +/* Cleanup actions to take when exiting. */ + +static void +exit_cleanup (void) +{ + if (temphead) + { + /* Clean up any remaining temporary files in a critical section so + that a signal handler does not try to clean them too. */ + struct cs_status cs; + cs_enter (&cs); + cleanup (); + cs_leave (&cs); + } + + close_stdout (); +} + +/* Create a new temporary file, returning its newly allocated tempnode. + Store into *PFD the file descriptor open for writing. + If the creation fails, return nullptr and store -1 into *PFD if the + failure is due to file descriptor exhaustion and + SURVIVE_FD_EXHAUSTION; otherwise, die. */ + +static struct tempnode * +create_temp_file (int *pfd, bool survive_fd_exhaustion) +{ + static char const slashbase[] = "/sortXXXXXX"; + static size_t temp_dir_index; + int fd; + int saved_errno; + char const *temp_dir = temp_dirs[temp_dir_index]; + size_t len = strlen (temp_dir); + struct tempnode *node = + xmalloc (FLEXSIZEOF (struct tempnode, name, len + sizeof slashbase)); + char *file = node->name; + struct cs_status cs; + + memcpy (file, temp_dir, len); + memcpy (file + len, slashbase, sizeof slashbase); + node->next = nullptr; + if (++temp_dir_index == temp_dir_count) + temp_dir_index = 0; + + /* Create the temporary file in a critical section, to avoid races. */ + cs_enter (&cs); + fd = mkostemp (file, O_CLOEXEC); + if (0 <= fd) + { + *temptail = node; + temptail = &node->next; + } + saved_errno = errno; + cs_leave (&cs); + errno = saved_errno; + + if (fd < 0) + { + if (! (survive_fd_exhaustion && errno == EMFILE)) + error (SORT_FAILURE, errno, _("cannot create temporary file in %s"), + quoteaf (temp_dir)); + free (node); + node = nullptr; + } + + *pfd = fd; + return node; +} + +/* Return a pointer to stdout status, or nullptr on failure. */ + +static struct stat * +get_outstatus (void) +{ + static int outstat_errno; + static struct stat outstat; + if (outstat_errno == 0) + outstat_errno = fstat (STDOUT_FILENO, &outstat) == 0 ? -1 : errno; + return outstat_errno < 0 ? &outstat : nullptr; +} + +/* Return a stream for FILE, opened with mode HOW. If HOW is "w", + the file is already open on standard output, and needs to be + truncated unless FILE is null. When opening for input, "-" + means standard input. To avoid confusion, do not return file + descriptors STDIN_FILENO, STDOUT_FILENO, or STDERR_FILENO when + opening an ordinary FILE. Return nullptr if unsuccessful. + + Use fadvise to specify an access pattern for input files. + There are a few hints we could possibly provide, + and after careful testing it was decided that + specifying FADVISE_SEQUENTIAL was not detrimental + to any cases. On Linux 2.6.31, this option doubles + the size of read ahead performed and thus was seen to + benefit these cases: + Merging + Sorting with a smaller internal buffer + Reading from faster flash devices + + In _addition_ one could also specify other hints... + + FADVISE_WILLNEED was tested, but Linux 2.6.31 + at least uses that to _synchronously_ prepopulate the cache + with the specified range. While sort does need to + read all of its input before outputting, a synchronous + read of the whole file up front precludes any processing + that sort could do in parallel with the system doing + read ahead of the data. This was seen to have negative effects + in a couple of cases: + Merging + Sorting with a smaller internal buffer + This option was seen to shorten the runtime for sort + on a multicore system with lots of RAM and other processes + competing for CPU. It could be argued that more explicit + scheduling hints with 'nice' et. al. are more appropriate + for this situation. + + FADVISE_NOREUSE is a possibility as it could lower + the priority of input data in the cache as sort will + only need to process it once. However its functionality + has changed over Linux kernel versions and as of 2.6.31 + it does nothing and thus we can't depend on what it might + do in future. + + FADVISE_DONTNEED is not appropriate for user specified + input files, but for temp files we do want to drop the + cache immediately after processing. This is done implicitly + however when the files are unlinked. */ + +static FILE * +stream_open (char const *file, char const *how) +{ + FILE *fp; + + if (*how == 'r') + { + if (STREQ (file, "-")) + { + have_read_stdin = true; + fp = stdin; + } + else + { + int fd = open (file, O_RDONLY | O_CLOEXEC); + fp = fd < 0 ? nullptr : fdopen (fd, how); + } + fadvise (fp, FADVISE_SEQUENTIAL); + } + else if (*how == 'w') + { + if (file && ftruncate (STDOUT_FILENO, 0) != 0) + { + int ftruncate_errno = errno; + struct stat *outst = get_outstatus (); + if (!outst || S_ISREG (outst->st_mode) || S_TYPEISSHM (outst)) + error (SORT_FAILURE, ftruncate_errno, _("%s: error truncating"), + quotef (file)); + } + fp = stdout; + } + else + affirm (!"unexpected mode passed to stream_open"); + + return fp; +} + +/* Same as stream_open, except always return a non-null value; die on + failure. */ + +static FILE * +xfopen (char const *file, char const *how) +{ + FILE *fp = stream_open (file, how); + if (!fp) + sort_die (_("open failed"), file); + return fp; +} + +/* Close FP, whose name is FILE, and report any errors. */ + +static void +xfclose (FILE *fp, char const *file) +{ + switch (fileno (fp)) + { + case STDIN_FILENO: + /* Allow reading stdin from tty more than once. */ + clearerr (fp); + break; + + case STDOUT_FILENO: + /* Don't close stdout just yet. close_stdout does that. */ + if (fflush (fp) != 0) + sort_die (_("fflush failed"), file); + break; + + default: + if (fclose (fp) != 0) + sort_die (_("close failed"), file); + break; + } +} + +/* Move OLDFD to NEWFD. If OLDFD != NEWFD, NEWFD is not close-on-exec. */ + +static void +move_fd (int oldfd, int newfd) +{ + if (oldfd != newfd) + { + /* These should never fail for our usage. */ + ignore_value (dup2 (oldfd, newfd)); + ignore_value (close (oldfd)); + } +} + +/* Fork a child process for piping to and do common cleanup. The + TRIES parameter specifies how many times to try to fork before + giving up. Return the PID of the child, or -1 (setting errno) + on failure. */ + +static pid_t +pipe_fork (int pipefds[2], size_t tries) +{ +#if HAVE_WORKING_FORK + struct tempnode *saved_temphead; + int saved_errno; + double wait_retry = 0.25; + pid_t pid; + struct cs_status cs; + + if (pipe2 (pipefds, O_CLOEXEC) < 0) + return -1; + + /* At least NMERGE + 1 subprocesses are needed. More could be created, but + uncontrolled subprocess generation can hurt performance significantly. + Allow at most NMERGE + 2 subprocesses, on the theory that there + may be some useful parallelism by letting compression for the + previous merge finish (1 subprocess) in parallel with the current + merge (NMERGE + 1 subprocesses). */ + + if (nmerge + 1 < nprocs) + reap_some (); + + while (tries--) + { + /* This is so the child process won't delete our temp files + if it receives a signal before exec-ing. */ + cs_enter (&cs); + saved_temphead = temphead; + temphead = nullptr; + + pid = fork (); + saved_errno = errno; + if (pid) + temphead = saved_temphead; + + cs_leave (&cs); + errno = saved_errno; + + if (0 <= pid || errno != EAGAIN) + break; + else + { + xnanosleep (wait_retry); + wait_retry *= 2; + reap_exited (); + } + } + + if (pid < 0) + { + saved_errno = errno; + close (pipefds[0]); + close (pipefds[1]); + errno = saved_errno; + } + else if (pid == 0) + { + close (STDIN_FILENO); + close (STDOUT_FILENO); + } + else + ++nprocs; + + return pid; + +#else /* ! HAVE_WORKING_FORK */ + return -1; +#endif +} + +/* Create a temporary file and, if asked for, start a compressor + to that file. Set *PFP to the file handle and return + the address of the new temp node. If the creation + fails, return nullptr if the failure is due to file descriptor + exhaustion and SURVIVE_FD_EXHAUSTION; otherwise, die. */ + +static struct tempnode * +maybe_create_temp (FILE **pfp, bool survive_fd_exhaustion) +{ + int tempfd; + struct tempnode *node = create_temp_file (&tempfd, survive_fd_exhaustion); + if (! node) + return nullptr; + + node->state = UNCOMPRESSED; + + if (compress_program) + { + int pipefds[2]; + + node->pid = pipe_fork (pipefds, MAX_FORK_TRIES_COMPRESS); + if (0 < node->pid) + { + close (tempfd); + close (pipefds[0]); + tempfd = pipefds[1]; + + register_proc (node); + } + else if (node->pid == 0) + { + /* Being the child of a multithreaded program before exec, + we're restricted to calling async-signal-safe routines here. */ + close (pipefds[1]); + move_fd (tempfd, STDOUT_FILENO); + move_fd (pipefds[0], STDIN_FILENO); + + execlp (compress_program, compress_program, (char *) nullptr); + + async_safe_die (errno, "couldn't execute compress program"); + } + } + + *pfp = fdopen (tempfd, "w"); + if (! *pfp) + sort_die (_("couldn't create temporary file"), node->name); + + return node; +} + +/* Create a temporary file and, if asked for, start a compressor + to that file. Set *PFP to the file handle and return the address + of the new temp node. Die on failure. */ + +static struct tempnode * +create_temp (FILE **pfp) +{ + return maybe_create_temp (pfp, false); +} + +/* Open a compressed temp file and start a decompression process through + which to filter the input. Return nullptr (setting errno to + EMFILE) if we ran out of file descriptors, and die on any other + kind of failure. */ + +static FILE * +open_temp (struct tempnode *temp) +{ + int tempfd, pipefds[2]; + FILE *fp = nullptr; + + if (temp->state == UNREAPED) + wait_proc (temp->pid); + + tempfd = open (temp->name, O_RDONLY); + if (tempfd < 0) + return nullptr; + + pid_t child = pipe_fork (pipefds, MAX_FORK_TRIES_DECOMPRESS); + + switch (child) + { + case -1: + if (errno != EMFILE) + error (SORT_FAILURE, errno, _("couldn't create process for %s -d"), + quoteaf (compress_program)); + close (tempfd); + errno = EMFILE; + break; + + case 0: + /* Being the child of a multithreaded program before exec, + we're restricted to calling async-signal-safe routines here. */ + close (pipefds[0]); + move_fd (tempfd, STDIN_FILENO); + move_fd (pipefds[1], STDOUT_FILENO); + + execlp (compress_program, compress_program, "-d", (char *) nullptr); + + async_safe_die (errno, "couldn't execute compress program (with -d)"); + + default: + temp->pid = child; + register_proc (temp); + close (tempfd); + close (pipefds[1]); + + fp = fdopen (pipefds[0], "r"); + if (! fp) + { + int saved_errno = errno; + close (pipefds[0]); + errno = saved_errno; + } + break; + } + + return fp; +} + +/* Append DIR to the array of temporary directory names. */ +static void +add_temp_dir (char const *dir) +{ + if (temp_dir_count == temp_dir_alloc) + temp_dirs = X2NREALLOC (temp_dirs, &temp_dir_alloc); + + temp_dirs[temp_dir_count++] = dir; +} + +/* Remove NAME from the list of temporary files. */ + +static void +zaptemp (char const *name) +{ + struct tempnode *volatile *pnode; + struct tempnode *node; + struct tempnode *next; + int unlink_status; + int unlink_errno = 0; + struct cs_status cs; + + for (pnode = &temphead; (node = *pnode)->name != name; pnode = &node->next) + continue; + + if (node->state == UNREAPED) + wait_proc (node->pid); + + /* Unlink the temporary file in a critical section to avoid races. */ + next = node->next; + cs_enter (&cs); + unlink_status = unlink (name); + unlink_errno = errno; + *pnode = next; + cs_leave (&cs); + + if (unlink_status != 0) + error (0, unlink_errno, _("warning: cannot remove: %s"), quotef (name)); + if (! next) + temptail = pnode; + free (node); +} + +#if HAVE_NL_LANGINFO + +static int +struct_month_cmp (void const *m1, void const *m2) +{ + struct month const *month1 = m1; + struct month const *month2 = m2; + return strcmp (month1->name, month2->name); +} + +#endif + +/* Initialize the character class tables. */ + +static void +inittables (void) +{ + size_t i; + + for (i = 0; i < UCHAR_LIM; ++i) + { + blanks[i] = field_sep (i); + nonprinting[i] = ! isprint (i); + nondictionary[i] = ! isalnum (i) && ! field_sep (i); + fold_toupper[i] = toupper (i); + } + +#if HAVE_NL_LANGINFO + /* If we're not in the "C" locale, read different names for months. */ + if (hard_LC_TIME) + { + for (i = 0; i < MONTHS_PER_YEAR; i++) + { + char const *s; + size_t s_len; + size_t j, k; + char *name; + + s = nl_langinfo (ABMON_1 + i); + s_len = strlen (s); + monthtab[i].name = name = xmalloc (s_len + 1); + monthtab[i].val = i + 1; + + for (j = k = 0; j < s_len; j++) + if (! isblank (to_uchar (s[j]))) + name[k++] = fold_toupper[to_uchar (s[j])]; + name[k] = '\0'; + } + qsort (monthtab, MONTHS_PER_YEAR, sizeof *monthtab, struct_month_cmp); + } +#endif +} + +/* Specify how many inputs may be merged at once. + This may be set on the command-line with the + --batch-size option. */ +static void +specify_nmerge (int oi, char c, char const *s) +{ + uintmax_t n; + struct rlimit rlimit; + enum strtol_error e = xstrtoumax (s, nullptr, 10, &n, ""); + + /* Try to find out how many file descriptors we'll be able + to open. We need at least nmerge + 3 (STDIN_FILENO, + STDOUT_FILENO and STDERR_FILENO). */ + unsigned int max_nmerge = ((getrlimit (RLIMIT_NOFILE, &rlimit) == 0 + ? rlimit.rlim_cur + : OPEN_MAX) + - 3); + + if (e == LONGINT_OK) + { + nmerge = n; + if (nmerge != n) + e = LONGINT_OVERFLOW; + else + { + if (nmerge < 2) + { + error (0, 0, _("invalid --%s argument %s"), + long_options[oi].name, quote (s)); + error (SORT_FAILURE, 0, + _("minimum --%s argument is %s"), + long_options[oi].name, quote ("2")); + } + else if (max_nmerge < nmerge) + { + e = LONGINT_OVERFLOW; + } + else + return; + } + } + + if (e == LONGINT_OVERFLOW) + { + char max_nmerge_buf[INT_BUFSIZE_BOUND (max_nmerge)]; + error (0, 0, _("--%s argument %s too large"), + long_options[oi].name, quote (s)); + error (SORT_FAILURE, 0, + _("maximum --%s argument with current rlimit is %s"), + long_options[oi].name, + uinttostr (max_nmerge, max_nmerge_buf)); + } + else + xstrtol_fatal (e, oi, c, long_options, s); +} + +/* Specify the amount of main memory to use when sorting. */ +static void +specify_sort_size (int oi, char c, char const *s) +{ + uintmax_t n; + char *suffix; + enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPQRtTYZ"); + + /* The default unit is KiB. */ + if (e == LONGINT_OK && ISDIGIT (suffix[-1])) + { + if (n <= UINTMAX_MAX / 1024) + n *= 1024; + else + e = LONGINT_OVERFLOW; + } + + /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */ + if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1]) + switch (suffix[0]) + { + case 'b': + e = LONGINT_OK; + break; + + case '%': + { + double mem = physmem_total () * n / 100; + + /* Use "<", not "<=", to avoid problems with rounding. */ + if (mem < UINTMAX_MAX) + { + n = mem; + e = LONGINT_OK; + } + else + e = LONGINT_OVERFLOW; + } + break; + } + + if (e == LONGINT_OK) + { + /* If multiple sort sizes are specified, take the maximum, so + that option order does not matter. */ + if (n < sort_size) + return; + + sort_size = n; + if (sort_size == n) + { + sort_size = MAX (sort_size, MIN_SORT_SIZE); + return; + } + + e = LONGINT_OVERFLOW; + } + + xstrtol_fatal (e, oi, c, long_options, s); +} + +/* Specify the number of threads to spawn during internal sort. */ +static size_t +specify_nthreads (int oi, char c, char const *s) +{ + uintmax_t nthreads; + enum strtol_error e = xstrtoumax (s, nullptr, 10, &nthreads, ""); + if (e == LONGINT_OVERFLOW) + return SIZE_MAX; + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, s); + if (SIZE_MAX < nthreads) + nthreads = SIZE_MAX; + if (nthreads == 0) + error (SORT_FAILURE, 0, _("number in parallel must be nonzero")); + return nthreads; +} + +/* Return the default sort size. */ +static size_t +default_sort_size (void) +{ + /* Let SIZE be MEM, but no more than the maximum object size, + total memory, or system resource limits. Don't bother to check + for values like RLIM_INFINITY since in practice they are not much + less than SIZE_MAX. */ + size_t size = SIZE_MAX; + struct rlimit rlimit; + if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size) + size = rlimit.rlim_cur; +#ifdef RLIMIT_AS + if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size) + size = rlimit.rlim_cur; +#endif + + /* Leave a large safety margin for the above limits, as failure can + occur when they are exceeded. */ + size /= 2; + +#ifdef RLIMIT_RSS + /* Leave a 1/16 margin for RSS to leave room for code, stack, etc. + Exceeding RSS is not fatal, but can be quite slow. */ + if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size) + size = rlimit.rlim_cur / 16 * 15; +#endif + + /* Let MEM be available memory or 1/8 of total memory, whichever + is greater. */ + double avail = physmem_available (); + double total = physmem_total (); + double mem = MAX (avail, total / 8); + + /* Leave a 1/4 margin for physical memory. */ + if (total * 0.75 < size) + size = total * 0.75; + + /* Return the minimum of MEM and SIZE, but no less than + MIN_SORT_SIZE. Avoid the MIN macro here, as it is not quite + right when only one argument is floating point. */ + if (mem < size) + size = mem; + return MAX (size, MIN_SORT_SIZE); +} + +/* Return the sort buffer size to use with the input files identified + by FPS and FILES, which are alternate names of the same files. + NFILES gives the number of input files; NFPS may be less. Assume + that each input line requires LINE_BYTES extra bytes' worth of line + information. Do not exceed the size bound specified by the user + (or a default size bound, if the user does not specify one). */ + +static size_t +sort_buffer_size (FILE *const *fps, size_t nfps, + char *const *files, size_t nfiles, + size_t line_bytes) +{ + /* A bound on the input size. If zero, the bound hasn't been + determined yet. */ + static size_t size_bound; + + /* In the worst case, each input byte is a newline. */ + size_t worst_case_per_input_byte = line_bytes + 1; + + /* Keep enough room for one extra input line and an extra byte. + This extra room might be needed when preparing to read EOF. */ + size_t size = worst_case_per_input_byte + 1; + + for (size_t i = 0; i < nfiles; i++) + { + struct stat st; + off_t file_size; + size_t worst_case; + + if ((i < nfps ? fstat (fileno (fps[i]), &st) + : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st) + : stat (files[i], &st)) + != 0) + sort_die (_("stat failed"), files[i]); + + if (S_ISREG (st.st_mode)) + file_size = st.st_size; + else + { + /* The file has unknown size. If the user specified a sort + buffer size, use that; otherwise, guess the size. */ + if (sort_size) + return sort_size; + file_size = INPUT_FILE_SIZE_GUESS; + } + + if (! size_bound) + { + size_bound = sort_size; + if (! size_bound) + size_bound = default_sort_size (); + } + + /* Add the amount of memory needed to represent the worst case + where the input consists entirely of newlines followed by a + single non-newline. Check for overflow. */ + worst_case = file_size * worst_case_per_input_byte + 1; + if (file_size != worst_case / worst_case_per_input_byte + || size_bound - size <= worst_case) + return size_bound; + size += worst_case; + } + + return size; +} + +/* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES + must be at least sizeof (struct line). Allocate ALLOC bytes + initially. */ + +static void +initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) +{ + /* Ensure that the line array is properly aligned. If the desired + size cannot be allocated, repeatedly halve it until allocation + succeeds. The smaller allocation may hurt overall performance, + but that's better than failing. */ + while (true) + { + alloc += sizeof (struct line) - alloc % sizeof (struct line); + buf->buf = malloc (alloc); + if (buf->buf) + break; + alloc /= 2; + if (alloc <= line_bytes + 1) + xalloc_die (); + } + + buf->line_bytes = line_bytes; + buf->alloc = alloc; + buf->used = buf->left = buf->nlines = 0; + buf->eof = false; +} + +/* Return one past the limit of the line array. */ + +static inline struct line * +buffer_linelim (struct buffer const *buf) +{ + void *linelim = buf->buf + buf->alloc; + return linelim; +} + +/* Return a pointer to the first character of the field specified + by KEY in LINE. */ + +static char * +begfield (struct line const *line, struct keyfield const *key) +{ + char *ptr = line->text, *lim = ptr + line->length - 1; + size_t sword = key->sword; + size_t schar = key->schar; + + /* The leading field separator itself is included in a field when -t + is absent. */ + + if (tab != TAB_DEFAULT) + while (ptr < lim && sword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim) + ++ptr; + } + else + while (ptr < lim && sword--) + { + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[to_uchar (*ptr)]) + ++ptr; + } + + /* If we're ignoring leading blanks when computing the Start + of the field, skip past them here. */ + if (key->skipsblanks) + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; + + /* Advance PTR by SCHAR (if possible), but no further than LIM. */ + ptr = MIN (lim, ptr + schar); + + return ptr; +} + +/* Return the limit of (a pointer to the first character after) the field + in LINE specified by KEY. */ + +ATTRIBUTE_PURE +static char * +limfield (struct line const *line, struct keyfield const *key) +{ + char *ptr = line->text, *lim = ptr + line->length - 1; + size_t eword = key->eword, echar = key->echar; + + if (echar == 0) + eword++; /* Skip all of end field. */ + + /* Move PTR past EWORD fields or to one past the last byte on LINE, + whichever comes first. If there are more than EWORD fields, leave + PTR pointing at the beginning of the field having zero-based index, + EWORD. If a delimiter character was specified (via -t), then that + 'beginning' is the first character following the delimiting TAB. + Otherwise, leave PTR pointing at the first 'blank' character after + the preceding field. */ + if (tab != TAB_DEFAULT) + while (ptr < lim && eword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim && (eword || echar)) + ++ptr; + } + else + while (ptr < lim && eword--) + { + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[to_uchar (*ptr)]) + ++ptr; + } + +#ifdef POSIX_UNSPECIFIED + /* The following block of code makes GNU sort incompatible with + standard Unix sort, so it's ifdef'd out for now. + The POSIX spec isn't clear on how to interpret this. + FIXME: request clarification. + + From: kwzh@gnu.ai.mit.edu (Karl Heuer) + Date: Thu, 30 May 96 12:20:41 -0400 + [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.] + + [...]I believe I've found another bug in 'sort'. + + $ cat /tmp/sort.in + a b c 2 d + pq rs 1 t + $ textutils-1.15/src/sort -k1.7,1.7 skipeblanks) + while (ptr < lim && blanks[to_uchar (*ptr)]) + ++ptr; + + /* Advance PTR by ECHAR (if possible), but no further than LIM. */ + ptr = MIN (lim, ptr + echar); + } + + return ptr; +} + +/* Fill BUF reading from FP, moving buf->left bytes from the end + of buf->buf to the beginning first. If EOF is reached and the + file wasn't terminated by a newline, supply one. Set up BUF's line + table too. FILE is the name of the file corresponding to FP. + Return true if some input was read. */ + +static bool +fillbuf (struct buffer *buf, FILE *fp, char const *file) +{ + struct keyfield const *key = keylist; + char eol = eolchar; + size_t line_bytes = buf->line_bytes; + size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; + + if (buf->eof) + return false; + + if (buf->used != buf->left) + { + memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); + buf->used = buf->left; + buf->nlines = 0; + } + + while (true) + { + char *ptr = buf->buf + buf->used; + struct line *linelim = buffer_linelim (buf); + struct line *line = linelim - buf->nlines; + size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr; + char *line_start = buf->nlines ? line->text + line->length : buf->buf; + + while (line_bytes + 1 < avail) + { + /* Read as many bytes as possible, but do not read so many + bytes that there might not be enough room for the + corresponding line array. The worst case is when the + rest of the input file consists entirely of newlines, + except that the last byte is not a newline. */ + size_t readsize = (avail - 1) / (line_bytes + 1); + size_t bytes_read = fread (ptr, 1, readsize, fp); + char *ptrlim = ptr + bytes_read; + char *p; + avail -= bytes_read; + + if (bytes_read != readsize) + { + if (ferror (fp)) + sort_die (_("read failed"), file); + if (feof (fp)) + { + buf->eof = true; + if (buf->buf == ptrlim) + return false; + if (line_start != ptrlim && ptrlim[-1] != eol) + *ptrlim++ = eol; + } + } + + /* Find and record each line in the just-read input. */ + while ((p = memchr (ptr, eol, ptrlim - ptr))) + { + /* Delimit the line with NUL. This eliminates the need to + temporarily replace the last byte with NUL when calling + xmemcoll, which increases performance. */ + *p = '\0'; + ptr = p + 1; + line--; + line->text = line_start; + line->length = ptr - line_start; + mergesize = MAX (mergesize, line->length); + avail -= line_bytes; + + if (key) + { + /* Precompute the position of the first key for + efficiency. */ + line->keylim = (key->eword == SIZE_MAX + ? p + : limfield (line, key)); + + if (key->sword != SIZE_MAX) + line->keybeg = begfield (line, key); + else + { + if (key->skipsblanks) + while (blanks[to_uchar (*line_start)]) + line_start++; + line->keybeg = line_start; + } + } + + line_start = ptr; + } + + ptr = ptrlim; + if (buf->eof) + break; + } + + buf->used = ptr - buf->buf; + buf->nlines = buffer_linelim (buf) - line; + if (buf->nlines != 0) + { + buf->left = ptr - line_start; + merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; + return true; + } + + { + /* The current input line is too long to fit in the buffer. + Increase the buffer size and try again, keeping it properly + aligned. */ + size_t line_alloc = buf->alloc / sizeof (struct line); + buf->buf = x2nrealloc (buf->buf, &line_alloc, sizeof (struct line)); + buf->alloc = line_alloc * sizeof (struct line); + } + } +} + +/* Table that maps characters to order-of-magnitude values. */ +static char const unit_order[UCHAR_LIM] = + { +#if ! ('K' == 75 && 'M' == 77 && 'G' == 71 && 'T' == 84 && 'P' == 80 \ + && 'E' == 69 && 'Z' == 90 && 'Y' == 89 && 'R' == 82 && 'Q' == 81 \ + && 'k' == 107) + /* This initializer syntax works on all C99 hosts. For now, use + it only on non-ASCII hosts, to ease the pain of porting to + pre-C99 ASCII hosts. */ + ['K']=1, ['M']=2, ['G']=3, ['T']=4, ['P']=5, ['E']=6, ['Z']=7, ['Y']=8, + ['R']=9, ['Q']=10, + ['k']=1, +#else + /* Generate the following table with this command: + perl -e 'my %a=(k=>1,K=>1,M=>2,G=>3,T=>4,P=>5,E=>6,Z=>7,Y=>8,R=>9,Q=>10); + foreach my $i (0..255) {my $c=chr($i); $a{$c} ||= 0;print "$a{$c}, "}'\ + |fmt */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 3, + 0, 0, 0, 1, 0, 2, 0, 0, 5, 10, 9, 0, 4, 0, 0, 0, 0, 8, 7, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#endif + }; + +/* Traverse number given as *number consisting of digits, thousands_sep, and + decimal_point chars only. Returns the highest digit found in the number, + or '\0' if no digit has been found. Upon return *number points at the + character that immediately follows after the given number. */ +static char +traverse_raw_number (char const **number) +{ + char const *p = *number; + char ch; + char max_digit = '\0'; + bool ends_with_thousands_sep = false; + + /* Scan to end of number. + Decimals or separators not followed by digits stop the scan. + Numbers ending in decimals or separators are thus considered + to be lacking in units. + FIXME: add support for multibyte thousands_sep and decimal_point. */ + + while (ISDIGIT (ch = *p++)) + { + if (max_digit < ch) + max_digit = ch; + + /* Allow to skip only one occurrence of thousands_sep to avoid finding + the unit in the next column in case thousands_sep matches as blank + and is used as column delimiter. */ + ends_with_thousands_sep = (*p == thousands_sep); + if (ends_with_thousands_sep) + ++p; + } + + if (ends_with_thousands_sep) + { + /* thousands_sep not followed by digit is not allowed. */ + *number = p - 2; + return max_digit; + } + + if (ch == decimal_point) + while (ISDIGIT (ch = *p++)) + if (max_digit < ch) + max_digit = ch; + + *number = p - 1; + return max_digit; +} + +/* Return an integer that represents the order of magnitude of the + unit following the number. The number may contain thousands + separators and a decimal point, but it may not contain leading blanks. + Negative numbers get negative orders; zero numbers have a zero order. */ + +ATTRIBUTE_PURE +static int +find_unit_order (char const *number) +{ + bool minus_sign = (*number == '-'); + char const *p = number + minus_sign; + char max_digit = traverse_raw_number (&p); + if ('0' < max_digit) + { + unsigned char ch = *p; + int order = unit_order[ch]; + return (minus_sign ? -order : order); + } + else + return 0; +} + +/* Compare numbers A and B ending in units with SI or IEC prefixes + < K/k < M < G < T < P < E < Z < Y < R < Q */ + +ATTRIBUTE_PURE +static int +human_numcompare (char const *a, char const *b) +{ + while (blanks[to_uchar (*a)]) + a++; + while (blanks[to_uchar (*b)]) + b++; + + int diff = find_unit_order (a) - find_unit_order (b); + return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep)); +} + +/* Compare strings A and B as numbers without explicitly converting them to + machine numbers. Comparatively slow for short strings, but asymptotically + hideously fast. */ + +ATTRIBUTE_PURE +static int +numcompare (char const *a, char const *b) +{ + while (blanks[to_uchar (*a)]) + a++; + while (blanks[to_uchar (*b)]) + b++; + + return strnumcmp (a, b, decimal_point, thousands_sep); +} + +static int +nan_compare (long double a, long double b) +{ + char buf[2][sizeof "-nan""()" + CHAR_BIT * sizeof a]; + snprintf (buf[0], sizeof buf[0], "%Lf", a); + snprintf (buf[1], sizeof buf[1], "%Lf", b); + return strcmp (buf[0], buf[1]); +} + +static int +general_numcompare (char const *sa, char const *sb) +{ + /* FIXME: maybe add option to try expensive FP conversion + only if A and B can't be compared more cheaply/accurately. */ + + char *ea; + char *eb; + long double a = strtold (sa, &ea); + long double b = strtold (sb, &eb); + + /* Put conversion errors at the start of the collating sequence. */ + if (sa == ea) + return sb == eb ? 0 : -1; + if (sb == eb) + return 1; + + /* Sort numbers in the usual way, where -0 == +0. Put NaNs after + conversion errors but before numbers; sort them by internal + bit-pattern, for lack of a more portable alternative. */ + return (a < b ? -1 + : a > b ? 1 + : a == b ? 0 + : b == b ? -1 + : a == a ? 1 + : nan_compare (a, b)); +} + +/* Return an integer in 1..12 of the month name MONTH. + Return 0 if the name in S is not recognized. */ + +static int +getmonth (char const *month, char **ea) +{ + size_t lo = 0; + size_t hi = MONTHS_PER_YEAR; + + while (blanks[to_uchar (*month)]) + month++; + + do + { + size_t ix = (lo + hi) / 2; + char const *m = month; + char const *n = monthtab[ix].name; + + for (;; m++, n++) + { + if (!*n) + { + if (ea) + *ea = (char *) m; + return monthtab[ix].val; + } + if (to_uchar (fold_toupper[to_uchar (*m)]) < to_uchar (*n)) + { + hi = ix; + break; + } + else if (to_uchar (fold_toupper[to_uchar (*m)]) > to_uchar (*n)) + { + lo = ix + 1; + break; + } + } + } + while (lo < hi); + + return 0; +} + +/* A randomly chosen MD5 state, used for random comparison. */ +static struct md5_ctx random_md5_state; + +/* Initialize the randomly chosen MD5 state. */ + +static void +random_md5_state_init (char const *random_source) +{ + unsigned char buf[MD5_DIGEST_SIZE]; + struct randread_source *r = randread_new (random_source, sizeof buf); + if (! r) + sort_die (_("open failed"), random_source ? random_source : "getrandom"); + randread (r, buf, sizeof buf); + if (randread_free (r) != 0) + sort_die (_("close failed"), random_source); + md5_init_ctx (&random_md5_state); + md5_process_bytes (buf, sizeof buf, &random_md5_state); +} + +/* This is like strxfrm, except it reports any error and exits. */ + +static size_t +xstrxfrm (char *restrict dest, char const *restrict src, size_t destsize) +{ + errno = 0; + size_t translated_size = strxfrm (dest, src, destsize); + + if (errno) + { + error (0, errno, _("string transformation failed")); + error (0, 0, _("set LC_ALL='C' to work around the problem")); + error (SORT_FAILURE, 0, + _("the original string was %s"), + quotearg_n_style (0, locale_quoting_style, src)); + } + + return translated_size; +} + +/* Compare the keys TEXTA (of length LENA) and TEXTB (of length LENB) + using one or more random hash functions. TEXTA[LENA] and + TEXTB[LENB] must be zero. */ + +static int +compare_random (char *restrict texta, size_t lena, + char *restrict textb, size_t lenb) +{ + /* XFRM_DIFF records the equivalent of memcmp on the transformed + data. This is used to break ties if there is a checksum + collision, and this is good enough given the astronomically low + probability of a collision. */ + int xfrm_diff = 0; + + char stackbuf[4000]; + char *buf = stackbuf; + size_t bufsize = sizeof stackbuf; + void *allocated = nullptr; + uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; + struct md5_ctx s[2]; + s[0] = s[1] = random_md5_state; + + if (hard_LC_COLLATE) + { + char const *lima = texta + lena; + char const *limb = textb + lenb; + + while (true) + { + /* Transform the text into the basis of comparison, so that byte + strings that would otherwise considered to be equal are + considered equal here even if their bytes differ. + + Each time through this loop, transform one + null-terminated string's worth from TEXTA or from TEXTB + or both. That way, there's no need to store the + transformation of the whole line, if it contains many + null-terminated strings. */ + + /* Store the transformed data into a big-enough buffer. */ + + /* A 3X size guess avoids the overhead of calling strxfrm + twice on typical implementations. Don't worry about + size_t overflow, as the guess need not be correct. */ + size_t guess_bufsize = 3 * (lena + lenb) + 2; + if (bufsize < guess_bufsize) + { + bufsize = MAX (guess_bufsize, bufsize * 3 / 2); + free (allocated); + buf = allocated = malloc (bufsize); + if (! buf) + { + buf = stackbuf; + bufsize = sizeof stackbuf; + } + } + + size_t sizea = + (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0); + bool a_fits = sizea <= bufsize; + size_t sizeb = + (textb < limb + ? (xstrxfrm ((a_fits ? buf + sizea : nullptr), textb, + (a_fits ? bufsize - sizea : 0)) + + 1) + : 0); + + if (! (a_fits && sizea + sizeb <= bufsize)) + { + bufsize = sizea + sizeb; + if (bufsize < SIZE_MAX / 3) + bufsize = bufsize * 3 / 2; + free (allocated); + buf = allocated = xmalloc (bufsize); + if (texta < lima) + strxfrm (buf, texta, sizea); + if (textb < limb) + strxfrm (buf + sizea, textb, sizeb); + } + + /* Advance past NULs to the next part of each input string, + exiting the loop if both strings are exhausted. When + exiting the loop, prepare to finish off the tiebreaker + comparison properly. */ + if (texta < lima) + texta += strlen (texta) + 1; + if (textb < limb) + textb += strlen (textb) + 1; + if (! (texta < lima || textb < limb)) + { + lena = sizea; texta = buf; + lenb = sizeb; textb = buf + sizea; + break; + } + + /* Accumulate the transformed data in the corresponding + checksums. */ + md5_process_bytes (buf, sizea, &s[0]); + md5_process_bytes (buf + sizea, sizeb, &s[1]); + + /* Update the tiebreaker comparison of the transformed data. */ + if (! xfrm_diff) + { + xfrm_diff = memcmp (buf, buf + sizea, MIN (sizea, sizeb)); + if (! xfrm_diff) + xfrm_diff = (sizea > sizeb) - (sizea < sizeb); + } + } + } + + /* Compute and compare the checksums. */ + md5_process_bytes (texta, lena, &s[0]); md5_finish_ctx (&s[0], dig[0]); + md5_process_bytes (textb, lenb, &s[1]); md5_finish_ctx (&s[1], dig[1]); + int diff = memcmp (dig[0], dig[1], sizeof dig[0]); + + /* Fall back on the tiebreaker if the checksums collide. */ + if (! diff) + { + if (! xfrm_diff) + { + xfrm_diff = memcmp (texta, textb, MIN (lena, lenb)); + if (! xfrm_diff) + xfrm_diff = (lena > lenb) - (lena < lenb); + } + + diff = xfrm_diff; + } + + free (allocated); + + return diff; +} + +/* Return the printable width of the block of memory starting at + TEXT and ending just before LIM, counting each tab as one byte. + FIXME: Should we generally be counting non printable chars? */ + +static size_t +debug_width (char const *text, char const *lim) +{ + size_t width = mbsnwidth (text, lim - text, 0); + while (text < lim) + width += (*text++ == '\t'); + return width; +} + +/* For debug mode, "underline" a key at the + specified offset and screen width. */ + +static void +mark_key (size_t offset, size_t width) +{ + while (offset--) + putchar (' '); + + if (!width) + printf (_("^ no match for key\n")); + else + { + do + putchar ('_'); + while (--width); + + putchar ('\n'); + } +} + +/* Return true if KEY is a numeric key. */ + +static inline bool +key_numeric (struct keyfield const *key) +{ + return key->numeric || key->general_numeric || key->human_numeric; +} + +/* For LINE, output a debugging line that underlines KEY in LINE. + If KEY is null, underline the whole line. */ + +static void +debug_key (struct line const *line, struct keyfield const *key) +{ + char *text = line->text; + char *beg = text; + char *lim = text + line->length - 1; + + if (key) + { + if (key->sword != SIZE_MAX) + beg = begfield (line, key); + if (key->eword != SIZE_MAX) + lim = limfield (line, key); + + if ((key->skipsblanks && key->sword == SIZE_MAX) + || key->month || key_numeric (key)) + { + char saved = *lim; + *lim = '\0'; + + while (blanks[to_uchar (*beg)]) + beg++; + + char *tighter_lim = beg; + + if (lim < beg) + tighter_lim = lim; + else if (key->month) + getmonth (beg, &tighter_lim); + else if (key->general_numeric) + ignore_value (strtold (beg, &tighter_lim)); + else if (key->numeric || key->human_numeric) + { + char const *p = beg + (beg < lim && *beg == '-'); + char max_digit = traverse_raw_number (&p); + if ('0' <= max_digit) + { + unsigned char ch = *p; + tighter_lim = (char *) p + + (key->human_numeric && unit_order[ch]); + } + } + else + tighter_lim = lim; + + *lim = saved; + lim = tighter_lim; + } + } + + size_t offset = debug_width (text, beg); + size_t width = debug_width (beg, lim); + mark_key (offset, width); +} + +/* Debug LINE by underlining its keys. */ + +static void +debug_line (struct line const *line) +{ + struct keyfield const *key = keylist; + + do + debug_key (line, key); + while (key && ((key = key->next) || ! (unique || stable))); +} + +/* Return whether sorting options specified for key. */ + +static bool +default_key_compare (struct keyfield const *key) +{ + return ! (key->ignore + || key->translate + || key->skipsblanks + || key->skipeblanks + || key_numeric (key) + || key->month + || key->version + || key->random + /* || key->reverse */ + ); +} + +/* Convert a key to the short options used to specify it. */ + +static void +key_to_opts (struct keyfield const *key, char *opts) +{ + if (key->skipsblanks || key->skipeblanks) + *opts++ = 'b';/* either disables global -b */ + if (key->ignore == nondictionary) + *opts++ = 'd'; + if (key->translate) + *opts++ = 'f'; + if (key->general_numeric) + *opts++ = 'g'; + if (key->human_numeric) + *opts++ = 'h'; + if (key->ignore == nonprinting) + *opts++ = 'i'; + if (key->month) + *opts++ = 'M'; + if (key->numeric) + *opts++ = 'n'; + if (key->random) + *opts++ = 'R'; + if (key->reverse) + *opts++ = 'r'; + if (key->version) + *opts++ = 'V'; + *opts = '\0'; +} + +/* Output data independent key warnings to stderr. */ + +static void +key_warnings (struct keyfield const *gkey, bool gkey_only) +{ + struct keyfield const *key; + struct keyfield ugkey = *gkey; + unsigned long keynum = 1; + bool basic_numeric_field = false; + bool general_numeric_field = false; + bool basic_numeric_field_span = false; + bool general_numeric_field_span = false; + + for (key = keylist; key; key = key->next, keynum++) + { + if (key_numeric (key)) + { + if (key->general_numeric) + general_numeric_field = true; + else + basic_numeric_field = true; + } + + if (key->traditional_used) + { + size_t sword = key->sword; + size_t eword = key->eword; + char tmp[INT_BUFSIZE_BOUND (uintmax_t)]; + /* obsolescent syntax +A.x -B.y is equivalent to: + -k A+1.x+1,B.y (when y = 0) + -k A+1.x+1,B+1.y (when y > 0) */ + char obuf[INT_BUFSIZE_BOUND (sword) * 2 + 4]; /* +# -# */ + char nbuf[INT_BUFSIZE_BOUND (sword) * 2 + 5]; /* -k #,# */ + char *po = obuf; + char *pn = nbuf; + + if (sword == SIZE_MAX) + sword++; + + po = stpcpy (stpcpy (po, "+"), umaxtostr (sword, tmp)); + pn = stpcpy (stpcpy (pn, "-k "), umaxtostr (sword + 1, tmp)); + if (key->eword != SIZE_MAX) + { + stpcpy (stpcpy (po, " -"), umaxtostr (eword + 1, tmp)); + stpcpy (stpcpy (pn, ","), + umaxtostr (eword + 1 + + (key->echar == SIZE_MAX), tmp)); + } + error (0, 0, _("obsolescent key %s used; consider %s instead"), + quote_n (0, obuf), quote_n (1, nbuf)); + } + + /* Warn about field specs that will never match. */ + bool zero_width = key->sword != SIZE_MAX && key->eword < key->sword; + if (zero_width) + error (0, 0, _("key %lu has zero width and will be ignored"), keynum); + + /* Warn about significant leading blanks. */ + bool implicit_skip = key_numeric (key) || key->month; + bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y */ + if (!zero_width && !gkey_only && tab == TAB_DEFAULT && !line_offset + && ((!key->skipsblanks && !implicit_skip) + || (!key->skipsblanks && key->schar) + || (!key->skipeblanks && key->echar))) + error (0, 0, _("leading blanks are significant in key %lu; " + "consider also specifying 'b'"), keynum); + + /* Warn about numeric comparisons spanning fields, + as field delimiters could be interpreted as part + of the number (maybe only in other locales). */ + if (!gkey_only && key_numeric (key)) + { + size_t sword = key->sword + 1; + size_t eword = key->eword + 1; + if (!sword) + sword++; + if (!eword || sword < eword) + { + error (0, 0, _("key %lu is numeric and spans multiple fields"), + keynum); + if (key->general_numeric) + general_numeric_field_span = true; + else + basic_numeric_field_span = true; + } + } + + /* Flag global options not copied or specified in any key. */ + if (ugkey.ignore && (ugkey.ignore == key->ignore)) + ugkey.ignore = nullptr; + if (ugkey.translate && (ugkey.translate == key->translate)) + ugkey.translate = nullptr; + ugkey.skipsblanks &= !key->skipsblanks; + ugkey.skipeblanks &= !key->skipeblanks; + ugkey.month &= !key->month; + ugkey.numeric &= !key->numeric; + ugkey.general_numeric &= !key->general_numeric; + ugkey.human_numeric &= !key->human_numeric; + ugkey.random &= !key->random; + ugkey.version &= !key->version; + ugkey.reverse &= !key->reverse; + } + + /* Explicitly warn if field delimiters in this locale + don't constrain numbers. */ + bool number_locale_warned = false; + if (basic_numeric_field_span) + { + if (tab == TAB_DEFAULT + ? thousands_sep != NON_CHAR && (isblank (to_uchar (thousands_sep))) + : tab == thousands_sep) + { + error (0, 0, + _("field separator %s is treated as a " + "group separator in numbers"), + quote (((char []) {thousands_sep, 0}))); + number_locale_warned = true; + } + } + if (basic_numeric_field_span || general_numeric_field_span) + { + if (tab == TAB_DEFAULT + ? thousands_sep != NON_CHAR && (isblank (to_uchar (decimal_point))) + : tab == decimal_point) + { + error (0, 0, + _("field separator %s is treated as a " + "decimal point in numbers"), + quote (((char []) {decimal_point, 0}))); + number_locale_warned = true; + } + else if (tab == '-') + { + error (0, 0, + _("field separator %s is treated as a " + "minus sign in numbers"), + quote (((char []) {tab, 0}))); + } + else if (general_numeric_field_span && tab == '+') + { + error (0, 0, + _("field separator %s is treated as a " + "plus sign in numbers"), + quote (((char []) {tab, 0}))); + } + } + + /* Explicitly indicate the decimal point used in this locale, + as it suggests that robust scripts need to consider + setting the locale when comparing numbers. */ + if ((basic_numeric_field || general_numeric_field) && ! number_locale_warned) + { + error (0, 0, + _("%snumbers use %s as a decimal point in this locale"), + tab == decimal_point ? "" : _("note "), + quote (((char []) {decimal_point, 0}))); + + } + + if (basic_numeric_field && thousands_sep_ignored) + { + error (0, 0, + _("the multi-byte number group separator " + "in this locale is not supported")); + } + + /* Warn about ignored global options flagged above. + This clears all flags if UGKEY is the only one in the list. */ + if (!default_key_compare (&ugkey) + || (ugkey.reverse && (stable || unique) && keylist)) + { + bool ugkey_reverse = ugkey.reverse; + if (!(stable || unique)) + ugkey.reverse = false; + /* The following is too big, but guaranteed to be "big enough". */ + char opts[sizeof short_options]; + key_to_opts (&ugkey, opts); + error (0, 0, + ngettext ("option '-%s' is ignored", + "options '-%s' are ignored", + select_plural (strlen (opts))), opts); + ugkey.reverse = ugkey_reverse; + } + if (ugkey.reverse && !(stable || unique) && keylist) + error (0, 0, _("option '-r' only applies to last-resort comparison")); +} + +/* Return either the sense of DIFF or its reverse, depending on REVERSED. + If REVERSED, do not simply negate DIFF as that can mishandle INT_MIN. */ + +static int +diff_reversed (int diff, bool reversed) +{ + return reversed ? (diff < 0) - (diff > 0) : diff; +} + +/* Compare two lines A and B trying every key in sequence until there + are no more keys or a difference is found. */ + +static int +keycompare (struct line const *a, struct line const *b) +{ + struct keyfield *key = keylist; + + /* For the first iteration only, the key positions have been + precomputed for us. */ + char *texta = a->keybeg; + char *textb = b->keybeg; + char *lima = a->keylim; + char *limb = b->keylim; + + int diff; + + while (true) + { + char const *translate = key->translate; + bool const *ignore = key->ignore; + + /* Treat field ends before field starts as empty fields. */ + lima = MAX (texta, lima); + limb = MAX (textb, limb); + + /* Find the lengths. */ + size_t lena = lima - texta; + size_t lenb = limb - textb; + + if (hard_LC_COLLATE || key_numeric (key) + || key->month || key->random || key->version) + { + /* Ordinarily use the keys in-place, temporarily null-terminated. */ + char *ta = texta; + char *tb = textb; + size_t tlena = lena; + size_t tlenb = lenb; + char enda = ta[tlena]; + char endb = tb[tlenb]; + + void *allocated = nullptr; + char stackbuf[4000]; + + if (ignore || translate) + { + /* Compute with copies of the keys, which are the result of + translating or ignoring characters, and which need their + own storage. */ + + size_t i; + + /* Allocate space for copies. */ + size_t size = lena + 1 + lenb + 1; + if (size <= sizeof stackbuf) + ta = stackbuf; + else + ta = allocated = xmalloc (size); + tb = ta + lena + 1; + + /* Put into each copy a version of the key in which the + requested characters are ignored or translated. */ + for (tlena = i = 0; i < lena; i++) + if (! (ignore && ignore[to_uchar (texta[i])])) + ta[tlena++] = (translate + ? translate[to_uchar (texta[i])] + : texta[i]); + + for (tlenb = i = 0; i < lenb; i++) + if (! (ignore && ignore[to_uchar (textb[i])])) + tb[tlenb++] = (translate + ? translate[to_uchar (textb[i])] + : textb[i]); + } + + ta[tlena] = '\0'; + tb[tlenb] = '\0'; + + if (key->numeric) + diff = numcompare (ta, tb); + else if (key->general_numeric) + diff = general_numcompare (ta, tb); + else if (key->human_numeric) + diff = human_numcompare (ta, tb); + else if (key->month) + diff = getmonth (ta, nullptr) - getmonth (tb, nullptr); + else if (key->random) + diff = compare_random (ta, tlena, tb, tlenb); + else if (key->version) + diff = filenvercmp (ta, tlena, tb, tlenb); + else + { + /* Locale-dependent string sorting. This is slower than + C-locale sorting, which is implemented below. */ + if (tlena == 0) + diff = - NONZERO (tlenb); + else if (tlenb == 0) + diff = 1; + else + diff = xmemcoll0 (ta, tlena + 1, tb, tlenb + 1); + } + + ta[tlena] = enda; + tb[tlenb] = endb; + + free (allocated); + } + else if (ignore) + { +#define CMP_WITH_IGNORE(A, B) \ + do \ + { \ + while (true) \ + { \ + while (texta < lima && ignore[to_uchar (*texta)]) \ + ++texta; \ + while (textb < limb && ignore[to_uchar (*textb)]) \ + ++textb; \ + if (! (texta < lima && textb < limb)) \ + { \ + diff = (texta < lima) - (textb < limb); \ + break; \ + } \ + diff = to_uchar (A) - to_uchar (B); \ + if (diff) \ + break; \ + ++texta; \ + ++textb; \ + } \ + \ + } \ + while (0) + + if (translate) + CMP_WITH_IGNORE (translate[to_uchar (*texta)], + translate[to_uchar (*textb)]); + else + CMP_WITH_IGNORE (*texta, *textb); + } + else + { + size_t lenmin = MIN (lena, lenb); + if (lenmin == 0) + diff = 0; + else if (translate) + { + size_t i = 0; + do + { + diff = (to_uchar (translate[to_uchar (texta[i])]) + - to_uchar (translate[to_uchar (textb[i])])); + if (diff) + break; + i++; + } + while (i < lenmin); + } + else + diff = memcmp (texta, textb, lenmin); + + if (! diff) + diff = (lena > lenb) - (lena < lenb); + } + + if (diff) + break; + + key = key->next; + if (! key) + return 0; + + /* Find the beginning and limit of the next field. */ + if (key->eword != SIZE_MAX) + lima = limfield (a, key), limb = limfield (b, key); + else + lima = a->text + a->length - 1, limb = b->text + b->length - 1; + + if (key->sword != SIZE_MAX) + texta = begfield (a, key), textb = begfield (b, key); + else + { + texta = a->text, textb = b->text; + if (key->skipsblanks) + { + while (texta < lima && blanks[to_uchar (*texta)]) + ++texta; + while (textb < limb && blanks[to_uchar (*textb)]) + ++textb; + } + } + } + + return diff_reversed (diff, key->reverse); +} + +/* Compare two lines A and B, returning negative, zero, or positive + depending on whether A compares less than, equal to, or greater than B. */ + +static int +compare (struct line const *a, struct line const *b) +{ + int diff; + size_t alen, blen; + + /* First try to compare on the specified keys (if any). + The only two cases with no key at all are unadorned sort, + and unadorned sort -r. */ + if (keylist) + { + diff = keycompare (a, b); + if (diff || unique || stable) + return diff; + } + + /* If the keys all compare equal (or no keys were specified) + fall through to the default comparison. */ + alen = a->length - 1, blen = b->length - 1; + + if (alen == 0) + diff = - NONZERO (blen); + else if (blen == 0) + diff = 1; + else if (hard_LC_COLLATE) + { + /* xmemcoll0 is a performance enhancement as + it will not unconditionally write '\0' after the + passed in buffers, which was seen to give around + a 3% increase in performance for short lines. */ + diff = xmemcoll0 (a->text, alen + 1, b->text, blen + 1); + } + else + { + diff = memcmp (a->text, b->text, MIN (alen, blen)); + if (!diff) + diff = (alen > blen) - (alen < blen); + } + + return diff_reversed (diff, reverse); +} + +/* Write LINE to output stream FP; the output file's name is + OUTPUT_FILE if OUTPUT_FILE is non-null, and is the standard output + otherwise. If debugging is enabled and FP is standard output, + append some debugging information. */ + +static void +write_line (struct line const *line, FILE *fp, char const *output_file) +{ + char *buf = line->text; + size_t n_bytes = line->length; + char *ebuf = buf + n_bytes; + + if (!output_file && debug) + { + /* Convert TAB to '>' and EOL to \n, and then output debugging info. */ + char const *c = buf; + + while (c < ebuf) + { + char wc = *c++; + if (wc == '\t') + wc = '>'; + else if (c == ebuf) + wc = '\n'; + if (fputc (wc, fp) == EOF) + sort_die (_("write failed"), output_file); + } + + debug_line (line); + } + else + { + ebuf[-1] = eolchar; + if (fwrite (buf, 1, n_bytes, fp) != n_bytes) + sort_die (_("write failed"), output_file); + ebuf[-1] = '\0'; + } +} + +/* Check that the lines read from FILE_NAME come in order. Return + true if they are in order. If CHECKONLY == 'c', also print a + diagnostic (FILE_NAME, line number, contents of line) to stderr if + they are not in order. */ + +static bool +check (char const *file_name, char checkonly) +{ + FILE *fp = xfopen (file_name, "r"); + struct buffer buf; /* Input buffer. */ + struct line temp; /* Copy of previous line. */ + size_t alloc = 0; + uintmax_t line_number = 0; + struct keyfield const *key = keylist; + bool nonunique = ! unique; + bool ordered = true; + + initbuf (&buf, sizeof (struct line), + MAX (merge_buffer_size, sort_size)); + temp.text = nullptr; + + while (fillbuf (&buf, fp, file_name)) + { + struct line const *line = buffer_linelim (&buf); + struct line const *linebase = line - buf.nlines; + + /* Make sure the line saved from the old buffer contents is + less than or equal to the first line of the new buffer. */ + if (alloc && nonunique <= compare (&temp, line - 1)) + { + found_disorder: + { + if (checkonly == 'c') + { + struct line const *disorder_line = line - 1; + uintmax_t disorder_line_number = + buffer_linelim (&buf) - disorder_line + line_number; + char hr_buf[INT_BUFSIZE_BOUND (disorder_line_number)]; + fprintf (stderr, _("%s: %s:%s: disorder: "), + program_name, file_name, + umaxtostr (disorder_line_number, hr_buf)); + write_line (disorder_line, stderr, _("standard error")); + } + + ordered = false; + break; + } + } + + /* Compare each line in the buffer with its successor. */ + while (linebase < --line) + if (nonunique <= compare (line, line - 1)) + goto found_disorder; + + line_number += buf.nlines; + + /* Save the last line of the buffer. */ + if (alloc < line->length) + { + do + { + alloc *= 2; + if (! alloc) + { + alloc = line->length; + break; + } + } + while (alloc < line->length); + + free (temp.text); + temp.text = xmalloc (alloc); + } + memcpy (temp.text, line->text, line->length); + temp.length = line->length; + if (key) + { + temp.keybeg = temp.text + (line->keybeg - line->text); + temp.keylim = temp.text + (line->keylim - line->text); + } + } + + xfclose (fp, file_name); + free (buf.buf); + free (temp.text); + return ordered; +} + +/* Open FILES (there are NFILES of them) and store the resulting array + of stream pointers into (*PFPS). Allocate the array. Return the + number of successfully opened files, setting errno if this value is + less than NFILES. */ + +static size_t +open_input_files (struct sortfile *files, size_t nfiles, FILE ***pfps) +{ + FILE **fps = *pfps = xnmalloc (nfiles, sizeof *fps); + int i; + + /* Open as many input files as we can. */ + for (i = 0; i < nfiles; i++) + { + fps[i] = (files[i].temp && files[i].temp->state != UNCOMPRESSED + ? open_temp (files[i].temp) + : stream_open (files[i].name, "r")); + if (!fps[i]) + break; + } + + return i; +} + +/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary + files (all of which are at the start of the FILES array), and + NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. + FPS is the vector of open stream corresponding to the files. + Close input and output streams before returning. + OUTPUT_FILE gives the name of the output file. If it is null, + the output file is standard output. */ + +static void +mergefps (struct sortfile *files, size_t ntemps, size_t nfiles, + FILE *ofp, char const *output_file, FILE **fps) +{ + struct buffer *buffer = xnmalloc (nfiles, sizeof *buffer); + /* Input buffers for each file. */ + struct line saved; /* Saved line storage for unique check. */ + struct line const *savedline = nullptr; + /* &saved if there is a saved line. */ + size_t savealloc = 0; /* Size allocated for the saved line. */ + struct line const **cur = xnmalloc (nfiles, sizeof *cur); + /* Current line in each line table. */ + struct line const **base = xnmalloc (nfiles, sizeof *base); + /* Base of each line table. */ + size_t *ord = xnmalloc (nfiles, sizeof *ord); + /* Table representing a permutation of fps, + such that cur[ord[0]] is the smallest line + and will be next output. */ + size_t i; + size_t j; + size_t t; + struct keyfield const *key = keylist; + saved.text = nullptr; + + /* Read initial lines from each input file. */ + for (i = 0; i < nfiles; ) + { + initbuf (&buffer[i], sizeof (struct line), + MAX (merge_buffer_size, sort_size / nfiles)); + if (fillbuf (&buffer[i], fps[i], files[i].name)) + { + struct line const *linelim = buffer_linelim (&buffer[i]); + cur[i] = linelim - 1; + base[i] = linelim - buffer[i].nlines; + i++; + } + else + { + /* fps[i] is empty; eliminate it from future consideration. */ + xfclose (fps[i], files[i].name); + if (i < ntemps) + { + ntemps--; + zaptemp (files[i].name); + } + free (buffer[i].buf); + --nfiles; + for (j = i; j < nfiles; ++j) + { + files[j] = files[j + 1]; + fps[j] = fps[j + 1]; + } + } + } + + /* Set up the ord table according to comparisons among input lines. + Since this only reorders two items if one is strictly greater than + the other, it is stable. */ + for (i = 0; i < nfiles; ++i) + ord[i] = i; + for (i = 1; i < nfiles; ++i) + if (0 < compare (cur[ord[i - 1]], cur[ord[i]])) + t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; + + /* Repeatedly output the smallest line until no input remains. */ + while (nfiles) + { + struct line const *smallest = cur[ord[0]]; + + /* If uniquified output is turned on, output only the first of + an identical series of lines. */ + if (unique) + { + if (savedline && compare (savedline, smallest)) + { + savedline = nullptr; + write_line (&saved, ofp, output_file); + } + if (!savedline) + { + savedline = &saved; + if (savealloc < smallest->length) + { + do + if (! savealloc) + { + savealloc = smallest->length; + break; + } + while ((savealloc *= 2) < smallest->length); + + free (saved.text); + saved.text = xmalloc (savealloc); + } + saved.length = smallest->length; + memcpy (saved.text, smallest->text, saved.length); + if (key) + { + saved.keybeg = + saved.text + (smallest->keybeg - smallest->text); + saved.keylim = + saved.text + (smallest->keylim - smallest->text); + } + } + } + else + write_line (smallest, ofp, output_file); + + /* Check if we need to read more lines into memory. */ + if (base[ord[0]] < smallest) + cur[ord[0]] = smallest - 1; + else + { + if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]].name)) + { + struct line const *linelim = buffer_linelim (&buffer[ord[0]]); + cur[ord[0]] = linelim - 1; + base[ord[0]] = linelim - buffer[ord[0]].nlines; + } + else + { + /* We reached EOF on fps[ord[0]]. */ + for (i = 1; i < nfiles; ++i) + if (ord[i] > ord[0]) + --ord[i]; + --nfiles; + xfclose (fps[ord[0]], files[ord[0]].name); + if (ord[0] < ntemps) + { + ntemps--; + zaptemp (files[ord[0]].name); + } + free (buffer[ord[0]].buf); + for (i = ord[0]; i < nfiles; ++i) + { + fps[i] = fps[i + 1]; + files[i] = files[i + 1]; + buffer[i] = buffer[i + 1]; + cur[i] = cur[i + 1]; + base[i] = base[i + 1]; + } + for (i = 0; i < nfiles; ++i) + ord[i] = ord[i + 1]; + continue; + } + } + + /* The new line just read in may be larger than other lines + already in main memory; push it back in the queue until we + encounter a line larger than it. Optimize for the common + case where the new line is smallest. */ + { + size_t lo = 1; + size_t hi = nfiles; + size_t probe = lo; + size_t ord0 = ord[0]; + size_t count_of_smaller_lines; + + while (lo < hi) + { + int cmp = compare (cur[ord0], cur[ord[probe]]); + if (cmp < 0 || (cmp == 0 && ord0 < ord[probe])) + hi = probe; + else + lo = probe + 1; + probe = (lo + hi) / 2; + } + + count_of_smaller_lines = lo - 1; + for (j = 0; j < count_of_smaller_lines; j++) + ord[j] = ord[j + 1]; + ord[count_of_smaller_lines] = ord0; + } + } + + if (unique && savedline) + { + write_line (&saved, ofp, output_file); + free (saved.text); + } + + xfclose (ofp, output_file); + free (fps); + free (buffer); + free (ord); + free (base); + free (cur); +} + +/* Merge lines from FILES onto OFP. NTEMPS is the number of temporary + files (all of which are at the start of the FILES array), and + NFILES is the number of files; 0 <= NTEMPS <= NFILES <= NMERGE. + Close input and output files before returning. + OUTPUT_FILE gives the name of the output file. + + Return the number of files successfully merged. This number can be + less than NFILES if we ran low on file descriptors, but in this + case it is never less than 2. */ + +static size_t +mergefiles (struct sortfile *files, size_t ntemps, size_t nfiles, + FILE *ofp, char const *output_file) +{ + FILE **fps; + size_t nopened = open_input_files (files, nfiles, &fps); + if (nopened < nfiles && nopened < 2) + sort_die (_("open failed"), files[nopened].name); + mergefps (files, ntemps, nopened, ofp, output_file, fps); + return nopened; +} + +/* Merge into T (of size NLINES) the two sorted arrays of lines + LO (with NLINES / 2 members), and + T - (NLINES / 2) (with NLINES - NLINES / 2 members). + T and LO point just past their respective arrays, and the arrays + are in reverse order. NLINES must be at least 2. */ + +static void +mergelines (struct line *restrict t, size_t nlines, + struct line const *restrict lo) +{ + size_t nlo = nlines / 2; + size_t nhi = nlines - nlo; + struct line *hi = t - nlo; + + while (true) + if (compare (lo - 1, hi - 1) <= 0) + { + *--t = *--lo; + if (! --nlo) + { + /* HI must equal T now, and there is no need to copy from + HI to T. */ + return; + } + } + else + { + *--t = *--hi; + if (! --nhi) + { + do + *--t = *--lo; + while (--nlo); + + return; + } + } +} + +/* Sort the array LINES with NLINES members, using TEMP for temporary space. + Do this all within one thread. NLINES must be at least 2. + If TO_TEMP, put the sorted output into TEMP, and TEMP is as large as LINES. + Otherwise the sort is in-place and TEMP is half-sized. + The input and output arrays are in reverse order, and LINES and + TEMP point just past the end of their respective arrays. + + Use a recursive divide-and-conquer algorithm, in the style + suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use + the optimization suggested by exercise 5.2.4-10; this requires room + for only 1.5*N lines, rather than the usual 2*N lines. Knuth + writes that this memory optimization was originally published by + D. A. Bell, Comp J. 1 (1958), 75. */ + +static void +sequential_sort (struct line *restrict lines, size_t nlines, + struct line *restrict temp, bool to_temp) +{ + if (nlines == 2) + { + /* Declare 'swap' as int, not bool, to work around a bug + + in the IBM xlc 6.0.0.0 compiler in 64-bit mode. */ + int swap = (0 < compare (&lines[-1], &lines[-2])); + if (to_temp) + { + temp[-1] = lines[-1 - swap]; + temp[-2] = lines[-2 + swap]; + } + else if (swap) + { + temp[-1] = lines[-1]; + lines[-1] = lines[-2]; + lines[-2] = temp[-1]; + } + } + else + { + size_t nlo = nlines / 2; + size_t nhi = nlines - nlo; + struct line *lo = lines; + struct line *hi = lines - nlo; + + sequential_sort (hi, nhi, temp - (to_temp ? nlo : 0), to_temp); + if (1 < nlo) + sequential_sort (lo, nlo, temp, !to_temp); + else if (!to_temp) + temp[-1] = lo[-1]; + + struct line *dest; + struct line const *sorted_lo; + if (to_temp) + { + dest = temp; + sorted_lo = lines; + } + else + { + dest = lines; + sorted_lo = temp; + } + mergelines (dest, nlines, sorted_lo); + } +} + +static struct merge_node *init_node (struct merge_node *restrict, + struct merge_node *restrict, + struct line *, size_t, size_t, bool); + + +/* Create and return a merge tree for NTHREADS threads, sorting NLINES + lines, with destination DEST. */ +static struct merge_node * +merge_tree_init (size_t nthreads, size_t nlines, struct line *dest) +{ + struct merge_node *merge_tree = xmalloc (2 * sizeof *merge_tree * nthreads); + + struct merge_node *root = merge_tree; + root->lo = root->hi = root->end_lo = root->end_hi = nullptr; + root->dest = nullptr; + root->nlo = root->nhi = nlines; + root->parent = nullptr; + root->level = MERGE_END; + root->queued = false; + pthread_mutex_init (&root->lock, nullptr); + + init_node (root, root + 1, dest, nthreads, nlines, false); + return merge_tree; +} + +/* Destroy the merge tree. */ +static void +merge_tree_destroy (size_t nthreads, struct merge_node *merge_tree) +{ + size_t n_nodes = nthreads * 2; + struct merge_node *node = merge_tree; + + while (n_nodes--) + { + pthread_mutex_destroy (&node->lock); + node++; + } + + free (merge_tree); +} + +/* Initialize a merge tree node and its descendants. The node's + parent is PARENT. The node and its descendants are taken from the + array of nodes NODE_POOL. Their destination starts at DEST; they + will consume NTHREADS threads. The total number of sort lines is + TOTAL_LINES. IS_LO_CHILD is true if the node is the low child of + its parent. */ + +static struct merge_node * +init_node (struct merge_node *restrict parent, + struct merge_node *restrict node_pool, + struct line *dest, size_t nthreads, + size_t total_lines, bool is_lo_child) +{ + size_t nlines = (is_lo_child ? parent->nlo : parent->nhi); + size_t nlo = nlines / 2; + size_t nhi = nlines - nlo; + struct line *lo = dest - total_lines; + struct line *hi = lo - nlo; + struct line **parent_end = (is_lo_child ? &parent->end_lo : &parent->end_hi); + + struct merge_node *node = node_pool++; + node->lo = node->end_lo = lo; + node->hi = node->end_hi = hi; + node->dest = parent_end; + node->nlo = nlo; + node->nhi = nhi; + node->parent = parent; + node->level = parent->level + 1; + node->queued = false; + pthread_mutex_init (&node->lock, nullptr); + + if (nthreads > 1) + { + size_t lo_threads = nthreads / 2; + size_t hi_threads = nthreads - lo_threads; + node->lo_child = node_pool; + node_pool = init_node (node, node_pool, lo, lo_threads, + total_lines, true); + node->hi_child = node_pool; + node_pool = init_node (node, node_pool, hi, hi_threads, + total_lines, false); + } + else + { + node->lo_child = nullptr; + node->hi_child = nullptr; + } + return node_pool; +} + + +/* Compare two merge nodes A and B for priority. */ + +static int +compare_nodes (void const *a, void const *b) +{ + struct merge_node const *nodea = a; + struct merge_node const *nodeb = b; + if (nodea->level == nodeb->level) + return (nodea->nlo + nodea->nhi) < (nodeb->nlo + nodeb->nhi); + return nodea->level < nodeb->level; +} + +/* Lock a merge tree NODE. */ + +static inline void +lock_node (struct merge_node *node) +{ + pthread_mutex_lock (&node->lock); +} + +/* Unlock a merge tree NODE. */ + +static inline void +unlock_node (struct merge_node *node) +{ + pthread_mutex_unlock (&node->lock); +} + +/* Destroy merge QUEUE. */ + +static void +queue_destroy (struct merge_node_queue *queue) +{ + heap_free (queue->priority_queue); + pthread_cond_destroy (&queue->cond); + pthread_mutex_destroy (&queue->mutex); +} + +/* Initialize merge QUEUE, allocating space suitable for a maximum of + NTHREADS threads. */ + +static void +queue_init (struct merge_node_queue *queue, size_t nthreads) +{ + /* Though it's highly unlikely all nodes are in the heap at the same + time, the heap should accommodate all of them. Counting a null + dummy head for the heap, reserve 2 * NTHREADS nodes. */ + queue->priority_queue = heap_alloc (compare_nodes, 2 * nthreads); + pthread_mutex_init (&queue->mutex, nullptr); + pthread_cond_init (&queue->cond, nullptr); +} + +/* Insert NODE into QUEUE. The caller either holds a lock on NODE, or + does not need to lock NODE. */ + +static void +queue_insert (struct merge_node_queue *queue, struct merge_node *node) +{ + pthread_mutex_lock (&queue->mutex); + heap_insert (queue->priority_queue, node); + node->queued = true; + pthread_cond_signal (&queue->cond); + pthread_mutex_unlock (&queue->mutex); +} + +/* Pop the top node off the priority QUEUE, lock the node, return it. */ + +static struct merge_node * +queue_pop (struct merge_node_queue *queue) +{ + struct merge_node *node; + pthread_mutex_lock (&queue->mutex); + while (! (node = heap_remove_top (queue->priority_queue))) + pthread_cond_wait (&queue->cond, &queue->mutex); + pthread_mutex_unlock (&queue->mutex); + lock_node (node); + node->queued = false; + return node; +} + +/* Output LINE to TFP, unless -u is specified and the line compares + equal to the previous line. TEMP_OUTPUT is the name of TFP, or + is null if TFP is standard output. + + This function does not save the line for comparison later, so it is + appropriate only for internal sort. */ + +static void +write_unique (struct line const *line, FILE *tfp, char const *temp_output) +{ + if (unique) + { + if (saved_line.text && ! compare (line, &saved_line)) + return; + saved_line = *line; + } + + write_line (line, tfp, temp_output); +} + +/* Merge the lines currently available to a NODE in the binary + merge tree. Merge a number of lines appropriate for this merge + level, assuming TOTAL_LINES is the total number of lines. + + If merging at the top level, send output to TFP. TEMP_OUTPUT is + the name of TFP, or is null if TFP is standard output. */ + +static void +mergelines_node (struct merge_node *restrict node, size_t total_lines, + FILE *tfp, char const *temp_output) +{ + struct line *lo_orig = node->lo; + struct line *hi_orig = node->hi; + size_t to_merge = MAX_MERGE (total_lines, node->level); + size_t merged_lo; + size_t merged_hi; + + if (node->level > MERGE_ROOT) + { + /* Merge to destination buffer. */ + struct line *dest = *node->dest; + while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--) + if (compare (node->lo - 1, node->hi - 1) <= 0) + *--dest = *--node->lo; + else + *--dest = *--node->hi; + + merged_lo = lo_orig - node->lo; + merged_hi = hi_orig - node->hi; + + if (node->nhi == merged_hi) + while (node->lo != node->end_lo && to_merge--) + *--dest = *--node->lo; + else if (node->nlo == merged_lo) + while (node->hi != node->end_hi && to_merge--) + *--dest = *--node->hi; + *node->dest = dest; + } + else + { + /* Merge directly to output. */ + while (node->lo != node->end_lo && node->hi != node->end_hi && to_merge--) + { + if (compare (node->lo - 1, node->hi - 1) <= 0) + write_unique (--node->lo, tfp, temp_output); + else + write_unique (--node->hi, tfp, temp_output); + } + + merged_lo = lo_orig - node->lo; + merged_hi = hi_orig - node->hi; + + if (node->nhi == merged_hi) + { + while (node->lo != node->end_lo && to_merge--) + write_unique (--node->lo, tfp, temp_output); + } + else if (node->nlo == merged_lo) + { + while (node->hi != node->end_hi && to_merge--) + write_unique (--node->hi, tfp, temp_output); + } + } + + /* Update NODE. */ + merged_lo = lo_orig - node->lo; + merged_hi = hi_orig - node->hi; + node->nlo -= merged_lo; + node->nhi -= merged_hi; +} + +/* Into QUEUE, insert NODE if it is not already queued, and if one of + NODE's children has available lines and the other either has + available lines or has exhausted its lines. */ + +static void +queue_check_insert (struct merge_node_queue *queue, struct merge_node *node) +{ + if (! node->queued) + { + bool lo_avail = (node->lo - node->end_lo) != 0; + bool hi_avail = (node->hi - node->end_hi) != 0; + if (lo_avail ? hi_avail || ! node->nhi : hi_avail && ! node->nlo) + queue_insert (queue, node); + } +} + +/* Into QUEUE, insert NODE's parent if the parent can now be worked on. */ + +static void +queue_check_insert_parent (struct merge_node_queue *queue, + struct merge_node *node) +{ + if (node->level > MERGE_ROOT) + { + lock_node (node->parent); + queue_check_insert (queue, node->parent); + unlock_node (node->parent); + } + else if (node->nlo + node->nhi == 0) + { + /* If the MERGE_ROOT NODE has finished merging, insert the + MERGE_END node. */ + queue_insert (queue, node->parent); + } +} + +/* Repeatedly pop QUEUE for a node with lines to merge, and merge at least + some of those lines, until the MERGE_END node is popped. + TOTAL_LINES is the total number of lines. If merging at the top + level, send output to TFP. TEMP_OUTPUT is the name of TFP, or is + null if TFP is standard output. */ + +static void +merge_loop (struct merge_node_queue *queue, + size_t total_lines, FILE *tfp, char const *temp_output) +{ + while (true) + { + struct merge_node *node = queue_pop (queue); + + if (node->level == MERGE_END) + { + unlock_node (node); + /* Reinsert so other threads can pop it. */ + queue_insert (queue, node); + break; + } + mergelines_node (node, total_lines, tfp, temp_output); + queue_check_insert (queue, node); + queue_check_insert_parent (queue, node); + + unlock_node (node); + } +} + + +static void sortlines (struct line *restrict, size_t, size_t, + struct merge_node *, struct merge_node_queue *, + FILE *, char const *); + +/* Thread arguments for sortlines_thread. */ + +struct thread_args +{ + /* Source, i.e., the array of lines to sort. This points just past + the end of the array. */ + struct line *lines; + + /* Number of threads to use. If 0 or 1, sort single-threaded. */ + size_t nthreads; + + /* Number of lines in LINES and DEST. */ + size_t const total_lines; + + /* Merge node. Lines from this node and this node's sibling will merged + to this node's parent. */ + struct merge_node *const node; + + /* The priority queue controlling available work for the entire + internal sort. */ + struct merge_node_queue *const queue; + + /* If at the top level, the file to output to, and the file's name. + If the file is standard output, the file's name is null. */ + FILE *tfp; + char const *output_temp; +}; + +/* Like sortlines, except with a signature acceptable to pthread_create. */ + +static void * +sortlines_thread (void *data) +{ + struct thread_args const *args = data; + sortlines (args->lines, args->nthreads, args->total_lines, + args->node, args->queue, args->tfp, + args->output_temp); + return nullptr; +} + +/* Sort lines, possibly in parallel. The arguments are as in struct + thread_args above. + + The algorithm has three phases: node creation, sequential sort, + and binary merge. + + During node creation, sortlines recursively visits each node in the + binary merge tree and creates a NODE structure corresponding to all the + future line merging NODE is responsible for. For each call to + sortlines, half the available threads are assigned to each recursive + call, until a leaf node having only 1 available thread is reached. + + Each leaf node then performs two sequential sorts, one on each half of + the lines it is responsible for. It records in its NODE structure that + there are two sorted sublists available to merge from, and inserts its + NODE into the priority queue. + + The binary merge phase then begins. Each thread drops into a loop + where the thread retrieves a NODE from the priority queue, merges lines + available to that NODE, and potentially insert NODE or its parent back + into the queue if there are sufficient available lines for them to + merge. This continues until all lines at all nodes of the merge tree + have been merged. */ + +static void +sortlines (struct line *restrict lines, size_t nthreads, + size_t total_lines, struct merge_node *node, + struct merge_node_queue *queue, FILE *tfp, char const *temp_output) +{ + size_t nlines = node->nlo + node->nhi; + + /* Calculate thread arguments. */ + size_t lo_threads = nthreads / 2; + size_t hi_threads = nthreads - lo_threads; + pthread_t thread; + struct thread_args args = {lines, lo_threads, total_lines, + node->lo_child, queue, tfp, temp_output}; + + if (nthreads > 1 && SUBTHREAD_LINES_HEURISTIC <= nlines + && pthread_create (&thread, nullptr, sortlines_thread, &args) == 0) + { + sortlines (lines - node->nlo, hi_threads, total_lines, + node->hi_child, queue, tfp, temp_output); + pthread_join (thread, nullptr); + } + else + { + /* Nthreads = 1, this is a leaf NODE, or pthread_create failed. + Sort with 1 thread. */ + size_t nlo = node->nlo; + size_t nhi = node->nhi; + struct line *temp = lines - total_lines; + if (1 < nhi) + sequential_sort (lines - nlo, nhi, temp - nlo / 2, false); + if (1 < nlo) + sequential_sort (lines, nlo, temp, false); + + /* Update merge NODE. No need to lock yet. */ + node->lo = lines; + node->hi = lines - nlo; + node->end_lo = lines - nlo; + node->end_hi = lines - nlo - nhi; + + queue_insert (queue, node); + merge_loop (queue, total_lines, tfp, temp_output); + } +} + +/* Scan through FILES[NTEMPS .. NFILES-1] looking for files that are + the same as OUTFILE. If found, replace each with the same + temporary copy that can be merged into OUTFILE without destroying + OUTFILE before it is completely read. This temporary copy does not + count as a merge temp, so don't worry about incrementing NTEMPS in + the caller; final cleanup will remove it, not zaptemp. + + This test ensures that an otherwise-erroneous use like + "sort -m -o FILE ... FILE ..." copies FILE before writing to it. + It's not clear that POSIX requires this nicety. + Detect common error cases, but don't try to catch obscure cases like + "cat ... FILE ... | sort -m -o FILE" + where traditional "sort" doesn't copy the input and where + people should know that they're getting into trouble anyway. + Catching these obscure cases would slow down performance in + common cases. */ + +static void +avoid_trashing_input (struct sortfile *files, size_t ntemps, + size_t nfiles, char const *outfile) +{ + struct tempnode *tempcopy = nullptr; + + for (size_t i = ntemps; i < nfiles; i++) + { + bool is_stdin = STREQ (files[i].name, "-"); + bool same; + struct stat instat; + + if (outfile && STREQ (outfile, files[i].name) && !is_stdin) + same = true; + else + { + struct stat *outst = get_outstatus (); + if (!outst) + break; + + same = (((is_stdin + ? fstat (STDIN_FILENO, &instat) + : stat (files[i].name, &instat)) + == 0) + && SAME_INODE (instat, *outst)); + } + + if (same) + { + if (! tempcopy) + { + FILE *tftp; + tempcopy = create_temp (&tftp); + mergefiles (&files[i], 0, 1, tftp, tempcopy->name); + } + + files[i].name = tempcopy->name; + files[i].temp = tempcopy; + } + } +} + +/* Scan the input files to ensure all are accessible. + Otherwise exit with a diagnostic. + + This will catch common issues with permissions etc. + but will fail to notice issues where you can open but not read, + like when a directory is specified on some systems. + Catching these obscure cases could slow down performance in + common cases. */ + +static void +check_inputs (char *const *files, size_t nfiles) +{ + for (size_t i = 0; i < nfiles; i++) + { + if (STREQ (files[i], "-")) + continue; + + if (euidaccess (files[i], R_OK) != 0) + sort_die (_("cannot read"), files[i]); + } +} + +/* Ensure a specified output file can be created or written to, + and point stdout to it. Do not truncate the file. + Exit with a diagnostic on failure. */ + +static void +check_output (char const *outfile) +{ + if (outfile) + { + int oflags = O_WRONLY | O_BINARY | O_CLOEXEC | O_CREAT; + int outfd = open (outfile, oflags, MODE_RW_UGO); + if (outfd < 0) + sort_die (_("open failed"), outfile); + move_fd (outfd, STDOUT_FILENO); + } +} + +/* Merge the input FILES. NTEMPS is the number of files at the + start of FILES that are temporary; it is zero at the top level. + NFILES is the total number of files. Put the output in + OUTPUT_FILE; a null OUTPUT_FILE stands for standard output. */ + +static void +merge (struct sortfile *files, size_t ntemps, size_t nfiles, + char const *output_file) +{ + while (nmerge < nfiles) + { + /* Number of input files processed so far. */ + size_t in; + + /* Number of output files generated so far. */ + size_t out; + + /* nfiles % NMERGE; this counts input files that are left over + after all full-sized merges have been done. */ + size_t remainder; + + /* Number of easily-available slots at the next loop iteration. */ + size_t cheap_slots; + + /* Do as many NMERGE-size merges as possible. In the case that + nmerge is bogus, increment by the maximum number of file + descriptors allowed. */ + for (out = in = 0; nmerge <= nfiles - in; out++) + { + FILE *tfp; + struct tempnode *temp = create_temp (&tfp); + size_t num_merged = mergefiles (&files[in], MIN (ntemps, nmerge), + nmerge, tfp, temp->name); + ntemps -= MIN (ntemps, num_merged); + files[out].name = temp->name; + files[out].temp = temp; + in += num_merged; + } + + remainder = nfiles - in; + cheap_slots = nmerge - out % nmerge; + + if (cheap_slots < remainder) + { + /* So many files remain that they can't all be put into the last + NMERGE-sized output window. Do one more merge. Merge as few + files as possible, to avoid needless I/O. */ + size_t nshortmerge = remainder - cheap_slots + 1; + FILE *tfp; + struct tempnode *temp = create_temp (&tfp); + size_t num_merged = mergefiles (&files[in], MIN (ntemps, nshortmerge), + nshortmerge, tfp, temp->name); + ntemps -= MIN (ntemps, num_merged); + files[out].name = temp->name; + files[out++].temp = temp; + in += num_merged; + } + + /* Put the remaining input files into the last NMERGE-sized output + window, so they will be merged in the next pass. */ + memmove (&files[out], &files[in], (nfiles - in) * sizeof *files); + ntemps += out; + nfiles -= in - out; + } + + avoid_trashing_input (files, ntemps, nfiles, output_file); + + /* We aren't guaranteed that this final mergefiles will work, therefore we + try to merge into the output, and then merge as much as we can into a + temp file if we can't. Repeat. */ + + while (true) + { + /* Merge directly into the output file if possible. */ + FILE **fps; + size_t nopened = open_input_files (files, nfiles, &fps); + + if (nopened == nfiles) + { + FILE *ofp = stream_open (output_file, "w"); + if (ofp) + { + mergefps (files, ntemps, nfiles, ofp, output_file, fps); + break; + } + if (errno != EMFILE || nopened <= 2) + sort_die (_("open failed"), output_file); + } + else if (nopened <= 2) + sort_die (_("open failed"), files[nopened].name); + + /* We ran out of file descriptors. Close one of the input + files, to gain a file descriptor. Then create a temporary + file with our spare file descriptor. Retry if that failed + (e.g., some other process could open a file between the time + we closed and tried to create). */ + FILE *tfp; + struct tempnode *temp; + do + { + nopened--; + xfclose (fps[nopened], files[nopened].name); + temp = maybe_create_temp (&tfp, ! (nopened <= 2)); + } + while (!temp); + + /* Merge into the newly allocated temporary. */ + mergefps (&files[0], MIN (ntemps, nopened), nopened, tfp, temp->name, + fps); + ntemps -= MIN (ntemps, nopened); + files[0].name = temp->name; + files[0].temp = temp; + + memmove (&files[1], &files[nopened], (nfiles - nopened) * sizeof *files); + ntemps++; + nfiles -= nopened - 1; + } +} + +/* Sort NFILES FILES onto OUTPUT_FILE. Use at most NTHREADS threads. */ + +static void +sort (char *const *files, size_t nfiles, char const *output_file, + size_t nthreads) +{ + struct buffer buf; + size_t ntemps = 0; + bool output_file_created = false; + + buf.alloc = 0; + + while (nfiles) + { + char const *temp_output; + char const *file = *files; + FILE *fp = xfopen (file, "r"); + FILE *tfp; + + size_t bytes_per_line; + if (nthreads > 1) + { + /* Get log P. */ + size_t tmp = 1; + size_t mult = 1; + while (tmp < nthreads) + { + tmp *= 2; + mult++; + } + bytes_per_line = (mult * sizeof (struct line)); + } + else + bytes_per_line = sizeof (struct line) * 3 / 2; + + if (! buf.alloc) + initbuf (&buf, bytes_per_line, + sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); + buf.eof = false; + files++; + nfiles--; + + while (fillbuf (&buf, fp, file)) + { + struct line *line; + + if (buf.eof && nfiles + && (bytes_per_line + 1 + < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) + { + /* End of file, but there is more input and buffer room. + Concatenate the next input file; this is faster in + the usual case. */ + buf.left = buf.used; + break; + } + + saved_line.text = nullptr; + line = buffer_linelim (&buf); + if (buf.eof && !nfiles && !ntemps && !buf.left) + { + xfclose (fp, file); + tfp = xfopen (output_file, "w"); + temp_output = output_file; + output_file_created = true; + } + else + { + ++ntemps; + temp_output = create_temp (&tfp)->name; + } + if (1 < buf.nlines) + { + struct merge_node_queue queue; + queue_init (&queue, nthreads); + struct merge_node *merge_tree = + merge_tree_init (nthreads, buf.nlines, line); + + sortlines (line, nthreads, buf.nlines, merge_tree + 1, + &queue, tfp, temp_output); + + merge_tree_destroy (nthreads, merge_tree); + queue_destroy (&queue); + } + else + write_unique (line - 1, tfp, temp_output); + + xfclose (tfp, temp_output); + + if (output_file_created) + goto finish; + } + xfclose (fp, file); + } + + finish: + free (buf.buf); + + if (! output_file_created) + { + struct tempnode *node = temphead; + struct sortfile *tempfiles = xnmalloc (ntemps, sizeof *tempfiles); + for (size_t i = 0; node; i++) + { + tempfiles[i].name = node->name; + tempfiles[i].temp = node; + node = node->next; + } + merge (tempfiles, ntemps, ntemps, output_file); + free (tempfiles); + } + + reap_all (); +} + +/* Insert a malloc'd copy of key KEY_ARG at the end of the key list. */ + +static void +insertkey (struct keyfield *key_arg) +{ + struct keyfield **p; + struct keyfield *key = xmemdup (key_arg, sizeof *key); + + for (p = &keylist; *p; p = &(*p)->next) + continue; + *p = key; + key->next = nullptr; +} + +/* Report a bad field specification SPEC, with extra info MSGID. */ + +static void +badfieldspec (char const *spec, char const *msgid) +{ + error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), + _(msgid), quote (spec)); +} + +/* Report incompatible options. */ + +static void +incompatible_options (char const *opts) +{ + error (SORT_FAILURE, 0, _("options '-%s' are incompatible"), (opts)); +} + +/* Check compatibility of ordering options. */ + +static void +check_ordering_compatibility (void) +{ + struct keyfield *key; + + for (key = keylist; key; key = key->next) + if (1 < (key->numeric + key->general_numeric + key->human_numeric + + key->month + (key->version | key->random | !!key->ignore))) + { + /* The following is too big, but guaranteed to be "big enough". */ + char opts[sizeof short_options]; + /* Clear flags we're not interested in. */ + key->skipsblanks = key->skipeblanks = key->reverse = false; + key_to_opts (key, opts); + incompatible_options (opts); + } +} + +/* Parse the leading integer in STRING and store the resulting value + (which must fit into size_t) into *VAL. Return the address of the + suffix after the integer. If the value is too large, silently + substitute SIZE_MAX. If MSGID is null, return nullptr after + failure; otherwise, report MSGID and exit on failure. */ + +static char const * +parse_field_count (char const *string, size_t *val, char const *msgid) +{ + char *suffix; + uintmax_t n; + + switch (xstrtoumax (string, &suffix, 10, &n, "")) + { + case LONGINT_OK: + case LONGINT_INVALID_SUFFIX_CHAR: + *val = n; + if (*val == n) + break; + FALLTHROUGH; + case LONGINT_OVERFLOW: + case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: + *val = SIZE_MAX; + break; + + case LONGINT_INVALID: + if (msgid) + error (SORT_FAILURE, 0, _("%s: invalid count at start of %s"), + _(msgid), quote (string)); + return nullptr; + } + + return suffix; +} + +/* Handle interrupts and hangups. */ + +static void +sighandler (int sig) +{ + if (! SA_NOCLDSTOP) + signal (sig, SIG_IGN); + + cleanup (); + + signal (sig, SIG_DFL); + raise (sig); +} + +/* Set the ordering options for KEY specified in S. + Return the address of the first character in S that + is not a valid ordering option. + BLANKTYPE is the kind of blanks that 'b' should skip. */ + +static char * +set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype) +{ + while (*s) + { + switch (*s) + { + case 'b': + if (blanktype == bl_start || blanktype == bl_both) + key->skipsblanks = true; + if (blanktype == bl_end || blanktype == bl_both) + key->skipeblanks = true; + break; + case 'd': + key->ignore = nondictionary; + break; + case 'f': + key->translate = fold_toupper; + break; + case 'g': + key->general_numeric = true; + break; + case 'h': + key->human_numeric = true; + break; + case 'i': + /* Option order should not matter, so don't let -i override + -d. -d implies -i, but -i does not imply -d. */ + if (! key->ignore) + key->ignore = nonprinting; + break; + case 'M': + key->month = true; + break; + case 'n': + key->numeric = true; + break; + case 'R': + key->random = true; + break; + case 'r': + key->reverse = true; + break; + case 'V': + key->version = true; + break; + default: + return (char *) s; + } + ++s; + } + return (char *) s; +} + +/* Initialize KEY. */ + +static struct keyfield * +key_init (struct keyfield *key) +{ + memset (key, 0, sizeof *key); + key->eword = SIZE_MAX; + return key; +} + +int +main (int argc, char **argv) +{ + struct keyfield *key; + struct keyfield key_buf; + struct keyfield gkey; + bool gkey_only = false; + char const *s; + int c = 0; + char checkonly = 0; + bool mergeonly = false; + char *random_source = nullptr; + bool need_random = false; + size_t nthreads = 0; + size_t nfiles = 0; + bool posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr); + int posix_ver = posix2_version (); + bool traditional_usage = ! (200112 <= posix_ver && posix_ver < 200809); + char **files; + char *files_from = nullptr; + struct Tokens tok; + char const *outfile = nullptr; + bool locale_ok; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + locale_ok = !! setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (SORT_FAILURE); + + hard_LC_COLLATE = hard_locale (LC_COLLATE); +#if HAVE_NL_LANGINFO + hard_LC_TIME = hard_locale (LC_TIME); +#endif + + /* Get locale's representation of the decimal point. */ + { + struct lconv const *locale = localeconv (); + + /* If the locale doesn't define a decimal point, or if the decimal + point is multibyte, use the C locale's decimal point. FIXME: + add support for multibyte decimal points. */ + decimal_point = locale->decimal_point[0]; + if (! decimal_point || locale->decimal_point[1]) + decimal_point = '.'; + + /* FIXME: add support for multibyte thousands separators. */ + thousands_sep = locale->thousands_sep[0]; + if (thousands_sep && locale->thousands_sep[1]) + thousands_sep_ignored = true; + if (! thousands_sep || locale->thousands_sep[1]) + thousands_sep = NON_CHAR; + } + + have_read_stdin = false; + inittables (); + + { + size_t i; + static int const sig[] = + { + /* The usual suspects. */ + SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGVTALRM + SIGVTALRM, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + enum { nsigs = ARRAY_CARDINALITY (sig) }; + +#if SA_NOCLDSTOP + struct sigaction act; + + sigemptyset (&caught_signals); + for (i = 0; i < nsigs; i++) + { + sigaction (sig[i], nullptr, &act); + if (act.sa_handler != SIG_IGN) + sigaddset (&caught_signals, sig[i]); + } + + act.sa_handler = sighandler; + act.sa_mask = caught_signals; + act.sa_flags = 0; + + for (i = 0; i < nsigs; i++) + if (sigismember (&caught_signals, sig[i])) + sigaction (sig[i], &act, nullptr); +#else + for (i = 0; i < nsigs; i++) + if (signal (sig[i], SIG_IGN) != SIG_IGN) + { + signal (sig[i], sighandler); + siginterrupt (sig[i], 1); + } +#endif + } + signal (SIGCHLD, SIG_DFL); /* Don't inherit CHLD handling from parent. */ + + /* The signal mask is known, so it is safe to invoke exit_cleanup. */ + atexit (exit_cleanup); + + key_init (&gkey); + gkey.sword = SIZE_MAX; + + files = xnmalloc (argc, sizeof *files); + + while (true) + { + /* Parse an operand as a file after "--" was seen; or if + pedantic and a file was seen, unless the POSIX version + is not 1003.1-2001 and -c was not seen and the operand is + "-o FILE" or "-oFILE". */ + int oi = -1; + + if (c == -1 + || (posixly_correct && nfiles != 0 + && ! (traditional_usage + && ! checkonly + && optind != argc + && argv[optind][0] == '-' && argv[optind][1] == 'o' + && (argv[optind][2] || optind + 1 != argc))) + || ((c = getopt_long (argc, argv, short_options, + long_options, &oi)) + == -1)) + { + if (argc <= optind) + break; + files[nfiles++] = argv[optind++]; + } + else switch (c) + { + case 1: + key = nullptr; + if (optarg[0] == '+') + { + bool minus_pos_usage = (optind != argc && argv[optind][0] == '-' + && ISDIGIT (argv[optind][1])); + traditional_usage |= minus_pos_usage && !posixly_correct; + if (traditional_usage) + { + /* Treat +POS1 [-POS2] as a key if possible; but silently + treat an operand as a file if it is not a valid +POS1. */ + key = key_init (&key_buf); + s = parse_field_count (optarg + 1, &key->sword, nullptr); + if (s && *s == '.') + s = parse_field_count (s + 1, &key->schar, nullptr); + if (! (key->sword || key->schar)) + key->sword = SIZE_MAX; + if (! s || *set_ordering (s, key, bl_start)) + key = nullptr; + else + { + if (minus_pos_usage) + { + char const *optarg1 = argv[optind++]; + s = parse_field_count (optarg1 + 1, &key->eword, + N_("invalid number after '-'")); + if (*s == '.') + s = parse_field_count (s + 1, &key->echar, + N_("invalid number after '.'")); + if (!key->echar && key->eword) + { + /* obsolescent syntax +A.x -B.y is equivalent to: + -k A+1.x+1,B.y (when y = 0) + -k A+1.x+1,B+1.y (when y > 0) + So eword is decremented as in the -k case + only when the end field (B) is specified and + echar (y) is 0. */ + key->eword--; + } + if (*set_ordering (s, key, bl_end)) + badfieldspec (optarg1, + N_("stray character in field spec")); + } + key->traditional_used = true; + insertkey (key); + } + } + } + if (! key) + files[nfiles++] = optarg; + break; + + case SORT_OPTION: + c = XARGMATCH ("--sort", optarg, sort_args, sort_types); + FALLTHROUGH; + case 'b': + case 'd': + case 'f': + case 'g': + case 'h': + case 'i': + case 'M': + case 'n': + case 'r': + case 'R': + case 'V': + { + char str[2]; + str[0] = c; + str[1] = '\0'; + set_ordering (str, &gkey, bl_both); + } + break; + + case CHECK_OPTION: + c = (optarg + ? XARGMATCH ("--check", optarg, check_args, check_types) + : 'c'); + FALLTHROUGH; + case 'c': + case 'C': + if (checkonly && checkonly != c) + incompatible_options ("cC"); + checkonly = c; + break; + + case COMPRESS_PROGRAM_OPTION: + if (compress_program && !STREQ (compress_program, optarg)) + error (SORT_FAILURE, 0, _("multiple compress programs specified")); + compress_program = optarg; + break; + + case DEBUG_PROGRAM_OPTION: + debug = true; + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case 'k': + key = key_init (&key_buf); + + /* Get POS1. */ + s = parse_field_count (optarg, &key->sword, + N_("invalid number at field start")); + if (! key->sword--) + { + /* Provoke with 'sort -k0' */ + badfieldspec (optarg, N_("field number is zero")); + } + if (*s == '.') + { + s = parse_field_count (s + 1, &key->schar, + N_("invalid number after '.'")); + if (! key->schar--) + { + /* Provoke with 'sort -k1.0' */ + badfieldspec (optarg, N_("character offset is zero")); + } + } + if (! (key->sword || key->schar)) + key->sword = SIZE_MAX; + s = set_ordering (s, key, bl_start); + if (*s != ',') + { + key->eword = SIZE_MAX; + key->echar = 0; + } + else + { + /* Get POS2. */ + s = parse_field_count (s + 1, &key->eword, + N_("invalid number after ','")); + if (! key->eword--) + { + /* Provoke with 'sort -k1,0' */ + badfieldspec (optarg, N_("field number is zero")); + } + if (*s == '.') + { + s = parse_field_count (s + 1, &key->echar, + N_("invalid number after '.'")); + } + s = set_ordering (s, key, bl_end); + } + if (*s) + badfieldspec (optarg, N_("stray character in field spec")); + insertkey (key); + break; + + case 'm': + mergeonly = true; + break; + + case NMERGE_OPTION: + specify_nmerge (oi, c, optarg); + break; + + case 'o': + if (outfile && !STREQ (outfile, optarg)) + error (SORT_FAILURE, 0, _("multiple output files specified")); + outfile = optarg; + break; + + case RANDOM_SOURCE_OPTION: + if (random_source && !STREQ (random_source, optarg)) + error (SORT_FAILURE, 0, _("multiple random sources specified")); + random_source = optarg; + break; + + case 's': + stable = true; + break; + + case 'S': + specify_sort_size (oi, c, optarg); + break; + + case 't': + { + char newtab = optarg[0]; + if (! newtab) + error (SORT_FAILURE, 0, _("empty tab")); + if (optarg[1]) + { + if (STREQ (optarg, "\\0")) + newtab = '\0'; + else + { + /* Provoke with 'sort -txx'. Complain about + "multi-character tab" instead of "multibyte tab", so + that the diagnostic's wording does not need to be + changed once multibyte characters are supported. */ + error (SORT_FAILURE, 0, _("multi-character tab %s"), + quote (optarg)); + } + } + if (tab != TAB_DEFAULT && tab != newtab) + error (SORT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; + } + break; + + case 'T': + add_temp_dir (optarg); + break; + + case PARALLEL_OPTION: + nthreads = specify_nthreads (oi, c, optarg); + break; + + case 'u': + unique = true; + break; + + case 'y': + /* Accept and ignore e.g. -y0 for compatibility with Solaris 2.x + through Solaris 7. It is also accepted by many non-Solaris + "sort" implementations, e.g., AIX 5.2, HP-UX 11i v2, IRIX 6.5. + -y is marked as obsolete starting with Solaris 8 (1999), but is + still accepted as of Solaris 10 prerelease (2004). + + Solaris 2.5.1 "sort -y 100" reads the input file "100", but + emulate Solaris 8 and 9 "sort -y 100" which ignores the "100", + and which in general ignores the argument after "-y" if it + consists entirely of digits (it can even be empty). */ + if (optarg == argv[optind - 1]) + { + char const *p; + for (p = optarg; ISDIGIT (*p); p++) + continue; + optind -= (*p != '\0'); + } + break; + + case 'z': + eolchar = 0; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (SORT_FAILURE); + } + } + + if (files_from) + { + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (nfiles) + { + error (0, 0, _("extra operand %s"), quoteaf (files[0])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (SORT_FAILURE); + } + + FILE *stream = xfopen (files_from, "r"); + + readtokens0_init (&tok); + + if (! readtokens0 (stream, &tok)) + error (SORT_FAILURE, 0, _("cannot read file names from %s"), + quoteaf (files_from)); + xfclose (stream, files_from); + + if (tok.n_tok) + { + free (files); + files = tok.tok; + nfiles = tok.n_tok; + for (size_t i = 0; i < nfiles; i++) + { + if (STREQ (files[i], "-")) + error (SORT_FAILURE, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (files[i])); + else if (files[i][0] == '\0') + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, + not NL, but it might be better than nothing. */ + unsigned long int file_number = i + 1; + error (SORT_FAILURE, 0, + _("%s:%lu: invalid zero-length file name"), + quotef (files_from), file_number); + } + } + } + else + error (SORT_FAILURE, 0, _("no input from %s"), + quoteaf (files_from)); + } + + /* Inheritance of global options to individual keys. */ + for (key = keylist; key; key = key->next) + { + if (default_key_compare (key) && !key->reverse) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->general_numeric = gkey.general_numeric; + key->human_numeric = gkey.human_numeric; + key->version = gkey.version; + key->random = gkey.random; + key->reverse = gkey.reverse; + } + + need_random |= key->random; + } + + if (!keylist && !default_key_compare (&gkey)) + { + gkey_only = true; + insertkey (&gkey); + need_random |= gkey.random; + } + + check_ordering_compatibility (); + + if (debug) + { + if (checkonly || outfile) + { + static char opts[] = "X --debug"; + opts[0] = (checkonly ? checkonly : 'o'); + incompatible_options (opts); + } + + /* Always output the locale in debug mode, since this + is such a common source of confusion. */ + + /* OpenBSD can only set some categories with LC_ALL above, + so set LC_COLLATE explicitly to flag errors. */ + if (locale_ok) + locale_ok = !! setlocale (LC_COLLATE, ""); + if (! locale_ok) + error (0, 0, "%s", _("failed to set locale")); + if (hard_LC_COLLATE) + error (0, 0, _("text ordering performed using %s sorting rules"), + quote (setlocale (LC_COLLATE, nullptr))); + else + error (0, 0, "%s", + _("text ordering performed using simple byte comparison")); + + key_warnings (&gkey, gkey_only); + } + + reverse = gkey.reverse; + + if (need_random) + random_md5_state_init (random_source); + + if (temp_dir_count == 0) + { + char const *tmp_dir = getenv ("TMPDIR"); + add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR); + } + + if (nfiles == 0) + { + nfiles = 1; + free (files); + files = xmalloc (sizeof *files); + *files = (char *) "-"; + } + + /* Need to re-check that we meet the minimum requirement for memory + usage with the final value for NMERGE. */ + if (0 < sort_size) + sort_size = MAX (sort_size, MIN_SORT_SIZE); + + if (checkonly) + { + if (nfiles > 1) + error (SORT_FAILURE, 0, _("extra operand %s not allowed with -%c"), + quoteaf (files[1]), checkonly); + + if (outfile) + { + static char opts[] = {0, 'o', 0}; + opts[0] = checkonly; + incompatible_options (opts); + } + + /* POSIX requires that sort return 1 IFF invoked with -c or -C and the + input is not properly sorted. */ + exit (check (files[0], checkonly) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); + } + + /* Check all inputs are accessible, or exit immediately. */ + check_inputs (files, nfiles); + + /* Check output is writable, or exit immediately. */ + check_output (outfile); + + if (mergeonly) + { + struct sortfile *sortfiles = xcalloc (nfiles, sizeof *sortfiles); + + for (size_t i = 0; i < nfiles; ++i) + sortfiles[i].name = files[i]; + + merge (sortfiles, 0, nfiles, outfile); + } + else + { + if (!nthreads) + { + unsigned long int np = num_processors (NPROC_CURRENT_OVERRIDABLE); + nthreads = MIN (np, DEFAULT_MAX_THREADS); + } + + /* Avoid integer overflow later. */ + size_t nthreads_max = SIZE_MAX / (2 * sizeof (struct merge_node)); + nthreads = MIN (nthreads, nthreads_max); + + sort (files, nfiles, outfile, nthreads); + } + + if (have_read_stdin && fclose (stdin) == EOF) + sort_die (_("close failed"), "-"); + + main_exit (EXIT_SUCCESS); +} diff --git a/src/split.c b/src/split.c new file mode 100644 index 0000000..a32b2d9 --- /dev/null +++ b/src/split.c @@ -0,0 +1,1699 @@ +/* split.c -- split a file into pieces. + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* By tege@sics.se, with rms. + + TODO: + * support -p REGEX as in BSD's split. + * support --suppress-matched as in csplit. */ +#include + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "alignalloc.h" +#include "assure.h" +#include "fadvise.h" +#include "fd-reopen.h" +#include "fcntl--.h" +#include "full-write.h" +#include "ioblksize.h" +#include "quote.h" +#include "sig2str.h" +#include "sys-limits.h" +#include "temp-stream.h" +#include "xbinary-io.h" +#include "xdectoint.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "split" + +#define AUTHORS \ + proper_name_lite ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("Richard M. Stallman") + +/* Shell command to filter through, instead of creating files. */ +static char const *filter_command; + +/* Process ID of the filter. */ +static pid_t filter_pid; + +/* Array of open pipes. */ +static int *open_pipes; +static idx_t open_pipes_alloc; +static int n_open_pipes; + +/* Whether SIGPIPE has the default action, when --filter is used. */ +static bool default_SIGPIPE; + +/* Base name of output files. */ +static char const *outbase; + +/* Name of output files. */ +static char *outfile; + +/* Pointer to the end of the prefix in OUTFILE. + Suffixes are inserted here. */ +static char *outfile_mid; + +/* Generate new suffix when suffixes are exhausted. */ +static bool suffix_auto = true; + +/* Length of OUTFILE's suffix. */ +static idx_t suffix_length; + +/* Alphabet of characters to use in suffix. */ +static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz"; + +/* Numerical suffix start value. */ +static char const *numeric_suffix_start; + +/* Additional suffix to append to output file names. */ +static char const *additional_suffix; + +/* Name of input file. May be "-". */ +static char *infile; + +/* stat buf for input file. */ +static struct stat in_stat_buf; + +/* Descriptor on which output file is open. */ +static int output_desc = -1; + +/* If true, print a diagnostic on standard error just before each + output file is opened. */ +static bool verbose; + +/* If true, don't generate zero length output files. */ +static bool elide_empty_files; + +/* If true, in round robin mode, immediately copy + input to output, which is much slower, so disabled by default. */ +static bool unbuffered; + +/* The character marking end of line. Defaults to \n below. */ +static int eolchar = -1; + +/* The split mode to use. */ +enum Split_type +{ + type_undef, type_bytes, type_byteslines, type_lines, type_digits, + type_chunk_bytes, type_chunk_lines, type_rr +}; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + VERBOSE_OPTION = CHAR_MAX + 1, + FILTER_OPTION, + IO_BLKSIZE_OPTION, + ADDITIONAL_SUFFIX_OPTION +}; + +static struct option const longopts[] = +{ + {"bytes", required_argument, nullptr, 'b'}, + {"lines", required_argument, nullptr, 'l'}, + {"line-bytes", required_argument, nullptr, 'C'}, + {"number", required_argument, nullptr, 'n'}, + {"elide-empty-files", no_argument, nullptr, 'e'}, + {"unbuffered", no_argument, nullptr, 'u'}, + {"suffix-length", required_argument, nullptr, 'a'}, + {"additional-suffix", required_argument, nullptr, + ADDITIONAL_SUFFIX_OPTION}, + {"numeric-suffixes", optional_argument, nullptr, 'd'}, + {"hex-suffixes", optional_argument, nullptr, 'x'}, + {"filter", required_argument, nullptr, FILTER_OPTION}, + {"verbose", no_argument, nullptr, VERBOSE_OPTION}, + {"separator", required_argument, nullptr, 't'}, + {"-io-blksize", required_argument, nullptr, + IO_BLKSIZE_OPTION}, /* do not document */ + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return true if the errno value, ERR, is ignorable. */ +static inline bool +ignorable (int err) +{ + return filter_command && err == EPIPE; +} + +static void +set_suffix_length (intmax_t n_units, enum Split_type split_type) +{ +#define DEFAULT_SUFFIX_LENGTH 2 + + int suffix_length_needed = 0; + + /* The suffix auto length feature is incompatible with + a user specified start value as the generated suffixes + are not all consecutive. */ + if (numeric_suffix_start) + suffix_auto = false; + + /* Auto-calculate the suffix length if the number of files is given. */ + if (split_type == type_chunk_bytes || split_type == type_chunk_lines + || split_type == type_rr) + { + intmax_t n_units_end = n_units - 1; + if (numeric_suffix_start) + { + intmax_t n_start; + strtol_error e = xstrtoimax (numeric_suffix_start, nullptr, 10, + &n_start, ""); + if (e == LONGINT_OK && n_start < n_units) + { + /* Restrict auto adjustment so we don't keep + incrementing a suffix size arbitrarily, + as that would break sort order for files + generated from multiple split runs. */ + if (ckd_add (&n_units_end, n_units_end, n_start)) + n_units_end = INTMAX_MAX; + } + + } + idx_t alphabet_len = strlen (suffix_alphabet); + do + suffix_length_needed++; + while (n_units_end /= alphabet_len); + + suffix_auto = false; + } + + if (suffix_length) /* set by user */ + { + if (suffix_length < suffix_length_needed) + error (EXIT_FAILURE, 0, + _("the suffix length needs to be at least %d"), + suffix_length_needed); + suffix_auto = false; + return; + } + else + suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_length_needed); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE [PREFIX]]\n\ +"), + program_name); + fputs (_("\ +Output pieces of FILE to PREFIXaa, PREFIXab, ...;\n\ +default size is 1000 lines, and default PREFIX is 'x'.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fprintf (stdout, _("\ + -a, --suffix-length=N generate suffixes of length N (default %d)\n\ + --additional-suffix=SUFFIX append an additional SUFFIX to file names\n\ + -b, --bytes=SIZE put SIZE bytes per output file\n\ + -C, --line-bytes=SIZE put at most SIZE bytes of records per output file\n\ + -d use numeric suffixes starting at 0, not alphabetic\n\ + --numeric-suffixes[=FROM] same as -d, but allow setting the start value\ +\n\ + -x use hex suffixes starting at 0, not alphabetic\n\ + --hex-suffixes[=FROM] same as -x, but allow setting the start value\n\ + -e, --elide-empty-files do not generate empty output files with '-n'\n\ + --filter=COMMAND write to shell COMMAND; file name is $FILE\n\ + -l, --lines=NUMBER put NUMBER lines/records per output file\n\ + -n, --number=CHUNKS generate CHUNKS output files; see explanation below\n\ + -t, --separator=SEP use SEP instead of newline as the record separator;\n\ + '\\0' (zero) specifies the NUL character\n\ + -u, --unbuffered immediately copy input to output with '-n r/...'\n\ +"), DEFAULT_SUFFIX_LENGTH); + fputs (_("\ + --verbose print a diagnostic just before each\n\ + output file is opened\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_size_note (); + fputs (_("\n\ +CHUNKS may be:\n\ + N split into N files based on size of input\n\ + K/N output Kth of N to stdout\n\ + l/N split into N files without splitting lines/records\n\ + l/K/N output Kth of N to stdout without splitting lines/records\n\ + r/N like 'l' but use round robin distribution\n\ + r/K/N likewise but only output Kth of N to stdout\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Copy the data in FD to a temporary file, then make that file FD. + Use BUF, of size BUFSIZE, to copy. Return the number of + bytes copied, or -1 (setting errno) on error. */ +static off_t +copy_to_tmpfile (int fd, char *buf, idx_t bufsize) +{ + FILE *tmp; + if (!temp_stream (&tmp, nullptr)) + return -1; + off_t copied = 0; + off_t r; + + while (0 < (r = read (fd, buf, bufsize))) + { + if (fwrite (buf, 1, r, tmp) != r) + return -1; + if (ckd_add (&copied, copied, r)) + { + errno = EOVERFLOW; + return -1; + } + } + + if (r < 0) + return r; + r = dup2 (fileno (tmp), fd); + if (r < 0) + return r; + if (fclose (tmp) < 0) + return -1; + return copied; +} + +/* Return the number of bytes that can be read from FD with status ST. + Store up to the first BUFSIZE bytes of the file's data into BUF, + and advance the file position by the number of bytes read. On + input error, set errno and return -1. */ + +static off_t +input_file_size (int fd, struct stat const *st, char *buf, idx_t bufsize) +{ + off_t size = 0; + do + { + ssize_t n_read = read (fd, buf + size, bufsize - size); + if (n_read <= 0) + return n_read < 0 ? n_read : size; + size += n_read; + } + while (size < bufsize); + + off_t cur, end; + if ((usable_st_size (st) && st->st_size < size) + || (cur = lseek (fd, 0, SEEK_CUR)) < 0 + || cur < size /* E.g., /dev/zero on GNU/Linux. */ + || (end = lseek (fd, 0, SEEK_END)) < 0) + { + char *tmpbuf = xmalloc (bufsize); + end = copy_to_tmpfile (fd, tmpbuf, bufsize); + free (tmpbuf); + if (end < 0) + return end; + cur = 0; + } + + if (end == OFF_T_MAX /* E.g., /dev/zero on GNU/Hurd. */ + || (cur < end && ckd_add (&size, size, end - cur))) + { + errno = EOVERFLOW; + return -1; + } + + if (cur < end) + { + off_t r = lseek (fd, cur, SEEK_SET); + if (r < 0) + return r; + } + + return size; +} + +/* Compute the next sequential output file name and store it into the + string 'outfile'. */ + +static void +next_file_name (void) +{ + /* Index in suffix_alphabet of each character in the suffix. */ + static idx_t *sufindex; + static idx_t outbase_length; + static idx_t outfile_length; + static idx_t addsuf_length; + + if (! outfile) + { + bool overflow, widen; + +new_name: + widen = !! outfile_length; + + if (! widen) + { + /* Allocate and initialize the first file name. */ + + outbase_length = strlen (outbase); + addsuf_length = additional_suffix ? strlen (additional_suffix) : 0; + overflow = ckd_add (&outfile_length, outbase_length + addsuf_length, + suffix_length); + } + else + { + /* Reallocate and initialize a new wider file name. + We do this by subsuming the unchanging part of + the generated suffix into the prefix (base), and + reinitializing the now one longer suffix. */ + + overflow = ckd_add (&outfile_length, outfile_length, 2); + suffix_length++; + } + + idx_t outfile_size; + overflow |= ckd_add (&outfile_size, outfile_length, 1); + if (overflow) + xalloc_die (); + outfile = xirealloc (outfile, outfile_size); + + if (! widen) + memcpy (outfile, outbase, outbase_length); + else + { + /* Append the last alphabet character to the file name prefix. */ + outfile[outbase_length] = suffix_alphabet[sufindex[0]]; + outbase_length++; + } + + outfile_mid = outfile + outbase_length; + memset (outfile_mid, suffix_alphabet[0], suffix_length); + if (additional_suffix) + memcpy (outfile_mid + suffix_length, additional_suffix, addsuf_length); + outfile[outfile_length] = 0; + + free (sufindex); + sufindex = xicalloc (suffix_length, sizeof *sufindex); + + if (numeric_suffix_start) + { + affirm (! widen); + + /* Update the output file name. */ + idx_t i = strlen (numeric_suffix_start); + memcpy (outfile_mid + suffix_length - i, numeric_suffix_start, i); + + /* Update the suffix index. */ + idx_t *sufindex_end = sufindex + suffix_length; + while (i-- != 0) + *--sufindex_end = numeric_suffix_start[i] - '0'; + } + +#if ! _POSIX_NO_TRUNC && HAVE_PATHCONF && defined _PC_NAME_MAX + /* POSIX requires that if the output file name is too long for + its directory, 'split' must fail without creating any files. + This must be checked for explicitly on operating systems that + silently truncate file names. */ + { + char *dir = dir_name (outfile); + long name_max = pathconf (dir, _PC_NAME_MAX); + if (0 <= name_max && name_max < base_len (last_component (outfile))) + error (EXIT_FAILURE, ENAMETOOLONG, "%s", quotef (outfile)); + free (dir); + } +#endif + } + else + { + /* Increment the suffix in place, if possible. */ + + idx_t i = suffix_length; + while (i-- != 0) + { + sufindex[i]++; + if (suffix_auto && i == 0 && ! suffix_alphabet[sufindex[0] + 1]) + goto new_name; + outfile_mid[i] = suffix_alphabet[sufindex[i]]; + if (outfile_mid[i]) + return; + sufindex[i] = 0; + outfile_mid[i] = suffix_alphabet[sufindex[i]]; + } + error (EXIT_FAILURE, 0, _("output file suffixes exhausted")); + } +} + +/* Create or truncate a file. */ + +static int +create (char const *name) +{ + if (!filter_command) + { + if (verbose) + fprintf (stdout, _("creating file %s\n"), quoteaf (name)); + + int oflags = O_WRONLY | O_CREAT | O_BINARY; + int fd = open (name, oflags | O_EXCL, MODE_RW_UGO); + if (0 <= fd || errno != EEXIST) + return fd; + fd = open (name, oflags, MODE_RW_UGO); + if (fd < 0) + return fd; + struct stat out_stat_buf; + if (fstat (fd, &out_stat_buf) != 0) + error (EXIT_FAILURE, errno, _("failed to stat %s"), quoteaf (name)); + if (SAME_INODE (in_stat_buf, out_stat_buf)) + error (EXIT_FAILURE, 0, _("%s would overwrite input; aborting"), + quoteaf (name)); + bool regularish + = S_ISREG (out_stat_buf.st_mode) || S_TYPEISSHM (&out_stat_buf); + if (! (regularish && out_stat_buf.st_size == 0) + && ftruncate (fd, 0) < 0 && regularish) + error (EXIT_FAILURE, errno, _("%s: error truncating"), quotef (name)); + + return fd; + } + else + { + int fd_pair[2]; + pid_t child_pid; + char const *shell_prog = getenv ("SHELL"); + if (shell_prog == nullptr) + shell_prog = "/bin/sh"; + if (setenv ("FILE", name, 1) != 0) + error (EXIT_FAILURE, errno, + _("failed to set FILE environment variable")); + if (verbose) + fprintf (stdout, _("executing with FILE=%s\n"), quotef (name)); + if (pipe (fd_pair) != 0) + error (EXIT_FAILURE, errno, _("failed to create pipe")); + child_pid = fork (); + if (child_pid == 0) + { + /* This is the child process. If an error occurs here, the + parent will eventually learn about it after doing a wait, + at which time it will emit its own error message. */ + int j; + /* We have to close any pipes that were opened during an + earlier call, otherwise this process will be holding a + write-pipe that will prevent the earlier process from + reading an EOF on the corresponding read-pipe. */ + for (j = 0; j < n_open_pipes; ++j) + if (close (open_pipes[j]) != 0) + error (EXIT_FAILURE, errno, _("closing prior pipe")); + if (close (fd_pair[1])) + error (EXIT_FAILURE, errno, _("closing output pipe")); + if (fd_pair[0] != STDIN_FILENO) + { + if (dup2 (fd_pair[0], STDIN_FILENO) != STDIN_FILENO) + error (EXIT_FAILURE, errno, _("moving input pipe")); + if (close (fd_pair[0]) != 0) + error (EXIT_FAILURE, errno, _("closing input pipe")); + } + if (default_SIGPIPE) + signal (SIGPIPE, SIG_DFL); + execl (shell_prog, last_component (shell_prog), "-c", + filter_command, (char *) nullptr); + error (EXIT_FAILURE, errno, _("failed to run command: \"%s -c %s\""), + shell_prog, filter_command); + } + if (child_pid < 0) + error (EXIT_FAILURE, errno, _("fork system call failed")); + if (close (fd_pair[0]) != 0) + error (EXIT_FAILURE, errno, _("failed to close input pipe")); + filter_pid = child_pid; + if (n_open_pipes == open_pipes_alloc) + open_pipes = xpalloc (open_pipes, &open_pipes_alloc, 1, + MIN (INT_MAX, IDX_MAX), sizeof *open_pipes); + open_pipes[n_open_pipes++] = fd_pair[1]; + return fd_pair[1]; + } +} + +/* Close the output file, and do any associated cleanup. + If FP and FD are both specified, they refer to the same open file; + in this case FP is closed, but FD is still used in cleanup. */ +static void +closeout (FILE *fp, int fd, pid_t pid, char const *name) +{ + if (fp != nullptr && fclose (fp) != 0 && ! ignorable (errno)) + error (EXIT_FAILURE, errno, "%s", quotef (name)); + if (fd >= 0) + { + if (fp == nullptr && close (fd) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (name)); + int j; + for (j = 0; j < n_open_pipes; ++j) + { + if (open_pipes[j] == fd) + { + open_pipes[j] = open_pipes[--n_open_pipes]; + break; + } + } + } + if (pid > 0) + { + int wstatus; + if (waitpid (pid, &wstatus, 0) < 0) + error (EXIT_FAILURE, errno, _("waiting for child process")); + else if (WIFSIGNALED (wstatus)) + { + int sig = WTERMSIG (wstatus); + if (sig != SIGPIPE) + { + char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))]; + if (sig2str (sig, signame) != 0) + sprintf (signame, "%d", sig); + error (sig + 128, 0, + _("with FILE=%s, signal %s from command: %s"), + quotef (name), signame, filter_command); + } + } + else if (WIFEXITED (wstatus)) + { + int ex = WEXITSTATUS (wstatus); + if (ex != 0) + error (ex, 0, _("with FILE=%s, exit %d from command: %s"), + quotef (name), ex, filter_command); + } + else + { + /* shouldn't happen. */ + error (EXIT_FAILURE, 0, + _("unknown status from command (0x%X)"), wstatus + 0u); + } + } +} + +/* Write BYTES bytes at BP to an output file. + If NEW_FILE_FLAG is true, open the next output file. + Otherwise add to the same output file already in use. + Return true if successful. */ + +static bool +cwrite (bool new_file_flag, char const *bp, idx_t bytes) +{ + if (new_file_flag) + { + if (!bp && bytes == 0 && elide_empty_files) + return true; + closeout (nullptr, output_desc, filter_pid, outfile); + next_file_name (); + output_desc = create (outfile); + if (output_desc < 0) + error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + } + + if (full_write (output_desc, bp, bytes) == bytes) + return true; + else + { + if (! ignorable (errno)) + error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + return false; + } +} + +/* Split into pieces of exactly N_BYTES bytes. + However, the first REM_BYTES pieces should be 1 byte longer. + Use buffer BUF, whose size is BUFSIZE. + If INITIAL_READ is nonnegative, + BUF contains the first INITIAL_READ input bytes. */ + +static void +bytes_split (intmax_t n_bytes, intmax_t rem_bytes, + char *buf, idx_t bufsize, ssize_t initial_read, + intmax_t max_files) +{ + bool new_file_flag = true; + bool filter_ok = true; + intmax_t opened = 0; + intmax_t to_write = n_bytes + (0 < rem_bytes); + bool eof = ! to_write; + + while (! eof) + { + ssize_t n_read; + if (0 <= initial_read) + { + n_read = initial_read; + initial_read = -1; + eof = n_read < bufsize; + } + else + { + if (! filter_ok + && 0 <= lseek (STDIN_FILENO, to_write, SEEK_CUR)) + { + to_write = n_bytes + (opened + 1 < rem_bytes); + new_file_flag = true; + } + + n_read = read (STDIN_FILENO, buf, bufsize); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + eof = n_read == 0; + } + char *bp_out = buf; + while (0 < to_write && to_write <= n_read) + { + if (filter_ok || new_file_flag) + filter_ok = cwrite (new_file_flag, bp_out, to_write); + opened += new_file_flag; + new_file_flag = !max_files || (opened < max_files); + if (! filter_ok && ! new_file_flag) + { + /* If filters no longer accepting input, stop reading. */ + n_read = 0; + eof = true; + break; + } + bp_out += to_write; + n_read -= to_write; + to_write = n_bytes + (opened < rem_bytes); + } + if (0 < n_read) + { + if (filter_ok || new_file_flag) + filter_ok = cwrite (new_file_flag, bp_out, n_read); + opened += new_file_flag; + new_file_flag = false; + if (! filter_ok && opened == max_files) + { + /* If filters no longer accepting input, stop reading. */ + break; + } + to_write -= n_read; + } + } + + /* Ensure NUMBER files are created, which truncates + any existing files or notifies any consumers on fifos. + FIXME: Should we do this before EXIT_FAILURE? */ + while (opened++ < max_files) + cwrite (true, nullptr, 0); +} + +/* Split into pieces of exactly N_LINES lines. + Use buffer BUF, whose size is BUFSIZE. */ + +static void +lines_split (intmax_t n_lines, char *buf, idx_t bufsize) +{ + ssize_t n_read; + char *bp, *bp_out, *eob; + bool new_file_flag = true; + intmax_t n = 0; + + do + { + n_read = read (STDIN_FILENO, buf, bufsize); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + bp = bp_out = buf; + eob = bp + n_read; + *eob = eolchar; + while (true) + { + bp = rawmemchr (bp, eolchar); + if (bp == eob) + { + if (eob != bp_out) /* do not write 0 bytes! */ + { + idx_t len = eob - bp_out; + cwrite (new_file_flag, bp_out, len); + new_file_flag = false; + } + break; + } + + ++bp; + if (++n >= n_lines) + { + cwrite (new_file_flag, bp_out, bp - bp_out); + bp_out = bp; + new_file_flag = true; + n = 0; + } + } + } + while (n_read); +} + +/* Split into pieces that are as large as possible while still not more + than N_BYTES bytes, and are split on line boundaries except + where lines longer than N_BYTES bytes occur. */ + +static void +line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize) +{ + ssize_t n_read; + intmax_t n_out = 0; /* for each split. */ + idx_t n_hold = 0; + char *hold = nullptr; /* for lines > bufsize. */ + idx_t hold_size = 0; + bool split_line = false; /* Whether a \n was output in a split. */ + + do + { + n_read = read (STDIN_FILENO, buf, bufsize); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + idx_t n_left = n_read; + char *sob = buf; + while (n_left) + { + idx_t split_rest = 0; + char *eoc = nullptr; + char *eol; + + /* Determine End Of Chunk and/or End of Line, + which are used below to select what to write or buffer. */ + if (n_bytes - n_out - n_hold <= n_left) + { + /* Have enough for split. */ + split_rest = n_bytes - n_out - n_hold; + eoc = sob + split_rest - 1; + eol = memrchr (sob, eolchar, split_rest); + } + else + eol = memrchr (sob, eolchar, n_left); + + /* Output hold space if possible. */ + if (n_hold && !(!eol && n_out)) + { + cwrite (n_out == 0, hold, n_hold); + n_out += n_hold; + if (n_hold > bufsize) + hold = xirealloc (hold, bufsize); + n_hold = 0; + hold_size = bufsize; + } + + /* Output to eol if present. */ + if (eol) + { + split_line = true; + idx_t n_write = eol - sob + 1; + cwrite (n_out == 0, sob, n_write); + n_out += n_write; + n_left -= n_write; + sob += n_write; + if (eoc) + split_rest -= n_write; + } + + /* Output to eoc or eob if possible. */ + if (n_left && !split_line) + { + idx_t n_write = eoc ? split_rest : n_left; + cwrite (n_out == 0, sob, n_write); + n_out += n_write; + n_left -= n_write; + sob += n_write; + if (eoc) + split_rest -= n_write; + } + + /* Update hold if needed. */ + if ((eoc && split_rest) || (!eoc && n_left)) + { + idx_t n_buf = eoc ? split_rest : n_left; + if (hold_size - n_hold < n_buf) + hold = xpalloc (hold, &hold_size, n_buf - (hold_size - n_hold), + -1, sizeof *hold); + memcpy (hold + n_hold, sob, n_buf); + n_hold += n_buf; + n_left -= n_buf; + sob += n_buf; + } + + /* Reset for new split. */ + if (eoc) + { + n_out = 0; + split_line = false; + } + } + } + while (n_read); + + /* Handle no eol at end of file. */ + if (n_hold) + cwrite (n_out == 0, hold, n_hold); + + free (hold); +} + +/* -n l/[K/]N: Write lines to files of approximately file size / N. + The file is partitioned into file size / N sized portions, with the + last assigned any excess. If a line _starts_ within a partition + it is written completely to the corresponding file. Since lines + are not split even if they overlap a partition, the files written + can be larger or smaller than the partition size, and even empty + if a line is so long as to completely overlap the partition. */ + +static void +lines_chunk_split (intmax_t k, intmax_t n, char *buf, idx_t bufsize, + ssize_t initial_read, off_t file_size) +{ + affirm (n && k <= n); + + intmax_t rem_bytes = file_size % n; + off_t chunk_size = file_size / n; + intmax_t chunk_no = 1; + off_t chunk_end = chunk_size + (0 < rem_bytes); + off_t n_written = 0; + bool new_file_flag = true; + bool chunk_truncated = false; + + if (k > 1 && 0 < file_size) + { + /* Start reading 1 byte before kth chunk of file. */ + off_t start = (k - 1) * chunk_size + MIN (k - 1, rem_bytes) - 1; + if (start < initial_read) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else + { + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + initial_read = -1; + } + n_written = start; + chunk_no = k - 1; + chunk_end = start + 1; + } + + while (n_written < file_size) + { + char *bp = buf, *eob; + ssize_t n_read; + if (0 <= initial_read) + { + n_read = initial_read; + initial_read = -1; + } + else + { + n_read = read (STDIN_FILENO, buf, + MIN (bufsize, file_size - n_written)); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + } + if (n_read == 0) + break; /* eof. */ + chunk_truncated = false; + eob = buf + n_read; + + while (bp != eob) + { + idx_t to_write; + bool next = false; + + /* Begin looking for '\n' at last byte of chunk. */ + off_t skip = MIN (n_read, MAX (0, chunk_end - 1 - n_written)); + char *bp_out = memchr (bp + skip, eolchar, n_read - skip); + if (bp_out) + { + bp_out++; + next = true; + } + else + bp_out = eob; + to_write = bp_out - bp; + + if (k == chunk_no) + { + /* We don't use the stdout buffer here since we're writing + large chunks from an existing file, so it's more efficient + to write out directly. */ + if (full_write (STDOUT_FILENO, bp, to_write) != to_write) + write_error (); + } + else if (! k) + cwrite (new_file_flag, bp, to_write); + n_written += to_write; + bp += to_write; + n_read -= to_write; + new_file_flag = next; + + /* A line could have been so long that it skipped + entire chunks. So create empty files in that case. */ + while (next || chunk_end <= n_written) + { + if (!next && bp == eob) + { + /* replenish buf, before going to next chunk. */ + chunk_truncated = true; + break; + } + if (k == chunk_no) + return; + chunk_end += chunk_size + (chunk_no < rem_bytes); + chunk_no++; + if (chunk_end <= n_written) + { + if (! k) + cwrite (true, nullptr, 0); + } + else + next = false; + } + } + } + + if (chunk_truncated) + chunk_no++; + + /* Ensure NUMBER files are created, which truncates + any existing files or notifies any consumers on fifos. + FIXME: Should we do this before EXIT_FAILURE? */ + if (!k) + while (chunk_no++ <= n) + cwrite (true, nullptr, 0); +} + +/* -n K/N: Extract Kth of N chunks. */ + +static void +bytes_chunk_extract (intmax_t k, intmax_t n, char *buf, idx_t bufsize, + ssize_t initial_read, off_t file_size) +{ + off_t start; + off_t end; + + assert (0 < k && k <= n); + + start = (k - 1) * (file_size / n) + MIN (k - 1, file_size % n); + end = k == n ? file_size : k * (file_size / n) + MIN (k, file_size % n); + + if (start < initial_read) + { + memmove (buf, buf + start, initial_read - start); + initial_read -= start; + } + else + { + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + initial_read = -1; + } + + while (start < end) + { + ssize_t n_read; + if (0 <= initial_read) + { + n_read = initial_read; + initial_read = -1; + } + else + { + n_read = read (STDIN_FILENO, buf, bufsize); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + } + if (n_read == 0) + break; /* eof. */ + n_read = MIN (n_read, end - start); + if (full_write (STDOUT_FILENO, buf, n_read) != n_read + && ! ignorable (errno)) + error (EXIT_FAILURE, errno, "%s", quotef ("-")); + start += n_read; + } +} + +typedef struct of_info +{ + char *of_name; + int ofd; + FILE *ofile; + pid_t opid; +} of_t; + +enum +{ + OFD_NEW = -1, + OFD_APPEND = -2 +}; + +/* Rotate file descriptors when we're writing to more output files than we + have available file descriptors. + Return whether we came under file resource pressure. + If so, it's probably best to close each file when finished with it. */ + +static bool +ofile_open (of_t *files, idx_t i_check, idx_t nfiles) +{ + bool file_limit = false; + + if (files[i_check].ofd <= OFD_NEW) + { + int fd; + idx_t i_reopen = i_check ? i_check - 1 : nfiles - 1; + + /* Another process could have opened a file in between the calls to + close and open, so we should keep trying until open succeeds or + we've closed all of our files. */ + while (true) + { + if (files[i_check].ofd == OFD_NEW) + fd = create (files[i_check].of_name); + else /* OFD_APPEND */ + { + /* Attempt to append to previously opened file. + We use O_NONBLOCK to support writing to fifos, + where the other end has closed because of our + previous close. In that case we'll immediately + get an error, rather than waiting indefinitely. + In specialized cases the consumer can keep reading + from the fifo, terminating on conditions in the data + itself, or perhaps never in the case of 'tail -f'. + I.e., for fifos it is valid to attempt this reopen. + + We don't handle the filter_command case here, as create() + will exit if there are not enough files in that case. + I.e., we don't support restarting filters, as that would + put too much burden on users specifying --filter commands. */ + fd = open (files[i_check].of_name, + O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK); + } + + if (0 <= fd) + break; + + if (!(errno == EMFILE || errno == ENFILE)) + error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name)); + + file_limit = true; + + /* Search backwards for an open file to close. */ + while (files[i_reopen].ofd < 0) + { + i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1; + /* No more open files to close, exit with E[NM]FILE. */ + if (i_reopen == i_check) + error (EXIT_FAILURE, errno, "%s", + quotef (files[i_check].of_name)); + } + + if (fclose (files[i_reopen].ofile) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (files[i_reopen].of_name)); + files[i_reopen].ofile = nullptr; + files[i_reopen].ofd = OFD_APPEND; + } + + files[i_check].ofd = fd; + FILE *ofile = fdopen (fd, "a"); + if (!ofile) + error (EXIT_FAILURE, errno, "%s", quotef (files[i_check].of_name)); + files[i_check].ofile = ofile; + files[i_check].opid = filter_pid; + filter_pid = 0; + } + + return file_limit; +} + +/* -n r/[K/]N: Divide file into N chunks in round robin fashion. + Use BUF of size BUFSIZE for the buffer, and if allocating storage + put its address into *FILESP to pacify -fsanitize=leak. + When K == 0, we try to keep the files open in parallel. + If we run out of file resources, then we revert + to opening and closing each file for each line. */ + +static void +lines_rr (intmax_t k, intmax_t n, char *buf, idx_t bufsize, of_t **filesp) +{ + bool wrapped = false; + bool wrote = false; + bool file_limit; + idx_t i_file; + of_t *files IF_LINT (= nullptr); + intmax_t line_no; + + if (k) + line_no = 1; + else + { + if (IDX_MAX < n) + xalloc_die (); + files = *filesp = xinmalloc (n, sizeof *files); + + /* Generate output file names. */ + for (i_file = 0; i_file < n; i_file++) + { + next_file_name (); + files[i_file].of_name = xstrdup (outfile); + files[i_file].ofd = OFD_NEW; + files[i_file].ofile = nullptr; + files[i_file].opid = 0; + } + i_file = 0; + file_limit = false; + } + + while (true) + { + char *bp = buf, *eob; + ssize_t n_read = read (STDIN_FILENO, buf, bufsize); + if (n_read < 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + else if (n_read == 0) + break; /* eof. */ + eob = buf + n_read; + + while (bp != eob) + { + idx_t to_write; + bool next = false; + + /* Find end of line. */ + char *bp_out = memchr (bp, eolchar, eob - bp); + if (bp_out) + { + bp_out++; + next = true; + } + else + bp_out = eob; + to_write = bp_out - bp; + + if (k) + { + if (line_no == k && unbuffered) + { + if (full_write (STDOUT_FILENO, bp, to_write) != to_write) + write_error (); + } + else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1) + { + write_error (); + } + if (next) + line_no = (line_no == n) ? 1 : line_no + 1; + } + else + { + /* Secure file descriptor. */ + file_limit |= ofile_open (files, i_file, n); + if (unbuffered) + { + /* Note writing to fd, rather than flushing the FILE gives + an 8% performance benefit, due to reduced data copying. */ + if (full_write (files[i_file].ofd, bp, to_write) != to_write + && ! ignorable (errno)) + error (EXIT_FAILURE, errno, "%s", + quotef (files[i_file].of_name)); + } + else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1 + && ! ignorable (errno)) + error (EXIT_FAILURE, errno, "%s", + quotef (files[i_file].of_name)); + + if (! ignorable (errno)) + wrote = true; + + if (file_limit) + { + if (fclose (files[i_file].ofile) != 0) + error (EXIT_FAILURE, errno, "%s", + quotef (files[i_file].of_name)); + files[i_file].ofile = nullptr; + files[i_file].ofd = OFD_APPEND; + } + if (next && ++i_file == n) + { + wrapped = true; + /* If no filters are accepting input, stop reading. */ + if (! wrote) + goto no_filters; + wrote = false; + i_file = 0; + } + } + + bp = bp_out; + } + } + +no_filters: + /* Ensure all files created, so that any existing files are truncated, + and to signal any waiting fifo consumers. + Also, close any open file descriptors. + FIXME: Should we do this before EXIT_FAILURE? */ + if (!k) + { + idx_t ceiling = wrapped ? n : i_file; + for (i_file = 0; i_file < n; i_file++) + { + if (i_file >= ceiling && !elide_empty_files) + file_limit |= ofile_open (files, i_file, n); + if (files[i_file].ofd >= 0) + closeout (files[i_file].ofile, files[i_file].ofd, + files[i_file].opid, files[i_file].of_name); + files[i_file].ofd = OFD_APPEND; + } + } +} + +#define FAIL_ONLY_ONE_WAY() \ + do \ + { \ + error (0, 0, _("cannot split in more than one way")); \ + usage (EXIT_FAILURE); \ + } \ + while (0) + +/* Report a string-to-integer conversion failure MSGID with ARG. */ + +static _Noreturn void +strtoint_die (char const *msgid, char const *arg) +{ + error (EXIT_FAILURE, errno == EINVAL ? 0 : errno, "%s: %s", + gettext (msgid), quote (arg)); +} + +/* Use OVERFLOW_OK when it is OK to ignore LONGINT_OVERFLOW errors, since the + extreme value will do the right thing anyway on any practical platform. */ +#define OVERFLOW_OK LONGINT_OVERFLOW + +/* Parse ARG for number of bytes or lines. The number can be followed + by MULTIPLIERS, and the resulting value must be positive. + If the number cannot be parsed, diagnose with MSG. + Return the number parsed, or an INTMAX_MAX on overflow. */ + +static intmax_t +parse_n_units (char const *arg, char const *multipliers, char const *msgid) +{ + intmax_t n; + if (OVERFLOW_OK < xstrtoimax (arg, nullptr, 10, &n, multipliers) || n < 1) + strtoint_die (msgid, arg); + return n; +} + +/* Parse K/N syntax of chunk options. */ + +static void +parse_chunk (intmax_t *k_units, intmax_t *n_units, char const *arg) +{ + char *argend; + strtol_error e = xstrtoimax (arg, &argend, 10, n_units, ""); + if (e == LONGINT_INVALID_SUFFIX_CHAR && *argend == '/') + { + *k_units = *n_units; + *n_units = parse_n_units (argend + 1, "", + N_("invalid number of chunks")); + if (! (0 < *k_units && *k_units <= *n_units)) + error (EXIT_FAILURE, 0, "%s: %s", _("invalid chunk number"), + quote_mem (arg, argend - arg)); + } + else if (! (e <= OVERFLOW_OK && 0 < *n_units)) + strtoint_die (N_("invalid number of chunks"), arg); +} + + +int +main (int argc, char **argv) +{ + enum Split_type split_type = type_undef; + idx_t in_blk_size = 0; /* optimal block size of input file device */ + idx_t page_size = getpagesize (); + intmax_t k_units = 0; + intmax_t n_units = 0; + + static char const multipliers[] = "bEGKkMmPQRTYZ0"; + int c; + int digits_optind = 0; + off_t file_size = OFF_T_MAX; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + /* Parse command line options. */ + + infile = bad_cast ("-"); + outbase = bad_cast ("x"); + + while (true) + { + /* This is the argv-index of the option we will read next. */ + int this_optind = optind ? optind : 1; + + c = getopt_long (argc, argv, "0123456789C:a:b:del:n:t:ux", + longopts, nullptr); + if (c == -1) + break; + + switch (c) + { + case 'a': + suffix_length = xdectoimax (optarg, 0, IDX_MAX, + "", _("invalid suffix length"), 0); + break; + + case ADDITIONAL_SUFFIX_OPTION: + { + int suffix_len = strlen (optarg); + if (last_component (optarg) != optarg + || (suffix_len && ISSLASH (optarg[suffix_len - 1]))) + { + error (0, 0, + _("invalid suffix %s, contains directory separator"), + quote (optarg)); + usage (EXIT_FAILURE); + } + } + additional_suffix = optarg; + break; + + case 'b': + if (split_type != type_undef) + FAIL_ONLY_ONE_WAY (); + split_type = type_bytes; + n_units = parse_n_units (optarg, multipliers, + N_("invalid number of bytes")); + break; + + case 'l': + if (split_type != type_undef) + FAIL_ONLY_ONE_WAY (); + split_type = type_lines; + n_units = parse_n_units (optarg, "", N_("invalid number of lines")); + break; + + case 'C': + if (split_type != type_undef) + FAIL_ONLY_ONE_WAY (); + split_type = type_byteslines; + n_units = parse_n_units (optarg, multipliers, + N_("invalid number of lines")); + break; + + case 'n': + if (split_type != type_undef) + FAIL_ONLY_ONE_WAY (); + /* skip any whitespace */ + while (isspace (to_uchar (*optarg))) + optarg++; + if (STRNCMP_LIT (optarg, "r/") == 0) + { + split_type = type_rr; + optarg += 2; + } + else if (STRNCMP_LIT (optarg, "l/") == 0) + { + split_type = type_chunk_lines; + optarg += 2; + } + else + split_type = type_chunk_bytes; + parse_chunk (&k_units, &n_units, optarg); + break; + + case 'u': + unbuffered = true; + break; + + case 't': + { + char neweol = optarg[0]; + if (! neweol) + error (EXIT_FAILURE, 0, _("empty record separator")); + if (optarg[1]) + { + if (STREQ (optarg, "\\0")) + neweol = '\0'; + else + { + /* Provoke with 'split -txx'. Complain about + "multi-character tab" instead of "multibyte tab", so + that the diagnostic's wording does not need to be + changed once multibyte characters are supported. */ + error (EXIT_FAILURE, 0, _("multi-character separator %s"), + quote (optarg)); + } + } + /* Make it explicit we don't support multiple separators. */ + if (0 <= eolchar && neweol != eolchar) + { + error (EXIT_FAILURE, 0, + _("multiple separator characters specified")); + } + + eolchar = neweol; + } + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (split_type == type_undef) + { + split_type = type_digits; + n_units = 0; + } + if (split_type != type_undef && split_type != type_digits) + FAIL_ONLY_ONE_WAY (); + if (digits_optind != 0 && digits_optind != this_optind) + n_units = 0; /* More than one number given; ignore other. */ + digits_optind = this_optind; + if (ckd_mul (&n_units, n_units, 10) + || ckd_add (&n_units, n_units, c - '0')) + n_units = INTMAX_MAX; + break; + + case 'd': + case 'x': + if (c == 'd') + suffix_alphabet = "0123456789"; + else + suffix_alphabet = "0123456789abcdef"; + if (optarg) + { + if (strlen (optarg) != strspn (optarg, suffix_alphabet)) + { + error (0, 0, + (c == 'd') ? + _("%s: invalid start value for numerical suffix") : + _("%s: invalid start value for hexadecimal suffix"), + quote (optarg)); + usage (EXIT_FAILURE); + } + else + { + /* Skip any leading zero. */ + while (*optarg == '0' && *(optarg + 1) != '\0') + optarg++; + numeric_suffix_start = optarg; + } + } + break; + + case 'e': + elide_empty_files = true; + break; + + case FILTER_OPTION: + filter_command = optarg; + break; + + case IO_BLKSIZE_OPTION: + in_blk_size = xdectoumax (optarg, 1, + MIN (SYS_BUFSIZE_MAX, + MIN (IDX_MAX, SIZE_MAX) - 1), + multipliers, _("invalid IO block size"), 0); + break; + + case VERBOSE_OPTION: + verbose = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (k_units != 0 && filter_command) + { + error (0, 0, _("--filter does not process a chunk extracted to stdout")); + usage (EXIT_FAILURE); + } + + /* Handle default case. */ + if (split_type == type_undef) + { + split_type = type_lines; + n_units = 1000; + } + + if (n_units == 0) + { + error (0, 0, _("invalid number of lines: %s"), quote ("0")); + usage (EXIT_FAILURE); + } + + if (eolchar < 0) + eolchar = '\n'; + + set_suffix_length (n_units, split_type); + + /* Get out the filename arguments. */ + + if (optind < argc) + infile = argv[optind++]; + + if (optind < argc) + outbase = argv[optind++]; + + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + /* Check that the suffix length is large enough for the numerical + suffix start value. */ + if (numeric_suffix_start && strlen (numeric_suffix_start) > suffix_length) + { + error (0, 0, _("numerical suffix start value is too large " + "for the suffix length")); + usage (EXIT_FAILURE); + } + + /* Open the input file. */ + if (! STREQ (infile, "-") + && fd_reopen (STDIN_FILENO, infile, O_RDONLY, 0) < 0) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (infile)); + + /* Binary I/O is safer when byte counts are used. */ + xset_binary_mode (STDIN_FILENO, O_BINARY); + + /* Advise the kernel of our access pattern. */ + fdadvise (STDIN_FILENO, 0, 0, FADVISE_SEQUENTIAL); + + /* Get the optimal block size of input device and make a buffer. */ + + if (fstat (STDIN_FILENO, &in_stat_buf) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + + if (in_blk_size == 0) + { + in_blk_size = io_blksize (in_stat_buf); + if (SYS_BUFSIZE_MAX < in_blk_size) + in_blk_size = SYS_BUFSIZE_MAX; + } + + char *buf = xalignalloc (page_size, in_blk_size + 1); + ssize_t initial_read = -1; + + if (split_type == type_chunk_bytes || split_type == type_chunk_lines) + { + file_size = input_file_size (STDIN_FILENO, &in_stat_buf, + buf, in_blk_size); + if (file_size < 0) + error (EXIT_FAILURE, errno, _("%s: cannot determine file size"), + quotef (infile)); + initial_read = MIN (file_size, in_blk_size); + } + + /* When filtering, closure of one pipe must not terminate the process, + as there may still be other streams expecting input from us. */ + if (filter_command) + default_SIGPIPE = signal (SIGPIPE, SIG_IGN) == SIG_DFL; + + switch (split_type) + { + case type_digits: + case type_lines: + lines_split (n_units, buf, in_blk_size); + break; + + case type_bytes: + bytes_split (n_units, 0, buf, in_blk_size, -1, 0); + break; + + case type_byteslines: + line_bytes_split (n_units, buf, in_blk_size); + break; + + case type_chunk_bytes: + if (k_units == 0) + bytes_split (file_size / n_units, file_size % n_units, + buf, in_blk_size, initial_read, n_units); + else + bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read, + file_size); + break; + + case type_chunk_lines: + lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read, + file_size); + break; + + case type_rr: + /* Note, this is like 'sed -n ${k}~${n}p' when k > 0, + but the functionality is provided for symmetry. */ + { + of_t *files; + lines_rr (k_units, n_units, buf, in_blk_size, &files); + } + break; + + default: + affirm (false); + } + + if (close (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + closeout (nullptr, output_desc, filter_pid, outfile); + + main_exit (EXIT_SUCCESS); +} diff --git a/src/stat.c b/src/stat.c new file mode 100644 index 0000000..dd86450 --- /dev/null +++ b/src/stat.c @@ -0,0 +1,1977 @@ +/* stat.c -- display file or file system status + Copyright (C) 2001-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Written by Michael Meskes. */ + +#include + +/* Keep this conditional in sync with the similar conditional in + ../m4/stat-prog.m4. */ +#if ((STAT_STATVFS || STAT_STATVFS64) \ + && (HAVE_STRUCT_STATVFS_F_BASETYPE || HAVE_STRUCT_STATVFS_F_FSTYPENAME \ + || (! HAVE_STRUCT_STATFS_F_FSTYPENAME && HAVE_STRUCT_STATVFS_F_TYPE))) +# define USE_STATVFS 1 +#else +# define USE_STATVFS 0 +#endif + +#include +#include +#include +#include +#if USE_STATVFS +# include +#elif HAVE_SYS_VFS_H +# include +#elif HAVE_SYS_MOUNT_H && HAVE_SYS_PARAM_H +/* NOTE: freebsd5.0 needs sys/param.h and sys/mount.h for statfs. + It does have statvfs.h, but shouldn't use it, since it doesn't + HAVE_STRUCT_STATVFS_F_BASETYPE. So find a clean way to fix it. */ +/* NetBSD 1.5.2 needs these, for the declaration of struct statfs. */ +# include +# include +# if HAVE_NFS_NFS_CLNT_H && HAVE_NFS_VFS_H +/* Ultrix 4.4 needs these for the declaration of struct statfs. */ +# include +# include +# include +# endif +#elif HAVE_OS_H /* BeOS */ +# include +#endif +#include + +#include "system.h" + +#include "areadlink.h" +#include "argmatch.h" +#include "file-type.h" +#include "filemode.h" +#include "fs.h" +#include "getopt.h" +#include "mountlist.h" +#include "quote.h" +#include "stat-size.h" +#include "stat-time.h" +#include "strftime.h" +#include "find-mount-point.h" +#include "xvasprintf.h" +#include "statx.h" + +#if HAVE_STATX && defined STATX_INO +# define USE_STATX 1 +#else +# define USE_STATX 0 +#endif + +#if USE_STATVFS +# define STRUCT_STATXFS_F_FSID_IS_INTEGER STRUCT_STATVFS_F_FSID_IS_INTEGER +# define HAVE_STRUCT_STATXFS_F_TYPE HAVE_STRUCT_STATVFS_F_TYPE +# if HAVE_STRUCT_STATVFS_F_NAMEMAX +# define SB_F_NAMEMAX(S) ((S)->f_namemax) +# endif +# if ! STAT_STATVFS && STAT_STATVFS64 +# define STRUCT_STATVFS struct statvfs64 +# define STATFS statvfs64 +# else +# define STRUCT_STATVFS struct statvfs +# define STATFS statvfs +# endif +# define STATFS_FRSIZE(S) ((S)->f_frsize) +#else +# define HAVE_STRUCT_STATXFS_F_TYPE HAVE_STRUCT_STATFS_F_TYPE +# if HAVE_STRUCT_STATFS_F_NAMELEN +# define SB_F_NAMEMAX(S) ((S)->f_namelen) +# elif HAVE_STRUCT_STATFS_F_NAMEMAX +# define SB_F_NAMEMAX(S) ((S)->f_namemax) +# endif +# define STATFS statfs +# if HAVE_OS_H /* BeOS */ +/* BeOS has a statvfs function, but it does not return sensible values + for f_files, f_ffree and f_favail, and lacks f_type, f_basetype and + f_fstypename. Use 'struct fs_info' instead. */ +NODISCARD +static int +statfs (char const *filename, struct fs_info *buf) +{ + dev_t device = dev_for_path (filename); + if (device < 0) + { + errno = (device == B_ENTRY_NOT_FOUND ? ENOENT + : device == B_BAD_VALUE ? EINVAL + : device == B_NAME_TOO_LONG ? ENAMETOOLONG + : device == B_NO_MEMORY ? ENOMEM + : device == B_FILE_ERROR ? EIO + : 0); + return -1; + } + /* If successful, buf->dev will be == device. */ + return fs_stat_dev (device, buf); +} +# define f_fsid dev +# define f_blocks total_blocks +# define f_bfree free_blocks +# define f_bavail free_blocks +# define f_bsize io_size +# define f_files total_nodes +# define f_ffree free_nodes +# define STRUCT_STATVFS struct fs_info +# define STRUCT_STATXFS_F_FSID_IS_INTEGER true +# define STATFS_FRSIZE(S) ((S)->block_size) +# else +# define STRUCT_STATVFS struct statfs +# define STRUCT_STATXFS_F_FSID_IS_INTEGER STRUCT_STATFS_F_FSID_IS_INTEGER +# if HAVE_STRUCT_STATFS_F_FRSIZE +# define STATFS_FRSIZE(S) ((S)->f_frsize) +# else +# define STATFS_FRSIZE(S) 0 +# endif +# endif +#endif + +#ifdef SB_F_NAMEMAX +# define OUT_NAMEMAX out_uint +#else +/* Depending on whether statvfs or statfs is used, + neither f_namemax or f_namelen may be available. */ +# define SB_F_NAMEMAX(S) "?" +# define OUT_NAMEMAX out_string +#endif + +#if HAVE_STRUCT_STATVFS_F_BASETYPE +# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME f_basetype +#else +# if HAVE_STRUCT_STATVFS_F_FSTYPENAME || HAVE_STRUCT_STATFS_F_FSTYPENAME +# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME f_fstypename +# elif HAVE_OS_H /* BeOS */ +# define STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME fsh_name +# endif +#endif + +#if HAVE_GETATTRAT +# include +# include +#endif + +/* FIXME: these are used by printf.c, too */ +#define isodigit(c) ('0' <= (c) && (c) <= '7') +#define octtobin(c) ((c) - '0') +#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \ + (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') + +static char const digits[] = "0123456789"; + +/* Flags that are portable for use in printf, for at least one + conversion specifier; make_format removes non-portable flags as + needed for particular specifiers. The glibc 2.2 extension "I" is + listed here; it is removed by make_format because it has undefined + behavior elsewhere and because it is incompatible with + out_epoch_sec. */ +static char const printf_flags[] = "'-+ #0I"; + +/* Formats for the --terse option. */ +static char const fmt_terse_fs[] = "%n %i %l %t %s %S %b %f %a %c %d\n"; +static char const fmt_terse_regular[] = "%n %s %b %f %u %g %D %i %h %t %T" + " %X %Y %Z %W %o\n"; +static char const fmt_terse_selinux[] = "%n %s %b %f %u %g %D %i %h %t %T" + " %X %Y %Z %W %o %C\n"; + +#define PROGRAM_NAME "stat" + +#define AUTHORS proper_name ("Michael Meskes") + +enum +{ + PRINTF_OPTION = CHAR_MAX + 1 +}; + +enum cached_mode +{ + cached_default, + cached_never, + cached_always +}; + +static char const *const cached_args[] = +{ + "default", "never", "always", nullptr +}; + +static enum cached_mode const cached_modes[] = +{ + cached_default, cached_never, cached_always +}; + +static struct option const long_options[] = +{ + {"dereference", no_argument, nullptr, 'L'}, + {"file-system", no_argument, nullptr, 'f'}, + {"format", required_argument, nullptr, 'c'}, + {"printf", required_argument, nullptr, PRINTF_OPTION}, + {"terse", no_argument, nullptr, 't'}, + {"cached", required_argument, nullptr, 0}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Whether to follow symbolic links; True for --dereference (-L). */ +static bool follow_links; + +/* Whether to interpret backslash-escape sequences. + True for --printf=FMT, not for --format=FMT (-c). */ +static bool interpret_backslash_escapes; + +/* The trailing delimiter string: + "" for --printf=FMT, "\n" for --format=FMT (-c). */ +static char const *trailing_delim = ""; + +/* The representation of the decimal point in the current locale. */ +static char const *decimal_point; +static size_t decimal_point_len; + +static bool +print_stat (char *pformat, size_t prefix_len, char mod, char m, + int fd, char const *filename, void const *data); + +/* Return the type of the specified file system. + Some systems have statfvs.f_basetype[FSTYPSZ] (AIX, HP-UX, and Solaris). + Others have statvfs.f_fstypename[_VFS_NAMELEN] (NetBSD 3.0). + Others have statfs.f_fstypename[MFSNAMELEN] (NetBSD 1.5.2). + Still others have neither and have to get by with f_type (GNU/Linux). + But f_type may only exist in statfs (Cygwin). */ +NODISCARD +static char const * +human_fstype (STRUCT_STATVFS const *statfsbuf) +{ +#ifdef STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME + return statfsbuf->STATXFS_FILE_SYSTEM_TYPE_MEMBER_NAME; +#else + switch (statfsbuf->f_type) + { +# if defined __linux__ || defined __ANDROID__ + + /* Compare with what's in libc: + f=/a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h + sed -n '/ADFS_SUPER_MAGIC/,/SYSFS_MAGIC/p' $f \ + | perl -n -e '/#define (.*?)_(?:SUPER_)MAGIC\s+0x(\S+)/' \ + -e 'and print "case S_MAGIC_$1: /\* 0x" . uc($2) . " *\/\n"' \ + | sort > sym_libc + perl -ne '/^\s+(case S_MAGIC_.*?): \/\* 0x(\S+) \*\//' \ + -e 'and do { $v=uc$2; print "$1: /\* 0x$v *\/\n"}' stat.c \ + | sort > sym_stat + diff -u sym_stat sym_libc + */ + + /* Also compare with the list in "man 2 statfs" using the + fs-magic-compare make target. */ + + /* IMPORTANT NOTE: Each of the following 'case S_MAGIC_...:' + statements must be followed by a hexadecimal constant in + a comment. The S_MAGIC_... name and constant are automatically + combined to produce the #define directives in fs.h. */ + + case S_MAGIC_AAFS: /* 0x5A3C69F0 local */ + return "aafs"; + case S_MAGIC_ACFS: /* 0x61636673 remote */ + return "acfs"; + case S_MAGIC_ADFS: /* 0xADF5 local */ + return "adfs"; + case S_MAGIC_AFFS: /* 0xADFF local */ + return "affs"; + case S_MAGIC_AFS: /* 0x5346414F remote */ + return "afs"; + case S_MAGIC_ANON_INODE_FS: /* 0x09041934 local */ + return "anon-inode FS"; + case S_MAGIC_AUFS: /* 0x61756673 remote */ + /* FIXME: change syntax or add an optional attribute like "inotify:no". + The above is labeled as "remote" so that tail always uses polling, + but this isn't really a remote file system type. */ + return "aufs"; + case S_MAGIC_AUTOFS: /* 0x0187 local */ + return "autofs"; + case S_MAGIC_BALLOON_KVM: /* 0x13661366 local */ + return "balloon-kvm-fs"; + case S_MAGIC_BEFS: /* 0x42465331 local */ + return "befs"; + case S_MAGIC_BDEVFS: /* 0x62646576 local */ + return "bdevfs"; + case S_MAGIC_BFS: /* 0x1BADFACE local */ + return "bfs"; + case S_MAGIC_BINDERFS: /* 0x6C6F6F70 local */ + return "binderfs"; + case S_MAGIC_BPF_FS: /* 0xCAFE4A11 local */ + return "bpf_fs"; + case S_MAGIC_BINFMTFS: /* 0x42494E4D local */ + return "binfmt_misc"; + case S_MAGIC_BTRFS: /* 0x9123683E local */ + return "btrfs"; + case S_MAGIC_BTRFS_TEST: /* 0x73727279 local */ + return "btrfs_test"; + case S_MAGIC_CEPH: /* 0x00C36400 remote */ + return "ceph"; + case S_MAGIC_CGROUP: /* 0x0027E0EB local */ + return "cgroupfs"; + case S_MAGIC_CGROUP2: /* 0x63677270 local */ + return "cgroup2fs"; + case S_MAGIC_CIFS: /* 0xFF534D42 remote */ + return "cifs"; + case S_MAGIC_CODA: /* 0x73757245 remote */ + return "coda"; + case S_MAGIC_COH: /* 0x012FF7B7 local */ + return "coh"; + case S_MAGIC_CONFIGFS: /* 0x62656570 local */ + return "configfs"; + case S_MAGIC_CRAMFS: /* 0x28CD3D45 local */ + return "cramfs"; + case S_MAGIC_CRAMFS_WEND: /* 0x453DCD28 local */ + return "cramfs-wend"; + case S_MAGIC_DAXFS: /* 0x64646178 local */ + return "daxfs"; + case S_MAGIC_DEBUGFS: /* 0x64626720 local */ + return "debugfs"; + case S_MAGIC_DEVFS: /* 0x1373 local */ + return "devfs"; + case S_MAGIC_DEVMEM: /* 0x454D444D local */ + return "devmem"; + case S_MAGIC_DEVPTS: /* 0x1CD1 local */ + return "devpts"; + case S_MAGIC_DMA_BUF: /* 0x444D4142 local */ + return "dma-buf-fs"; + case S_MAGIC_ECRYPTFS: /* 0xF15F local */ + return "ecryptfs"; + case S_MAGIC_EFIVARFS: /* 0xDE5E81E4 local */ + return "efivarfs"; + case S_MAGIC_EFS: /* 0x00414A53 local */ + return "efs"; + case S_MAGIC_EROFS_V1: /* 0xE0F5E1E2 local */ + return "erofs"; + case S_MAGIC_EXFAT: /* 0x2011BAB0 local */ + return "exfat"; + case S_MAGIC_EXFS: /* 0x45584653 local */ + return "exfs"; + case S_MAGIC_EXOFS: /* 0x5DF5 local */ + return "exofs"; + case S_MAGIC_EXT: /* 0x137D local */ + return "ext"; + case S_MAGIC_EXT2: /* 0xEF53 local */ + return "ext2/ext3"; + case S_MAGIC_EXT2_OLD: /* 0xEF51 local */ + return "ext2"; + case S_MAGIC_F2FS: /* 0xF2F52010 local */ + return "f2fs"; + case S_MAGIC_FAT: /* 0x4006 local */ + return "fat"; + case S_MAGIC_FHGFS: /* 0x19830326 remote */ + return "fhgfs"; + case S_MAGIC_FUSEBLK: /* 0x65735546 remote */ + return "fuseblk"; + case S_MAGIC_FUSECTL: /* 0x65735543 remote */ + return "fusectl"; + case S_MAGIC_FUTEXFS: /* 0x0BAD1DEA local */ + return "futexfs"; + case S_MAGIC_GFS: /* 0x01161970 remote */ + return "gfs/gfs2"; + case S_MAGIC_GPFS: /* 0x47504653 remote */ + return "gpfs"; + case S_MAGIC_HFS: /* 0x4244 local */ + return "hfs"; + case S_MAGIC_HFS_PLUS: /* 0x482B local */ + return "hfs+"; + case S_MAGIC_HFS_X: /* 0x4858 local */ + return "hfsx"; + case S_MAGIC_HOSTFS: /* 0x00C0FFEE local */ + return "hostfs"; + case S_MAGIC_HPFS: /* 0xF995E849 local */ + return "hpfs"; + case S_MAGIC_HUGETLBFS: /* 0x958458F6 local */ + return "hugetlbfs"; + case S_MAGIC_MTD_INODE_FS: /* 0x11307854 local */ + return "inodefs"; + case S_MAGIC_IBRIX: /* 0x013111A8 remote */ + return "ibrix"; + case S_MAGIC_INOTIFYFS: /* 0x2BAD1DEA local */ + return "inotifyfs"; + case S_MAGIC_ISOFS: /* 0x9660 local */ + return "isofs"; + case S_MAGIC_ISOFS_R_WIN: /* 0x4004 local */ + return "isofs"; + case S_MAGIC_ISOFS_WIN: /* 0x4000 local */ + return "isofs"; + case S_MAGIC_JFFS: /* 0x07C0 local */ + return "jffs"; + case S_MAGIC_JFFS2: /* 0x72B6 local */ + return "jffs2"; + case S_MAGIC_JFS: /* 0x3153464A local */ + return "jfs"; + case S_MAGIC_KAFS: /* 0x6B414653 remote */ + return "k-afs"; + case S_MAGIC_LOGFS: /* 0xC97E8168 local */ + return "logfs"; + case S_MAGIC_LUSTRE: /* 0x0BD00BD0 remote */ + return "lustre"; + case S_MAGIC_M1FS: /* 0x5346314D local */ + return "m1fs"; + case S_MAGIC_MINIX: /* 0x137F local */ + return "minix"; + case S_MAGIC_MINIX_30: /* 0x138F local */ + return "minix (30 char.)"; + case S_MAGIC_MINIX_V2: /* 0x2468 local */ + return "minix v2"; + case S_MAGIC_MINIX_V2_30: /* 0x2478 local */ + return "minix v2 (30 char.)"; + case S_MAGIC_MINIX_V3: /* 0x4D5A local */ + return "minix3"; + case S_MAGIC_MQUEUE: /* 0x19800202 local */ + return "mqueue"; + case S_MAGIC_MSDOS: /* 0x4D44 local */ + return "msdos"; + case S_MAGIC_NCP: /* 0x564C remote */ + return "novell"; + case S_MAGIC_NFS: /* 0x6969 remote */ + return "nfs"; + case S_MAGIC_NFSD: /* 0x6E667364 remote */ + return "nfsd"; + case S_MAGIC_NILFS: /* 0x3434 local */ + return "nilfs"; + case S_MAGIC_NSFS: /* 0x6E736673 local */ + return "nsfs"; + case S_MAGIC_NTFS: /* 0x5346544E local */ + return "ntfs"; + case S_MAGIC_OPENPROM: /* 0x9FA1 local */ + return "openprom"; + case S_MAGIC_OCFS2: /* 0x7461636F remote */ + return "ocfs2"; + case S_MAGIC_OVERLAYFS: /* 0x794C7630 remote */ + /* This may overlay remote file systems. + Also there have been issues reported with inotify and overlayfs, + so mark as "remote" so that polling is used. */ + return "overlayfs"; + case S_MAGIC_PANFS: /* 0xAAD7AAEA remote */ + return "panfs"; + case S_MAGIC_PIPEFS: /* 0x50495045 remote */ + /* FIXME: change syntax or add an optional attribute like "inotify:no". + pipefs and prlfs are labeled as "remote" so that tail always polls, + but these aren't really remote file system types. */ + return "pipefs"; + case S_MAGIC_PPC_CMM: /* 0xC7571590 local */ + return "ppc-cmm-fs"; + case S_MAGIC_PRL_FS: /* 0x7C7C6673 remote */ + return "prl_fs"; + case S_MAGIC_PROC: /* 0x9FA0 local */ + return "proc"; + case S_MAGIC_PSTOREFS: /* 0x6165676C local */ + return "pstorefs"; + case S_MAGIC_QNX4: /* 0x002F local */ + return "qnx4"; + case S_MAGIC_QNX6: /* 0x68191122 local */ + return "qnx6"; + case S_MAGIC_RAMFS: /* 0x858458F6 local */ + return "ramfs"; + case S_MAGIC_RDTGROUP: /* 0x07655821 local */ + return "rdt"; + case S_MAGIC_REISERFS: /* 0x52654973 local */ + return "reiserfs"; + case S_MAGIC_ROMFS: /* 0x7275 local */ + return "romfs"; + case S_MAGIC_RPC_PIPEFS: /* 0x67596969 local */ + return "rpc_pipefs"; + case S_MAGIC_SDCARDFS: /* 0x5DCA2DF5 local */ + return "sdcardfs"; + case S_MAGIC_SECRETMEM: /* 0x5345434D local */ + return "secretmem"; + case S_MAGIC_SECURITYFS: /* 0x73636673 local */ + return "securityfs"; + case S_MAGIC_SELINUX: /* 0xF97CFF8C local */ + return "selinux"; + case S_MAGIC_SMACK: /* 0x43415D53 local */ + return "smackfs"; + case S_MAGIC_SMB: /* 0x517B remote */ + return "smb"; + case S_MAGIC_SMB2: /* 0xFE534D42 remote */ + return "smb2"; + case S_MAGIC_SNFS: /* 0xBEEFDEAD remote */ + return "snfs"; + case S_MAGIC_SOCKFS: /* 0x534F434B local */ + return "sockfs"; + case S_MAGIC_SQUASHFS: /* 0x73717368 local */ + return "squashfs"; + case S_MAGIC_SYSFS: /* 0x62656572 local */ + return "sysfs"; + case S_MAGIC_SYSV2: /* 0x012FF7B6 local */ + return "sysv2"; + case S_MAGIC_SYSV4: /* 0x012FF7B5 local */ + return "sysv4"; + case S_MAGIC_TMPFS: /* 0x01021994 local */ + return "tmpfs"; + case S_MAGIC_TRACEFS: /* 0x74726163 local */ + return "tracefs"; + case S_MAGIC_UBIFS: /* 0x24051905 local */ + return "ubifs"; + case S_MAGIC_UDF: /* 0x15013346 local */ + return "udf"; + case S_MAGIC_UFS: /* 0x00011954 local */ + return "ufs"; + case S_MAGIC_UFS_BYTESWAPPED: /* 0x54190100 local */ + return "ufs"; + case S_MAGIC_USBDEVFS: /* 0x9FA2 local */ + return "usbdevfs"; + case S_MAGIC_V9FS: /* 0x01021997 local */ + return "v9fs"; + case S_MAGIC_VBOXSF: /* 0x786F4256 remote */ + return "vboxsf"; + case S_MAGIC_VMHGFS: /* 0xBACBACBC remote */ + return "vmhgfs"; + case S_MAGIC_VXFS: /* 0xA501FCF5 remote */ + /* Veritas File System can run in single instance or clustered mode, + so mark as remote to cater for the latter case. */ + return "vxfs"; + case S_MAGIC_VZFS: /* 0x565A4653 local */ + return "vzfs"; + case S_MAGIC_WSLFS: /* 0x53464846 local */ + return "wslfs"; + case S_MAGIC_XENFS: /* 0xABBA1974 local */ + return "xenfs"; + case S_MAGIC_XENIX: /* 0x012FF7B4 local */ + return "xenix"; + case S_MAGIC_XFS: /* 0x58465342 local */ + return "xfs"; + case S_MAGIC_XIAFS: /* 0x012FD16D local */ + return "xia"; + case S_MAGIC_Z3FOLD: /* 0x0033 local */ + return "z3fold"; + case S_MAGIC_ZFS: /* 0x2FC12FC1 local */ + return "zfs"; + case S_MAGIC_ZONEFS: /* 0x5A4F4653 local */ + return "zonefs"; + case S_MAGIC_ZSMALLOC: /* 0x58295829 local */ + return "zsmallocfs"; + + +# elif __GNU__ + case FSTYPE_UFS: + return "ufs"; + case FSTYPE_NFS: + return "nfs"; + case FSTYPE_GFS: + return "gfs"; + case FSTYPE_LFS: + return "lfs"; + case FSTYPE_SYSV: + return "sysv"; + case FSTYPE_FTP: + return "ftp"; + case FSTYPE_TAR: + return "tar"; + case FSTYPE_AR: + return "ar"; + case FSTYPE_CPIO: + return "cpio"; + case FSTYPE_MSLOSS: + return "msloss"; + case FSTYPE_CPM: + return "cpm"; + case FSTYPE_HFS: + return "hfs"; + case FSTYPE_DTFS: + return "dtfs"; + case FSTYPE_GRFS: + return "grfs"; + case FSTYPE_TERM: + return "term"; + case FSTYPE_DEV: + return "dev"; + case FSTYPE_PROC: + return "proc"; + case FSTYPE_IFSOCK: + return "ifsock"; + case FSTYPE_AFS: + return "afs"; + case FSTYPE_DFS: + return "dfs"; + case FSTYPE_PROC9: + return "proc9"; + case FSTYPE_SOCKET: + return "socket"; + case FSTYPE_MISC: + return "misc"; + case FSTYPE_EXT2FS: + return "ext2/ext3"; + case FSTYPE_HTTP: + return "http"; + case FSTYPE_MEMFS: + return "memfs"; + case FSTYPE_ISO9660: + return "iso9660"; +# endif + default: + { + unsigned long int type = statfsbuf->f_type; + static char buf[sizeof "UNKNOWN (0x%lx)" - 3 + + (sizeof type * CHAR_BIT + 3) / 4]; + sprintf (buf, "UNKNOWN (0x%lx)", type); + return buf; + } + } +#endif +} + +NODISCARD +static char * +human_access (struct stat const *statbuf) +{ + static char modebuf[12]; + filemodestring (statbuf, modebuf); + modebuf[10] = 0; + return modebuf; +} + +NODISCARD +static char * +human_time (struct timespec t) +{ + /* STR must be at least INT_BUFSIZE_BOUND (intmax_t) big, either + because localtime_rz fails, or because the time zone is truly + outlandish so that %z expands to a long string. */ + static char str[INT_BUFSIZE_BOUND (intmax_t) + + INT_STRLEN_BOUND (int) /* YYYY */ + + 1 /* because YYYY might equal INT_MAX + 1900 */ + + sizeof "-MM-DD HH:MM:SS.NNNNNNNNN +"]; + static timezone_t tz; + if (!tz) + tz = tzalloc (getenv ("TZ")); + struct tm tm; + int ns = t.tv_nsec; + if (localtime_rz (tz, &t.tv_sec, &tm)) + nstrftime (str, sizeof str, "%Y-%m-%d %H:%M:%S.%N %z", &tm, tz, ns); + else + { + char secbuf[INT_BUFSIZE_BOUND (intmax_t)]; + sprintf (str, "%s.%09d", timetostr (t.tv_sec, secbuf), ns); + } + return str; +} + +/* PFORMAT points to a '%' followed by a prefix of a format, all of + size PREFIX_LEN. The flags allowed for this format are + ALLOWED_FLAGS; remove other printf flags from the prefix, then + append SUFFIX. */ +static void +make_format (char *pformat, size_t prefix_len, char const *allowed_flags, + char const *suffix) +{ + char *dst = pformat + 1; + char const *src; + char const *srclim = pformat + prefix_len; + for (src = dst; src < srclim && strchr (printf_flags, *src); src++) + if (strchr (allowed_flags, *src)) + *dst++ = *src; + while (src < srclim) + *dst++ = *src++; + strcpy (dst, suffix); +} + +static void +out_string (char *pformat, size_t prefix_len, char const *arg) +{ + make_format (pformat, prefix_len, "-", "s"); + printf (pformat, arg); +} +static int +out_int (char *pformat, size_t prefix_len, intmax_t arg) +{ + make_format (pformat, prefix_len, "'-+ 0", PRIdMAX); + return printf (pformat, arg); +} +static int +out_uint (char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format (pformat, prefix_len, "'-0", PRIuMAX); + return printf (pformat, arg); +} +static void +out_uint_o (char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format (pformat, prefix_len, "-#0", PRIoMAX); + printf (pformat, arg); +} +static void +out_uint_x (char *pformat, size_t prefix_len, uintmax_t arg) +{ + make_format (pformat, prefix_len, "-#0", PRIxMAX); + printf (pformat, arg); +} +static int +out_minus_zero (char *pformat, size_t prefix_len) +{ + make_format (pformat, prefix_len, "'-+ 0", ".0f"); + return printf (pformat, -0.25); +} + +/* Output the number of seconds since the Epoch, using a format that + acts like printf's %f format. */ +static void +out_epoch_sec (char *pformat, size_t prefix_len, + struct timespec arg) +{ + char *dot = memchr (pformat, '.', prefix_len); + size_t sec_prefix_len = prefix_len; + int width = 0; + int precision = 0; + bool frac_left_adjust = false; + + if (dot) + { + sec_prefix_len = dot - pformat; + pformat[prefix_len] = '\0'; + + if (ISDIGIT (dot[1])) + { + long int lprec = strtol (dot + 1, nullptr, 10); + precision = (lprec <= INT_MAX ? lprec : INT_MAX); + } + else + { + precision = 9; + } + + if (precision && ISDIGIT (dot[-1])) + { + /* If a nontrivial width is given, subtract the width of the + decimal point and PRECISION digits that will be output + later. */ + char *p = dot; + *dot = '\0'; + + do + --p; + while (ISDIGIT (p[-1])); + + long int lwidth = strtol (p, nullptr, 10); + width = (lwidth <= INT_MAX ? lwidth : INT_MAX); + if (1 < width) + { + p += (*p == '0'); + sec_prefix_len = p - pformat; + int w_d = (decimal_point_len < width + ? width - decimal_point_len + : 0); + if (1 < w_d) + { + int w = w_d - precision; + if (1 < w) + { + char *dst = pformat; + for (char const *src = dst; src < p; src++) + { + if (*src == '-') + frac_left_adjust = true; + else + *dst++ = *src; + } + sec_prefix_len = + (dst - pformat + + (frac_left_adjust ? 0 : sprintf (dst, "%d", w))); + } + } + } + } + } + + int divisor = 1; + for (int i = precision; i < 9; i++) + divisor *= 10; + int frac_sec = arg.tv_nsec / divisor; + int int_len; + + if (TYPE_SIGNED (time_t)) + { + bool minus_zero = false; + if (arg.tv_sec < 0 && arg.tv_nsec != 0) + { + int frac_sec_modulus = 1000000000 / divisor; + frac_sec = (frac_sec_modulus - frac_sec + - (arg.tv_nsec % divisor != 0)); + arg.tv_sec += (frac_sec != 0); + minus_zero = (arg.tv_sec == 0); + } + int_len = (minus_zero + ? out_minus_zero (pformat, sec_prefix_len) + : out_int (pformat, sec_prefix_len, arg.tv_sec)); + } + else + int_len = out_uint (pformat, sec_prefix_len, arg.tv_sec); + + if (precision) + { + int prec = (precision < 9 ? precision : 9); + int trailing_prec = precision - prec; + int ilen = (int_len < 0 ? 0 : int_len); + int trailing_width = (ilen < width && decimal_point_len < width - ilen + ? width - ilen - decimal_point_len - prec + : 0); + printf ("%s%.*d%-*.*d", decimal_point, prec, frac_sec, + trailing_width, trailing_prec, 0); + } +} + +/* Print the context information of FILENAME, and return true iff the + context could not be obtained. */ +NODISCARD +static bool +out_file_context (char *pformat, size_t prefix_len, char const *filename) +{ + char *scontext; + bool fail = false; + + if ((follow_links + ? getfilecon (filename, &scontext) + : lgetfilecon (filename, &scontext)) < 0) + { + error (0, errno, _("failed to get security context of %s"), + quoteaf (filename)); + scontext = nullptr; + fail = true; + } + strcpy (pformat + prefix_len, "s"); + printf (pformat, (scontext ? scontext : "?")); + if (scontext) + freecon (scontext); + return fail; +} + +/* Print statfs info. Return zero upon success, nonzero upon failure. */ +NODISCARD +static bool +print_statfs (char *pformat, size_t prefix_len, MAYBE_UNUSED char mod, char m, + int fd, char const *filename, + void const *data) +{ + STRUCT_STATVFS const *statfsbuf = data; + bool fail = false; + + switch (m) + { + case 'n': + out_string (pformat, prefix_len, filename); + break; + + case 'i': + { +#if STRUCT_STATXFS_F_FSID_IS_INTEGER + uintmax_t fsid = statfsbuf->f_fsid; +#else + typedef unsigned int fsid_word; + static_assert (alignof (STRUCT_STATVFS) % alignof (fsid_word) == 0); + static_assert (offsetof (STRUCT_STATVFS, f_fsid) % alignof (fsid_word) + == 0); + static_assert (sizeof statfsbuf->f_fsid % alignof (fsid_word) == 0); + fsid_word const *p = (fsid_word *) &statfsbuf->f_fsid; + + /* Assume a little-endian word order, as that is compatible + with glibc's statvfs implementation. */ + uintmax_t fsid = 0; + int words = sizeof statfsbuf->f_fsid / sizeof *p; + for (int i = 0; i < words && i * sizeof *p < sizeof fsid; i++) + { + uintmax_t u = p[words - 1 - i]; + fsid |= u << (i * CHAR_BIT * sizeof *p); + } +#endif + out_uint_x (pformat, prefix_len, fsid); + } + break; + + case 'l': + OUT_NAMEMAX (pformat, prefix_len, SB_F_NAMEMAX (statfsbuf)); + break; + case 't': +#if HAVE_STRUCT_STATXFS_F_TYPE + out_uint_x (pformat, prefix_len, statfsbuf->f_type); +#else + fputc ('?', stdout); +#endif + break; + case 'T': + out_string (pformat, prefix_len, human_fstype (statfsbuf)); + break; + case 'b': + out_int (pformat, prefix_len, statfsbuf->f_blocks); + break; + case 'f': + out_int (pformat, prefix_len, statfsbuf->f_bfree); + break; + case 'a': + out_int (pformat, prefix_len, statfsbuf->f_bavail); + break; + case 's': + out_uint (pformat, prefix_len, statfsbuf->f_bsize); + break; + case 'S': + { + uintmax_t frsize = STATFS_FRSIZE (statfsbuf); + if (! frsize) + frsize = statfsbuf->f_bsize; + out_uint (pformat, prefix_len, frsize); + } + break; + case 'c': + out_uint (pformat, prefix_len, statfsbuf->f_files); + break; + case 'd': + out_int (pformat, prefix_len, statfsbuf->f_ffree); + break; + default: + fputc ('?', stdout); + break; + } + return fail; +} + +/* Return any bind mounted source for a path. + The caller should not free the returned buffer. + Return nullptr if no bind mount found. */ +NODISCARD +static char const * +find_bind_mount (char const * name) +{ + char const * bind_mount = nullptr; + + static struct mount_entry *mount_list; + static bool tried_mount_list = false; + if (!tried_mount_list) /* attempt/warn once per process. */ + { + if (!(mount_list = read_file_system_list (false))) + error (0, errno, "%s", _("cannot read table of mounted file systems")); + tried_mount_list = true; + } + + struct stat name_stats; + if (stat (name, &name_stats) != 0) + return nullptr; + + struct mount_entry *me; + for (me = mount_list; me; me = me->me_next) + { + if (me->me_dummy && me->me_devname[0] == '/' + && STREQ (me->me_mountdir, name)) + { + struct stat dev_stats; + + if (stat (me->me_devname, &dev_stats) == 0 + && SAME_INODE (name_stats, dev_stats)) + { + bind_mount = me->me_devname; + break; + } + } + } + + return bind_mount; +} + +/* Print mount point. Return zero upon success, nonzero upon failure. */ +NODISCARD +static bool +out_mount_point (char const *filename, char *pformat, size_t prefix_len, + const struct stat *statp) +{ + + char const *np = "?", *bp = nullptr; + char *mp = nullptr; + bool fail = true; + + /* Look for bind mounts first. Note we output the immediate alias, + rather than further resolving to a base device mount point. */ + if (follow_links || !S_ISLNK (statp->st_mode)) + { + char *resolved = canonicalize_file_name (filename); + if (!resolved) + { + error (0, errno, _("failed to canonicalize %s"), quoteaf (filename)); + goto print_mount_point; + } + bp = find_bind_mount (resolved); + free (resolved); + if (bp) + { + fail = false; + goto print_mount_point; + } + } + + /* If there is no direct bind mount, then navigate + back up the tree looking for a device change. + Note we don't detect if any of the directory components + are bind mounted to the same device, but that's OK + since we've not directly queried them. */ + if ((mp = find_mount_point (filename, statp))) + { + /* This dir might be bind mounted to another device, + so we resolve the bound source in that case also. */ + bp = find_bind_mount (mp); + fail = false; + } + +print_mount_point: + + out_string (pformat, prefix_len, bp ? bp : mp ? mp : np); + free (mp); + return fail; +} + +/* Map a TS with negative TS.tv_nsec to {0,0}. */ +static inline struct timespec +neg_to_zero (struct timespec ts) +{ + if (0 <= ts.tv_nsec) + return ts; + struct timespec z = {0, 0}; + return z; +} + +/* Set the quoting style default if the environment variable + QUOTING_STYLE is set. */ + +static void +getenv_quoting_style (void) +{ + char const *q_style = getenv ("QUOTING_STYLE"); + if (q_style) + { + int i = ARGMATCH (q_style, quoting_style_args, quoting_style_vals); + if (0 <= i) + set_quoting_style (nullptr, quoting_style_vals[i]); + else + { + set_quoting_style (nullptr, shell_escape_always_quoting_style); + error (0, 0, _("ignoring invalid value of environment " + "variable QUOTING_STYLE: %s"), quote (q_style)); + } + } + else + set_quoting_style (nullptr, shell_escape_always_quoting_style); +} + +/* Equivalent to quotearg(), but explicit to avoid syntax checks. */ +#define quoteN(x) quotearg_style (get_quoting_style (nullptr), x) + +/* Output a single-character \ escape. */ + +static void +print_esc_char (char c) +{ + switch (c) + { + case 'a': /* Alert. */ + c ='\a'; + break; + case 'b': /* Backspace. */ + c ='\b'; + break; + case 'e': /* Escape. */ + c ='\x1B'; + break; + case 'f': /* Form feed. */ + c ='\f'; + break; + case 'n': /* New line. */ + c ='\n'; + break; + case 'r': /* Carriage return. */ + c ='\r'; + break; + case 't': /* Horizontal tab. */ + c ='\t'; + break; + case 'v': /* Vertical tab. */ + c ='\v'; + break; + case '"': + case '\\': + break; + default: + error (0, 0, _("warning: unrecognized escape '\\%c'"), c); + break; + } + putchar (c); +} + +ATTRIBUTE_PURE +static size_t +format_code_offset (char const *directive) +{ + size_t len = strspn (directive + 1, printf_flags); + char const *fmt_char = directive + len + 1; + fmt_char += strspn (fmt_char, digits); + if (*fmt_char == '.') + fmt_char += 1 + strspn (fmt_char + 1, digits); + return fmt_char - directive; +} + +/* Print the information specified by the format string, FORMAT, + calling PRINT_FUNC for each %-directive encountered. + Return zero upon success, nonzero upon failure. */ +NODISCARD +static bool +print_it (char const *format, int fd, char const *filename, + bool (*print_func) (char *, size_t, char, char, + int, char const *, void const *), + void const *data) +{ + bool fail = false; + + /* Add 2 to accommodate our conversion of the stat '%s' format string + to the longer printf '%llu' one. */ + enum + { + MAX_ADDITIONAL_BYTES = + (MAX (sizeof PRIdMAX, + MAX (sizeof PRIoMAX, MAX (sizeof PRIuMAX, sizeof PRIxMAX))) + - 1) + }; + size_t n_alloc = strlen (format) + MAX_ADDITIONAL_BYTES + 1; + char *dest = xmalloc (n_alloc); + char const *b; + for (b = format; *b; b++) + { + switch (*b) + { + case '%': + { + size_t len = format_code_offset (b); + char fmt_char = *(b + len); + char mod_char = 0; + memcpy (dest, b, len); + b += len; + + switch (fmt_char) + { + case '\0': + --b; + FALLTHROUGH; + case '%': + if (1 < len) + { + dest[len] = fmt_char; + dest[len + 1] = '\0'; + error (EXIT_FAILURE, 0, _("%s: invalid directive"), + quote (dest)); + } + putchar ('%'); + break; + case 'H': + case 'L': + mod_char = fmt_char; + fmt_char = *(b + 1); + if (print_func == print_stat + && (fmt_char == 'd' || fmt_char == 'r')) + { + b++; + } + else + { + fmt_char = mod_char; + mod_char = 0; + } + FALLTHROUGH; + default: + fail |= print_func (dest, len, mod_char, fmt_char, + fd, filename, data); + break; + } + break; + } + + case '\\': + if ( ! interpret_backslash_escapes) + { + putchar ('\\'); + break; + } + ++b; + if (isodigit (*b)) + { + int esc_value = octtobin (*b); + int esc_length = 1; /* number of octal digits */ + for (++b; esc_length < 3 && isodigit (*b); + ++esc_length, ++b) + { + esc_value = esc_value * 8 + octtobin (*b); + } + putchar (esc_value); + --b; + } + else if (*b == 'x' && isxdigit (to_uchar (b[1]))) + { + int esc_value = hextobin (b[1]); /* Value of \xhh escape. */ + /* A hexadecimal \xhh escape sequence must have + 1 or 2 hex. digits. */ + ++b; + if (isxdigit (to_uchar (b[1]))) + { + ++b; + esc_value = esc_value * 16 + hextobin (*b); + } + putchar (esc_value); + } + else if (*b == '\0') + { + error (0, 0, _("warning: backslash at end of format")); + putchar ('\\'); + /* Arrange to exit the loop. */ + --b; + } + else + { + print_esc_char (*b); + } + break; + + default: + putchar (*b); + break; + } + } + free (dest); + + fputs (trailing_delim, stdout); + + return fail; +} + +/* Stat the file system and print what we find. */ +NODISCARD +static bool +do_statfs (char const *filename, char const *format) +{ + STRUCT_STATVFS statfsbuf; + + if (STREQ (filename, "-")) + { + error (0, 0, _("using %s to denote standard input does not work" + " in file system mode"), quoteaf (filename)); + return false; + } + + if (STATFS (filename, &statfsbuf) != 0) + { + error (0, errno, _("cannot read file system information for %s"), + quoteaf (filename)); + return false; + } + + bool fail = print_it (format, -1, filename, print_statfs, &statfsbuf); + return ! fail; +} + +struct print_args { + struct stat *st; + struct timespec btime; +}; + +/* Ask statx to avoid syncing? */ +static bool dont_sync; + +/* Ask statx to force sync? */ +static bool force_sync; + +#if USE_STATX +static unsigned int +fmt_to_mask (char fmt) +{ + switch (fmt) + { + case 'N': + return STATX_MODE; + case 'd': + case 'D': + return STATX_MODE; + case 'i': + return STATX_INO; + case 'a': + case 'A': + return STATX_MODE; + case 'f': + return STATX_MODE|STATX_TYPE; + case 'F': + return STATX_TYPE; + case 'h': + return STATX_NLINK; + case 'u': + case 'U': + return STATX_UID; + case 'g': + case 'G': + return STATX_GID; + case 'm': + return STATX_MODE|STATX_INO; + case 's': + return STATX_SIZE; + case 't': + case 'T': + return STATX_MODE; + case 'b': + return STATX_BLOCKS; + case 'w': + case 'W': + return STATX_BTIME; + case 'x': + case 'X': + return STATX_ATIME; + case 'y': + case 'Y': + return STATX_MTIME; + case 'z': + case 'Z': + return STATX_CTIME; + } + return 0; +} + +ATTRIBUTE_PURE +static unsigned int +format_to_mask (char const *format) +{ + unsigned int mask = 0; + char const *b; + + for (b = format; *b; b++) + { + if (*b != '%') + continue; + + b += format_code_offset (b); + if (*b == '\0') + break; + mask |= fmt_to_mask (*b); + } + return mask; +} + +/* statx the file and print what we find */ +NODISCARD +static bool +do_stat (char const *filename, char const *format, char const *format2) +{ + int fd = STREQ (filename, "-") ? 0 : AT_FDCWD; + int flags = 0; + struct stat st; + struct statx stx = { 0, }; + char const *pathname = filename; + struct print_args pa; + pa.st = &st; + pa.btime = (struct timespec) {-1, -1}; + + if (AT_FDCWD != fd) + { + pathname = ""; + flags = AT_EMPTY_PATH; + } + else if (!follow_links) + { + flags = AT_SYMLINK_NOFOLLOW; + } + + if (dont_sync) + flags |= AT_STATX_DONT_SYNC; + else if (force_sync) + flags |= AT_STATX_FORCE_SYNC; + + if (! force_sync) + flags |= AT_NO_AUTOMOUNT; + + fd = statx (fd, pathname, flags, format_to_mask (format), &stx); + if (fd < 0) + { + if (flags & AT_EMPTY_PATH) + error (0, errno, _("cannot stat standard input")); + else + error (0, errno, _("cannot statx %s"), quoteaf (filename)); + return false; + } + + if (S_ISBLK (stx.stx_mode) || S_ISCHR (stx.stx_mode)) + format = format2; + + statx_to_stat (&stx, &st); + if (stx.stx_mask & STATX_BTIME) + pa.btime = statx_timestamp_to_timespec (stx.stx_btime); + + bool fail = print_it (format, fd, filename, print_stat, &pa); + return ! fail; +} + +#else /* USE_STATX */ + +static struct timespec +get_birthtime (int fd, char const *filename, struct stat const *st) +{ + struct timespec ts = get_stat_birthtime (st); + +# if HAVE_GETATTRAT + if (ts.tv_nsec < 0) + { + nvlist_t *response; + if ((fd < 0 + ? getattrat (AT_FDCWD, XATTR_VIEW_READWRITE, filename, &response) + : fgetattr (fd, XATTR_VIEW_READWRITE, &response)) + == 0) + { + uint64_t *val; + uint_t n; + if (nvlist_lookup_uint64_array (response, A_CRTIME, &val, &n) == 0 + && 2 <= n + && val[0] <= TYPE_MAXIMUM (time_t) + && val[1] < 1000000000 * 2 /* for leap seconds */) + { + ts.tv_sec = val[0]; + ts.tv_nsec = val[1]; + } + nvlist_free (response); + } + } +# endif + + return ts; +} + + +/* stat the file and print what we find */ +NODISCARD +static bool +do_stat (char const *filename, char const *format, + char const *format2) +{ + int fd = STREQ (filename, "-") ? 0 : -1; + struct stat statbuf; + struct print_args pa; + pa.st = &statbuf; + pa.btime = (struct timespec) {-1, -1}; + + if (0 <= fd) + { + if (fstat (fd, &statbuf) != 0) + { + error (0, errno, _("cannot stat standard input")); + return false; + } + } + /* We can't use the shorter + (follow_links?stat:lstat) (filename, &statbug) + since stat might be a function-like macro. */ + else if ((follow_links + ? stat (filename, &statbuf) + : lstat (filename, &statbuf)) != 0) + { + error (0, errno, _("cannot stat %s"), quoteaf (filename)); + return false; + } + + if (S_ISBLK (statbuf.st_mode) || S_ISCHR (statbuf.st_mode)) + format = format2; + + bool fail = print_it (format, fd, filename, print_stat, &pa); + return ! fail; +} +#endif /* USE_STATX */ + +/* POSIX requires 'ls' to print file sizes without a sign, even + when negative. Be consistent with that. */ + +static uintmax_t +unsigned_file_size (off_t size) +{ + return size + (size < 0) * ((uintmax_t) OFF_T_MAX - OFF_T_MIN + 1); +} + +/* Print stat info. Return zero upon success, nonzero upon failure. */ +static bool +print_stat (char *pformat, size_t prefix_len, char mod, char m, + int fd, char const *filename, void const *data) +{ + struct print_args *parg = (struct print_args *) data; + struct stat *statbuf = parg->st; + struct timespec btime = parg->btime; + struct passwd *pw_ent; + struct group *gw_ent; + bool fail = false; + + switch (m) + { + case 'n': + out_string (pformat, prefix_len, filename); + break; + case 'N': + out_string (pformat, prefix_len, quoteN (filename)); + if (S_ISLNK (statbuf->st_mode)) + { + char *linkname = areadlink_with_size (filename, statbuf->st_size); + if (linkname == nullptr) + { + error (0, errno, _("cannot read symbolic link %s"), + quoteaf (filename)); + return true; + } + printf (" -> "); + out_string (pformat, prefix_len, quoteN (linkname)); + free (linkname); + } + break; + case 'd': + if (mod == 'H') + out_uint (pformat, prefix_len, major (statbuf->st_dev)); + else if (mod == 'L') + out_uint (pformat, prefix_len, minor (statbuf->st_dev)); + else + out_uint (pformat, prefix_len, statbuf->st_dev); + break; + case 'D': + out_uint_x (pformat, prefix_len, statbuf->st_dev); + break; + case 'i': + out_uint (pformat, prefix_len, statbuf->st_ino); + break; + case 'a': + out_uint_o (pformat, prefix_len, statbuf->st_mode & CHMOD_MODE_BITS); + break; + case 'A': + out_string (pformat, prefix_len, human_access (statbuf)); + break; + case 'f': + out_uint_x (pformat, prefix_len, statbuf->st_mode); + break; + case 'F': + out_string (pformat, prefix_len, file_type (statbuf)); + break; + case 'h': + out_uint (pformat, prefix_len, statbuf->st_nlink); + break; + case 'u': + out_uint (pformat, prefix_len, statbuf->st_uid); + break; + case 'U': + pw_ent = getpwuid (statbuf->st_uid); + out_string (pformat, prefix_len, + pw_ent ? pw_ent->pw_name : "UNKNOWN"); + break; + case 'g': + out_uint (pformat, prefix_len, statbuf->st_gid); + break; + case 'G': + gw_ent = getgrgid (statbuf->st_gid); + out_string (pformat, prefix_len, + gw_ent ? gw_ent->gr_name : "UNKNOWN"); + break; + case 'm': + fail |= out_mount_point (filename, pformat, prefix_len, statbuf); + break; + case 's': + out_uint (pformat, prefix_len, unsigned_file_size (statbuf->st_size)); + break; + case 'r': + if (mod == 'H') + out_uint (pformat, prefix_len, major (statbuf->st_rdev)); + else if (mod == 'L') + out_uint (pformat, prefix_len, minor (statbuf->st_rdev)); + else + out_uint (pformat, prefix_len, statbuf->st_rdev); + break; + case 'R': + out_uint_x (pformat, prefix_len, statbuf->st_rdev); + break; + case 't': + out_uint_x (pformat, prefix_len, major (statbuf->st_rdev)); + break; + case 'T': + out_uint_x (pformat, prefix_len, minor (statbuf->st_rdev)); + break; + case 'B': + out_uint (pformat, prefix_len, ST_NBLOCKSIZE); + break; + case 'b': + out_uint (pformat, prefix_len, ST_NBLOCKS (*statbuf)); + break; + case 'o': + out_uint (pformat, prefix_len, ST_BLKSIZE (*statbuf)); + break; + case 'w': + { +#if ! USE_STATX + btime = get_birthtime (fd, filename, statbuf); +#endif + if (btime.tv_nsec < 0) + out_string (pformat, prefix_len, "-"); + else + out_string (pformat, prefix_len, human_time (btime)); + } + break; + case 'W': + { +#if ! USE_STATX + btime = get_birthtime (fd, filename, statbuf); +#endif + out_epoch_sec (pformat, prefix_len, neg_to_zero (btime)); + } + break; + case 'x': + out_string (pformat, prefix_len, human_time (get_stat_atime (statbuf))); + break; + case 'X': + out_epoch_sec (pformat, prefix_len, get_stat_atime (statbuf)); + break; + case 'y': + out_string (pformat, prefix_len, human_time (get_stat_mtime (statbuf))); + break; + case 'Y': + out_epoch_sec (pformat, prefix_len, get_stat_mtime (statbuf)); + break; + case 'z': + out_string (pformat, prefix_len, human_time (get_stat_ctime (statbuf))); + break; + case 'Z': + out_epoch_sec (pformat, prefix_len, get_stat_ctime (statbuf)); + break; + case 'C': + fail |= out_file_context (pformat, prefix_len, filename); + break; + default: + fputc ('?', stdout); + break; + } + return fail; +} + +/* Return an allocated format string in static storage that + corresponds to whether FS and TERSE options were declared. */ +static char * +default_format (bool fs, bool terse, bool device) +{ + char *format; + if (fs) + { + if (terse) + format = xstrdup (fmt_terse_fs); + else + { + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' with --file-system, and NOT from printf. */ + format = xstrdup (_(" File: \"%n\"\n" + " ID: %-8i Namelen: %-7l Type: %T\n" + "Block size: %-10s Fundamental block size: %S\n" + "Blocks: Total: %-10b Free: %-10f Available: %a\n" + "Inodes: Total: %-10c Free: %d\n")); + } + } + else /* ! fs */ + { + if (terse) + { + if (0 < is_selinux_enabled ()) + format = xstrdup (fmt_terse_selinux); + else + format = xstrdup (fmt_terse_regular); + } + else + { + char *temp; + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xstrdup (_("\ + File: %N\n\ + Size: %-10s\tBlocks: %-10b IO Block: %-6o %F\n\ +")); + + temp = format; + if (device) + { + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xasprintf ("%s%s", format, _("\ +" "Device: %Hd,%Ld\tInode: %-10i Links: %-5h Device type: %Hr,%Lr\n\ +")); + } + else + { + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xasprintf ("%s%s", format, _("\ +" "Device: %Hd,%Ld\tInode: %-10i Links: %h\n\ +")); + } + free (temp); + + temp = format; + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xasprintf ("%s%s", format, _("\ +" "Access: (%04a/%10.10A) Uid: (%5u/%8U) Gid: (%5g/%8G)\n\ +")); + free (temp); + + if (0 < is_selinux_enabled ()) + { + temp = format; + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xasprintf ("%s%s", format, _("Context: %C\n")); + free (temp); + } + + temp = format; + /* TRANSLATORS: This string uses format specifiers from + 'stat --help' without --file-system, and NOT from printf. */ + format = xasprintf ("%s%s", format, + _("Access: %x\n" + "Modify: %y\n" + "Change: %z\n" + " Birth: %w\n")); + free (temp); + } + } + return format; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + fputs (_("\ +Display file or file system status.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -L, --dereference follow links\n\ + -f, --file-system display file system status instead of file status\n\ +"), stdout); + fputs (_("\ + --cached=MODE specify how to use cached attributes;\n\ + useful on remote file systems. See MODE below\n\ +"), stdout); + fputs (_("\ + -c --format=FORMAT use the specified FORMAT instead of the default;\n\ + output a newline after each use of FORMAT\n\ + --printf=FORMAT like --format, but interpret backslash escapes,\n\ + and do not output a mandatory trailing newline;\n\ + if you want a newline, include \\n in FORMAT\n\ + -t, --terse print the information in terse form\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + + fputs (_("\n\ +The MODE argument of --cached can be: always, never, or default.\n\ +'always' will use cached attributes if available, while\n\ +'never' will try to synchronize with the latest attributes, and\n\ +'default' will leave it up to the underlying file system.\n\ +"), stdout); + + fputs (_("\n\ +The valid format sequences for files (without --file-system):\n\ +\n\ + %a permission bits in octal (note '#' and '0' printf flags)\n\ + %A permission bits and file type in human readable form\n\ + %b number of blocks allocated (see %B)\n\ + %B the size in bytes of each block reported by %b\n\ + %C SELinux security context string\n\ +"), stdout); + fputs (_("\ + %d device number in decimal (st_dev)\n\ + %D device number in hex (st_dev)\n\ + %Hd major device number in decimal\n\ + %Ld minor device number in decimal\n\ + %f raw mode in hex\n\ + %F file type\n\ + %g group ID of owner\n\ + %G group name of owner\n\ +"), stdout); + fputs (_("\ + %h number of hard links\n\ + %i inode number\n\ + %m mount point\n\ + %n file name\n\ + %N quoted file name with dereference if symbolic link\n\ + %o optimal I/O transfer size hint\n\ + %s total size, in bytes\n\ + %r device type in decimal (st_rdev)\n\ + %R device type in hex (st_rdev)\n\ + %Hr major device type in decimal, for character/block device special files\n\ + %Lr minor device type in decimal, for character/block device special files\n\ + %t major device type in hex, for character/block device special files\n\ + %T minor device type in hex, for character/block device special files\n\ +"), stdout); + fputs (_("\ + %u user ID of owner\n\ + %U user name of owner\n\ + %w time of file birth, human-readable; - if unknown\n\ + %W time of file birth, seconds since Epoch; 0 if unknown\n\ + %x time of last access, human-readable\n\ + %X time of last access, seconds since Epoch\n\ + %y time of last data modification, human-readable\n\ + %Y time of last data modification, seconds since Epoch\n\ + %z time of last status change, human-readable\n\ + %Z time of last status change, seconds since Epoch\n\ +\n\ +"), stdout); + + fputs (_("\ +Valid format sequences for file systems:\n\ +\n\ + %a free blocks available to non-superuser\n\ + %b total data blocks in file system\n\ + %c total file nodes in file system\n\ + %d free file nodes in file system\n\ + %f free blocks in file system\n\ +"), stdout); + fputs (_("\ + %i file system ID in hex\n\ + %l maximum length of filenames\n\ + %n file name\n\ + %s block size (for faster transfers)\n\ + %S fundamental block size (for block counts)\n\ + %t file system type in hex\n\ + %T file system type in human readable form\n\ +"), stdout); + + printf (_("\n\ +--terse is equivalent to the following FORMAT:\n\ + %s\ +"), +#if HAVE_SELINUX_SELINUX_H + fmt_terse_selinux +#else + fmt_terse_regular +#endif + ); + + printf (_("\ +--terse --file-system is equivalent to the following FORMAT:\n\ + %s\ +"), fmt_terse_fs); + + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char *argv[]) +{ + int c; + bool fs = false; + bool terse = false; + char *format = nullptr; + char *format2; + bool ok = true; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + struct lconv const *locale = localeconv (); + decimal_point = (locale->decimal_point[0] ? locale->decimal_point : "."); + decimal_point_len = strlen (decimal_point); + + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "c:fLt", long_options, nullptr)) != -1) + { + switch (c) + { + case PRINTF_OPTION: + format = optarg; + interpret_backslash_escapes = true; + trailing_delim = ""; + break; + + case 'c': + format = optarg; + interpret_backslash_escapes = false; + trailing_delim = "\n"; + break; + + case 'L': + follow_links = true; + break; + + case 'f': + fs = true; + break; + + case 't': + terse = true; + break; + + case 0: + switch (XARGMATCH ("--cached", optarg, cached_args, cached_modes)) + { + case cached_never: + force_sync = true; + dont_sync = false; + break; + case cached_always: + force_sync = false; + dont_sync = true; + break; + case cached_default: + force_sync = false; + dont_sync = false; + } + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (argc == optind) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (format) + { + if (strstr (format, "%N")) + getenv_quoting_style (); + format2 = format; + } + else + { + format = default_format (fs, terse, /* device= */ false); + format2 = default_format (fs, terse, /* device= */ true); + } + + for (int i = optind; i < argc; i++) + ok &= (fs + ? do_statfs (argv[i], format) + : do_stat (argv[i], format, format2)); + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/statx.h b/src/statx.h new file mode 100644 index 0000000..dd8cb32 --- /dev/null +++ b/src/statx.h @@ -0,0 +1,52 @@ +/* statx -> stat conversion functions for coreutils + Copyright (C) 2019-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef COREUTILS_STATX_H +# define COREUTILS_STATX_H + +# if HAVE_STATX && defined STATX_INO +/* Much of the format printing requires a struct stat or timespec */ +static inline struct timespec +statx_timestamp_to_timespec (struct statx_timestamp tsx) +{ + struct timespec ts; + + ts.tv_sec = tsx.tv_sec; + ts.tv_nsec = tsx.tv_nsec; + return ts; +} + +static inline void +statx_to_stat (struct statx *stx, struct stat *stat) +{ + stat->st_dev = makedev (stx->stx_dev_major, stx->stx_dev_minor); + stat->st_ino = stx->stx_ino; + stat->st_mode = stx->stx_mode; + stat->st_nlink = stx->stx_nlink; + stat->st_uid = stx->stx_uid; + stat->st_gid = stx->stx_gid; + stat->st_rdev = makedev (stx->stx_rdev_major, stx->stx_rdev_minor); + stat->st_size = stx->stx_size; + stat->st_blksize = stx->stx_blksize; +/* define to avoid sc_prohibit_stat_st_blocks. */ +# define SC_ST_BLOCKS st_blocks + stat->SC_ST_BLOCKS = stx->stx_blocks; + stat->st_atim = statx_timestamp_to_timespec (stx->stx_atime); + stat->st_mtim = statx_timestamp_to_timespec (stx->stx_mtime); + stat->st_ctim = statx_timestamp_to_timespec (stx->stx_ctime); +} +# endif /* HAVE_STATX && defined STATX_INO */ +#endif /* COREUTILS_STATX_H */ diff --git a/src/stdbuf.c b/src/stdbuf.c new file mode 100644 index 0000000..2a58c36 --- /dev/null +++ b/src/stdbuf.c @@ -0,0 +1,391 @@ +/* stdbuf -- setup the standard streams for a command + Copyright (C) 2009-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "filenamecat.h" +#include "quote.h" +#include "xreadlink.h" +#include "xstrtol.h" +#include "c-ctype.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "stdbuf" +#define LIB_NAME "libstdbuf.so" /* FIXME: don't hardcode */ + +#define AUTHORS proper_name_lite ("Padraig Brady", "P\303\241draig Brady") + +static char *program_path; + +static struct +{ + size_t size; + int optc; + char *optarg; +} stdbuf[3]; + +static struct option const longopts[] = +{ + {"input", required_argument, nullptr, 'i'}, + {"output", required_argument, nullptr, 'o'}, + {"error", required_argument, nullptr, 'e'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Set size to the value of STR, interpreted as a decimal integer, + optionally multiplied by various values. + Return -1 on error, 0 on success. + + This supports dd BLOCK size suffixes. + Note we don't support dd's b=512, c=1, w=2 or 21x512MiB formats. */ +static int +parse_size (char const *str, size_t *size) +{ + uintmax_t tmp_size; + enum strtol_error e = xstrtoumax (str, nullptr, 10, + &tmp_size, "EGkKMPQRTYZ0"); + if (e == LONGINT_OK && SIZE_MAX < tmp_size) + e = LONGINT_OVERFLOW; + + if (e == LONGINT_OK) + { + errno = 0; + *size = tmp_size; + return 0; + } + + errno = (e == LONGINT_OVERFLOW ? EOVERFLOW : errno); + return -1; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s OPTION... COMMAND\n"), program_name); + fputs (_("\ +Run COMMAND, with modified buffering operations for its standard streams.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -i, --input=MODE adjust standard input stream buffering\n\ + -o, --output=MODE adjust standard output stream buffering\n\ + -e, --error=MODE adjust standard error stream buffering\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\n\ +If MODE is 'L' the corresponding stream will be line buffered.\n\ +This option is invalid with standard input.\n"), stdout); + fputs (_("\n\ +If MODE is '0' the corresponding stream will be unbuffered.\n\ +"), stdout); + fputs (_("\n\ +Otherwise MODE is a number which may be followed by one of the following:\n\ +KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G,T,P,E,Z,Y,R,Q.\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +In this case the corresponding stream will be fully buffered with the buffer\n\ +size set to MODE bytes.\n\ +"), stdout); + fputs (_("\n\ +NOTE: If COMMAND adjusts the buffering of its standard streams ('tee' does\n\ +for example) then that will override corresponding changes by 'stdbuf'.\n\ +Also some filters (like 'dd' and 'cat' etc.) don't use streams for I/O,\n\ +and are thus unaffected by 'stdbuf' settings.\n\ +"), stdout); + emit_exec_status (PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* argv[0] can be anything really, but generally it contains + the path to the executable or just a name if it was executed + using $PATH. In the latter case to get the path we can: + search getenv("PATH"), readlink("/prof/self/exe"), getenv("_"), + dladdr(), pstat_getpathname(), etc. */ + +static void +set_program_path (char const *arg) +{ + if (strchr (arg, '/')) /* Use absolute or relative paths directly. */ + { + program_path = dir_name (arg); + } + else + { + char *path = xreadlink ("/proc/self/exe"); + if (path) + program_path = dir_name (path); + else if ((path = getenv ("PATH"))) + { + char *dir; + path = xstrdup (path); + for (dir = strtok (path, ":"); dir != nullptr; + dir = strtok (nullptr, ":")) + { + char *candidate = file_name_concat (dir, arg, nullptr); + if (access (candidate, X_OK) == 0) + { + program_path = dir_name (candidate); + free (candidate); + break; + } + free (candidate); + } + } + free (path); + } +} + +static int +optc_to_fileno (int c) +{ + int ret = -1; + + switch (c) + { + case 'e': + ret = STDERR_FILENO; + break; + case 'i': + ret = STDIN_FILENO; + break; + case 'o': + ret = STDOUT_FILENO; + break; + } + + return ret; +} + +static void +set_LD_PRELOAD (void) +{ + int ret; +#ifdef __APPLE__ + char const *preload_env = "DYLD_INSERT_LIBRARIES"; +#else + char const *preload_env = "LD_PRELOAD"; +#endif + char *old_libs = getenv (preload_env); + char *LD_PRELOAD; + + /* Note this would auto add the appropriate search path for "libstdbuf.so": + gcc stdbuf.c -Wl,-rpath,'$ORIGIN' -Wl,-rpath,$PKGLIBEXECDIR + However we want the lookup done for the exec'd command not stdbuf. + + Since we don't link against libstdbuf.so add it to PKGLIBEXECDIR + rather than to LIBDIR. + + Note we could add "" as the penultimate item in the following list + to enable searching for libstdbuf.so in the default system lib paths. + However that would not indicate an error if libstdbuf.so was not found. + Also while this could support auto selecting the right arch in a multilib + environment, what we really want is to auto select based on the arch of the + command being run, rather than that of stdbuf itself. This is currently + not supported due to the unusual need for controlling the stdio buffering + of programs that are a different architecture to the default on the + system (and that of stdbuf itself). */ + char const *const search_path[] = { + program_path, + PKGLIBEXECDIR, + nullptr + }; + + char const *const *path = search_path; + char *libstdbuf; + + while (true) + { + struct stat sb; + + if (!**path) /* system default */ + { + libstdbuf = xstrdup (LIB_NAME); + break; + } + ret = asprintf (&libstdbuf, "%s/%s", *path, LIB_NAME); + if (ret < 0) + xalloc_die (); + if (stat (libstdbuf, &sb) == 0) /* file_exists */ + break; + free (libstdbuf); + + ++path; + if ( ! *path) + error (EXIT_CANCELED, 0, _("failed to find %s"), quote (LIB_NAME)); + } + + /* FIXME: Do we need to support libstdbuf.dll, c:, '\' separators etc? */ + + if (old_libs) + ret = asprintf (&LD_PRELOAD, "%s=%s:%s", preload_env, old_libs, libstdbuf); + else + ret = asprintf (&LD_PRELOAD, "%s=%s", preload_env, libstdbuf); + + if (ret < 0) + xalloc_die (); + + free (libstdbuf); + + ret = putenv (LD_PRELOAD); +#ifdef __APPLE__ + if (ret == 0) + ret = setenv ("DYLD_FORCE_FLAT_NAMESPACE", "y", 1); +#endif + + if (ret != 0) + error (EXIT_CANCELED, errno, + _("failed to update the environment with %s"), + quote (LD_PRELOAD)); +} + +/* Populate environ with _STDBUF_I=$MODE _STDBUF_O=$MODE _STDBUF_E=$MODE. + Return TRUE if any environment variables set. */ + +static bool +set_libstdbuf_options (void) +{ + bool env_set = false; + + for (size_t i = 0; i < ARRAY_CARDINALITY (stdbuf); i++) + { + if (stdbuf[i].optarg) + { + char *var; + int ret; + + if (*stdbuf[i].optarg == 'L') + ret = asprintf (&var, "%s%c=L", "_STDBUF_", + toupper (stdbuf[i].optc)); + else + ret = asprintf (&var, "%s%c=%" PRIuMAX, "_STDBUF_", + toupper (stdbuf[i].optc), + (uintmax_t) stdbuf[i].size); + if (ret < 0) + xalloc_die (); + + if (putenv (var) != 0) + error (EXIT_CANCELED, errno, + _("failed to update the environment with %s"), + quote (var)); + + env_set = true; + } + } + + return env_set; +} + +int +main (int argc, char **argv) +{ + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "+i:o:e:", longopts, nullptr)) != -1) + { + int opt_fileno; + + switch (c) + { + /* Old McDonald had a farm ei... */ + case 'e': + case 'i': + case 'o': + opt_fileno = optc_to_fileno (c); + affirm (0 <= opt_fileno && opt_fileno < ARRAY_CARDINALITY (stdbuf)); + stdbuf[opt_fileno].optc = c; + while (c_isspace (*optarg)) + optarg++; + stdbuf[opt_fileno].optarg = optarg; + if (c == 'i' && *optarg == 'L') + { + /* -oL will be by far the most common use of this utility, + but one could easily think -iL might have the same affect, + so disallow it as it could be confusing. */ + error (0, 0, _("line buffering stdin is meaningless")); + usage (EXIT_CANCELED); + } + + if (!STREQ (optarg, "L") + && parse_size (optarg, &stdbuf[opt_fileno].size) == -1) + error (EXIT_CANCELED, errno, _("invalid mode %s"), quote (optarg)); + + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_CANCELED); + } + } + + argv += optind; + argc -= optind; + + /* must specify at least 1 command. */ + if (argc < 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_CANCELED); + } + + if (! set_libstdbuf_options ()) + { + error (0, 0, _("you must specify a buffering mode option")); + usage (EXIT_CANCELED); + } + + /* Try to preload libstdbuf first from the same path as + stdbuf is running from. */ + set_program_path (program_name); + if (!program_path) + program_path = xstrdup (PKGLIBDIR); /* Need to init to non-null. */ + set_LD_PRELOAD (); + free (program_path); + + execvp (*argv, argv); + + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + error (0, errno, _("failed to run command %s"), quote (argv[0])); + return exit_status; +} diff --git a/src/stty.c b/src/stty.c new file mode 100644 index 0000000..a95a735 --- /dev/null +++ b/src/stty.c @@ -0,0 +1,2366 @@ +/* stty -- change and print terminal line settings + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Usage: stty [-ag] [--all] [--save] [-F device] [--file=device] [setting...] + + Options: + -a, --all Write all current settings to stdout in human-readable form. + -g, --save Write all current settings to stdout in stty-readable form. + -F, --file Open and use the specified device instead of stdin + + If no args are given, write to stdout the baud rate and settings that + have been changed from their defaults. Mode reading and changes + are done on the specified device, or stdin if none was specified. + + David MacKenzie */ + +#include + +#ifdef TERMIOS_NEEDS_XOPEN_SOURCE +# define _XOPEN_SOURCE +#endif + +#include +#include + +#include +#if HAVE_STROPTS_H +# include +#endif +#include + +#ifdef WINSIZE_IN_PTEM +# include +# include +#endif +#ifdef GWINSZ_IN_SYS_PTY +# include +# include +#endif +#include +#include + +#include "system.h" +#include "assure.h" +#include "fd-reopen.h" +#include "quote.h" +#include "xdectoint.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "stty" + +#define AUTHORS proper_name ("David MacKenzie") + +#ifndef _POSIX_VDISABLE +# define _POSIX_VDISABLE 0 +#endif + +#define Control(c) ((c) & 0x1f) +/* Canonical values for control characters. */ +#ifndef CINTR +# define CINTR Control ('c') +#endif +#ifndef CQUIT +# define CQUIT 28 +#endif +#ifndef CERASE +# define CERASE 127 +#endif +#ifndef CKILL +# define CKILL Control ('u') +#endif +#ifndef CEOF +# define CEOF Control ('d') +#endif +#ifndef CEOL +# define CEOL _POSIX_VDISABLE +#endif +#ifndef CSTART +# define CSTART Control ('q') +#endif +#ifndef CSTOP +# define CSTOP Control ('s') +#endif +#ifndef CSUSP +# define CSUSP Control ('z') +#endif +#if defined VEOL2 && !defined CEOL2 +# define CEOL2 _POSIX_VDISABLE +#endif +/* Some platforms have VSWTC, others VSWTCH. In both cases, this control + character is initialized by CSWTCH, if present. */ +#if defined VSWTC && !defined VSWTCH +# define VSWTCH VSWTC +#endif +/* ISC renamed swtch to susp for termios, but we'll accept either name. */ +#if defined VSUSP && !defined VSWTCH +# define VSWTCH VSUSP +# if defined CSUSP && !defined CSWTCH +# define CSWTCH CSUSP +# endif +#endif +#if defined VSWTCH && !defined CSWTCH +# define CSWTCH _POSIX_VDISABLE +#endif + +/* SunOS >= 5.3 loses (^Z doesn't work) if 'swtch' is the same as 'susp'. + So the default is to disable 'swtch.' */ +#if defined __sun +# undef CSWTCH +# define CSWTCH _POSIX_VDISABLE +#endif + +#if defined VWERSE && !defined VWERASE /* AIX-3.2.5 */ +# define VWERASE VWERSE +#endif +#if defined VDSUSP && !defined CDSUSP +# define CDSUSP Control ('y') +#endif +#if !defined VREPRINT && defined VRPRNT /* Irix 4.0.5 */ +# define VREPRINT VRPRNT +#endif +#if defined VREPRINT && !defined CRPRNT +# define CRPRNT Control ('r') +#endif +#if defined CREPRINT && !defined CRPRNT +# define CRPRNT Control ('r') +#endif +#if defined VWERASE && !defined CWERASE +# define CWERASE Control ('w') +#endif +#if defined VLNEXT && !defined CLNEXT +# define CLNEXT Control ('v') +#endif +#if defined VDISCARD && !defined VFLUSHO +# define VFLUSHO VDISCARD +#endif +#if defined VFLUSH && !defined VFLUSHO /* Ultrix 4.2 */ +# define VFLUSHO VFLUSH +#endif +#if defined CTLECH && !defined ECHOCTL /* Ultrix 4.3 */ +# define ECHOCTL CTLECH +#endif +#if defined TCTLECH && !defined ECHOCTL /* Ultrix 4.2 */ +# define ECHOCTL TCTLECH +#endif +#if defined CRTKIL && !defined ECHOKE /* Ultrix 4.2 and 4.3 */ +# define ECHOKE CRTKIL +#endif +#if defined VFLUSHO && !defined CFLUSHO +# define CFLUSHO Control ('o') +#endif +#if defined VSTATUS && !defined CSTATUS +# define CSTATUS Control ('t') +#endif + +/* Which speeds to set. */ +enum speed_setting + { + input_speed, output_speed, both_speeds + }; + +/* What to output and how. */ +enum output_type + { + changed, all, recoverable /* Default, -a, -g. */ + }; + +/* Which member(s) of 'struct termios' a mode uses. */ +enum mode_type + { + control, input, output, local, combination + }; + +/* Flags for 'struct mode_info'. */ +#define SANE_SET 1 /* Set in 'sane' mode. */ +#define SANE_UNSET 2 /* Unset in 'sane' mode. */ +#define REV 4 /* Can be turned off by prepending '-'. */ +#define OMIT 8 /* Don't display value. */ +#define NO_SETATTR 16 /* tcsetattr not used to set mode bits. */ + +/* Each mode. */ +struct mode_info + { + char const *name; /* Name given on command line. */ + enum mode_type type; /* Which structure element to change. */ + char flags; /* Setting and display options. */ + unsigned long bits; /* Bits to set for this mode. */ + unsigned long mask; /* Other bits to turn off for this mode. */ + }; + +static struct mode_info const mode_info[] = +{ + {"parenb", control, REV, PARENB, 0}, + {"parodd", control, REV, PARODD, 0}, +#ifdef CMSPAR + {"cmspar", control, REV, CMSPAR, 0}, +#endif + {"cs5", control, 0, CS5, CSIZE}, + {"cs6", control, 0, CS6, CSIZE}, + {"cs7", control, 0, CS7, CSIZE}, + {"cs8", control, 0, CS8, CSIZE}, + {"hupcl", control, REV, HUPCL, 0}, + {"hup", control, REV | OMIT, HUPCL, 0}, + {"cstopb", control, REV, CSTOPB, 0}, + {"cread", control, SANE_SET | REV, CREAD, 0}, + {"clocal", control, REV, CLOCAL, 0}, +#ifdef CRTSCTS + {"crtscts", control, REV, CRTSCTS, 0}, +#endif +#ifdef CDTRDSR + {"cdtrdsr", control, REV, CDTRDSR, 0}, +#endif + + {"ignbrk", input, SANE_UNSET | REV, IGNBRK, 0}, + {"brkint", input, SANE_SET | REV, BRKINT, 0}, + {"ignpar", input, REV, IGNPAR, 0}, + {"parmrk", input, REV, PARMRK, 0}, + {"inpck", input, REV, INPCK, 0}, + {"istrip", input, REV, ISTRIP, 0}, + {"inlcr", input, SANE_UNSET | REV, INLCR, 0}, + {"igncr", input, SANE_UNSET | REV, IGNCR, 0}, + {"icrnl", input, SANE_SET | REV, ICRNL, 0}, + {"ixon", input, REV, IXON, 0}, + {"ixoff", input, SANE_UNSET | REV, IXOFF, 0}, + {"tandem", input, REV | OMIT, IXOFF, 0}, +#ifdef IUCLC + {"iuclc", input, SANE_UNSET | REV, IUCLC, 0}, +#endif +#ifdef IXANY + {"ixany", input, SANE_UNSET | REV, IXANY, 0}, +#endif +#ifdef IMAXBEL + {"imaxbel", input, SANE_SET | REV, IMAXBEL, 0}, +#endif +#ifdef IUTF8 + {"iutf8", input, SANE_UNSET | REV, IUTF8, 0}, +#endif + + {"opost", output, SANE_SET | REV, OPOST, 0}, +#ifdef OLCUC + {"olcuc", output, SANE_UNSET | REV, OLCUC, 0}, +#endif +#ifdef OCRNL + {"ocrnl", output, SANE_UNSET | REV, OCRNL, 0}, +#endif +#ifdef ONLCR + {"onlcr", output, SANE_SET | REV, ONLCR, 0}, +#endif +#ifdef ONOCR + {"onocr", output, SANE_UNSET | REV, ONOCR, 0}, +#endif +#ifdef ONLRET + {"onlret", output, SANE_UNSET | REV, ONLRET, 0}, +#endif +#ifdef OFILL + {"ofill", output, SANE_UNSET | REV, OFILL, 0}, +#endif +#ifdef OFDEL + {"ofdel", output, SANE_UNSET | REV, OFDEL, 0}, +#endif +#ifdef NLDLY + {"nl1", output, SANE_UNSET, NL1, NLDLY}, + {"nl0", output, SANE_SET, NL0, NLDLY}, +#endif +#ifdef CRDLY + {"cr3", output, SANE_UNSET, CR3, CRDLY}, + {"cr2", output, SANE_UNSET, CR2, CRDLY}, + {"cr1", output, SANE_UNSET, CR1, CRDLY}, + {"cr0", output, SANE_SET, CR0, CRDLY}, +#endif +#ifdef TABDLY +# ifdef TAB3 + {"tab3", output, SANE_UNSET, TAB3, TABDLY}, +# endif +# ifdef TAB2 + {"tab2", output, SANE_UNSET, TAB2, TABDLY}, +# endif +# ifdef TAB1 + {"tab1", output, SANE_UNSET, TAB1, TABDLY}, +# endif +# ifdef TAB0 + {"tab0", output, SANE_SET, TAB0, TABDLY}, +# endif +#else +# ifdef OXTABS + {"tab3", output, SANE_UNSET, OXTABS, 0}, +# endif +#endif +#ifdef BSDLY + {"bs1", output, SANE_UNSET, BS1, BSDLY}, + {"bs0", output, SANE_SET, BS0, BSDLY}, +#endif +#ifdef VTDLY + {"vt1", output, SANE_UNSET, VT1, VTDLY}, + {"vt0", output, SANE_SET, VT0, VTDLY}, +#endif +#ifdef FFDLY + {"ff1", output, SANE_UNSET, FF1, FFDLY}, + {"ff0", output, SANE_SET, FF0, FFDLY}, +#endif + + {"isig", local, SANE_SET | REV, ISIG, 0}, + {"icanon", local, SANE_SET | REV, ICANON, 0}, +#ifdef IEXTEN + {"iexten", local, SANE_SET | REV, IEXTEN, 0}, +#endif + {"echo", local, SANE_SET | REV, ECHO, 0}, + {"echoe", local, SANE_SET | REV, ECHOE, 0}, + {"crterase", local, REV | OMIT, ECHOE, 0}, + {"echok", local, SANE_SET | REV, ECHOK, 0}, + {"echonl", local, SANE_UNSET | REV, ECHONL, 0}, + {"noflsh", local, SANE_UNSET | REV, NOFLSH, 0}, +#ifdef XCASE + {"xcase", local, SANE_UNSET | REV, XCASE, 0}, +#endif +#ifdef TOSTOP + {"tostop", local, SANE_UNSET | REV, TOSTOP, 0}, +#endif +#ifdef ECHOPRT + {"echoprt", local, SANE_UNSET | REV, ECHOPRT, 0}, + {"prterase", local, REV | OMIT, ECHOPRT, 0}, +#endif +#ifdef ECHOCTL + {"echoctl", local, SANE_SET | REV, ECHOCTL, 0}, + {"ctlecho", local, REV | OMIT, ECHOCTL, 0}, +#endif +#ifdef ECHOKE + {"echoke", local, SANE_SET | REV, ECHOKE, 0}, + {"crtkill", local, REV | OMIT, ECHOKE, 0}, +#endif +#ifdef FLUSHO + {"flusho", local, SANE_UNSET | REV, FLUSHO, 0}, +#endif +#if defined TIOCEXT + {"extproc", local, SANE_UNSET | REV | NO_SETATTR, EXTPROC, 0}, +#elif defined EXTPROC + {"extproc", local, SANE_UNSET | REV, EXTPROC, 0}, +#endif + + {"evenp", combination, REV | OMIT, 0, 0}, + {"parity", combination, REV | OMIT, 0, 0}, + {"oddp", combination, REV | OMIT, 0, 0}, + {"nl", combination, REV | OMIT, 0, 0}, + {"ek", combination, OMIT, 0, 0}, + {"sane", combination, OMIT, 0, 0}, + {"cooked", combination, REV | OMIT, 0, 0}, + {"raw", combination, REV | OMIT, 0, 0}, + {"pass8", combination, REV | OMIT, 0, 0}, + {"litout", combination, REV | OMIT, 0, 0}, + {"cbreak", combination, REV | OMIT, 0, 0}, +#ifdef IXANY + {"decctlq", combination, REV | OMIT, 0, 0}, +#endif +#if defined TABDLY || defined OXTABS + {"tabs", combination, REV | OMIT, 0, 0}, +#endif +#if defined XCASE && defined IUCLC && defined OLCUC + {"lcase", combination, REV | OMIT, 0, 0}, + {"LCASE", combination, REV | OMIT, 0, 0}, +#endif + {"crt", combination, OMIT, 0, 0}, + {"dec", combination, OMIT, 0, 0}, + + {nullptr, control, 0, 0, 0} +}; + +/* Control character settings. */ +struct control_info + { + char const *name; /* Name given on command line. */ + cc_t saneval; /* Value to set for 'stty sane'. */ + size_t offset; /* Offset in c_cc. */ + }; + +/* Control characters. */ + +static struct control_info const control_info[] = +{ + {"intr", CINTR, VINTR}, + {"quit", CQUIT, VQUIT}, + {"erase", CERASE, VERASE}, + {"kill", CKILL, VKILL}, + {"eof", CEOF, VEOF}, + {"eol", CEOL, VEOL}, +#ifdef VEOL2 + {"eol2", CEOL2, VEOL2}, +#endif +#ifdef VSWTCH + {"swtch", CSWTCH, VSWTCH}, +#endif + {"start", CSTART, VSTART}, + {"stop", CSTOP, VSTOP}, + {"susp", CSUSP, VSUSP}, +#ifdef VDSUSP + {"dsusp", CDSUSP, VDSUSP}, +#endif +#ifdef VREPRINT + {"rprnt", CRPRNT, VREPRINT}, +#else +# ifdef CREPRINT /* HPUX 10.20 needs this */ + {"rprnt", CRPRNT, CREPRINT}, +# endif +#endif +#ifdef VWERASE + {"werase", CWERASE, VWERASE}, +#endif +#ifdef VLNEXT + {"lnext", CLNEXT, VLNEXT}, +#endif +#ifdef VFLUSHO + {"flush", CFLUSHO, VFLUSHO}, /* deprecated compat option. */ + {"discard", CFLUSHO, VFLUSHO}, +#endif +#ifdef VSTATUS + {"status", CSTATUS, VSTATUS}, +#endif + + /* These must be last because of the display routines. */ + {"min", 1, VMIN}, + {"time", 0, VTIME}, + {nullptr, 0, 0} +}; + +static char const *visible (cc_t ch); +static unsigned long int baud_to_value (speed_t speed); +static bool recover_mode (char const *arg, struct termios *mode); +static int screen_columns (void); +static bool set_mode (struct mode_info const *info, bool reversed, + struct termios *mode); +static bool eq_mode (struct termios *mode1, struct termios *mode2); +static unsigned long int integer_arg (char const *s, unsigned long int max); +static speed_t string_to_baud (char const *arg); +static tcflag_t *mode_type_flag (enum mode_type type, struct termios *mode); +static void display_all (struct termios *mode, char const *device_name); +static void display_changed (struct termios *mode); +static void display_recoverable (struct termios *mode); +static void display_settings (enum output_type output_type, + struct termios *mode, + char const *device_name); +static void check_speed (struct termios *mode); +static void display_speed (struct termios *mode, bool fancy); +static void display_window_size (bool fancy, char const *device_name); +static void sane_mode (struct termios *mode); +static void set_control_char (struct control_info const *info, + char const *arg, + struct termios *mode); +static void set_speed (enum speed_setting type, char const *arg, + struct termios *mode); +static void set_window_size (int rows, int cols, char const *device_name); + +/* The width of the screen, for output wrapping. */ +static int max_col; + +/* Current position, to know when to wrap. */ +static int current_col; + +/* Default "drain" mode for tcsetattr. */ +static int tcsetattr_options = TCSADRAIN; + +/* Extra info to aid stty development. */ +static bool dev_debug; + +/* Record last speed set for correlation. */ +static speed_t last_ibaud = (speed_t) -1; +static speed_t last_obaud = (speed_t) -1; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEV_DEBUG_OPTION = CHAR_MAX + 1, +}; + +static struct option const longopts[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"save", no_argument, nullptr, 'g'}, + {"file", required_argument, nullptr, 'F'}, + {"-debug", no_argument, nullptr, DEV_DEBUG_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Print format string MESSAGE and optional args. + Wrap to next line first if it won't fit. + Print a space first unless MESSAGE will start a new line. */ + +ATTRIBUTE_FORMAT ((printf, 1, 2)) +static void +wrapf (char const *message,...) +{ + va_list args; + char *buf; + int buflen; + + va_start (args, message); + buflen = vasprintf (&buf, message, args); + va_end (args); + + if (buflen < 0) + xalloc_die (); + + if (0 < current_col) + { + if (max_col - current_col <= buflen) + { + putchar ('\n'); + current_col = 0; + } + else + { + putchar (' '); + current_col++; + } + } + + fputs (buf, stdout); + free (buf); + current_col += buflen; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [-F DEVICE | --file=DEVICE] [SETTING]...\n\ + or: %s [-F DEVICE | --file=DEVICE] [-a|--all]\n\ + or: %s [-F DEVICE | --file=DEVICE] [-g|--save]\n\ +"), + program_name, program_name, program_name); + fputs (_("\ +Print or change terminal characteristics.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -a, --all print all current settings in human-readable form\n\ + -g, --save print all current settings in a stty-readable form\n\ + -F, --file=DEVICE open and use the specified DEVICE instead of stdin\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Optional - before SETTING indicates negation. An * marks non-POSIX\n\ +settings. The underlying system defines which settings are available.\n\ +"), stdout); + fputs (_("\ +\n\ +Special characters:\n"), stdout); +#ifdef VFLUSHO + fputs (_("\ + * discard CHAR CHAR will toggle discarding of output\n\ +"), stdout); +#endif +#ifdef VDSUSP + fputs (_("\ + * dsusp CHAR CHAR will send a terminal stop signal once input flushed\n\ +"), stdout); +#endif + fputs (_("\ + eof CHAR CHAR will send an end of file (terminate the input)\n\ + eol CHAR CHAR will end the line\n\ +"), stdout); +#ifdef VEOL2 + fputs (_("\ + * eol2 CHAR alternate CHAR for ending the line\n\ +"), stdout); +#endif + fputs (_("\ + erase CHAR CHAR will erase the last character typed\n\ + intr CHAR CHAR will send an interrupt signal\n\ + kill CHAR CHAR will erase the current line\n\ +"), stdout); +#ifdef VLNEXT + fputs (_("\ + * lnext CHAR CHAR will enter the next character quoted\n\ +"), stdout); +#endif +#ifdef VSTATUS + fputs (_("\ + * status CHAR CHAR will send an info signal\n\ +"), stdout); +#endif + fputs (_("\ + quit CHAR CHAR will send a quit signal\n\ +"), stdout); +#if defined CREPRINT || defined VREPRINT + fputs (_("\ + * rprnt CHAR CHAR will redraw the current line\n\ +"), stdout); +#endif + fputs (_("\ + start CHAR CHAR will restart the output after stopping it\n\ + stop CHAR CHAR will stop the output\n\ + susp CHAR CHAR will send a terminal stop signal\n\ +"), stdout); +#ifdef VSWTCH + fputs (_("\ + * swtch CHAR CHAR will switch to a different shell layer\n\ +"), stdout); +#endif +#ifdef VWERASE + fputs (_("\ + * werase CHAR CHAR will erase the last word typed\n\ +"), stdout); +#endif + fputs (_("\ +\n\ +Special settings:\n\ + N set the input and output speeds to N bauds\n\ +"), stdout); +#ifdef TIOCGWINSZ + fputs (_("\ + * cols N tell the kernel that the terminal has N columns\n\ + * columns N same as cols N\n\ +"), stdout); +#endif + printf (_("\ + * [-]drain wait for transmission before applying settings (%s by default)\ +\n"), tcsetattr_options == TCSADRAIN ? _("on") : _("off")); + fputs (_("\ + ispeed N set the input speed to N\n\ +"), stdout); +#ifdef HAVE_C_LINE + fputs (_("\ + * line N use line discipline N\n\ +"), stdout); +#endif + fputs (_("\ + min N with -icanon, set N characters minimum for a completed read\n\ + ospeed N set the output speed to N\n\ +"), stdout); +#ifdef TIOCGWINSZ + fputs (_("\ + * rows N tell the kernel that the terminal has N rows\n\ + * size print the number of rows and columns according to the kernel\n\ +"), stdout); +#endif + fputs (_("\ + speed print the terminal speed\n\ + time N with -icanon, set read timeout of N tenths of a second\n\ +"), stdout); + fputs (_("\ +\n\ +Control settings:\n\ + [-]clocal disable modem control signals\n\ + [-]cread allow input to be received\n\ +"), stdout); +#ifdef CRTSCTS + fputs (_("\ + * [-]crtscts enable RTS/CTS handshaking\n\ +"), stdout); +#endif +#ifdef CDTRDSR + fputs (_("\ + * [-]cdtrdsr enable DTR/DSR handshaking\n\ +"), stdout); +#endif + fputs (_("\ + csN set character size to N bits, N in [5..8]\n\ +"), stdout); + fputs (_("\ + [-]cstopb use two stop bits per character (one with '-')\n\ + [-]hup send a hangup signal when the last process closes the tty\n\ + [-]hupcl same as [-]hup\n\ + [-]parenb generate parity bit in output and expect parity bit in input\n\ + [-]parodd set odd parity (or even parity with '-')\n\ +"), stdout); +#ifdef CMSPAR + fputs (_("\ + * [-]cmspar use \"stick\" (mark/space) parity\n\ +"), stdout); +#endif + fputs (_("\ +\n\ +Input settings:\n\ + [-]brkint breaks cause an interrupt signal\n\ + [-]icrnl translate carriage return to newline\n\ + [-]ignbrk ignore break characters\n\ + [-]igncr ignore carriage return\n\ + [-]ignpar ignore characters with parity errors\n\ +"), stdout); +#ifdef IMAXBEL + fputs (_("\ + * [-]imaxbel beep and do not flush a full input buffer on a character\n\ +"), stdout); +#endif + fputs (_("\ + [-]inlcr translate newline to carriage return\n\ + [-]inpck enable input parity checking\n\ + [-]istrip clear high (8th) bit of input characters\n\ +"), stdout); +#ifdef IUTF8 + fputs (_("\ + * [-]iutf8 assume input characters are UTF-8 encoded\n\ +"), stdout); +#endif +#ifdef IUCLC + fputs (_("\ + * [-]iuclc translate uppercase characters to lowercase\n\ +"), stdout); +#endif +#ifdef IXANY + fputs (_("\ + * [-]ixany let any character restart output, not only start character\n\ +"), stdout); +#endif + fputs (_("\ + [-]ixoff enable sending of start/stop characters\n\ + [-]ixon enable XON/XOFF flow control\n\ + [-]parmrk mark parity errors (with a 255-0-character sequence)\n\ + [-]tandem same as [-]ixoff\n\ +"), stdout); + fputs (_("\ +\n\ +Output settings:\n\ +"), stdout); +#ifdef BSDLY + fputs (_("\ + * bsN backspace delay style, N in [0..1]\n\ +"), stdout); +#endif +#ifdef CRDLY + fputs (_("\ + * crN carriage return delay style, N in [0..3]\n\ +"), stdout); +#endif +#ifdef FFDLY + fputs (_("\ + * ffN form feed delay style, N in [0..1]\n\ +"), stdout); +#endif +#ifdef NLDLY + fputs (_("\ + * nlN newline delay style, N in [0..1]\n\ +"), stdout); +#endif +#ifdef OCRNL + fputs (_("\ + * [-]ocrnl translate carriage return to newline\n\ +"), stdout); +#endif +#ifdef OFDEL + fputs (_("\ + * [-]ofdel use delete characters for fill instead of NUL characters\n\ +"), stdout); +#endif +#ifdef OFILL + fputs (_("\ + * [-]ofill use fill (padding) characters instead of timing for delays\n\ +"), stdout); +#endif +#ifdef OLCUC + fputs (_("\ + * [-]olcuc translate lowercase characters to uppercase\n\ +"), stdout); +#endif +#ifdef ONLCR + fputs (_("\ + * [-]onlcr translate newline to carriage return-newline\n\ +"), stdout); +#endif +#ifdef ONLRET + fputs (_("\ + * [-]onlret newline performs a carriage return\n\ +"), stdout); +#endif +#ifdef ONOCR + fputs (_("\ + * [-]onocr do not print carriage returns in the first column\n\ +"), stdout); +#endif + fputs (_("\ + [-]opost postprocess output\n\ +"), stdout); +#if defined TABDLY || defined OXTABS + fputs (_("\ + * tabN horizontal tab delay style, N in [0..3]\n\ + * tabs same as tab0\n\ + * -tabs same as tab3\n\ +"), stdout); +#endif +#ifdef VTDLY + fputs (_("\ + * vtN vertical tab delay style, N in [0..1]\n\ +"), stdout); +#endif + fputs (_("\ +\n\ +Local settings:\n\ + [-]crterase echo erase characters as backspace-space-backspace\n\ +"), stdout); +#ifdef ECHOKE + fputs (_("\ + * crtkill kill all line by obeying the echoprt and echoe settings\n\ + * -crtkill kill all line by obeying the echoctl and echok settings\n\ +"), stdout); +#endif +#ifdef ECHOCTL + fputs (_("\ + * [-]ctlecho echo control characters in hat notation ('^c')\n\ +"), stdout); +#endif + fputs (_("\ + [-]echo echo input characters\n\ +"), stdout); +#ifdef ECHOCTL + fputs (_("\ + * [-]echoctl same as [-]ctlecho\n\ +"), stdout); +#endif + fputs (_("\ + [-]echoe same as [-]crterase\n\ + [-]echok echo a newline after a kill character\n\ +"), stdout); +#ifdef ECHOKE + fputs (_("\ + * [-]echoke same as [-]crtkill\n\ +"), stdout); +#endif + fputs (_("\ + [-]echonl echo newline even if not echoing other characters\n\ +"), stdout); +#ifdef ECHOPRT + fputs (_("\ + * [-]echoprt echo erased characters backward, between '\\' and '/'\n\ +"), stdout); +#endif +#if defined EXTPROC || defined TIOCEXT + fputs (_("\ + * [-]extproc enable \"LINEMODE\"; useful with high latency links\n\ +"), stdout); +#endif +#if defined FLUSHO + fputs (_("\ + * [-]flusho discard output\n\ +"), stdout); +#endif + printf (_("\ + [-]icanon enable special characters: %s\n\ + [-]iexten enable non-POSIX special characters\n\ +"), "erase, kill" +#ifdef VWERASE + ", werase" +#endif +#if defined CREPRINT || defined VREPRINT + ", rprnt" +#endif +); + fputs (_("\ + [-]isig enable interrupt, quit, and suspend special characters\n\ + [-]noflsh disable flushing after interrupt and quit special characters\n\ +"), stdout); +#ifdef ECHOPRT + fputs (_("\ + * [-]prterase same as [-]echoprt\n\ +"), stdout); +#endif +#ifdef TOSTOP + fputs (_("\ + * [-]tostop stop background jobs that try to write to the terminal\n\ +"), stdout); +#endif +#ifdef XCASE + fputs (_("\ + * [-]xcase with icanon, escape with '\\' for uppercase characters\n\ +"), stdout); +#endif + fputs (_("\ +\n\ +Combination settings:\n\ +"), stdout); +#if defined XCASE && defined IUCLC && defined OLCUC + fputs (_("\ + * [-]LCASE same as [-]lcase\n\ +"), stdout); +#endif + fputs (_("\ + cbreak same as -icanon\n\ + -cbreak same as icanon\n\ +"), stdout); + fputs (_("\ + cooked same as brkint ignpar istrip icrnl ixon opost isig\n\ + icanon, eof and eol characters to their default values\n\ + -cooked same as raw\n\ +"), stdout); + printf (_("\ + crt same as %s\n\ +"), "echoe" +#ifdef ECHOCTL + " echoctl" +#endif +#ifdef ECHOKE + " echoke" +#endif +); + printf (_("\ + dec same as %s intr ^c erase 0177\n\ + kill ^u\n\ +"), "echoe" +#ifdef ECHOCTL + " echoctl" +#endif +#ifdef ECHOKE + " echoke" +#endif +#ifdef IXANY + " -ixany" +#endif +); +#ifdef IXANY + fputs (_("\ + * [-]decctlq same as [-]ixany\n\ +"), stdout); +#endif + fputs (_("\ + ek erase and kill characters to their default values\n\ + evenp same as parenb -parodd cs7\n\ + -evenp same as -parenb cs8\n\ +"), stdout); +#if defined XCASE && defined IUCLC && defined OLCUC + fputs (_("\ + * [-]lcase same as xcase iuclc olcuc\n\ +"), stdout); +#endif + fputs (_("\ + litout same as -parenb -istrip -opost cs8\n\ + -litout same as parenb istrip opost cs7\n\ +"), stdout); + printf (_("\ + nl same as %s\n\ + -nl same as %s\n\ +"), "-icrnl" +#ifdef ONLCR + " -onlcr" +#endif + , "icrnl -inlcr -igncr" +#ifdef ONLCR + " onlcr" +#endif +#ifdef OCRNL + " -ocrnl" +#endif +#ifdef ONLRET + " -onlret" +#endif +); + fputs (_("\ + oddp same as parenb parodd cs7\n\ + -oddp same as -parenb cs8\n\ + [-]parity same as [-]evenp\n\ + pass8 same as -parenb -istrip cs8\n\ + -pass8 same as parenb istrip cs7\n\ +"), stdout); + printf (_("\ + raw same as -ignbrk -brkint -ignpar -parmrk -inpck -istrip\n\ + -inlcr -igncr -icrnl -ixon -ixoff -icanon -opost\n\ + -isig%s min 1 time 0\n\ + -raw same as cooked\n\ +"), +#ifdef IUCLC + " -iuclc" +#endif +#ifdef IXANY + " -ixany" +#endif +#ifdef IMAXBEL + " -imaxbel" +#endif +#ifdef XCASE + " -xcase" +#endif +); + printf (_("\ + sane same as cread -ignbrk brkint -inlcr -igncr icrnl\n\ + icanon iexten echo echoe echok -echonl -noflsh\n\ + %s\n\ + %s\n\ + %s,\n\ + all special characters to their default values\n\ +"), + "-ixoff" +#ifdef IUTF8 + " -iutf8" +#endif +#ifdef IUCLC + " -iuclc" +#endif +#ifdef IXANY + " -ixany" +#endif +#ifdef IMAXBEL + " imaxbel" +#endif +#ifdef XCASE + " -xcase" +#endif +#ifdef OLCUC + " -olcuc" +#endif +#ifdef OCRNL + " -ocrnl" +#endif + + , "opost" +#ifdef OFILL + " -ofill" +#endif +#ifdef ONLCR + " onlcr" +#endif +#ifdef ONOCR + " -onocr" +#endif +#ifdef ONLRET + " -onlret" +#endif +#ifdef NLDLY + " nl0" +#endif +#ifdef CRDLY + " cr0" +#endif +#ifdef TAB0 + " tab0" +#endif +#ifdef BSDLY + " bs0" +#endif +#ifdef VTDLY + " vt0" +#endif +#ifdef FFDLY + " ff0" +#endif + + , "isig" +#ifdef TOSTOP + " -tostop" +#endif +#ifdef OFDEL + " -ofdel" +#endif +#ifdef ECHOPRT + " -echoprt" +#endif +#ifdef ECHOCTL + " echoctl" +#endif +#ifdef ECHOKE + " echoke" +#endif +#ifdef EXTPROC + " -extproc" +#endif +#ifdef FLUSHO + " -flusho" +#endif +); + fputs (_("\ +\n\ +Handle the tty line connected to standard input. Without arguments,\n\ +prints baud rate, line discipline, and deviations from stty sane. In\n\ +settings, CHAR is taken literally, or coded as in ^c, 0x37, 0177 or\n\ +127; special values ^- or undef used to disable special characters.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + + +/* Apply specified settings to MODE and REQUIRE_SET_ATTR as required. + If CHECKING is true, this function doesn't interact + with a device, and only validates specified settings. */ + +static void +apply_settings (bool checking, char const *device_name, + char * const *settings, int n_settings, + struct termios *mode, bool *require_set_attr) +{ +#define check_argument(arg) \ + do \ + { \ + if (k == n_settings - 1 || ! settings[k + 1]) \ + { \ + error (0, 0, _("missing argument to %s"), quote (arg)); \ + usage (EXIT_FAILURE); \ + } \ + } \ + while (0) + + for (int k = 1; k < n_settings; k++) + { + char const *arg = settings[k]; + bool match_found = false; + bool not_set_attr = false; + bool reversed = false; + int i; + + if (! arg) + continue; + + if (arg[0] == '-') + { + ++arg; + reversed = true; + } + if (STREQ (arg, "drain")) + { + tcsetattr_options = reversed ? TCSANOW : TCSADRAIN; + continue; + } + for (i = 0; mode_info[i].name != nullptr; ++i) + { + if (STREQ (arg, mode_info[i].name)) + { + if ((mode_info[i].flags & NO_SETATTR) == 0) + { + match_found = set_mode (&mode_info[i], reversed, mode); + *require_set_attr = true; + } + else + match_found = not_set_attr = true; + break; + } + } + if (!match_found && reversed) + { + error (0, 0, _("invalid argument %s"), quote (arg - 1)); + usage (EXIT_FAILURE); + } + if (!match_found) + { + for (i = 0; control_info[i].name != nullptr; ++i) + { + if (STREQ (arg, control_info[i].name)) + { + check_argument (arg); + match_found = true; + ++k; + set_control_char (&control_info[i], settings[k], mode); + *require_set_attr = true; + break; + } + } + } + if (!match_found || not_set_attr) + { + if (STREQ (arg, "ispeed")) + { + check_argument (arg); + ++k; + if (string_to_baud (settings[k]) == (speed_t) -1) + { + error (0, 0, _("invalid ispeed %s"), quote (settings[k])); + usage (EXIT_FAILURE); + } + set_speed (input_speed, settings[k], mode); + if (checking) + continue; + *require_set_attr = true; + } + else if (STREQ (arg, "ospeed")) + { + check_argument (arg); + ++k; + if (string_to_baud (settings[k]) == (speed_t) -1) + { + error (0, 0, _("invalid ospeed %s"), quote (settings[k])); + usage (EXIT_FAILURE); + } + set_speed (output_speed, settings[k], mode); + if (checking) + continue; + *require_set_attr = true; + } +#ifdef TIOCEXT + /* This is the BSD interface to "extproc". + Even though it's an lflag, an ioctl is used to set it. */ + else if (STREQ (arg, "extproc")) + { + int val = ! reversed; + + if (checking) + continue; + + if (ioctl (STDIN_FILENO, TIOCEXT, &val) != 0) + error (EXIT_FAILURE, errno, _("%s: error setting %s"), + quotef_n (0, device_name), quote_n (1, arg)); + } +#endif +#ifdef TIOCGWINSZ + else if (STREQ (arg, "rows")) + { + check_argument (arg); + ++k; + if (checking) + continue; + set_window_size (integer_arg (settings[k], INT_MAX), -1, + device_name); + } + else if (STREQ (arg, "cols") + || STREQ (arg, "columns")) + { + check_argument (arg); + ++k; + if (checking) + continue; + set_window_size (-1, integer_arg (settings[k], INT_MAX), + device_name); + } + else if (STREQ (arg, "size")) + { + if (checking) + continue; + max_col = screen_columns (); + current_col = 0; + display_window_size (false, device_name); + } +#endif +#ifdef HAVE_C_LINE + else if (STREQ (arg, "line")) + { + unsigned long int value; + check_argument (arg); + ++k; + mode->c_line = value = integer_arg (settings[k], ULONG_MAX); + if (mode->c_line != value) + error (0, 0, _("invalid line discipline %s"), + quote (settings[k])); + *require_set_attr = true; + } +#endif + else if (STREQ (arg, "speed")) + { + if (checking) + continue; + max_col = screen_columns (); + display_speed (mode, false); + } + else if (string_to_baud (arg) != (speed_t) -1) + { + set_speed (both_speeds, arg, mode); + if (checking) + continue; + *require_set_attr = true; + } + else + { + if (! recover_mode (arg, mode)) + { + error (0, 0, _("invalid argument %s"), quote (arg)); + usage (EXIT_FAILURE); + } + *require_set_attr = true; + } + } + } + + if (checking) + check_speed (mode); +} + +int +main (int argc, char **argv) +{ + /* Initialize to all zeroes so there is no risk memcmp will report a + spurious difference in an uninitialized portion of the structure. */ + static struct termios mode; + + enum output_type output_type; + int optc; + int argi = 0; + int opti = 1; + bool require_set_attr; + bool verbose_output; + bool recoverable_output; + bool noargs = true; + char *file_name = nullptr; + char const *device_name; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + output_type = changed; + verbose_output = false; + recoverable_output = false; + + /* Don't print error messages for unrecognized options. */ + opterr = 0; + + /* If any new options are ever added to stty, the short options MUST + NOT allow any ambiguity with the stty settings. For example, the + stty setting "-gagFork" would not be feasible, since it will be + parsed as "-g -a -g -F ork". If you change anything about how + stty parses options, be sure it still works with combinations of + short and long options, --, POSIXLY_CORRECT, etc. */ + + while ((optc = getopt_long (argc - argi, argv + argi, "-agF:", + longopts, nullptr)) + != -1) + { + switch (optc) + { + case 'a': + verbose_output = true; + output_type = all; + break; + + case 'g': + recoverable_output = true; + output_type = recoverable; + break; + + case 'F': + if (file_name) + error (EXIT_FAILURE, 0, _("only one device may be specified")); + file_name = optarg; + break; + + case DEV_DEBUG_OPTION: + dev_debug = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + /* Consider "drain" as an option rather than a setting, + to support: alias stty='stty -drain' etc. */ + if (! STREQ (argv[argi + opti], "-drain") + && ! STREQ (argv[argi + opti], "drain")) + noargs = false; + + /* Skip the argument containing this unrecognized option; + the 2nd pass will analyze it. */ + argi += opti; + + /* Restart getopt_long from the first unskipped argument. */ + opti = 1; + optind = 0; + + break; + } + + /* Clear fully-parsed arguments, so they don't confuse the 2nd pass. */ + while (opti < optind) + argv[argi + opti++] = nullptr; + } + + /* Specifying both -a and -g gets an error. */ + if (verbose_output && recoverable_output) + error (EXIT_FAILURE, 0, + _("the options for verbose and stty-readable output styles are\n" + "mutually exclusive")); + + /* Specifying any other arguments with -a or -g gets an error. */ + if (!noargs && (verbose_output || recoverable_output)) + error (EXIT_FAILURE, 0, + _("when specifying an output style, modes may not be set")); + + device_name = file_name ? file_name : _("standard input"); + + if (!noargs && !verbose_output && !recoverable_output) + { + static struct termios check_mode; + apply_settings (/* checking= */ true, device_name, argv, argc, + &check_mode, &require_set_attr); + } + + if (file_name) + { + int fdflags; + if (fd_reopen (STDIN_FILENO, device_name, O_RDONLY | O_NONBLOCK, 0) < 0) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + if ((fdflags = fcntl (STDIN_FILENO, F_GETFL)) == -1 + || fcntl (STDIN_FILENO, F_SETFL, fdflags & ~O_NONBLOCK) < 0) + error (EXIT_FAILURE, errno, _("%s: couldn't reset non-blocking mode"), + quotef (device_name)); + } + + if (tcgetattr (STDIN_FILENO, &mode)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + + if (verbose_output || recoverable_output || noargs) + { + max_col = screen_columns (); + current_col = 0; + display_settings (output_type, &mode, device_name); + return EXIT_SUCCESS; + } + + require_set_attr = false; + apply_settings (/* checking= */ false, device_name, argv, argc, + &mode, &require_set_attr); + + if (require_set_attr) + { + /* Initialize to all zeroes so there is no risk memcmp will report a + spurious difference in an uninitialized portion of the structure. */ + static struct termios new_mode; + + if (tcsetattr (STDIN_FILENO, tcsetattr_options, &mode)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + + /* POSIX (according to Zlotnick's book) tcsetattr returns zero if + it performs *any* of the requested operations. This means it + can report 'success' when it has actually failed to perform + some proper subset of the requested operations. To detect + this partial failure, get the current terminal attributes and + compare them to the requested ones. */ + + if (tcgetattr (STDIN_FILENO, &new_mode)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + + if (! eq_mode (&mode, &new_mode)) + { + if (dev_debug) + { + error (0, 0, _("indx: mode: actual mode")); + for (unsigned int i = 0; i < sizeof (new_mode); i++) + { + unsigned int newc = *(((unsigned char *) &new_mode) + i); + unsigned int oldc = *(((unsigned char *) &mode) + i); + error (0, 0, "0x%02x, 0x%02x: 0x%02x%s", i, oldc, newc, + newc == oldc ? "" : " *"); + } + } + + error (EXIT_FAILURE, 0, + _("%s: unable to perform all requested operations"), + quotef (device_name)); + } + } + + return EXIT_SUCCESS; +} + +/* Return true if modes are equivalent. */ + +static bool +eq_mode (struct termios *mode1, struct termios *mode2) +{ + return mode1->c_iflag == mode2->c_iflag + && mode1->c_oflag == mode2->c_oflag + && mode1->c_cflag == mode2->c_cflag + && mode1->c_lflag == mode2->c_lflag +#ifdef HAVE_C_LINE + && mode1->c_line == mode2->c_line +#endif + && memcmp (mode1->c_cc, mode2->c_cc, sizeof (mode1->c_cc)) == 0 + && cfgetispeed (mode1) == cfgetispeed (mode2) + && cfgetospeed (mode1) == cfgetospeed (mode2); +} + +/* Return false if not applied because not reversible; otherwise + return true. */ + +static bool +set_mode (struct mode_info const *info, bool reversed, struct termios *mode) +{ + tcflag_t *bitsp; + + if (reversed && (info->flags & REV) == 0) + return false; + + bitsp = mode_type_flag (info->type, mode); + + if (bitsp == nullptr) + { + /* Combination mode. */ + if (STREQ (info->name, "evenp") || STREQ (info->name, "parity")) + { + if (reversed) + mode->c_cflag = (mode->c_cflag & ~PARENB & ~CSIZE) | CS8; + else + mode->c_cflag = (mode->c_cflag & ~PARODD & ~CSIZE) | PARENB | CS7; + } + else if (STREQ (info->name, "oddp")) + { + if (reversed) + mode->c_cflag = (mode->c_cflag & ~PARENB & ~CSIZE) | CS8; + else + mode->c_cflag = (mode->c_cflag & ~CSIZE) | CS7 | PARODD | PARENB; + } + else if (STREQ (info->name, "nl")) + { + if (reversed) + { + mode->c_iflag = (mode->c_iflag | ICRNL) & ~INLCR & ~IGNCR; + mode->c_oflag = (mode->c_oflag +#ifdef ONLCR + | ONLCR +#endif + ) +#ifdef OCRNL + & ~OCRNL +#endif +#ifdef ONLRET + & ~ONLRET +#endif + ; + } + else + { + mode->c_iflag = mode->c_iflag & ~ICRNL; +#ifdef ONLCR + mode->c_oflag = mode->c_oflag & ~ONLCR; +#endif + } + } + else if (STREQ (info->name, "ek")) + { + mode->c_cc[VERASE] = CERASE; + mode->c_cc[VKILL] = CKILL; + } + else if (STREQ (info->name, "sane")) + sane_mode (mode); + else if (STREQ (info->name, "cbreak")) + { + if (reversed) + mode->c_lflag |= ICANON; + else + mode->c_lflag &= ~ICANON; + } + else if (STREQ (info->name, "pass8")) + { + if (reversed) + { + mode->c_cflag = (mode->c_cflag & ~CSIZE) | CS7 | PARENB; + mode->c_iflag |= ISTRIP; + } + else + { + mode->c_cflag = (mode->c_cflag & ~PARENB & ~CSIZE) | CS8; + mode->c_iflag &= ~ISTRIP; + } + } + else if (STREQ (info->name, "litout")) + { + if (reversed) + { + mode->c_cflag = (mode->c_cflag & ~CSIZE) | CS7 | PARENB; + mode->c_iflag |= ISTRIP; + mode->c_oflag |= OPOST; + } + else + { + mode->c_cflag = (mode->c_cflag & ~PARENB & ~CSIZE) | CS8; + mode->c_iflag &= ~ISTRIP; + mode->c_oflag &= ~OPOST; + } + } + else if (STREQ (info->name, "raw") || STREQ (info->name, "cooked")) + { + if ((info->name[0] == 'r' && reversed) + || (info->name[0] == 'c' && !reversed)) + { + /* Cooked mode. */ + mode->c_iflag |= BRKINT | IGNPAR | ISTRIP | ICRNL | IXON; + mode->c_oflag |= OPOST; + mode->c_lflag |= ISIG | ICANON; +#if VMIN == VEOF + mode->c_cc[VEOF] = CEOF; +#endif +#if VTIME == VEOL + mode->c_cc[VEOL] = CEOL; +#endif + } + else + { + /* Raw mode. */ + mode->c_iflag = 0; + mode->c_oflag &= ~OPOST; + mode->c_lflag &= ~(ISIG | ICANON +#ifdef XCASE + | XCASE +#endif + ); + mode->c_cc[VMIN] = 1; + mode->c_cc[VTIME] = 0; + } + } +#ifdef IXANY + else if (STREQ (info->name, "decctlq")) + { + if (reversed) + mode->c_iflag |= IXANY; + else + mode->c_iflag &= ~IXANY; + } +#endif +#ifdef TABDLY + else if (STREQ (info->name, "tabs")) + { + if (reversed) + mode->c_oflag = (mode->c_oflag & ~TABDLY) | TAB3; + else + mode->c_oflag = (mode->c_oflag & ~TABDLY) | TAB0; + } +#else +# ifdef OXTABS + else if (STREQ (info->name, "tabs")) + { + if (reversed) + mode->c_oflag = mode->c_oflag | OXTABS; + else + mode->c_oflag = mode->c_oflag & ~OXTABS; + } +# endif +#endif +#if defined XCASE && defined IUCLC && defined OLCUC + else if (STREQ (info->name, "lcase") + || STREQ (info->name, "LCASE")) + { + if (reversed) + { + mode->c_lflag &= ~XCASE; + mode->c_iflag &= ~IUCLC; + mode->c_oflag &= ~OLCUC; + } + else + { + mode->c_lflag |= XCASE; + mode->c_iflag |= IUCLC; + mode->c_oflag |= OLCUC; + } + } +#endif + else if (STREQ (info->name, "crt")) + mode->c_lflag |= ECHOE +#ifdef ECHOCTL + | ECHOCTL +#endif +#ifdef ECHOKE + | ECHOKE +#endif + ; + else if (STREQ (info->name, "dec")) + { + mode->c_cc[VINTR] = 3; /* ^C */ + mode->c_cc[VERASE] = 127; /* DEL */ + mode->c_cc[VKILL] = 21; /* ^U */ + mode->c_lflag |= ECHOE +#ifdef ECHOCTL + | ECHOCTL +#endif +#ifdef ECHOKE + | ECHOKE +#endif + ; +#ifdef IXANY + mode->c_iflag &= ~IXANY; +#endif + } + } + else if (reversed) + *bitsp = *bitsp & ~info->mask & ~info->bits; + else + *bitsp = (*bitsp & ~info->mask) | info->bits; + + return true; +} + +static void +set_control_char (struct control_info const *info, char const *arg, + struct termios *mode) +{ + unsigned long int value; + + if (STREQ (info->name, "min") || STREQ (info->name, "time")) + value = integer_arg (arg, TYPE_MAXIMUM (cc_t)); + else if (arg[0] == '\0' || arg[1] == '\0') + value = to_uchar (arg[0]); + else if (STREQ (arg, "^-") || STREQ (arg, "undef")) + value = _POSIX_VDISABLE; + else if (arg[0] == '^' && arg[1] != '\0') /* Ignore any trailing junk. */ + { + if (arg[1] == '?') + value = 127; + else + value = to_uchar (arg[1]) & ~0140; /* Non-letters get weird results. */ + } + else + value = integer_arg (arg, TYPE_MAXIMUM (cc_t)); + mode->c_cc[info->offset] = value; +} + +static void +set_speed (enum speed_setting type, char const *arg, struct termios *mode) +{ + /* Note cfset[io]speed(), do not check with the device, + and only check whether the system logic supports the specified speed. + Therefore we don't report the device name in any errors. */ + + speed_t baud = string_to_baud (arg); + affirm (baud != (speed_t) -1); + + if (type == input_speed || type == both_speeds) + { + last_ibaud = baud; + if (cfsetispeed (mode, baud)) + error (EXIT_FAILURE, 0, _("unsupported ispeed %s"), quoteaf (arg)); + } + if (type == output_speed || type == both_speeds) + { + last_obaud = baud; + if (cfsetospeed (mode, baud)) + error (EXIT_FAILURE, 0, _("unsupported ospeed %s"), quoteaf (arg)); + } +} + +#ifdef TIOCGWINSZ + +static int +get_win_size (int fd, struct winsize *win) +{ + int err = ioctl (fd, TIOCGWINSZ, (char *) win); + return err; +} + +static void +set_window_size (int rows, int cols, char const *device_name) +{ + struct winsize win; + + if (get_win_size (STDIN_FILENO, &win)) + { + if (errno != EINVAL) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + memset (&win, 0, sizeof (win)); + } + + if (rows >= 0) + win.ws_row = rows; + if (cols >= 0) + win.ws_col = cols; + +# ifdef TIOCSSIZE + /* Alexander Dupuy wrote: + The following code deals with a bug in the SunOS 4.x (and 3.x?) kernel. + This comment from sys/ttold.h describes Sun's twisted logic - a better + test would have been (ts_lines > 64k || ts_cols > 64k || ts_cols == 0). + At any rate, the problem is gone in Solaris 2.x. + + Unfortunately, the old TIOCSSIZE code does collide with TIOCSWINSZ, + but they can be disambiguated by checking whether a "struct ttysize" + structure's "ts_lines" field is greater than 64K or not. If so, + it's almost certainly a "struct winsize" instead. + + At any rate, the bug manifests itself when ws_row == 0; the symptom is + that ws_row is set to ws_col, and ws_col is set to (ws_xpixel<<16) + + ws_ypixel. Since GNU stty sets rows and columns separately, this bug + caused "stty rows 0 cols 0" to set rows to cols and cols to 0, while + "stty cols 0 rows 0" would do the right thing. On a little-endian + machine like the sun386i, the problem is the same, but for ws_col == 0. + + The workaround is to do the ioctl once with row and col = 1 to set the + pixel info, and then do it again using a TIOCSSIZE to set rows/cols. */ + + if (win.ws_row == 0 || win.ws_col == 0) + { + struct ttysize ttysz; + + ttysz.ts_lines = win.ws_row; + ttysz.ts_cols = win.ws_col; + + win.ws_row = 1; + win.ws_col = 1; + + if (ioctl (STDIN_FILENO, TIOCSWINSZ, (char *) &win)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + + if (ioctl (STDIN_FILENO, TIOCSSIZE, (char *) &ttysz)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + return; + } +# endif + + if (ioctl (STDIN_FILENO, TIOCSWINSZ, (char *) &win)) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); +} + +static void +display_window_size (bool fancy, char const *device_name) +{ + struct winsize win; + + if (get_win_size (STDIN_FILENO, &win)) + { + if (errno != EINVAL) + error (EXIT_FAILURE, errno, "%s", quotef (device_name)); + if (!fancy) + error (EXIT_FAILURE, 0, + _("%s: no size information for this device"), + quotef (device_name)); + } + else + { + wrapf (fancy ? "rows %d; columns %d;" : "%d %d\n", + win.ws_row, win.ws_col); + if (!fancy) + current_col = 0; + } +} +#endif + +static int +screen_columns (void) +{ +#ifdef TIOCGWINSZ + struct winsize win; + + /* With Solaris 2.[123], this ioctl fails and errno is set to + EINVAL for telnet (but not rlogin) sessions. + On ISC 3.0, it fails for the console and the serial port + (but it works for ptys). + It can also fail on any system when stdout isn't a tty. + In case of any failure, just use the default. */ + if (get_win_size (STDOUT_FILENO, &win) == 0 && 0 < win.ws_col) + return win.ws_col; +#endif + { + /* Use $COLUMNS if it's in [1..INT_MAX]. */ + char *col_string = getenv ("COLUMNS"); + long int n_columns; + if (!(col_string != nullptr + && xstrtol (col_string, nullptr, 0, &n_columns, "") == LONGINT_OK + && 0 < n_columns + && n_columns <= INT_MAX)) + n_columns = 80; + return n_columns; + } +} + +ATTRIBUTE_PURE +static tcflag_t * +mode_type_flag (enum mode_type type, struct termios *mode) +{ + switch (type) + { + case control: + return &mode->c_cflag; + + case input: + return &mode->c_iflag; + + case output: + return &mode->c_oflag; + + case local: + return &mode->c_lflag; + + case combination: + return nullptr; + + default: + unreachable (); + } +} + +static void +display_settings (enum output_type output_type, struct termios *mode, + char const *device_name) +{ + switch (output_type) + { + case changed: + display_changed (mode); + break; + + case all: + display_all (mode, device_name); + break; + + case recoverable: + display_recoverable (mode); + break; + } +} + +static void +display_changed (struct termios *mode) +{ + int i; + bool empty_line; + tcflag_t *bitsp; + unsigned long mask; + enum mode_type prev_type = control; + + display_speed (mode, true); +#ifdef HAVE_C_LINE + wrapf ("line = %d;", mode->c_line); +#endif + putchar ('\n'); + current_col = 0; + + empty_line = true; + for (i = 0; !STREQ (control_info[i].name, "min"); ++i) + { + if (mode->c_cc[control_info[i].offset] == control_info[i].saneval) + continue; + +#ifdef VFLUSHO + /* 'flush' is the deprecated equivalent of 'discard'. */ + if (STREQ (control_info[i].name, "flush")) + continue; +#endif + /* If swtch is the same as susp, don't print both. */ +#if VSWTCH == VSUSP + if (STREQ (control_info[i].name, "swtch")) + continue; +#endif + /* If eof uses the same slot as min, only print whichever applies. */ +#if VEOF == VMIN + if ((mode->c_lflag & ICANON) == 0 + && (STREQ (control_info[i].name, "eof") + || STREQ (control_info[i].name, "eol"))) + continue; +#endif + + empty_line = false; + wrapf ("%s = %s;", control_info[i].name, + visible (mode->c_cc[control_info[i].offset])); + } + if ((mode->c_lflag & ICANON) == 0) + { + wrapf ("min = %lu; time = %lu;\n", + (unsigned long int) mode->c_cc[VMIN], + (unsigned long int) mode->c_cc[VTIME]); + } + else if (!empty_line) + putchar ('\n'); + current_col = 0; + + empty_line = true; + for (i = 0; mode_info[i].name != nullptr; ++i) + { + if (mode_info[i].flags & OMIT) + continue; + if (mode_info[i].type != prev_type) + { + if (!empty_line) + { + putchar ('\n'); + current_col = 0; + empty_line = true; + } + prev_type = mode_info[i].type; + } + + bitsp = mode_type_flag (mode_info[i].type, mode); + mask = mode_info[i].mask ? mode_info[i].mask : mode_info[i].bits; + if ((*bitsp & mask) == mode_info[i].bits) + { + if (mode_info[i].flags & SANE_UNSET) + { + wrapf ("%s", mode_info[i].name); + empty_line = false; + } + } + else if ((mode_info[i].flags & (SANE_SET | REV)) == (SANE_SET | REV)) + { + wrapf ("-%s", mode_info[i].name); + empty_line = false; + } + } + if (!empty_line) + putchar ('\n'); + current_col = 0; +} + +static void +display_all (struct termios *mode, char const *device_name) +{ + int i; + tcflag_t *bitsp; + unsigned long mask; + enum mode_type prev_type = control; + + display_speed (mode, true); +#ifdef TIOCGWINSZ + display_window_size (true, device_name); +#endif +#ifdef HAVE_C_LINE + wrapf ("line = %d;", mode->c_line); +#endif + putchar ('\n'); + current_col = 0; + + for (i = 0; ! STREQ (control_info[i].name, "min"); ++i) + { +#ifdef VFLUSHO + /* 'flush' is the deprecated equivalent of 'discard'. */ + if (STREQ (control_info[i].name, "flush")) + continue; +#endif + /* If swtch is the same as susp, don't print both. */ +#if VSWTCH == VSUSP + if (STREQ (control_info[i].name, "swtch")) + continue; +#endif + /* If eof uses the same slot as min, only print whichever applies. */ +#if VEOF == VMIN + if ((mode->c_lflag & ICANON) == 0 + && (STREQ (control_info[i].name, "eof") + || STREQ (control_info[i].name, "eol"))) + continue; +#endif + wrapf ("%s = %s;", control_info[i].name, + visible (mode->c_cc[control_info[i].offset])); + } +#if VEOF == VMIN + if ((mode->c_lflag & ICANON) == 0) +#endif + wrapf ("min = %lu; time = %lu;", + (unsigned long int) mode->c_cc[VMIN], + (unsigned long int) mode->c_cc[VTIME]); + if (current_col != 0) + putchar ('\n'); + current_col = 0; + + for (i = 0; mode_info[i].name != nullptr; ++i) + { + if (mode_info[i].flags & OMIT) + continue; + if (mode_info[i].type != prev_type) + { + putchar ('\n'); + current_col = 0; + prev_type = mode_info[i].type; + } + + bitsp = mode_type_flag (mode_info[i].type, mode); + mask = mode_info[i].mask ? mode_info[i].mask : mode_info[i].bits; + if ((*bitsp & mask) == mode_info[i].bits) + wrapf ("%s", mode_info[i].name); + else if (mode_info[i].flags & REV) + wrapf ("-%s", mode_info[i].name); + } + putchar ('\n'); + current_col = 0; +} + +/* Verify requested asymmetric speeds are supported. + Note we don't flag the case where only ispeed or + ospeed is set, when that would set both. */ + +static void +check_speed (struct termios *mode) +{ + if (last_ibaud != -1 && last_obaud != -1) + { + if (cfgetispeed (mode) != last_ibaud + || cfgetospeed (mode) != last_obaud) + error (EXIT_FAILURE, 0, + _("asymmetric input (%lu), output (%lu) speeds not supported"), + baud_to_value (last_ibaud), baud_to_value (last_obaud)); + } +} + +static void +display_speed (struct termios *mode, bool fancy) +{ + if (cfgetispeed (mode) == 0 || cfgetispeed (mode) == cfgetospeed (mode)) + wrapf (fancy ? "speed %lu baud;" : "%lu\n", + baud_to_value (cfgetospeed (mode))); + else + wrapf (fancy ? "ispeed %lu baud; ospeed %lu baud;" : "%lu %lu\n", + baud_to_value (cfgetispeed (mode)), + baud_to_value (cfgetospeed (mode))); + if (!fancy) + current_col = 0; +} + +static void +display_recoverable (struct termios *mode) +{ + printf ("%lx:%lx:%lx:%lx", + (unsigned long int) mode->c_iflag, + (unsigned long int) mode->c_oflag, + (unsigned long int) mode->c_cflag, + (unsigned long int) mode->c_lflag); + for (size_t i = 0; i < NCCS; ++i) + printf (":%lx", (unsigned long int) mode->c_cc[i]); + putchar ('\n'); +} + +/* NOTE: identical to below, modulo use of tcflag_t */ +static int +strtoul_tcflag_t (char const *s, int base, char **p, tcflag_t *result, + char delim) +{ + unsigned long ul; + errno = 0; + ul = strtoul (s, p, base); + if (errno || **p != delim || *p == s || (tcflag_t) ul != ul) + return -1; + *result = ul; + return 0; +} + +/* NOTE: identical to above, modulo use of cc_t */ +static int +strtoul_cc_t (char const *s, int base, char **p, cc_t *result, char delim) +{ + unsigned long ul; + errno = 0; + ul = strtoul (s, p, base); + if (errno || **p != delim || *p == s || (cc_t) ul != ul) + return -1; + *result = ul; + return 0; +} + +/* Parse the output of display_recoverable. + Return false if any part of it is invalid. */ +static bool +recover_mode (char const *arg, struct termios *mode) +{ + tcflag_t flag[4]; + char const *s = arg; + size_t i; + for (i = 0; i < 4; i++) + { + char *p; + if (strtoul_tcflag_t (s, 16, &p, flag + i, ':') != 0) + return false; + s = p + 1; + } + mode->c_iflag = flag[0]; + mode->c_oflag = flag[1]; + mode->c_cflag = flag[2]; + mode->c_lflag = flag[3]; + + for (i = 0; i < NCCS; ++i) + { + char *p; + char delim = i < NCCS - 1 ? ':' : '\0'; + if (strtoul_cc_t (s, 16, &p, mode->c_cc + i, delim) != 0) + return false; + s = p + 1; + } + + return true; +} + +struct speed_map +{ + char const *string; /* ASCII representation. */ + speed_t speed; /* Internal form. */ + unsigned long int value; /* Numeric value. */ +}; + +static struct speed_map const speeds[] = +{ + {"0", B0, 0}, + {"50", B50, 50}, + {"75", B75, 75}, + {"110", B110, 110}, + {"134", B134, 134}, + {"134.5", B134, 134}, + {"150", B150, 150}, + {"200", B200, 200}, + {"300", B300, 300}, + {"600", B600, 600}, + {"1200", B1200, 1200}, + {"1800", B1800, 1800}, + {"2400", B2400, 2400}, + {"4800", B4800, 4800}, + {"9600", B9600, 9600}, + {"19200", B19200, 19200}, + {"38400", B38400, 38400}, + {"exta", B19200, 19200}, + {"extb", B38400, 38400}, +#ifdef B57600 + {"57600", B57600, 57600}, +#endif +#ifdef B115200 + {"115200", B115200, 115200}, +#endif +#ifdef B230400 + {"230400", B230400, 230400}, +#endif +#ifdef B460800 + {"460800", B460800, 460800}, +#endif +#ifdef B500000 + {"500000", B500000, 500000}, +#endif +#ifdef B576000 + {"576000", B576000, 576000}, +#endif +#ifdef B921600 + {"921600", B921600, 921600}, +#endif +#ifdef B1000000 + {"1000000", B1000000, 1000000}, +#endif +#ifdef B1152000 + {"1152000", B1152000, 1152000}, +#endif +#ifdef B1500000 + {"1500000", B1500000, 1500000}, +#endif +#ifdef B2000000 + {"2000000", B2000000, 2000000}, +#endif +#ifdef B2500000 + {"2500000", B2500000, 2500000}, +#endif +#ifdef B3000000 + {"3000000", B3000000, 3000000}, +#endif +#ifdef B3500000 + {"3500000", B3500000, 3500000}, +#endif +#ifdef B4000000 + {"4000000", B4000000, 4000000}, +#endif + {nullptr, 0, 0} +}; + +ATTRIBUTE_PURE +static speed_t +string_to_baud (char const *arg) +{ + for (int i = 0; speeds[i].string != nullptr; ++i) + if (STREQ (arg, speeds[i].string)) + return speeds[i].speed; + return (speed_t) -1; +} + +ATTRIBUTE_PURE +static unsigned long int +baud_to_value (speed_t speed) +{ + for (int i = 0; speeds[i].string != nullptr; ++i) + if (speed == speeds[i].speed) + return speeds[i].value; + return 0; +} + +static void +sane_mode (struct termios *mode) +{ + int i; + tcflag_t *bitsp; + + for (i = 0; control_info[i].name; ++i) + { +#if VMIN == VEOF + if (STREQ (control_info[i].name, "min")) + break; +#endif + mode->c_cc[control_info[i].offset] = control_info[i].saneval; + } + + for (i = 0; mode_info[i].name != nullptr; ++i) + { + if (mode_info[i].flags & NO_SETATTR) + continue; + + if (mode_info[i].flags & SANE_SET) + { + bitsp = mode_type_flag (mode_info[i].type, mode); + assume (bitsp); /* combination modes will not have SANE_SET. */ + *bitsp = (*bitsp & ~mode_info[i].mask) | mode_info[i].bits; + } + else if (mode_info[i].flags & SANE_UNSET) + { + bitsp = mode_type_flag (mode_info[i].type, mode); + assume (bitsp); /* combination modes will not have SANE_UNSET. */ + *bitsp = *bitsp & ~mode_info[i].mask & ~mode_info[i].bits; + } + } +} + +/* Return a string that is the printable representation of character CH. */ +/* Adapted from 'cat' by Torbjörn Granlund. */ + +static char const * +visible (cc_t ch) +{ + static char buf[10]; + char *bpout = buf; + + if (ch == _POSIX_VDISABLE) + return ""; + + if (ch >= 32) + { + if (ch < 127) + *bpout++ = ch; + else if (ch == 127) + { + *bpout++ = '^'; + *bpout++ = '?'; + } + else + { + *bpout++ = 'M'; + *bpout++ = '-'; + if (ch >= 128 + 32) + { + if (ch < 128 + 127) + *bpout++ = ch - 128; + else + { + *bpout++ = '^'; + *bpout++ = '?'; + } + } + else + { + *bpout++ = '^'; + *bpout++ = ch - 128 + 64; + } + } + } + else + { + *bpout++ = '^'; + *bpout++ = ch + 64; + } + *bpout = '\0'; + return (char const *) buf; +} + +/* Parse string S as an integer, using decimal radix by default, + but allowing octal and hex numbers as in C. Reject values + larger than MAXVAL. */ + +static unsigned long int +integer_arg (char const *s, unsigned long int maxval) +{ + return xnumtoumax (s, 0, 0, maxval, "bB", _("invalid integer argument"), 0); +} diff --git a/src/sum.c b/src/sum.c new file mode 100644 index 0000000..36464ca --- /dev/null +++ b/src/sum.c @@ -0,0 +1,237 @@ +/* sum -- checksum and count the blocks in a file + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Like BSD sum or SysV sum -r, except like SysV sum if -s option is given. */ + +/* Written by Kayvan Aghaiepour and David MacKenzie. */ + +#include + +#include +#include +#include "system.h" +#include "human.h" +#include "sum.h" + +#include +#ifdef WORDS_BIGENDIAN +# define SWAP(n) (n) +#else +# define SWAP(n) bswap_16 (n) +#endif + +/* Calculate the checksum and the size in bytes of stream STREAM. + Return -1 on error, 0 on success. */ + +int +bsd_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + int ret = -1; + size_t sum, n; + int checksum = 0; /* The checksum mod 2^16. */ + uintmax_t total_bytes = 0; /* The number of bytes. */ + static const size_t buffer_length = 32768; + uint8_t *buffer = malloc (buffer_length); + + if (! buffer) + return -1; + + /* Process file */ + while (true) + { + sum = 0; + + /* Read block */ + while (true) + { + n = fread (buffer + sum, 1, buffer_length - sum, stream); + sum += n; + + if (buffer_length == sum) + break; + + if (n == 0) + { + if (ferror (stream)) + goto cleanup_buffer; + goto final_process; + } + + if (feof (stream)) + goto final_process; + } + + for (size_t i = 0; i < sum; i++) + { + checksum = (checksum >> 1) + ((checksum & 1) << 15); + checksum += buffer[i]; + checksum &= 0xffff; /* Keep it within bounds. */ + } + if (total_bytes + sum < total_bytes) + { + errno = EOVERFLOW; + goto cleanup_buffer; + } + total_bytes += sum; + } + +final_process:; + + for (size_t i = 0; i < sum; i++) + { + checksum = (checksum >> 1) + ((checksum & 1) << 15); + checksum += buffer[i]; + checksum &= 0xffff; /* Keep it within bounds. */ + } + if (total_bytes + sum < total_bytes) + { + errno = EOVERFLOW; + goto cleanup_buffer; + } + total_bytes += sum; + + memcpy (resstream, &checksum, sizeof checksum); + *length = total_bytes; + ret = 0; +cleanup_buffer: + free (buffer); + return ret; +} + +/* Calculate the checksum and the size in bytes of stream STREAM. + Return -1 on error, 0 on success. */ + +int +sysv_sum_stream (FILE *stream, void *resstream, uintmax_t *length) +{ + int ret = -1; + size_t sum, n; + uintmax_t total_bytes = 0; + static const size_t buffer_length = 32768; + uint8_t *buffer = malloc (buffer_length); + + if (! buffer) + return -1; + + /* The sum of all the input bytes, modulo (UINT_MAX + 1). */ + unsigned int s = 0; + + /* Process file */ + while (true) + { + sum = 0; + + /* Read block */ + while (true) + { + n = fread (buffer + sum, 1, buffer_length - sum, stream); + sum += n; + + if (buffer_length == sum) + break; + + if (n == 0) + { + if (ferror (stream)) + goto cleanup_buffer; + goto final_process; + } + + if (feof (stream)) + goto final_process; + } + + for (size_t i = 0; i < sum; i++) + s += buffer[i]; + if (total_bytes + sum < total_bytes) + { + errno = EOVERFLOW; + goto cleanup_buffer; + } + total_bytes += sum; + } + +final_process:; + + for (size_t i = 0; i < sum; i++) + s += buffer[i]; + if (total_bytes + sum < total_bytes) + { + errno = EOVERFLOW; + goto cleanup_buffer; + } + total_bytes += sum; + + int r = (s & 0xffff) + ((s & 0xffffffff) >> 16); + int checksum = (r & 0xffff) + (r >> 16); + + memcpy (resstream, &checksum, sizeof checksum); + *length = total_bytes; + ret = 0; +cleanup_buffer: + free (buffer); + return ret; +} + +/* Print the checksum and size (in 1024 byte blocks) to stdout. + If ARGS is true, also print the FILE name. */ + +void +output_bsd (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, bool args, + uintmax_t length) +{ + if (raw) + { + /* Output in network byte order (big endian). */ + uint16_t out_int = *(int *)digest; + out_int = SWAP (out_int); + fwrite (&out_int, 1, 16/8, stdout); + return; + } + + char hbuf[LONGEST_HUMAN_READABLE + 1]; + printf ("%05d %5s", *(int *)digest, + human_readable (length, hbuf, human_ceiling, 1, 1024)); + if (args) + printf (" %s", file); + putchar (delim); +} + +/* Print the checksum and size (in 512 byte blocks) to stdout. + If ARGS is true, also print the FILE name. */ + +void +output_sysv (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, bool args, + uintmax_t length) +{ + if (raw) + { + /* Output in network byte order (big endian). */ + uint16_t out_int = *(int *)digest; + out_int = SWAP (out_int); + fwrite (&out_int, 1, 16/8, stdout); + return; + } + + char hbuf[LONGEST_HUMAN_READABLE + 1]; + printf ("%d %s", *(int *)digest, + human_readable (length, hbuf, human_ceiling, 1, 512)); + if (args) + printf (" %s", file); + putchar (delim); +} diff --git a/src/sum.h b/src/sum.h new file mode 100644 index 0000000..bd251a0 --- /dev/null +++ b/src/sum.h @@ -0,0 +1,18 @@ +extern int +bsd_sum_stream (FILE *stream, void *resstream, uintmax_t *length); + +extern int +sysv_sum_stream (FILE *stream, void *resstream, uintmax_t *length); + +typedef int (*sumfn)(FILE *, void *, uintmax_t *); + + +extern void +output_bsd (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, bool args, + uintmax_t length); + +extern void +output_sysv (char const *file, int binary_file, void const *digest, + bool raw, bool tagged, unsigned char delim, bool args, + uintmax_t length); diff --git a/src/sync.c b/src/sync.c new file mode 100644 index 0000000..22337bc --- /dev/null +++ b/src/sync.c @@ -0,0 +1,234 @@ +/* sync - update the super block + Copyright (C) 1994-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering */ + +#include +#include +#include +#include + +#include "system.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "sync" + +#define AUTHORS \ + proper_name ("Jim Meyering"), \ + proper_name ("Giuseppe Scrivano") + +#ifndef HAVE_SYNCFS +# define HAVE_SYNCFS 0 +#endif + +enum sync_mode +{ + MODE_FILE, + MODE_DATA, + MODE_FILE_SYSTEM, + MODE_SYNC +}; + +static struct option const long_options[] = +{ + {"data", no_argument, nullptr, 'd'}, + {"file-system", no_argument, nullptr, 'f'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION] [FILE]...\n"), program_name); + fputs (_("\ +Synchronize cached writes to persistent storage\n\ +\n\ +If one or more files are specified, sync only them,\n\ +or their containing file systems.\n\ +\n\ +"), stdout); + + fputs (_("\ + -d, --data sync only file data, no unneeded metadata\n\ +"), stdout); + fputs (_("\ + -f, --file-system sync the file systems that contain the files\n\ +"), stdout); + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Sync the specified FILE, or file systems associated with FILE. + Return 1 on success. */ + +static bool +sync_arg (enum sync_mode mode, char const *file) +{ + bool ret = true; + int open_flags = O_RDONLY | O_NONBLOCK; + int fd; + +#if defined _AIX || defined __CYGWIN__ + /* AIX 7.1, CYGWIN 2.9.0, fsync requires write access to file. */ + if (mode == MODE_FILE) + open_flags = O_WRONLY | O_NONBLOCK; +#endif + + /* Note O_PATH might be supported with syncfs(), + though as of Linux 3.18 is not. */ + fd = open (file, open_flags); + if (fd < 0) + { + /* Use the O_RDONLY errno, which is significant + with directories for example. */ + int rd_errno = errno; + if (open_flags != (O_WRONLY | O_NONBLOCK)) + fd = open (file, O_WRONLY | O_NONBLOCK); + if (fd < 0) + { + error (0, rd_errno, _("error opening %s"), quoteaf (file)); + return false; + } + } + + /* We used O_NONBLOCK above to not hang with fifos, + so reset that here. */ + int fdflags = fcntl (fd, F_GETFL); + if (fdflags == -1 + || fcntl (fd, F_SETFL, fdflags & ~O_NONBLOCK) < 0) + { + error (0, errno, _("couldn't reset non-blocking mode %s"), + quoteaf (file)); + ret = false; + } + + if (ret == true) + { + int sync_status = -1; + + switch (mode) + { + case MODE_DATA: + sync_status = fdatasync (fd); + break; + + case MODE_FILE: + sync_status = fsync (fd); + break; + +#if HAVE_SYNCFS + case MODE_FILE_SYSTEM: + sync_status = syncfs (fd); + break; +#endif + + default: + unreachable (); + } + + if (sync_status < 0) + { + error (0, errno, _("error syncing %s"), quoteaf (file)); + ret = false; + } + } + + if (close (fd) < 0) + { + error (0, errno, _("failed to close %s"), quoteaf (file)); + ret = false; + } + + return ret; +} + +int +main (int argc, char **argv) +{ + int c; + bool args_specified; + bool arg_data = false, arg_file_system = false; + enum sync_mode mode; + bool ok = true; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "df", long_options, nullptr)) + != -1) + { + switch (c) + { + case 'd': + arg_data = true; + break; + + case 'f': + arg_file_system = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + args_specified = optind < argc; + + if (arg_data && arg_file_system) + error (EXIT_FAILURE, 0, + _("cannot specify both --data and --file-system")); + + if (!args_specified && arg_data) + error (EXIT_FAILURE, 0, _("--data needs at least one argument")); + + if (! args_specified || (arg_file_system && ! HAVE_SYNCFS)) + mode = MODE_SYNC; + else if (arg_file_system) + mode = MODE_FILE_SYSTEM; + else if (! arg_data) + mode = MODE_FILE; + else + mode = MODE_DATA; + + if (mode == MODE_SYNC) + sync (); + else + { + for (; optind < argc; optind++) + ok &= sync_arg (mode, argv[optind]); + } + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/system.h b/src/system.h new file mode 100644 index 0000000..21b1583 --- /dev/null +++ b/src/system.h @@ -0,0 +1,823 @@ +/* system-dependent definitions for coreutils + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Include this file _after_ system headers if possible. */ + +#include + +#include + +#include + +/* Commonly used file permission combination. */ +#define MODE_RW_UGO (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) + +#if HAVE_SYS_PARAM_H +# include +#endif + +#include + +#include + +#include "pathmax.h" +#ifndef PATH_MAX +# define PATH_MAX 8192 +#endif + +#include "configmake.h" + +#include +#include + +/* Since major is a function on SVR4, we can't use 'ifndef major'. */ +#if MAJOR_IN_MKDEV +# include +# define HAVE_MAJOR +#endif +#if MAJOR_IN_SYSMACROS +# include +# define HAVE_MAJOR +#endif +#ifdef major /* Might be defined in sys/types.h. */ +# define HAVE_MAJOR +#endif + +#ifndef HAVE_MAJOR +# define major(dev) (((dev) >> 8) & 0xff) +# define minor(dev) ((dev) & 0xff) +# define makedev(maj, min) (((maj) << 8) | (min)) +#endif +#undef HAVE_MAJOR + +#if ! defined makedev && defined mkdev +# define makedev(maj, min) mkdev (maj, min) +#endif + +#include +#include +#include + +/* Some systems don't define this; POSIX mentions it but says it is + obsolete. gnulib defines it, but only on native Windows systems, + and there only because MSVC 10 does. */ +#ifndef ENODATA +# define ENODATA (-1) +#endif + +#include +#include "version.h" + +/* Exit statuses for programs like 'env' that exec other programs. */ +enum +{ + EXIT_TIMEDOUT = 124, /* Time expired before child completed. */ + EXIT_CANCELED = 125, /* Internal error prior to exec attempt. */ + EXIT_CANNOT_INVOKE = 126, /* Program located, but not usable. */ + EXIT_ENOENT = 127 /* Could not find program to exec. */ +}; + +#include "exitfail.h" + +/* Set exit_failure to STATUS if that's not the default already. */ +static inline void +initialize_exit_failure (int status) +{ + if (status != EXIT_FAILURE) + exit_failure = status; +} + +#include +#ifdef O_PATH +enum { O_PATHSEARCH = O_PATH }; +#else +enum { O_PATHSEARCH = O_SEARCH }; +#endif + +#include +#ifndef _D_EXACT_NAMLEN +# define _D_EXACT_NAMLEN(dp) strlen ((dp)->d_name) +#endif + +enum +{ + NOT_AN_INODE_NUMBER = 0 +}; + +#ifdef D_INO_IN_DIRENT +# define D_INO(dp) (dp)->d_ino +#else +/* Some systems don't have inodes, so fake them to avoid lots of ifdefs. */ +# define D_INO(dp) NOT_AN_INODE_NUMBER +#endif + +/* include here for SIZE_MAX. */ +#include + +/* Redirection and wildcarding when done by the utility itself. + Generally a noop, but used in particular for OS/2. */ +#ifndef initialize_main +# ifndef __OS2__ +# define initialize_main(ac, av) +# else +# define initialize_main(ac, av) \ + do { _wildcard (ac, av); _response (ac, av); } while (0) +# endif +#endif + +#include "stat-macros.h" + +#include "timespec.h" + +#include + +/* ISDIGIT differs from isdigit, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char + or EOF. + - It's typically faster. + POSIX says that only '0' through '9' are digits. Prefer ISDIGIT to + isdigit unless it's important to use the locale's definition + of 'digit' even when the host does not conform to POSIX. */ +#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9) + +/* Convert a possibly-signed character to an unsigned character. This is + a bit safer than casting to unsigned char, since it catches some type + errors that the cast doesn't. */ +static inline unsigned char to_uchar (char ch) { return ch; } + +/* '\n' is considered a field separator with --zero-terminated. */ +static inline bool +field_sep (unsigned char ch) +{ + return isblank (ch) || ch == '\n'; +} + +#include + +/* Take care of NLS matters. */ + +#include "gettext.h" +#if ! ENABLE_NLS +# undef textdomain +# define textdomain(Domainname) /* empty */ +# undef bindtextdomain +# define bindtextdomain(Domainname, Dirname) /* empty */ +#endif + +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +/* Return a value that pluralizes the same way that N does, in all + languages we know of. */ +static inline unsigned long int +select_plural (uintmax_t n) +{ + /* Reduce by a power of ten, but keep it away from zero. The + gettext manual says 1000000 should be safe. */ + enum { PLURAL_REDUCER = 1000000 }; + return (n <= ULONG_MAX ? n : n % PLURAL_REDUCER + PLURAL_REDUCER); +} + +#define STREQ(a, b) (strcmp (a, b) == 0) +#define STREQ_LEN(a, b, n) (strncmp (a, b, n) == 0) /* n==-1 means unbounded */ +#define STRPREFIX(a, b) (strncmp (a, b, strlen (b)) == 0) + +/* Just like strncmp, but the second argument must be a literal string + and you don't specify the length; that comes from the literal. */ +#define STRNCMP_LIT(s, lit) strncmp (s, "" lit "", sizeof (lit) - 1) + +#if !HAVE_DECL_GETLOGIN +char *getlogin (void); +#endif + +#if !HAVE_DECL_TTYNAME +char *ttyname (int); +#endif + +#if !HAVE_DECL_GETEUID +uid_t geteuid (void); +#endif + +#if !HAVE_DECL_GETPWUID +struct passwd *getpwuid (uid_t); +#endif + +#if !HAVE_DECL_GETGRGID +struct group *getgrgid (gid_t); +#endif + +/* Interix has replacements for getgr{gid,nam,ent}, that don't + query the domain controller for group members when not required. + This speeds up the calls tremendously (<1 ms vs. >3 s). */ +/* To protect any system that could provide _nomembers functions + other than interix, check for HAVE_SETGROUPS, as interix is + one of the very few (the only?) platform that lacks it */ +#if ! HAVE_SETGROUPS +# if HAVE_GETGRGID_NOMEMBERS +# define getgrgid(gid) getgrgid_nomembers(gid) +# endif +# if HAVE_GETGRNAM_NOMEMBERS +# define getgrnam(nam) getgrnam_nomembers(nam) +# endif +# if HAVE_GETGRENT_NOMEMBERS +# define getgrent() getgrent_nomembers() +# endif +#endif + +#if !HAVE_DECL_GETUID +uid_t getuid (void); +#endif + +#include "idx.h" +#include "xalloc.h" +#include "verify.h" + +/* This is simply a shorthand for the common case in which + the third argument to x2nrealloc would be 'sizeof *(P)'. + Ensure that sizeof *(P) is *not* 1. In that case, it'd be + better to use X2REALLOC, although not strictly necessary. */ +#define X2NREALLOC(P, PN) verify_expr (sizeof *(P) != 1, \ + x2nrealloc (P, PN, sizeof *(P))) + +/* Using x2realloc (when appropriate) usually makes your code more + readable than using x2nrealloc, but it also makes it so your + code will malfunction if sizeof *(P) ever becomes 2 or greater. + So use this macro instead of using x2realloc directly. */ +#define X2REALLOC(P, PN) verify_expr (sizeof *(P) == 1, \ + x2realloc (P, PN)) + +#include "unlocked-io.h" +#include "same-inode.h" + +#include "dirname.h" +#include "openat.h" + +static inline bool +dot_or_dotdot (char const *file_name) +{ + if (file_name[0] == '.') + { + char sep = file_name[(file_name[1] == '.') + 1]; + return (! sep || ISSLASH (sep)); + } + else + return false; +} + +/* A wrapper for readdir so that callers don't see entries for '.' or '..'. */ +static inline struct dirent const * +readdir_ignoring_dot_and_dotdot (DIR *dirp) +{ + while (true) + { + struct dirent const *dp = readdir (dirp); + if (dp == nullptr || ! dot_or_dotdot (dp->d_name)) + return dp; + } +} + +/* Return -1 if DIR is an empty directory, + 0 if DIR is a nonempty directory, + and a positive error number if there was trouble determining + whether DIR is an empty or nonempty directory. */ +enum { + DS_UNKNOWN = -2, + DS_EMPTY = -1, + DS_NONEMPTY = 0, +}; +static inline int +directory_status (int fd_cwd, char const *dir) +{ + DIR *dirp; + bool no_direntries; + int saved_errno; + int fd = openat (fd_cwd, dir, + (O_RDONLY | O_DIRECTORY + | O_NOCTTY | O_NOFOLLOW | O_NONBLOCK)); + + if (fd < 0) + return errno; + + dirp = fdopendir (fd); + if (dirp == nullptr) + { + saved_errno = errno; + close (fd); + return saved_errno; + } + + errno = 0; + no_direntries = !readdir_ignoring_dot_and_dotdot (dirp); + saved_errno = errno; + closedir (dirp); + return no_direntries && saved_errno == 0 ? DS_EMPTY : saved_errno; +} + +/* Factor out some of the common --help and --version processing code. */ + +/* These enum values cannot possibly conflict with the option values + ordinarily used by commands, including CHAR_MAX + 1, etc. Avoid + CHAR_MIN - 1, as it may equal -1, the getopt end-of-options value. */ +enum +{ + GETOPT_HELP_CHAR = (CHAR_MIN - 2), + GETOPT_VERSION_CHAR = (CHAR_MIN - 3) +}; + +#define GETOPT_HELP_OPTION_DECL \ + "help", no_argument, nullptr, GETOPT_HELP_CHAR +#define GETOPT_VERSION_OPTION_DECL \ + "version", no_argument, nullptr, GETOPT_VERSION_CHAR +#define GETOPT_SELINUX_CONTEXT_OPTION_DECL \ + "context", optional_argument, nullptr, 'Z' + +#define case_GETOPT_HELP_CHAR \ + case GETOPT_HELP_CHAR: \ + usage (EXIT_SUCCESS); \ + break; + +/* Program_name must be a literal string. + Usually it is just PROGRAM_NAME. */ +#define USAGE_BUILTIN_WARNING \ + _("\n" \ +"NOTE: your shell may have its own version of %s, which usually supersedes\n" \ +"the version described here. Please refer to your shell's documentation\n" \ +"for details about the options it supports.\n") + +#define HELP_OPTION_DESCRIPTION \ + _(" --help display this help and exit\n") +#define VERSION_OPTION_DESCRIPTION \ + _(" --version output version information and exit\n") + +#include "closein.h" +#include "closeout.h" + +#define emit_bug_reporting_address unused__emit_bug_reporting_address +#include "version-etc.h" +#undef emit_bug_reporting_address + +#include "propername.h" +/* Define away proper_name, since it's not worth the cost of adding ~17KB to + the x86_64 text size of every single program. This avoids a 40% + (almost ~2MB) increase in the file system space utilization for the set + of the 100 binaries. */ +#define proper_name(x) proper_name_lite (x, x) + +#include "progname.h" + +#define case_GETOPT_VERSION_CHAR(Program_name, Authors) \ + case GETOPT_VERSION_CHAR: \ + version_etc (stdout, Program_name, PACKAGE_NAME, Version, Authors, \ + (char *) nullptr); \ + exit (EXIT_SUCCESS); \ + break; + +#include "minmax.h" +#include "intprops.h" + +#ifndef SSIZE_MAX +# define SSIZE_MAX TYPE_MAXIMUM (ssize_t) +#endif + +#ifndef OFF_T_MIN +# define OFF_T_MIN TYPE_MINIMUM (off_t) +#endif + +#ifndef OFF_T_MAX +# define OFF_T_MAX TYPE_MAXIMUM (off_t) +#endif + +#ifndef UID_T_MAX +# define UID_T_MAX TYPE_MAXIMUM (uid_t) +#endif + +#ifndef GID_T_MAX +# define GID_T_MAX TYPE_MAXIMUM (gid_t) +#endif + +#ifndef PID_T_MAX +# define PID_T_MAX TYPE_MAXIMUM (pid_t) +#endif + +/* Use this to suppress gcc warnings. */ +#ifdef lint +# define IF_LINT(Code) Code +#else +# define IF_LINT(Code) /* empty */ +#endif + +/* main_exit should be called only from the main function. It is + equivalent to 'exit'. When checking for lint it calls 'exit', to + pacify gcc -fsanitize=lint which would otherwise have false alarms + for pointers in the main function's activation record. Otherwise + it simply returns from 'main'; this used to be what gcc's static + checking preferred and may yet be again. */ +#ifdef lint +# define main_exit(status) exit (status) +#else +# define main_exit(status) return status +#endif + +#ifdef __GNUC__ +# define LIKELY(cond) __builtin_expect ((cond), 1) +# define UNLIKELY(cond) __builtin_expect ((cond), 0) +#else +# define LIKELY(cond) (cond) +# define UNLIKELY(cond) (cond) +#endif + + +#if defined strdupa +# define ASSIGN_STRDUPA(DEST, S) \ + do { DEST = strdupa (S); } while (0) +#else +# define ASSIGN_STRDUPA(DEST, S) \ + do \ + { \ + char const *s_ = (S); \ + size_t len_ = strlen (s_) + 1; \ + char *tmp_dest_ = alloca (len_); \ + DEST = memcpy (tmp_dest_, s_, len_); \ + } \ + while (0) +#endif + +#if ! HAVE_SYNC +# define sync() /* empty */ +#endif + +/* Compute the greatest common divisor of U and V using Euclid's + algorithm. U and V must be nonzero. */ + +ATTRIBUTE_CONST +static inline size_t +gcd (size_t u, size_t v) +{ + do + { + size_t t = u % v; + u = v; + v = t; + } + while (v); + + return u; +} + +/* Compute the least common multiple of U and V. U and V must be + nonzero. There is no overflow checking, so callers should not + specify outlandish sizes. */ + +ATTRIBUTE_CONST +static inline size_t +lcm (size_t u, size_t v) +{ + return u * (v / gcd (u, v)); +} + +/* Return PTR, aligned upward to the next multiple of ALIGNMENT. + ALIGNMENT must be nonzero. The caller must arrange for ((char *) + PTR) through ((char *) PTR + ALIGNMENT - 1) to be addressable + locations. */ + +static inline void * +ptr_align (void const *ptr, size_t alignment) +{ + char const *p0 = ptr; + char const *p1 = p0 + alignment - 1; + return (void *) (p1 - (size_t) p1 % alignment); +} + +/* Return whether the buffer consists entirely of NULs. + Based on memeqzero in CCAN by Rusty Russell under CC0 (Public domain). */ + +ATTRIBUTE_PURE +static inline bool +is_nul (void const *buf, size_t length) +{ + const unsigned char *p = buf; +/* Using possibly unaligned access for the first 16 bytes + saves about 30-40 cycles, though it is strictly undefined behavior + and so would need __attribute__ ((__no_sanitize_undefined__)) + to avoid -fsanitize=undefined warnings. + Considering coreutils is mainly concerned with relatively + large buffers, we'll just use the defined behavior. */ +#if 0 && (_STRING_ARCH_unaligned || _STRING_INLINE_unaligned) + unsigned long word; +#else + unsigned char word; +#endif + + if (! length) + return true; + + /* Check len bytes not aligned on a word. */ + while (UNLIKELY (length & (sizeof word - 1))) + { + if (*p) + return false; + p++; + length--; + if (! length) + return true; + } + + /* Check up to 16 bytes a word at a time. */ + for (;;) + { + memcpy (&word, p, sizeof word); + if (word) + return false; + p += sizeof word; + length -= sizeof word; + if (! length) + return true; + if (UNLIKELY (length & 15) == 0) + break; + } + + /* Now we know first 16 bytes are NUL, memcmp with self. */ + return memcmp (buf, p, length) == 0; +} + +/* If 10*Accum + Digit_val is larger than the maximum value for Type, + then don't update Accum and return false to indicate it would + overflow. Otherwise, set Accum to that new value and return true. + Verify at compile-time that Type is Accum's type, and that Type is + unsigned. Accum must be an object, so that we can take its + address. Accum and Digit_val may be evaluated multiple times. + + The "Added check" below is not strictly required, but it causes GCC + to return a nonzero exit status instead of merely a warning + diagnostic, and that is more useful. */ + +#define DECIMAL_DIGIT_ACCUMULATE(Accum, Digit_val, Type) \ + ( \ + (void) (&(Accum) == (Type *) nullptr), /* The type matches. */ \ + verify_expr (! TYPE_SIGNED (Type), /* The type is unsigned. */ \ + (((Type) -1 / 10 < (Accum) \ + || (Type) ((Accum) * 10 + (Digit_val)) < (Accum)) \ + ? false \ + : (((Accum) = (Accum) * 10 + (Digit_val)), true))) \ + ) + +static inline void +emit_stdin_note (void) +{ + fputs (_("\n\ +With no FILE, or when FILE is -, read standard input.\n\ +"), stdout); +} +static inline void +emit_mandatory_arg_note (void) +{ + fputs (_("\n\ +Mandatory arguments to long options are mandatory for short options too.\n\ +"), stdout); +} + +static inline void +emit_size_note (void) +{ + fputs (_("\n\ +The SIZE argument is an integer and optional unit (example: 10K is 10*1024).\n\ +Units are K,M,G,T,P,E,Z,Y,R,Q (powers of 1024) or KB,MB,... (powers of 1000).\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +"), stdout); +} + +static inline void +emit_blocksize_note (char const *program) +{ + printf (_("\n\ +Display values are in units of the first available SIZE from --block-size,\n\ +and the %s_BLOCK_SIZE, BLOCK_SIZE and BLOCKSIZE environment variables.\n\ +Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set).\n\ +"), program); +} + +static inline void +emit_update_parameters_note (void) +{ + fputs (_("\ +\n\ +UPDATE controls which existing files in the destination are replaced.\n\ +'all' is the default operation when an --update option is not specified,\n\ +and results in all existing files in the destination being replaced.\n\ +'none' is similar to the --no-clobber option, in that no files in the\n\ +destination are replaced, but also skipped files do not induce a failure.\n\ +'older' is the default operation when --update is specified, and results\n\ +in files being replaced if they're older than the corresponding source file.\n\ +"), stdout); +} + +static inline void +emit_backup_suffix_note (void) +{ + fputs (_("\ +\n\ +The backup suffix is '~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n\ +The version control method may be selected via the --backup option or through\n\ +the VERSION_CONTROL environment variable. Here are the values:\n\ +\n\ +"), stdout); + fputs (_("\ + none, off never make backups (even if --backup is given)\n\ + numbered, t make numbered backups\n\ + existing, nil numbered if numbered backups exist, simple otherwise\n\ + simple, never always make simple backups\n\ +"), stdout); +} + +static inline void +emit_exec_status (char const *program) +{ + printf (_("\n\ +Exit status:\n\ + 125 if the %s command itself fails\n\ + 126 if COMMAND is found but cannot be invoked\n\ + 127 if COMMAND cannot be found\n\ + - the exit status of COMMAND otherwise\n\ +"), program); +} + +static inline void +emit_ancillary_info (char const *program) +{ + struct infomap { char const *program; char const *node; } const infomap[] = { + { "[", "test invocation" }, + { "coreutils", "Multi-call invocation" }, + { "sha224sum", "sha2 utilities" }, + { "sha256sum", "sha2 utilities" }, + { "sha384sum", "sha2 utilities" }, + { "sha512sum", "sha2 utilities" }, + { nullptr, nullptr } + }; + + char const *node = program; + struct infomap const *map_prog = infomap; + + while (map_prog->program && ! STREQ (program, map_prog->program)) + map_prog++; + + if (map_prog->node) + node = map_prog->node; + + printf (_("\n%s online help: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + + /* Don't output this redundant message for English locales. + Note we still output for 'C' so that it gets included in the man page. */ + char const *lc_messages = setlocale (LC_MESSAGES, nullptr); + if (lc_messages && STRNCMP_LIT (lc_messages, "en_")) + { + /* TRANSLATORS: Replace LANG_CODE in this URL with your language code + to form one of + the URLs at https://translationproject.org/team/. Otherwise, replace + the entire URL with your translation team's email address. */ + fputs (_("Report any translation bugs to " + "\n"), stdout); + } + /* .htaccess on the coreutils web site maps programs to the appropriate page, + however we explicitly handle "[" -> "test" here as the "[" is not + recognized as part of a URL by default in terminals. */ + char const *url_program = STREQ (program, "[") ? "test" : program; + printf (_("Full documentation <%s%s>\n"), + PACKAGE_URL, url_program); + printf (_("or available locally via: info '(coreutils) %s%s'\n"), + node, node == program ? " invocation" : ""); +} + +/* Use a macro rather than an inline function, as this references + the global program_name, which causes dynamic linking issues + in libstdbuf.so on some systems where unused functions + are not removed by the linker. */ +#define emit_try_help() \ + do \ + { \ + fprintf (stderr, _("Try '%s --help' for more information.\n"), \ + program_name); \ + } \ + while (0) + +#include "inttostr.h" + +static inline char * +timetostr (time_t t, char *buf) +{ + return (TYPE_SIGNED (time_t) + ? imaxtostr (t, buf) + : umaxtostr (t, buf)); +} + +static inline char * +bad_cast (char const *s) +{ + return (char *) s; +} + +/* Return a boolean indicating whether SB->st_size is defined. */ +static inline bool +usable_st_size (struct stat const *sb) +{ + return (S_ISREG (sb->st_mode) || S_ISLNK (sb->st_mode) + || S_TYPEISSHM (sb) || S_TYPEISTMO (sb)); +} + +_Noreturn void usage (int status); + +#include "error.h" + +/* Like error(0, 0, ...), but without an implicit newline. + Also a noop unless the global DEV_DEBUG is set. */ +#define devmsg(...) \ + do \ + { \ + if (dev_debug) \ + fprintf (stderr, __VA_ARGS__); \ + } \ + while (0) + +#define emit_cycle_warning(file_name) \ + do \ + { \ + error (0, 0, _("\ +WARNING: Circular directory structure.\n\ +This almost certainly means that you have a corrupted file system.\n\ +NOTIFY YOUR SYSTEM MANAGER.\n\ +The following directory is part of the cycle:\n %s\n"), \ + quotef (file_name)); \ + } \ + while (0) + +/* exit with a _single_ "write error" diagnostic. */ + +static inline void +write_error (void) +{ + int saved_errno = errno; + fflush (stdout); /* Last attempt to write any buffered data. */ + fpurge (stdout); /* Ensure nothing buffered that might induce an error. */ + clearerr (stdout); /* Avoid extraneous diagnostic from close_stdout. */ + error (EXIT_FAILURE, saved_errno, _("write error")); +} + +/* Like stpncpy, but do ensure that the result is NUL-terminated, + and do not NUL-pad out to LEN. I.e., when strnlen (src, len) == len, + this function writes a NUL byte into dest[len]. Thus, the length + of the destination buffer must be at least LEN + 1. + The DEST and SRC buffers must not overlap. */ +static inline char * +stzncpy (char *restrict dest, char const *restrict src, size_t len) +{ + size_t i; + for (i = 0; i < len && *src; i++) + *dest++ = *src++; + *dest = 0; + return dest; +} + +#ifndef ARRAY_CARDINALITY +# define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array)) +#endif + +/* Return true if ERR is ENOTSUP or EOPNOTSUPP, otherwise false. + This wrapper function avoids the redundant 'or'd comparison on + systems like Linux for which they have the same value. It also + avoids the gcc warning to that effect. */ +static inline bool +is_ENOTSUP (int err) +{ + return err == EOPNOTSUPP || (ENOTSUP != EOPNOTSUPP && err == ENOTSUP); +} + + +/* How coreutils quotes filenames, to minimize use of outer quotes, + but also provide better support for copy and paste when used. */ +#include "quotearg.h" + +/* Use these to shell quote only when necessary, + when the quoted item is already delimited with colons. */ +#define quotef(arg) \ + quotearg_n_style_colon (0, shell_escape_quoting_style, arg) +#define quotef_n(n, arg) \ + quotearg_n_style_colon (n, shell_escape_quoting_style, arg) + +/* Use these when there are spaces around the file name, + in the error message. */ +#define quoteaf(arg) \ + quotearg_style (shell_escape_always_quoting_style, arg) +#define quoteaf_n(n, arg) \ + quotearg_n_style (n, shell_escape_always_quoting_style, arg) diff --git a/src/tac-pipe.c b/src/tac-pipe.c new file mode 100644 index 0000000..decf666 --- /dev/null +++ b/src/tac-pipe.c @@ -0,0 +1,260 @@ +/* tac from a pipe. + + Copyright (C) 1997-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* FIXME */ +#include "assure.h" + +/* FIXME: this is small for testing */ +#define BUFFER_SIZE (8) + +#define LEN(X, I) ((X)->p[(I)].one_past_end - (X)->p[(I)].start) +#define EMPTY(X) ((X)->n_bufs == 1 && LEN (X, 0) == 0) + +#define ONE_PAST_END(X, I) ((X)->p[(I)].one_past_end) + +struct Line_ptr +{ + size_t i; + char *ptr; +}; +typedef struct Line_ptr Line_ptr; + +struct B_pair +{ + char *start; + char *one_past_end; +}; + +struct Buf +{ + size_t n_bufs; + struct obstack obs; + struct B_pair *p; +}; +typedef struct Buf Buf; + +static bool +buf_init_from_stdin (Buf *x, char eol_byte) +{ + bool last_byte_is_eol_byte = true; + bool ok = true; + +#define OBS (&(x->obs)) + obstack_init (OBS); + + while (true) + { + char *buf = (char *) malloc (BUFFER_SIZE); + size_t bytes_read; + + if (buf == nullptr) + { + /* Fall back on the code that relies on a temporary file. + Write all buffers to that file and free them. */ + /* FIXME */ + ok = false; + break; + } + bytes_read = full_read (STDIN_FILENO, buf, BUFFER_SIZE); + if (bytes_read != buffer_size && errno != 0) + error (EXIT_FAILURE, errno, _("read error")); + + { + struct B_pair bp; + bp.start = buf; + bp.one_past_end = buf + bytes_read; + obstack_grow (OBS, &bp, sizeof (bp)); + } + + if (bytes_read != 0) + last_byte_is_eol_byte = (buf[bytes_read - 1] == eol_byte); + + if (bytes_read < BUFFER_SIZE) + break; + } + + if (ok) + { + /* If the file was non-empty and lacked an EOL_BYTE at its end, + then add a buffer containing just that one byte. */ + if (!last_byte_is_eol_byte) + { + char *buf = malloc (1); + if (buf == nullptr) + { + /* FIXME: just like above */ + ok = false; + } + else + { + struct B_pair bp; + *buf = eol_byte; + bp.start = buf; + bp.one_past_end = buf + 1; + obstack_grow (OBS, &bp, sizeof (bp)); + } + } + } + + x->n_bufs = obstack_object_size (OBS) / sizeof (x->p[0]); + x->p = (struct B_pair *) obstack_finish (OBS); + + /* If there are two or more buffers and the last buffer is empty, + then free the last one and decrement the buffer count. */ + if (x->n_bufs >= 2 + && x->p[x->n_bufs - 1].start == x->p[x->n_bufs - 1].one_past_end) + free (x->p[--(x->n_bufs)].start); + + return ok; +} + +static void +buf_free (Buf *x) +{ + for (size_t i = 0; i < x->n_bufs; i++) + free (x->p[i].start); + obstack_free (OBS, nullptr); +} + +Line_ptr +line_ptr_decrement (const Buf *x, const Line_ptr *lp) +{ + Line_ptr lp_new; + + if (lp->ptr > x->p[lp->i].start) + { + lp_new.i = lp->i; + lp_new.ptr = lp->ptr - 1; + } + else + { + affirm (lp->i > 0); + lp_new.i = lp->i - 1; + lp_new.ptr = ONE_PAST_END (x, lp->i - 1) - 1; + } + return lp_new; +} + +Line_ptr +line_ptr_increment (const Buf *x, const Line_ptr *lp) +{ + Line_ptr lp_new; + + affirm (lp->ptr <= ONE_PAST_END (x, lp->i) - 1); + if (lp->ptr < ONE_PAST_END (x, lp->i) - 1) + { + lp_new.i = lp->i; + lp_new.ptr = lp->ptr + 1; + } + else + { + affirm (lp->i < x->n_bufs - 1); + lp_new.i = lp->i + 1; + lp_new.ptr = x->p[lp->i + 1].start; + } + return lp_new; +} + +static bool +find_bol (const Buf *x, + const Line_ptr *last_bol, Line_ptr *new_bol, char eol_byte) +{ + size_t i; + Line_ptr tmp; + char *last_bol_ptr; + + if (last_bol->ptr == x->p[0].start) + return false; + + tmp = line_ptr_decrement (x, last_bol); + last_bol_ptr = tmp.ptr; + i = tmp.i; + while (true) + { + char *nl = memrchr (x->p[i].start, last_bol_ptr, eol_byte); + if (nl) + { + Line_ptr nl_pos; + nl_pos.i = i; + nl_pos.ptr = nl; + *new_bol = line_ptr_increment (x, &nl_pos); + return true; + } + + if (i == 0) + break; + + --i; + last_bol_ptr = ONE_PAST_END (x, i); + } + + /* If last_bol->ptr didn't point at the first byte of X, then reaching + this point means that we're about to return the line that is at the + beginning of X. */ + if (last_bol->ptr != x->p[0].start) + { + new_bol->i = 0; + new_bol->ptr = x->p[0].start; + return true; + } + + return false; +} + +static void +print_line (FILE *out_stream, const Buf *x, + const Line_ptr *bol, const Line_ptr *bol_next) +{ + for (size_t i = bol->i; i <= bol_next->i; i++) + { + char *a = (i == bol->i ? bol->ptr : x->p[i].start); + char *b = (i == bol_next->i ? bol_next->ptr : ONE_PAST_END (x, i)); + fwrite (a, 1, b - a, out_stream); + } +} + +static bool +tac_mem () +{ + Buf x; + Line_ptr bol; + char eol_byte = '\n'; + + if (! buf_init_from_stdin (&x, eol_byte)) + { + buf_free (&x); + return false; + } + + /* Special case the empty file. */ + if (EMPTY (&x)) + return true; + + /* Initially, point at one past the last byte of the file. */ + bol.i = x.n_bufs - 1; + bol.ptr = ONE_PAST_END (&x, bol.i); + + while (true) + { + Line_ptr new_bol; + if (! find_bol (&x, &bol, &new_bol, eol_byte)) + break; + print_line (stdout, &x, &new_bol, &bol); + bol = new_bol; + } + return true; +} diff --git a/src/tac.c b/src/tac.c new file mode 100644 index 0000000..4c36558 --- /dev/null +++ b/src/tac.c @@ -0,0 +1,586 @@ +/* tac - concatenate and print files in reverse + Copyright (C) 1988-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jay Lepreau (lepreau@cs.utah.edu). + GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */ + +/* Copy each FILE, or the standard input if none are given or when a + FILE name of "-" is encountered, to the standard output with the + order of the records reversed. The records are separated by + instances of a string, or a newline if none is given. By default, the + separator string is attached to the end of the record that it + follows in the file. + + Options: + -b, --before The separator is attached to the beginning + of the record that it precedes in the file. + -r, --regex The separator is a regular expression. + -s, --separator=separator Use SEPARATOR as the record separator. + + To reverse a file byte by byte, use (in bash, ksh, or sh): +tac -r -s '.\| +' file */ + +#include + +#include +#include +#include +#include "system.h" + +#include + +#include "filenamecat.h" +#include "full-read.h" +#include "safe-read.h" +#include "temp-stream.h" +#include "xbinary-io.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tac" + +#define AUTHORS \ + proper_name ("Jay Lepreau"), \ + proper_name ("David MacKenzie") + + +/* The number of bytes per atomic read. */ +#define INITIAL_READSIZE 8192 + +/* The number of bytes per atomic write. */ +#define WRITESIZE 8192 + +/* The string that separates the records of the file. */ +static char const *separator; + +/* True if we have ever read standard input. */ +static bool have_read_stdin = false; + +/* If true, print 'separator' along with the record preceding it + in the file; otherwise with the record following it. */ +static bool separator_ends_record; + +/* 0 if 'separator' is to be matched as a regular expression; + otherwise, the length of 'separator', used as a sentinel to + stop the search. */ +static size_t sentinel_length; + +/* The length of a match with 'separator'. If 'sentinel_length' is 0, + 'match_length' is computed every time a match succeeds; + otherwise, it is simply the length of 'separator'. */ +static size_t match_length; + +/* The input buffer. */ +static char *G_buffer; + +/* The number of bytes to read at once into 'buffer'. */ +static size_t read_size; + +/* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2. + The extra 2 bytes allow 'past_end' to have a value beyond the + end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */ +static size_t G_buffer_size; + +/* The compiled regular expression representing 'separator'. */ +static struct re_pattern_buffer compiled_separator; +static char compiled_separator_fastmap[UCHAR_MAX + 1]; +static struct re_registers regs; + +static struct option const longopts[] = +{ + {"before", no_argument, nullptr, 'b'}, + {"regex", no_argument, nullptr, 'r'}, + {"separator", required_argument, nullptr, 's'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Write each FILE to standard output, last line first.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -b, --before attach the separator before instead of after\n\ + -r, --regex interpret the separator as a regular expression\n\ + -s, --separator=STRING use STRING as the separator instead of newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Print the characters from START to PAST_END - 1. + If START is null, just flush the buffer. */ + +static void +output (char const *start, char const *past_end) +{ + static char buffer[WRITESIZE]; + static size_t bytes_in_buffer = 0; + size_t bytes_to_add = past_end - start; + size_t bytes_available = WRITESIZE - bytes_in_buffer; + + if (start == 0) + { + fwrite (buffer, 1, bytes_in_buffer, stdout); + bytes_in_buffer = 0; + return; + } + + /* Write out as many full buffers as possible. */ + while (bytes_to_add >= bytes_available) + { + memcpy (buffer + bytes_in_buffer, start, bytes_available); + bytes_to_add -= bytes_available; + start += bytes_available; + fwrite (buffer, 1, WRITESIZE, stdout); + bytes_in_buffer = 0; + bytes_available = WRITESIZE; + } + + memcpy (buffer + bytes_in_buffer, start, bytes_to_add); + bytes_in_buffer += bytes_to_add; +} + +/* Print in reverse the file open on descriptor FD for reading FILE. + The file is already positioned at FILE_POS, which should be near its end. + Return true if successful. */ + +static bool +tac_seekable (int input_fd, char const *file, off_t file_pos) +{ + /* Pointer to the location in 'G_buffer' where the search for + the next separator will begin. */ + char *match_start; + + /* Pointer to one past the rightmost character in 'G_buffer' that + has not been printed yet. */ + char *past_end; + + /* Length of the record growing in 'G_buffer'. */ + size_t saved_record_size; + + /* True if 'output' has not been called yet for any file. + Only used when the separator is attached to the preceding record. */ + bool first_time = true; + char first_char = *separator; /* Speed optimization, non-regexp. */ + char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */ + size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */ + + /* Arrange for the first read to lop off enough to leave the rest of the + file a multiple of 'read_size'. Since 'read_size' can change, this may + not always hold during the program run, but since it usually will, leave + it here for i/o efficiency (page/sector boundaries and all that). + Note: the efficiency gain has not been verified. */ + size_t remainder = file_pos % read_size; + if (remainder != 0) + { + file_pos -= remainder; + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + } + + /* Scan backward, looking for end of file. This caters to proc-like + file systems where the file size is just an estimate. */ + while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0 + && file_pos != 0) + { + off_t rsize = read_size; + if (lseek (input_fd, -rsize, SEEK_CUR) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + file_pos -= read_size; + } + + /* Now scan forward, looking for end of file. */ + while (saved_record_size == read_size) + { + size_t nread = safe_read (input_fd, G_buffer, read_size); + if (nread == 0) + break; + saved_record_size = nread; + if (saved_record_size == SAFE_READ_ERROR) + break; + file_pos += nread; + } + + if (saved_record_size == SAFE_READ_ERROR) + { + error (0, errno, _("%s: read error"), quotef (file)); + return false; + } + + match_start = past_end = G_buffer + saved_record_size; + /* For non-regexp search, move past impossible positions for a match. */ + if (sentinel_length) + match_start -= match_length1; + + while (true) + { + /* Search backward from 'match_start' - 1 to 'G_buffer' for a match + with 'separator'; for speed, use strncmp if 'separator' contains no + metacharacters. + If the match succeeds, set 'match_start' to point to the start of + the match and 'match_length' to the length of the match. + Otherwise, make 'match_start' < 'G_buffer'. */ + if (sentinel_length == 0) + { + size_t i = match_start - G_buffer; + regoff_t ri = i; + regoff_t range = 1 - ri; + regoff_t ret; + + if (1 < range) + error (EXIT_FAILURE, 0, _("record too large")); + + if (range == 1 + || ((ret = re_search (&compiled_separator, G_buffer, + i, i - 1, range, ®s)) + == -1)) + match_start = G_buffer - 1; + else if (ret == -2) + error (EXIT_FAILURE, 0, + _("error in regular expression search")); + else + { + match_start = G_buffer + regs.start[0]; + match_length = regs.end[0] - regs.start[0]; + } + } + else + { + /* 'match_length' is constant for non-regexp boundaries. */ + while (*--match_start != first_char + || (match_length1 && !STREQ_LEN (match_start + 1, separator1, + match_length1))) + /* Do nothing. */ ; + } + + /* Check whether we backed off the front of 'G_buffer' without finding + a match for 'separator'. */ + if (match_start < G_buffer) + { + if (file_pos == 0) + { + /* Hit the beginning of the file; print the remaining record. */ + output (G_buffer, past_end); + return true; + } + + saved_record_size = past_end - G_buffer; + if (saved_record_size > read_size) + { + /* 'G_buffer_size' is about twice 'read_size', so since + we want to read in another 'read_size' bytes before + the data already in 'G_buffer', we need to increase + 'G_buffer_size'. */ + char *newbuffer; + size_t offset = sentinel_length ? sentinel_length : 1; + size_t old_G_buffer_size = G_buffer_size; + + read_size *= 2; + G_buffer_size = read_size * 2 + sentinel_length + 2; + if (G_buffer_size < old_G_buffer_size) + xalloc_die (); + newbuffer = xrealloc (G_buffer - offset, G_buffer_size); + newbuffer += offset; + G_buffer = newbuffer; + } + + /* Back up to the start of the next bufferfull of the file. */ + if (file_pos >= read_size) + file_pos -= read_size; + else + { + read_size = file_pos; + file_pos = 0; + } + if (lseek (input_fd, file_pos, SEEK_SET) < 0) + error (0, errno, _("%s: seek failed"), quotef (file)); + + /* Shift the pending record data right to make room for the new. + The source and destination regions probably overlap. */ + memmove (G_buffer + read_size, G_buffer, saved_record_size); + past_end = G_buffer + read_size + saved_record_size; + /* For non-regexp searches, avoid unnecessary scanning. */ + if (sentinel_length) + match_start = G_buffer + read_size; + else + match_start = past_end; + + if (full_read (input_fd, G_buffer, read_size) != read_size) + { + error (0, errno, _("%s: read error"), quotef (file)); + return false; + } + } + else + { + /* Found a match of 'separator'. */ + if (separator_ends_record) + { + char *match_end = match_start + match_length; + + /* If this match of 'separator' isn't at the end of the + file, print the record. */ + if (!first_time || match_end != past_end) + output (match_end, past_end); + past_end = match_end; + first_time = false; + } + else + { + output (match_start, past_end); + past_end = match_start; + } + + /* For non-regex matching, we can back up. */ + if (sentinel_length > 0) + match_start -= match_length - 1; + } + } +} + +/* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to + a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream + and file name. Return the number of bytes copied, or -1 on error. */ + +static off_t +copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file) +{ + FILE *fp; + char *file_name; + uintmax_t bytes_copied = 0; + if (!temp_stream (&fp, &file_name)) + return -1; + + while (true) + { + size_t bytes_read = safe_read (input_fd, G_buffer, read_size); + if (bytes_read == 0) + break; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("%s: read error"), quotef (file)); + return -1; + } + + if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read) + { + error (0, errno, _("%s: write error"), quotef (file_name)); + return -1; + } + + /* Implicitly <= OFF_T_MAX due to preceding fwrite(), + but unsigned type used to avoid compiler warnings + not aware of this fact. */ + bytes_copied += bytes_read; + } + + if (fflush (fp) != 0) + { + error (0, errno, _("%s: write error"), quotef (file_name)); + return -1; + } + + *g_tmp = fp; + *g_tempfile = file_name; + return bytes_copied; +} + +/* Copy INPUT_FD to a temporary, then tac that file. + Return true if successful. */ + +static bool +tac_nonseekable (int input_fd, char const *file) +{ + FILE *tmp_stream; + char *tmp_file; + off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file); + if (bytes_copied < 0) + return false; + + bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied); + return ok; +} + +/* Print FILE in reverse, copying it to a temporary + file first if it is not seekable. + Return true if successful. */ + +static bool +tac_file (char const *filename) +{ + bool ok; + off_t file_size; + int fd; + bool is_stdin = STREQ (filename, "-"); + + if (is_stdin) + { + have_read_stdin = true; + fd = STDIN_FILENO; + filename = _("standard input"); + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + { + fd = open (filename, O_RDONLY | O_BINARY); + if (fd < 0) + { + error (0, errno, _("failed to open %s for reading"), + quoteaf (filename)); + return false; + } + } + + file_size = lseek (fd, 0, SEEK_END); + + ok = (file_size < 0 || isatty (fd) + ? tac_nonseekable (fd, filename) + : tac_seekable (fd, filename, file_size)); + + if (!is_stdin && close (fd) != 0) + { + error (0, errno, _("%s: read error"), quotef (filename)); + ok = false; + } + return ok; +} + +int +main (int argc, char **argv) +{ + char const *error_message; /* Return value from re_compile_pattern. */ + int optc; + bool ok; + size_t half_buffer_size; + + /* Initializer for file_list if no file-arguments + were specified on the command line. */ + static char const *const default_file_list[] = {"-", nullptr}; + char const *const *file; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + separator = "\n"; + sentinel_length = 1; + separator_ends_record = true; + + while ((optc = getopt_long (argc, argv, "brs:", longopts, nullptr)) != -1) + { + switch (optc) + { + case 'b': + separator_ends_record = false; + break; + case 'r': + sentinel_length = 0; + break; + case 's': + separator = optarg; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + usage (EXIT_FAILURE); + } + } + + if (sentinel_length == 0) + { + if (*separator == 0) + error (EXIT_FAILURE, 0, _("separator cannot be empty")); + + compiled_separator.buffer = nullptr; + compiled_separator.allocated = 0; + compiled_separator.fastmap = compiled_separator_fastmap; + compiled_separator.translate = nullptr; + error_message = re_compile_pattern (separator, strlen (separator), + &compiled_separator); + if (error_message) + error (EXIT_FAILURE, 0, "%s", (error_message)); + } + else + match_length = sentinel_length = *separator ? strlen (separator) : 1; + + read_size = INITIAL_READSIZE; + while (sentinel_length >= read_size / 2) + { + if (SIZE_MAX / 2 < read_size) + xalloc_die (); + read_size *= 2; + } + half_buffer_size = read_size + sentinel_length + 1; + G_buffer_size = 2 * half_buffer_size; + if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size)) + xalloc_die (); + G_buffer = xmalloc (G_buffer_size); + if (sentinel_length) + { + memcpy (G_buffer, separator, sentinel_length + 1); + G_buffer += sentinel_length; + } + else + { + ++G_buffer; + } + + file = (optind < argc + ? (char const *const *) &argv[optind] + : default_file_list); + + xset_binary_mode (STDOUT_FILENO, O_BINARY); + + { + ok = true; + for (size_t i = 0; file[i]; ++i) + ok &= tac_file (file[i]); + } + + /* Flush the output buffer. */ + output ((char *) nullptr, (char *) nullptr); + + if (have_read_stdin && close (STDIN_FILENO) < 0) + { + error (0, errno, "-"); + ok = false; + } + + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/tail.c b/src/tail.c new file mode 100644 index 0000000..f293551 --- /dev/null +++ b/src/tail.c @@ -0,0 +1,2478 @@ +/* tail -- output the last part of file(s) + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Can display any amount of data, unlike the Unix version, which uses + a fixed size buffer and therefore can only deliver a limited number + of lines. + + Original version by Paul Rubin . + Extensions by David MacKenzie . + tail -f for multiple files by Ian Lance Taylor . + inotify back-end by Giuseppe Scrivano . */ + +#include + +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "cl-strtod.h" +#include "fcntl--.h" +#include "iopoll.h" +#include "isapipe.h" +#include "posixver.h" +#include "quote.h" +#include "safe-read.h" +#include "stat-size.h" +#include "stat-time.h" +#include "xbinary-io.h" +#include "xdectoint.h" +#include "xnanosleep.h" +#include "xstrtol.h" +#include "xstrtod.h" + +#if HAVE_INOTIFY +# include "hash.h" +# include +# include +#endif + +/* Linux can optimize the handling of local files. */ +#if defined __linux__ || defined __ANDROID__ +# include "fs.h" +# include "fs-is-local.h" +# if HAVE_SYS_STATFS_H +# include +# elif HAVE_SYS_VFS_H +# include +# endif +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tail" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Ian Lance Taylor"), \ + proper_name ("Jim Meyering") + +/* Number of items to tail. */ +#define DEFAULT_N_LINES 10 + +/* Special values for dump_remainder's N_BYTES parameter. */ +#define COPY_TO_EOF UINTMAX_MAX +#define COPY_A_BUFFER (UINTMAX_MAX - 1) + +/* FIXME: make Follow_name the default? */ +#define DEFAULT_FOLLOW_MODE Follow_descriptor + +enum Follow_mode +{ + /* Follow the name of each file: if the file is renamed, try to reopen + that name and track the end of the new file if/when it's recreated. + This is useful for tracking logs that are occasionally rotated. */ + Follow_name = 1, + + /* Follow each descriptor obtained upon opening a file. + That means we'll continue to follow the end of a file even after + it has been renamed or unlinked. */ + Follow_descriptor = 2 +}; + +/* The types of files for which tail works. */ +#define IS_TAILABLE_FILE_TYPE(Mode) \ + (S_ISREG (Mode) || S_ISFIFO (Mode) || S_ISSOCK (Mode) || S_ISCHR (Mode)) + +static char const *const follow_mode_string[] = +{ + "descriptor", "name", nullptr +}; + +static enum Follow_mode const follow_mode_map[] = +{ + Follow_descriptor, Follow_name, +}; + +struct File_spec +{ + /* The actual file name, or "-" for stdin. */ + char *name; + + /* Attributes of the file the last time we checked. */ + off_t size; + struct timespec mtime; + dev_t dev; + ino_t ino; + mode_t mode; + + /* The specified name initially referred to a directory or some other + type for which tail isn't meaningful. Unlike for a permission problem + (tailable, below) once this is set, the name is not checked ever again. */ + bool ignore; + + /* See the description of fremote. */ + bool remote; + + /* A file is tailable if it exists, is readable, and is of type + IS_TAILABLE_FILE_TYPE. */ + bool tailable; + + /* File descriptor on which the file is open; -1 if it's not open. */ + int fd; + + /* The value of errno seen last time we checked this file. */ + int errnum; + + /* 1 if O_NONBLOCK is clear, 0 if set, -1 if not known. */ + int blocking; + +#if HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif + + /* See description of DEFAULT_MAX_N_... below. */ + uintmax_t n_unchanged_stats; +}; + +/* Keep trying to open a file even if it is inaccessible when tail starts + or if it becomes inaccessible later -- useful only with -f. */ +static bool reopen_inaccessible_files; + +/* If true, interpret the numeric argument as the number of lines. + Otherwise, interpret it as the number of bytes. */ +static bool count_lines; + +/* Whether we follow the name of each file or the file descriptor + that is initially associated with each name. */ +static enum Follow_mode follow_mode = Follow_descriptor; + +/* If true, read from the ends of all specified files until killed. */ +static bool forever; + +/* If true, monitor output so we exit if pipe reader terminates. */ +static bool monitor_output; + +/* If true, count from start of file instead of end. */ +static bool from_start; + +/* If true, print filename headers. */ +static bool print_headers; + +/* Character to split lines by. */ +static char line_end; + +/* When to print the filename banners. */ +enum header_mode +{ + multiple_files, always, never +}; + +/* When tailing a file by name, if there have been this many consecutive + iterations for which the file has not changed, then open/fstat + the file to determine if that file name is still associated with the + same device/inode-number pair as before. This option is meaningful only + when following by name. --max-unchanged-stats=N */ +#define DEFAULT_MAX_N_UNCHANGED_STATS_BETWEEN_OPENS 5 +static uintmax_t max_n_unchanged_stats_between_opens = + DEFAULT_MAX_N_UNCHANGED_STATS_BETWEEN_OPENS; + +/* The process ID of the process (presumably on the current host) + that is writing to all followed files. */ +static pid_t pid; + +/* True if we have ever read standard input. */ +static bool have_read_stdin; + +/* If nonzero, skip the is-regular-file test used to determine whether + to use the lseek optimization. Instead, use the more general (and + more expensive) code unconditionally. Intended solely for testing. */ +static bool presume_input_pipe; + +/* If nonzero then don't use inotify even if available. */ +static bool disable_inotify; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + RETRY_OPTION = CHAR_MAX + 1, + MAX_UNCHANGED_STATS_OPTION, + PID_OPTION, + PRESUME_INPUT_PIPE_OPTION, + LONG_FOLLOW_OPTION, + DISABLE_INOTIFY_OPTION +}; + +static struct option const long_options[] = +{ + {"bytes", required_argument, nullptr, 'c'}, + {"follow", optional_argument, nullptr, LONG_FOLLOW_OPTION}, + {"lines", required_argument, nullptr, 'n'}, + {"max-unchanged-stats", required_argument, nullptr, + MAX_UNCHANGED_STATS_OPTION}, + {"-disable-inotify", no_argument, nullptr, + DISABLE_INOTIFY_OPTION}, /* do not document */ + {"pid", required_argument, nullptr, PID_OPTION}, + {"-presume-input-pipe", no_argument, nullptr, + PRESUME_INPUT_PIPE_OPTION}, /* do not document */ + {"quiet", no_argument, nullptr, 'q'}, + {"retry", no_argument, nullptr, RETRY_OPTION}, + {"silent", no_argument, nullptr, 'q'}, + {"sleep-interval", required_argument, nullptr, 's'}, + {"verbose", no_argument, nullptr, 'v'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + printf (_("\ +Print the last %d lines of each FILE to standard output.\n\ +With more than one FILE, precede each with a header giving the file name.\n\ +"), DEFAULT_N_LINES); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -c, --bytes=[+]NUM output the last NUM bytes; or use -c +NUM to\n\ + output starting with byte NUM of each file\n\ +"), stdout); + fputs (_("\ + -f, --follow[={name|descriptor}]\n\ + output appended data as the file grows;\n\ + an absent option argument means 'descriptor'\n\ + -F same as --follow=name --retry\n\ +"), stdout); + printf (_("\ + -n, --lines=[+]NUM output the last NUM lines, instead of the last %d;\n\ + or use -n +NUM to skip NUM-1 lines at the start\n\ +"), + DEFAULT_N_LINES + ); + printf (_("\ + --max-unchanged-stats=N\n\ + with --follow=name, reopen a FILE which has not\n\ + changed size after N (default %d) iterations\n\ + to see if it has been unlinked or renamed\n\ + (this is the usual case of rotated log files);\n\ + with inotify, this option is rarely useful\n\ +"), + DEFAULT_MAX_N_UNCHANGED_STATS_BETWEEN_OPENS + ); + fputs (_("\ + --pid=PID with -f, terminate after process ID, PID dies\n\ + -q, --quiet, --silent never output headers giving file names\n\ + --retry keep trying to open a file if it is inaccessible\n\ +"), stdout); + fputs (_("\ + -s, --sleep-interval=N with -f, sleep for approximately N seconds\n\ + (default 1.0) between iterations;\n\ + with inotify and --pid=P, check process P at\n\ + least once every N seconds\n\ + -v, --verbose always output headers giving file names\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +NUM may have a multiplier suffix:\n\ +b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\ +GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y, R, Q.\n\ +Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\ +\n\ +"), stdout); + fputs (_("\ +With --follow (-f), tail defaults to following the file descriptor, which\n\ +means that even if a tail'ed file is renamed, tail will continue to track\n\ +its end. This default behavior is not desirable when you really want to\n\ +track the actual name of the file, not the file descriptor (e.g., log\n\ +rotation). Use --follow=name in that case. That causes tail to track the\n\ +named file in a way that accommodates renaming, removal and creation.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Ensure exit, either with SIGPIPE or EXIT_FAILURE status. */ +static void +die_pipe (void) +{ + raise (SIGPIPE); + exit (EXIT_FAILURE); +} + +/* If the output has gone away, then terminate + as we would if we had written to this output. */ +static void +check_output_alive (void) +{ + if (! monitor_output) + return; + + if (iopoll (-1, STDOUT_FILENO, false) == IOPOLL_BROKEN_OUTPUT) + die_pipe (); +} + +MAYBE_UNUSED static bool +valid_file_spec (struct File_spec const *f) +{ + /* Exactly one of the following subexpressions must be true. */ + return ((f->fd == -1) ^ (f->errnum == 0)); +} + +static char const * +pretty_name (struct File_spec const *f) +{ + return (STREQ (f->name, "-") ? _("standard input") : f->name); +} + +/* Record a file F with descriptor FD, size SIZE, status ST, and + blocking status BLOCKING. */ + +static void +record_open_fd (struct File_spec *f, int fd, + off_t size, struct stat const *st, + int blocking) +{ + f->fd = fd; + f->size = size; + f->mtime = get_stat_mtime (st); + f->dev = st->st_dev; + f->ino = st->st_ino; + f->mode = st->st_mode; + f->blocking = blocking; + f->n_unchanged_stats = 0; + f->ignore = false; +} + +/* Close the file with descriptor FD and name FILENAME. */ + +static void +close_fd (int fd, char const *filename) +{ + if (fd != -1 && fd != STDIN_FILENO && close (fd)) + { + error (0, errno, _("closing %s (fd=%d)"), quoteaf (filename), fd); + } +} + +static void +write_header (char const *pretty_filename) +{ + static bool first_file = true; + + printf ("%s==> %s <==\n", (first_file ? "" : "\n"), pretty_filename); + first_file = false; +} + +/* Write N_BYTES from BUFFER to stdout. + Exit immediately on error with a single diagnostic. */ + +static void +xwrite_stdout (char const *buffer, size_t n_bytes) +{ + if (n_bytes > 0 && fwrite (buffer, 1, n_bytes, stdout) < n_bytes) + { + clearerr (stdout); /* To avoid redundant close_stdout diagnostic. */ + error (EXIT_FAILURE, errno, _("error writing %s"), + quoteaf ("standard output")); + } +} + +/* Read and output N_BYTES of file PRETTY_FILENAME starting at the current + position in FD. If N_BYTES is COPY_TO_EOF, then copy until end of file. + If N_BYTES is COPY_A_BUFFER, then copy at most one buffer's worth. + Return the number of bytes read from the file. */ + +static uintmax_t +dump_remainder (bool want_header, char const *pretty_filename, int fd, + uintmax_t n_bytes) +{ + uintmax_t n_written; + uintmax_t n_remaining = n_bytes; + + n_written = 0; + while (true) + { + char buffer[BUFSIZ]; + size_t n = MIN (n_remaining, BUFSIZ); + size_t bytes_read = safe_read (fd, buffer, n); + if (bytes_read == SAFE_READ_ERROR) + { + if (errno != EAGAIN) + error (EXIT_FAILURE, errno, _("error reading %s"), + quoteaf (pretty_filename)); + break; + } + if (bytes_read == 0) + break; + if (want_header) + { + write_header (pretty_filename); + want_header = false; + } + xwrite_stdout (buffer, bytes_read); + n_written += bytes_read; + if (n_bytes != COPY_TO_EOF) + { + n_remaining -= bytes_read; + if (n_remaining == 0 || n_bytes == COPY_A_BUFFER) + break; + } + } + + return n_written; +} + +/* Call lseek with the specified arguments, where file descriptor FD + corresponds to the file, FILENAME. + Give a diagnostic and exit nonzero if lseek fails. + Otherwise, return the resulting offset. */ + +static off_t +xlseek (int fd, off_t offset, int whence, char const *filename) +{ + off_t new_offset = lseek (fd, offset, whence); + char buf[INT_BUFSIZE_BOUND (offset)]; + char *s; + + if (0 <= new_offset) + return new_offset; + + s = offtostr (offset, buf); + switch (whence) + { + case SEEK_SET: + error (EXIT_FAILURE, errno, _("%s: cannot seek to offset %s"), + quotef (filename), s); + break; + case SEEK_CUR: + error (EXIT_FAILURE, errno, _("%s: cannot seek to relative offset %s"), + quotef (filename), s); + break; + case SEEK_END: + error (EXIT_FAILURE, errno, + _("%s: cannot seek to end-relative offset %s"), + quotef (filename), s); + break; + default: + unreachable (); + } +} + +/* Print the last N_LINES lines from the end of file FD. + Go backward through the file, reading 'BUFSIZ' bytes at a time (except + probably the first), until we hit the start of the file or have + read NUMBER newlines. + START_POS is the starting position of the read pointer for the file + associated with FD (may be nonzero). + END_POS is the file offset of EOF (one larger than offset of last byte). + Return true if successful. */ + +static bool +file_lines (char const *pretty_filename, int fd, uintmax_t n_lines, + off_t start_pos, off_t end_pos, uintmax_t *read_pos) +{ + char buffer[BUFSIZ]; + size_t bytes_read; + off_t pos = end_pos; + + if (n_lines == 0) + return true; + + /* Set 'bytes_read' to the size of the last, probably partial, buffer; + 0 < 'bytes_read' <= 'BUFSIZ'. */ + bytes_read = (pos - start_pos) % BUFSIZ; + if (bytes_read == 0) + bytes_read = BUFSIZ; + /* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all + reads will be on block boundaries, which might increase efficiency. */ + pos -= bytes_read; + xlseek (fd, pos, SEEK_SET, pretty_filename); + bytes_read = safe_read (fd, buffer, bytes_read); + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return false; + } + *read_pos = pos + bytes_read; + + /* Count the incomplete line on files that don't end with a newline. */ + if (bytes_read && buffer[bytes_read - 1] != line_end) + --n_lines; + + do + { + /* Scan backward, counting the newlines in this bufferfull. */ + + size_t n = bytes_read; + while (n) + { + char const *nl; + nl = memrchr (buffer, line_end, n); + if (nl == nullptr) + break; + n = nl - buffer; + if (n_lines-- == 0) + { + /* If this newline isn't the last character in the buffer, + output the part that is after it. */ + xwrite_stdout (nl + 1, bytes_read - (n + 1)); + *read_pos += dump_remainder (false, pretty_filename, fd, + end_pos - (pos + bytes_read)); + return true; + } + } + + /* Not enough newlines in that bufferfull. */ + if (pos == start_pos) + { + /* Not enough lines in the file; print everything from + start_pos to the end. */ + xlseek (fd, start_pos, SEEK_SET, pretty_filename); + *read_pos = start_pos + dump_remainder (false, pretty_filename, fd, + end_pos); + return true; + } + pos -= BUFSIZ; + xlseek (fd, pos, SEEK_SET, pretty_filename); + + bytes_read = safe_read (fd, buffer, BUFSIZ); + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return false; + } + + *read_pos = pos + bytes_read; + } + while (bytes_read > 0); + + return true; +} + +/* Print the last N_LINES lines from the end of the standard input, + open for reading as pipe FD. + Buffer the text as a linked list of LBUFFERs, adding them as needed. + Return true if successful. */ + +static bool +pipe_lines (char const *pretty_filename, int fd, uintmax_t n_lines, + uintmax_t *read_pos) +{ + struct linebuffer + { + char buffer[BUFSIZ]; + size_t nbytes; + size_t nlines; + struct linebuffer *next; + }; + typedef struct linebuffer LBUFFER; + LBUFFER *first, *last, *tmp; + size_t total_lines = 0; /* Total number of newlines in all buffers. */ + bool ok = true; + size_t n_read; /* Size in bytes of most recent read */ + + first = last = xmalloc (sizeof (LBUFFER)); + first->nbytes = first->nlines = 0; + first->next = nullptr; + tmp = xmalloc (sizeof (LBUFFER)); + + /* Input is always read into a fresh buffer. */ + while (true) + { + n_read = safe_read (fd, tmp->buffer, BUFSIZ); + if (n_read == 0 || n_read == SAFE_READ_ERROR) + break; + tmp->nbytes = n_read; + *read_pos += n_read; + tmp->nlines = 0; + tmp->next = nullptr; + + /* Count the number of newlines just read. */ + { + char const *buffer_end = tmp->buffer + n_read; + char const *p = tmp->buffer; + while ((p = memchr (p, line_end, buffer_end - p))) + { + ++p; + ++tmp->nlines; + } + } + total_lines += tmp->nlines; + + /* If there is enough room in the last buffer read, just append the new + one to it. This is because when reading from a pipe, 'n_read' can + often be very small. */ + if (tmp->nbytes + last->nbytes < BUFSIZ) + { + memcpy (&last->buffer[last->nbytes], tmp->buffer, tmp->nbytes); + last->nbytes += tmp->nbytes; + last->nlines += tmp->nlines; + } + else + { + /* If there's not enough room, link the new buffer onto the end of + the list, then either free up the oldest buffer for the next + read if that would leave enough lines, or else malloc a new one. + Some compaction mechanism is possible but probably not + worthwhile. */ + last = last->next = tmp; + if (total_lines - first->nlines > n_lines) + { + tmp = first; + total_lines -= first->nlines; + first = first->next; + } + else + tmp = xmalloc (sizeof (LBUFFER)); + } + } + + free (tmp); + + if (n_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + ok = false; + goto free_lbuffers; + } + + /* If the file is empty, then bail out. */ + if (last->nbytes == 0) + goto free_lbuffers; + + /* This prevents a core dump when the pipe contains no newlines. */ + if (n_lines == 0) + goto free_lbuffers; + + /* Count the incomplete line on files that don't end with a newline. */ + if (last->buffer[last->nbytes - 1] != line_end) + { + ++last->nlines; + ++total_lines; + } + + /* Run through the list, printing lines. First, skip over unneeded + buffers. */ + for (tmp = first; total_lines - tmp->nlines > n_lines; tmp = tmp->next) + total_lines -= tmp->nlines; + + /* Find the correct beginning, then print the rest of the file. */ + { + char const *beg = tmp->buffer; + char const *buffer_end = tmp->buffer + tmp->nbytes; + if (total_lines > n_lines) + { + /* Skip 'total_lines' - 'n_lines' newlines. We made sure that + 'total_lines' - 'n_lines' <= 'tmp->nlines'. */ + size_t j; + for (j = total_lines - n_lines; j; --j) + { + beg = rawmemchr (beg, line_end); + ++beg; + } + } + + xwrite_stdout (beg, buffer_end - beg); + } + + for (tmp = tmp->next; tmp; tmp = tmp->next) + xwrite_stdout (tmp->buffer, tmp->nbytes); + +free_lbuffers: + while (first) + { + tmp = first->next; + free (first); + first = tmp; + } + return ok; +} + +/* Print the last N_BYTES characters from the end of pipe FD. + This is a stripped down version of pipe_lines. + Return true if successful. */ + +static bool +pipe_bytes (char const *pretty_filename, int fd, uintmax_t n_bytes, + uintmax_t *read_pos) +{ + struct charbuffer + { + char buffer[BUFSIZ]; + size_t nbytes; + struct charbuffer *next; + }; + typedef struct charbuffer CBUFFER; + CBUFFER *first, *last, *tmp; + size_t i; /* Index into buffers. */ + size_t total_bytes = 0; /* Total characters in all buffers. */ + bool ok = true; + size_t n_read; + + first = last = xmalloc (sizeof (CBUFFER)); + first->nbytes = 0; + first->next = nullptr; + tmp = xmalloc (sizeof (CBUFFER)); + + /* Input is always read into a fresh buffer. */ + while (true) + { + n_read = safe_read (fd, tmp->buffer, BUFSIZ); + if (n_read == 0 || n_read == SAFE_READ_ERROR) + break; + *read_pos += n_read; + tmp->nbytes = n_read; + tmp->next = nullptr; + + total_bytes += tmp->nbytes; + /* If there is enough room in the last buffer read, just append the new + one to it. This is because when reading from a pipe, 'nbytes' can + often be very small. */ + if (tmp->nbytes + last->nbytes < BUFSIZ) + { + memcpy (&last->buffer[last->nbytes], tmp->buffer, tmp->nbytes); + last->nbytes += tmp->nbytes; + } + else + { + /* If there's not enough room, link the new buffer onto the end of + the list, then either free up the oldest buffer for the next + read if that would leave enough characters, or else malloc a new + one. Some compaction mechanism is possible but probably not + worthwhile. */ + last = last->next = tmp; + if (total_bytes - first->nbytes > n_bytes) + { + tmp = first; + total_bytes -= first->nbytes; + first = first->next; + } + else + { + tmp = xmalloc (sizeof (CBUFFER)); + } + } + } + + free (tmp); + + if (n_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + ok = false; + goto free_cbuffers; + } + + /* Run through the list, printing characters. First, skip over unneeded + buffers. */ + for (tmp = first; total_bytes - tmp->nbytes > n_bytes; tmp = tmp->next) + total_bytes -= tmp->nbytes; + + /* Find the correct beginning, then print the rest of the file. + We made sure that 'total_bytes' - 'n_bytes' <= 'tmp->nbytes'. */ + if (total_bytes > n_bytes) + i = total_bytes - n_bytes; + else + i = 0; + xwrite_stdout (&tmp->buffer[i], tmp->nbytes - i); + + for (tmp = tmp->next; tmp; tmp = tmp->next) + xwrite_stdout (tmp->buffer, tmp->nbytes); + +free_cbuffers: + while (first) + { + tmp = first->next; + free (first); + first = tmp; + } + return ok; +} + +/* Skip N_BYTES characters from the start of pipe FD, and print + any extra characters that were read beyond that. + Return 1 on error, 0 if ok, -1 if EOF. */ + +static int +start_bytes (char const *pretty_filename, int fd, uintmax_t n_bytes, + uintmax_t *read_pos) +{ + char buffer[BUFSIZ]; + + while (0 < n_bytes) + { + size_t bytes_read = safe_read (fd, buffer, BUFSIZ); + if (bytes_read == 0) + return -1; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return 1; + } + *read_pos += bytes_read; + if (bytes_read <= n_bytes) + n_bytes -= bytes_read; + else + { + size_t n_remaining = bytes_read - n_bytes; + /* Print extra characters if there are any. */ + xwrite_stdout (&buffer[n_bytes], n_remaining); + break; + } + } + + return 0; +} + +/* Skip N_LINES lines at the start of file or pipe FD, and print + any extra characters that were read beyond that. + Return 1 on error, 0 if ok, -1 if EOF. */ + +static int +start_lines (char const *pretty_filename, int fd, uintmax_t n_lines, + uintmax_t *read_pos) +{ + if (n_lines == 0) + return 0; + + while (true) + { + char buffer[BUFSIZ]; + size_t bytes_read = safe_read (fd, buffer, BUFSIZ); + if (bytes_read == 0) /* EOF */ + return -1; + if (bytes_read == SAFE_READ_ERROR) /* error */ + { + error (0, errno, _("error reading %s"), quoteaf (pretty_filename)); + return 1; + } + + char *buffer_end = buffer + bytes_read; + + *read_pos += bytes_read; + + char *p = buffer; + while ((p = memchr (p, line_end, buffer_end - p))) + { + ++p; + if (--n_lines == 0) + { + if (p < buffer_end) + xwrite_stdout (p, buffer_end - p); + return 0; + } + } + } +} + +/* Return false when FD is open on a file residing on a local file system. + If fstatfs fails, give a diagnostic and return true. + If fstatfs cannot be called, return true. */ +static bool +fremote (int fd, char const *name) +{ + bool remote = true; /* be conservative (poll by default). */ + +#if HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE \ + && (defined __linux__ || defined __ANDROID__) + struct statfs buf; + int err = fstatfs (fd, &buf); + if (err != 0) + { + /* On at least linux-2.6.38, fstatfs fails with ENOSYS when FD + is open on a pipe. Treat that like a remote file. */ + if (errno != ENOSYS) + error (0, errno, _("cannot determine location of %s. " + "reverting to polling"), quoteaf (name)); + } + else + { + /* Treat unrecognized file systems as "remote", so caller polls. + Note README-release has instructions for syncing the internal + list with the latest Linux kernel file system constants. */ + remote = is_local_fs_type (buf.f_type) <= 0; + } +#endif + + return remote; +} + +/* open/fstat F->name and handle changes. */ +static void +recheck (struct File_spec *f, bool blocking) +{ + struct stat new_stats; + bool ok = true; + bool is_stdin = (STREQ (f->name, "-")); + bool was_tailable = f->tailable; + int prev_errnum = f->errnum; + bool new_file; + int fd = (is_stdin + ? STDIN_FILENO + : open (f->name, O_RDONLY | (blocking ? 0 : O_NONBLOCK))); + + affirm (valid_file_spec (f)); + + /* If the open fails because the file doesn't exist, + then mark the file as not tailable. */ + f->tailable = !(reopen_inaccessible_files && fd == -1); + + if (! disable_inotify && ! lstat (f->name, &new_stats) + && S_ISLNK (new_stats.st_mode)) + { + /* Diagnose the edge case where a regular file is changed + to a symlink. We avoid inotify with symlinks since + it's awkward to match between symlink name and target. */ + ok = false; + f->errnum = -1; + f->ignore = true; + + error (0, 0, _("%s has been replaced with an untailable symbolic link"), + quoteaf (pretty_name (f))); + } + else if (fd == -1 || fstat (fd, &new_stats) < 0) + { + ok = false; + f->errnum = errno; + if (!f->tailable) + { + if (was_tailable) + { + /* FIXME-maybe: detect the case in which the file first becomes + unreadable (perms), and later becomes readable again and can + be seen to be the same file (dev/ino). Otherwise, tail prints + the entire contents of the file when it becomes readable. */ + error (0, f->errnum, _("%s has become inaccessible"), + quoteaf (pretty_name (f))); + } + else + { + /* say nothing... it's still not tailable */ + } + } + else if (prev_errnum != errno) + error (0, errno, "%s", quotef (pretty_name (f))); + } + else if (!IS_TAILABLE_FILE_TYPE (new_stats.st_mode)) + { + ok = false; + f->errnum = -1; + f->tailable = false; + f->ignore = ! (reopen_inaccessible_files && follow_mode == Follow_name); + if (was_tailable || prev_errnum != f->errnum) + error (0, 0, _("%s has been replaced with an untailable file%s"), + quoteaf (pretty_name (f)), + f->ignore ? _("; giving up on this name") : ""); + } + else if ((f->remote = fremote (fd, pretty_name (f))) && ! disable_inotify) + { + ok = false; + f->errnum = -1; + error (0, 0, _("%s has been replaced with an untailable remote file"), + quoteaf (pretty_name (f))); + f->ignore = true; + f->remote = true; + } + else + { + f->errnum = 0; + } + + new_file = false; + if (!ok) + { + close_fd (fd, pretty_name (f)); + close_fd (f->fd, pretty_name (f)); + f->fd = -1; + } + else if (prev_errnum && prev_errnum != ENOENT) + { + new_file = true; + affirm (f->fd == -1); + error (0, 0, _("%s has become accessible"), quoteaf (pretty_name (f))); + } + else if (f->fd == -1) + { + /* A new file even when inodes haven't changed as + pairs can be reused, and we know the file was missing + on the previous iteration. Note this also means the file + is redisplayed in --follow=name mode if renamed away from + and back to a monitored name. */ + new_file = true; + + error (0, 0, + _("%s has appeared; following new file"), + quoteaf (pretty_name (f))); + } + else if (f->ino != new_stats.st_ino || f->dev != new_stats.st_dev) + { + /* File has been replaced (e.g., via log rotation) -- + tail the new one. */ + new_file = true; + + error (0, 0, + _("%s has been replaced; following new file"), + quoteaf (pretty_name (f))); + + /* Close the old one. */ + close_fd (f->fd, pretty_name (f)); + + } + else + { + /* No changes detected, so close new fd. */ + close_fd (fd, pretty_name (f)); + } + + /* FIXME: When a log is rotated, daemons tend to log to the + old file descriptor until the new file is present and + the daemon is sent a signal. Therefore tail may miss entries + being written to the old file. Perhaps we should keep + the older file open and continue to monitor it until + data is written to a new file. */ + if (new_file) + { + /* Start at the beginning of the file. */ + record_open_fd (f, fd, 0, &new_stats, (is_stdin ? -1 : blocking)); + if (S_ISREG (new_stats.st_mode)) + xlseek (fd, 0, SEEK_SET, pretty_name (f)); + } +} + +/* Return true if any of the N_FILES files in F are live, i.e., have + open file descriptors, or should be checked again (see --retry). + When following descriptors, checking should only continue when any + of the files is not yet ignored. */ + +static bool +any_live_files (const struct File_spec *f, size_t n_files) +{ + /* In inotify mode, ignore may be set for files + which may later be replaced with new files. + So always consider files live in -F mode. */ + if (reopen_inaccessible_files && follow_mode == Follow_name) + return true; + + for (size_t i = 0; i < n_files; i++) + { + if (0 <= f[i].fd) + return true; + else + { + if (! f[i].ignore && reopen_inaccessible_files) + return true; + } + } + + return false; +} + +/* Tail N_FILES files forever, or until killed. + The pertinent information for each file is stored in an entry of F. + Loop over each of them, doing an fstat to see if they have changed size, + and an occasional open/fstat to see if any dev/ino pair has changed. + If none of them have changed size in one iteration, sleep for a + while and try again. Continue until the user interrupts us. */ + +static void +tail_forever (struct File_spec *f, size_t n_files, double sleep_interval) +{ + /* Use blocking I/O as an optimization, when it's easy. */ + bool blocking = (pid == 0 && follow_mode == Follow_descriptor + && n_files == 1 && f[0].fd != -1 && ! S_ISREG (f[0].mode)); + size_t last; + bool writer_is_dead = false; + + last = n_files - 1; + + while (true) + { + size_t i; + bool any_input = false; + + for (i = 0; i < n_files; i++) + { + int fd; + char const *name; + mode_t mode; + struct stat stats; + uintmax_t bytes_read; + + if (f[i].ignore) + continue; + + if (f[i].fd < 0) + { + recheck (&f[i], blocking); + continue; + } + + fd = f[i].fd; + name = pretty_name (&f[i]); + mode = f[i].mode; + + if (f[i].blocking != blocking) + { + int old_flags = fcntl (fd, F_GETFL); + int new_flags = old_flags | (blocking ? 0 : O_NONBLOCK); + if (old_flags < 0 + || (new_flags != old_flags + && fcntl (fd, F_SETFL, new_flags) == -1)) + { + /* Don't update f[i].blocking if fcntl fails. */ + if (S_ISREG (f[i].mode) && errno == EPERM) + { + /* This happens when using tail -f on a file with + the append-only attribute. */ + } + else + error (EXIT_FAILURE, errno, + _("%s: cannot change nonblocking mode"), + quotef (name)); + } + else + f[i].blocking = blocking; + } + + bool read_unchanged = false; + if (!f[i].blocking) + { + if (fstat (fd, &stats) != 0) + { + f[i].fd = -1; + f[i].errnum = errno; + error (0, errno, "%s", quotef (name)); + close (fd); /* ignore failure */ + continue; + } + + if (f[i].mode == stats.st_mode + && (! S_ISREG (stats.st_mode) || f[i].size == stats.st_size) + && timespec_cmp (f[i].mtime, get_stat_mtime (&stats)) == 0) + { + if ((max_n_unchanged_stats_between_opens + <= f[i].n_unchanged_stats++) + && follow_mode == Follow_name) + { + recheck (&f[i], f[i].blocking); + f[i].n_unchanged_stats = 0; + } + if (fd != f[i].fd || S_ISREG (stats.st_mode) || 1 < n_files) + continue; + else + read_unchanged = true; + } + + affirm (fd == f[i].fd); + + /* This file has changed. Print out what we can, and + then keep looping. */ + + f[i].mtime = get_stat_mtime (&stats); + f[i].mode = stats.st_mode; + + /* reset counter */ + if (! read_unchanged) + f[i].n_unchanged_stats = 0; + + /* XXX: This is only a heuristic, as the file may have also + been truncated and written to if st_size >= size + (in which case we ignore new data <= size). */ + if (S_ISREG (mode) && stats.st_size < f[i].size) + { + error (0, 0, _("%s: file truncated"), quotef (name)); + /* Assume the file was truncated to 0, + and therefore output all "new" data. */ + xlseek (fd, 0, SEEK_SET, name); + f[i].size = 0; + } + + if (i != last) + { + if (print_headers) + write_header (name); + last = i; + } + } + + /* Don't read more than st_size on networked file systems + because it was seen on glusterfs at least, that st_size + may be smaller than the data read on a _subsequent_ stat call. */ + uintmax_t bytes_to_read; + if (f[i].blocking) + bytes_to_read = COPY_A_BUFFER; + else if (S_ISREG (mode) && f[i].remote) + bytes_to_read = stats.st_size - f[i].size; + else + bytes_to_read = COPY_TO_EOF; + + bytes_read = dump_remainder (false, name, fd, bytes_to_read); + + if (read_unchanged && bytes_read) + f[i].n_unchanged_stats = 0; + + any_input |= (bytes_read != 0); + f[i].size += bytes_read; + } + + if (! any_live_files (f, n_files)) + { + error (0, 0, _("no files remaining")); + break; + } + + if ((!any_input || blocking) && fflush (stdout) != 0) + write_error (); + + check_output_alive (); + + /* If nothing was read, sleep and/or check for dead writers. */ + if (!any_input) + { + if (writer_is_dead) + break; + + /* Once the writer is dead, read the files once more to + avoid a race condition. */ + writer_is_dead = (pid != 0 + && kill (pid, 0) != 0 + /* Handle the case in which you cannot send a + signal to the writer, so kill fails and sets + errno to EPERM. */ + && errno != EPERM); + + if (!writer_is_dead && xnanosleep (sleep_interval)) + error (EXIT_FAILURE, errno, _("cannot read realtime clock")); + + } + } +} + +#if HAVE_INOTIFY + +/* Return true if any of the N_FILES files in F is remote, i.e., has + an open file descriptor and is on a network file system. */ + +static bool +any_remote_file (const struct File_spec *f, size_t n_files) +{ + for (size_t i = 0; i < n_files; i++) + if (0 <= f[i].fd && f[i].remote) + return true; + return false; +} + +/* Return true if any of the N_FILES files in F is non remote, i.e., has + an open file descriptor and is not on a network file system. */ + +static bool +any_non_remote_file (const struct File_spec *f, size_t n_files) +{ + for (size_t i = 0; i < n_files; i++) + if (0 <= f[i].fd && ! f[i].remote) + return true; + return false; +} + +/* Return true if any of the N_FILES files in F is a symlink. + Note we don't worry about the edge case where "-" exists, + since that will have the same consequences for inotify, + which is the only context this function is currently used. */ + +static bool +any_symlinks (const struct File_spec *f, size_t n_files) +{ + struct stat st; + for (size_t i = 0; i < n_files; i++) + if (lstat (f[i].name, &st) == 0 && S_ISLNK (st.st_mode)) + return true; + return false; +} + +/* Return true if any of the N_FILES files in F is not + a regular file or fifo. This is used to avoid adding inotify + watches on a device file for example, which inotify + will accept, but not give any events for. */ + +static bool +any_non_regular_fifo (const struct File_spec *f, size_t n_files) +{ + for (size_t i = 0; i < n_files; i++) + if (0 <= f[i].fd && ! S_ISREG (f[i].mode) && ! S_ISFIFO (f[i].mode)) + return true; + return false; +} + +/* Return true if any of the N_FILES files in F represents + stdin and is tailable. */ + +static bool +tailable_stdin (const struct File_spec *f, size_t n_files) +{ + for (size_t i = 0; i < n_files; i++) + if (!f[i].ignore && STREQ (f[i].name, "-")) + return true; + return false; +} + +static size_t +wd_hasher (const void *entry, size_t tabsize) +{ + const struct File_spec *spec = entry; + return spec->wd % tabsize; +} + +static bool +wd_comparator (const void *e1, const void *e2) +{ + const struct File_spec *spec1 = e1; + const struct File_spec *spec2 = e2; + return spec1->wd == spec2->wd; +} + +/* Output (new) data for FSPEC->fd. + PREV_FSPEC records the last File_spec for which we output. */ +static void +check_fspec (struct File_spec *fspec, struct File_spec **prev_fspec) +{ + struct stat stats; + char const *name; + + if (fspec->fd == -1) + return; + + name = pretty_name (fspec); + + if (fstat (fspec->fd, &stats) != 0) + { + fspec->errnum = errno; + close_fd (fspec->fd, name); + fspec->fd = -1; + return; + } + + /* XXX: This is only a heuristic, as the file may have also + been truncated and written to if st_size >= size + (in which case we ignore new data <= size). + Though in the inotify case it's more likely we'll get + separate events for truncate() and write(). */ + if (S_ISREG (fspec->mode) && stats.st_size < fspec->size) + { + error (0, 0, _("%s: file truncated"), quotef (name)); + xlseek (fspec->fd, 0, SEEK_SET, name); + fspec->size = 0; + } + else if (S_ISREG (fspec->mode) && stats.st_size == fspec->size + && timespec_cmp (fspec->mtime, get_stat_mtime (&stats)) == 0) + return; + + bool want_header = print_headers && (fspec != *prev_fspec); + + uintmax_t bytes_read = dump_remainder (want_header, name, fspec->fd, + COPY_TO_EOF); + fspec->size += bytes_read; + + if (bytes_read) + { + *prev_fspec = fspec; + if (fflush (stdout) != 0) + write_error (); + } +} + +/* Attempt to tail N_FILES files forever, or until killed. + Check modifications using the inotify events system. + Exit if finished or on fatal error; return to revert to polling. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, + double sleep_interval, Hash_table **wd_to_namep) +{ +# if TAIL_TEST_SLEEP + /* Delay between open() and inotify_add_watch() + to help trigger different cases. */ + xnanosleep (1000000); +# endif + unsigned int max_realloc = 3; + + /* Map an inotify watch descriptor to the name of the file it's watching. */ + Hash_table *wd_to_name; + + bool found_watchable_file = false; + bool tailed_but_unwatchable = false; + bool found_unwatchable_dir = false; + bool no_inotify_resources = false; + bool writer_is_dead = false; + struct File_spec *prev_fspec; + size_t evlen = 0; + char *evbuf; + size_t evbuf_off = 0; + size_t len = 0; + + wd_to_name = hash_initialize (n_files, nullptr, wd_hasher, wd_comparator, + nullptr); + if (! wd_to_name) + xalloc_die (); + *wd_to_namep = wd_to_name; + + /* The events mask used with inotify on files (not directories). */ + uint32_t inotify_wd_mask = IN_MODIFY; + /* TODO: Perhaps monitor these events in Follow_descriptor mode also, + to tag reported file names with "deleted", "moved" etc. */ + if (follow_mode == Follow_name) + inotify_wd_mask |= (IN_ATTRIB | IN_DELETE_SELF | IN_MOVE_SELF); + + /* Add an inotify watch for each watched file. If -F is specified then watch + its parent directory too, in this way when they re-appear we can add them + again to the watch list. */ + size_t i; + for (i = 0; i < n_files; i++) + { + if (!f[i].ignore) + { + size_t fnlen = strlen (f[i].name); + if (evlen < fnlen) + evlen = fnlen; + + f[i].wd = -1; + + if (follow_mode == Follow_name) + { + size_t dirlen = dir_len (f[i].name); + char prev = f[i].name[dirlen]; + f[i].basename_start = last_component (f[i].name) - f[i].name; + + f[i].name[dirlen] = '\0'; + + /* It's fine to add the same directory more than once. + In that case the same watch descriptor is returned. */ + f[i].parent_wd = inotify_add_watch (wd, dirlen ? f[i].name : ".", + (IN_CREATE | IN_DELETE + | IN_MOVED_TO | IN_ATTRIB + | IN_DELETE_SELF)); + + f[i].name[dirlen] = prev; + + if (f[i].parent_wd < 0) + { + if (errno != ENOSPC) /* suppress confusing error. */ + error (0, errno, _("cannot watch parent directory of %s"), + quoteaf (f[i].name)); + else + error (0, 0, _("inotify resources exhausted")); + found_unwatchable_dir = true; + /* We revert to polling below. Note invalid uses + of the inotify API will still be diagnosed. */ + break; + } + } + + f[i].wd = inotify_add_watch (wd, f[i].name, inotify_wd_mask); + + if (f[i].wd < 0) + { + if (f[i].fd != -1) /* already tailed. */ + tailed_but_unwatchable = true; + if (errno == ENOSPC || errno == ENOMEM) + { + no_inotify_resources = true; + error (0, 0, _("inotify resources exhausted")); + break; + } + else if (errno != f[i].errnum) + error (0, errno, _("cannot watch %s"), quoteaf (f[i].name)); + continue; + } + + if (hash_insert (wd_to_name, &(f[i])) == nullptr) + xalloc_die (); + + found_watchable_file = true; + } + } + + /* Linux kernel 2.6.24 at least has a bug where eventually, ENOSPC is always + returned by inotify_add_watch. In any case we should revert to polling + when there are no inotify resources. Also a specified directory may not + be currently present or accessible, so revert to polling. Also an already + tailed but unwatchable due rename/unlink race, should also revert. */ + if (no_inotify_resources || found_unwatchable_dir + || (follow_mode == Follow_descriptor && tailed_but_unwatchable)) + return; + if (follow_mode == Follow_descriptor && !found_watchable_file) + exit (EXIT_FAILURE); + + prev_fspec = &(f[n_files - 1]); + + /* Check files again. New files or data can be available since last time we + checked and before they are watched by inotify. */ + for (i = 0; i < n_files; i++) + { + if (! f[i].ignore) + { + /* check for new files. */ + if (follow_mode == Follow_name) + recheck (&(f[i]), false); + else if (f[i].fd != -1) + { + /* If the file was replaced in the small window since we tailed, + then assume the watch is on the wrong item (different to + that we've already produced output for), and so revert to + polling the original descriptor. */ + struct stat stats; + + if (stat (f[i].name, &stats) == 0 + && (f[i].dev != stats.st_dev || f[i].ino != stats.st_ino)) + { + error (0, errno, _("%s was replaced"), + quoteaf (pretty_name (&(f[i])))); + return; + } + } + + /* check for new data. */ + check_fspec (&f[i], &prev_fspec); + } + } + + evlen += sizeof (struct inotify_event) + 1; + evbuf = xmalloc (evlen); + + /* Wait for inotify events and handle them. Events on directories + ensure that watched files can be re-added when following by name. + This loop blocks on the 'safe_read' call until a new event is notified. + But when --pid=P is specified, tail usually waits via poll. */ + while (true) + { + struct File_spec *fspec; + struct inotify_event *ev; + void *void_ev; + + /* When following by name without --retry, and the last file has + been unlinked or renamed-away, diagnose it and return. */ + if (follow_mode == Follow_name + && ! reopen_inaccessible_files + && hash_get_n_entries (wd_to_name) == 0) + error (EXIT_FAILURE, 0, _("no files remaining")); + + if (len <= evbuf_off) + { + /* Poll for inotify events. When watching a PID, ensure + that a read from WD will not block indefinitely. + If MONITOR_OUTPUT, also poll for a broken output pipe. */ + + int file_change; + struct pollfd pfd[2]; + do + { + /* How many ms to wait for changes. -1 means wait forever. */ + int delay = -1; + + if (pid) + { + if (writer_is_dead) + exit (EXIT_SUCCESS); + + writer_is_dead = (kill (pid, 0) != 0 && errno != EPERM); + + if (writer_is_dead || sleep_interval <= 0) + delay = 0; + else if (sleep_interval < INT_MAX / 1000 - 1) + { + /* delay = ceil (sleep_interval * 1000), sans libm. */ + double ddelay = sleep_interval * 1000; + delay = ddelay; + delay += delay < ddelay; + } + } + + pfd[0].fd = wd; + pfd[0].events = POLLIN; + pfd[1].fd = STDOUT_FILENO; + pfd[1].events = pfd[1].revents = 0; + file_change = poll (pfd, monitor_output + 1, delay); + } + while (file_change == 0); + + if (file_change < 0) + error (EXIT_FAILURE, errno, + _("error waiting for inotify and output events")); + if (pfd[1].revents) + die_pipe (); + + len = safe_read (wd, evbuf, evlen); + evbuf_off = 0; + + /* For kernels prior to 2.6.21, read returns 0 when the buffer + is too small. */ + if ((len == 0 || (len == SAFE_READ_ERROR && errno == EINVAL)) + && max_realloc--) + { + len = 0; + evlen *= 2; + evbuf = xrealloc (evbuf, evlen); + continue; + } + + if (len == 0 || len == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, _("error reading inotify event")); + } + + void_ev = evbuf + evbuf_off; + ev = void_ev; + evbuf_off += sizeof (*ev) + ev->len; + + /* If a directory is deleted, IN_DELETE_SELF is emitted + with ev->name of length 0. + We need to catch it, otherwise it would wait forever, + as wd for directory becomes inactive. Revert to polling now. */ + if ((ev->mask & IN_DELETE_SELF) && ! ev->len) + { + for (i = 0; i < n_files; i++) + { + if (ev->wd == f[i].parent_wd) + { + error (0, 0, + _("directory containing watched file was removed")); + return; + } + } + } + + if (ev->len) /* event on ev->name in watched directory. */ + { + size_t j; + for (j = 0; j < n_files; j++) + { + /* With N=hundreds of frequently-changing files, this O(N^2) + process might be a problem. FIXME: use a hash table? */ + if (f[j].parent_wd == ev->wd + && STREQ (ev->name, f[j].name + f[j].basename_start)) + break; + } + + /* It is not a watched file. */ + if (j == n_files) + continue; + + fspec = &(f[j]); + + int new_wd = -1; + bool deleting = !! (ev->mask & IN_DELETE); + + if (! deleting) + { + /* Adding the same inode again will look up any existing wd. */ + new_wd = inotify_add_watch (wd, f[j].name, inotify_wd_mask); + } + + if (! deleting && new_wd < 0) + { + if (errno == ENOSPC || errno == ENOMEM) + { + error (0, 0, _("inotify resources exhausted")); + return; /* revert to polling. */ + } + else + { + /* Can get ENOENT for a dangling symlink for example. */ + error (0, errno, _("cannot watch %s"), quoteaf (f[j].name)); + } + /* We'll continue below after removing the existing watch. */ + } + + /* This will be false if only attributes of file change. */ + bool new_watch; + new_watch = (! deleting) && (fspec->wd < 0 || new_wd != fspec->wd); + + if (new_watch) + { + if (0 <= fspec->wd) + { + inotify_rm_watch (wd, fspec->wd); + hash_remove (wd_to_name, fspec); + } + + fspec->wd = new_wd; + + if (new_wd == -1) + continue; + + /* If the file was moved then inotify will use the source file wd + for the destination file. Make sure the key is not present in + the table. */ + struct File_spec *prev = hash_remove (wd_to_name, fspec); + if (prev && prev != fspec) + { + if (follow_mode == Follow_name) + recheck (prev, false); + prev->wd = -1; + close_fd (prev->fd, pretty_name (prev)); + } + + if (hash_insert (wd_to_name, fspec) == nullptr) + xalloc_die (); + } + + if (follow_mode == Follow_name) + recheck (fspec, false); + } + else + { + struct File_spec key; + key.wd = ev->wd; + fspec = hash_lookup (wd_to_name, &key); + } + + if (! fspec) + continue; + + if (ev->mask & (IN_ATTRIB | IN_DELETE | IN_DELETE_SELF | IN_MOVE_SELF)) + { + /* Note for IN_MOVE_SELF (the file we're watching has + been clobbered via a rename) we leave the watch + in place since it may still be part of the set + of watched names. */ + if (ev->mask & IN_DELETE_SELF) + { + inotify_rm_watch (wd, fspec->wd); + hash_remove (wd_to_name, fspec); + } + + /* Note we get IN_ATTRIB for unlink() as st_nlink decrements. + The usual path is a close() done in recheck() triggers + an IN_DELETE_SELF event as the inode is removed. + However sometimes open() will succeed as even though + st_nlink is decremented, the dentry (cache) is not updated. + Thus we depend on the IN_DELETE event on the directory + to trigger processing for the removed file. */ + + recheck (fspec, false); + + continue; + } + check_fspec (fspec, &prev_fspec); + } +} +#endif + +/* Output the last N_BYTES bytes of file FILENAME open for reading in FD. + Return true if successful. */ + +static bool +tail_bytes (char const *pretty_filename, int fd, uintmax_t n_bytes, + uintmax_t *read_pos) +{ + struct stat stats; + + if (fstat (fd, &stats)) + { + error (0, errno, _("cannot fstat %s"), quoteaf (pretty_filename)); + return false; + } + + if (from_start) + { + if (! presume_input_pipe && n_bytes <= OFF_T_MAX + && ((S_ISREG (stats.st_mode) + && xlseek (fd, n_bytes, SEEK_CUR, pretty_filename) >= 0) + || lseek (fd, n_bytes, SEEK_CUR) != -1)) + *read_pos += n_bytes; + else + { + int t = start_bytes (pretty_filename, fd, n_bytes, read_pos); + if (t) + return t < 0; + } + n_bytes = COPY_TO_EOF; + } + else + { + off_t end_pos = -1; + off_t current_pos = -1; + + if (! presume_input_pipe && n_bytes <= OFF_T_MAX) + { + if (usable_st_size (&stats)) + end_pos = stats.st_size; + else if ((current_pos = lseek (fd, -n_bytes, SEEK_END)) != -1) + end_pos = current_pos + n_bytes; + } + if (end_pos <= (off_t) ST_BLKSIZE (stats)) + return pipe_bytes (pretty_filename, fd, n_bytes, read_pos); + if (current_pos == -1) + current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename); + if (current_pos < end_pos) + { + off_t bytes_remaining = end_pos - current_pos; + + if (n_bytes < bytes_remaining) + { + current_pos = end_pos - n_bytes; + xlseek (fd, current_pos, SEEK_SET, pretty_filename); + } + } + *read_pos = current_pos; + } + + *read_pos += dump_remainder (false, pretty_filename, fd, n_bytes); + return true; +} + +/* Output the last N_LINES lines of file FILENAME open for reading in FD. + Return true if successful. */ + +static bool +tail_lines (char const *pretty_filename, int fd, uintmax_t n_lines, + uintmax_t *read_pos) +{ + struct stat stats; + + if (fstat (fd, &stats)) + { + error (0, errno, _("cannot fstat %s"), quoteaf (pretty_filename)); + return false; + } + + if (from_start) + { + int t = start_lines (pretty_filename, fd, n_lines, read_pos); + if (t) + return t < 0; + *read_pos += dump_remainder (false, pretty_filename, fd, COPY_TO_EOF); + } + else + { + off_t start_pos = -1; + off_t end_pos; + + /* Use file_lines only if FD refers to a regular file for + which lseek (... SEEK_END) works. */ + if ( ! presume_input_pipe + && S_ISREG (stats.st_mode) + && (start_pos = lseek (fd, 0, SEEK_CUR)) != -1 + && start_pos < (end_pos = lseek (fd, 0, SEEK_END))) + { + *read_pos = end_pos; + if (end_pos != 0 + && ! file_lines (pretty_filename, fd, n_lines, + start_pos, end_pos, read_pos)) + return false; + } + else + { + /* Under very unlikely circumstances, it is possible to reach + this point after positioning the file pointer to end of file + via the 'lseek (...SEEK_END)' above. In that case, reposition + the file pointer back to start_pos before calling pipe_lines. */ + if (start_pos != -1) + xlseek (fd, start_pos, SEEK_SET, pretty_filename); + + return pipe_lines (pretty_filename, fd, n_lines, read_pos); + } + } + return true; +} + +/* Display the last N_UNITS units of file FILENAME, open for reading + via FD. Set *READ_POS to the position of the input stream pointer. + *READ_POS is usually the number of bytes read and corresponds to an + offset from the beginning of a file. However, it may be larger than + OFF_T_MAX (as for an input pipe), and may also be larger than the + number of bytes read (when an input pointer is initially not at + beginning of file), and may be far greater than the number of bytes + actually read for an input file that is seekable. + Return true if successful. */ + +static bool +tail (char const *filename, int fd, uintmax_t n_units, + uintmax_t *read_pos) +{ + *read_pos = 0; + if (count_lines) + return tail_lines (filename, fd, n_units, read_pos); + else + return tail_bytes (filename, fd, n_units, read_pos); +} + +/* Display the last N_UNITS units of the file described by F. + Return true if successful. */ + +static bool +tail_file (struct File_spec *f, uintmax_t n_units) +{ + int fd; + bool ok; + + bool is_stdin = (STREQ (f->name, "-")); + + if (is_stdin) + { + have_read_stdin = true; + fd = STDIN_FILENO; + xset_binary_mode (STDIN_FILENO, O_BINARY); + } + else + fd = open (f->name, O_RDONLY | O_BINARY); + + f->tailable = !(reopen_inaccessible_files && fd == -1); + + if (fd == -1) + { + if (forever) + { + f->fd = -1; + f->errnum = errno; + f->ignore = ! reopen_inaccessible_files; + f->ino = 0; + f->dev = 0; + } + error (0, errno, _("cannot open %s for reading"), + quoteaf (pretty_name (f))); + ok = false; + } + else + { + uintmax_t read_pos; + + if (print_headers) + write_header (pretty_name (f)); + ok = tail (pretty_name (f), fd, n_units, &read_pos); + if (forever) + { + struct stat stats; + +#if TAIL_TEST_SLEEP + /* Before the tail function provided 'read_pos', there was + a race condition described in the URL below. This sleep + call made the window big enough to exercise the problem. */ + xnanosleep (1); +#endif + f->errnum = ok - 1; + if (fstat (fd, &stats) < 0) + { + ok = false; + f->errnum = errno; + error (0, errno, _("error reading %s"), + quoteaf (pretty_name (f))); + } + else if (!IS_TAILABLE_FILE_TYPE (stats.st_mode)) + { + ok = false; + f->errnum = -1; + f->tailable = false; + f->ignore = ! reopen_inaccessible_files; + error (0, 0, _("%s: cannot follow end of this type of file%s"), + quotef (pretty_name (f)), + f->ignore ? _("; giving up on this name") : ""); + } + + if (!ok) + { + f->ignore = ! reopen_inaccessible_files; + close_fd (fd, pretty_name (f)); + f->fd = -1; + } + else + { + /* Note: we must use read_pos here, not stats.st_size, + to avoid a race condition described by Ken Raeburn: + https://lists.gnu.org/r/bug-textutils/2003-05/msg00007.html */ + record_open_fd (f, fd, read_pos, &stats, (is_stdin ? -1 : 1)); + f->remote = fremote (fd, pretty_name (f)); + } + } + else + { + if (!is_stdin && close (fd)) + { + error (0, errno, _("error reading %s"), + quoteaf (pretty_name (f))); + ok = false; + } + } + } + + return ok; +} + +/* If obsolete usage is allowed, and the command line arguments are of + the obsolete form and the option string is well-formed, set + *N_UNITS, the globals COUNT_LINES, FOREVER, and FROM_START, and + return true. If the command line arguments are obviously incorrect + (e.g., because obsolete usage is not allowed and the arguments are + incorrect for non-obsolete usage), report an error and exit. + Otherwise, return false and don't modify any parameter or global + variable. */ + +static bool +parse_obsolete_option (int argc, char * const *argv, uintmax_t *n_units) +{ + char const *p; + char const *n_string; + char const *n_string_end; + int default_count = DEFAULT_N_LINES; + bool t_from_start; + bool t_count_lines = true; + bool t_forever = false; + + /* With the obsolete form, there is one option string and at most + one file argument. Watch out for "-" and "--", though. */ + if (! (argc == 2 + || (argc == 3 && ! (argv[2][0] == '-' && argv[2][1])) + || (3 <= argc && argc <= 4 && STREQ (argv[2], "--")))) + return false; + + int posix_ver = posix2_version (); + bool obsolete_usage = posix_ver < 200112; + bool traditional_usage = obsolete_usage || 200809 <= posix_ver; + p = argv[1]; + + switch (*p++) + { + default: + return false; + + case '+': + /* Leading "+" is a file name in the standard form. */ + if (!traditional_usage) + return false; + + t_from_start = true; + break; + + case '-': + /* In the non-obsolete form, "-" is standard input and "-c" + requires an option-argument. The obsolete multidigit options + are supported as a GNU extension even when conforming to + POSIX 1003.1-2001 or later, so don't complain about them. */ + if (!obsolete_usage && !p[p[0] == 'c']) + return false; + + t_from_start = false; + break; + } + + n_string = p; + while (ISDIGIT (*p)) + p++; + n_string_end = p; + + switch (*p) + { + case 'b': default_count *= 512; FALLTHROUGH; + case 'c': t_count_lines = false; FALLTHROUGH; + case 'l': p++; break; + } + + if (*p == 'f') + { + t_forever = true; + ++p; + } + + if (*p) + return false; + + if (n_string == n_string_end) + *n_units = default_count; + else if ((xstrtoumax (n_string, nullptr, 10, n_units, "b") + & ~LONGINT_INVALID_SUFFIX_CHAR) + != LONGINT_OK) + error (EXIT_FAILURE, errno, "%s: %s", _("invalid number"), + quote (argv[1])); + + /* Set globals. */ + from_start = t_from_start; + count_lines = t_count_lines; + forever = t_forever; + + return true; +} + +static void +parse_options (int argc, char **argv, + uintmax_t *n_units, enum header_mode *header_mode, + double *sleep_interval) +{ + int c; + + while ((c = getopt_long (argc, argv, "c:n:fFqs:vz0123456789", + long_options, nullptr)) + != -1) + { + switch (c) + { + case 'F': + forever = true; + follow_mode = Follow_name; + reopen_inaccessible_files = true; + break; + + case 'c': + case 'n': + count_lines = (c == 'n'); + if (*optarg == '+') + from_start = true; + else if (*optarg == '-') + ++optarg; + + *n_units = xdectoumax (optarg, 0, UINTMAX_MAX, "bkKmMGTPEZYRQ0", + count_lines + ? _("invalid number of lines") + : _("invalid number of bytes"), 0); + break; + + case 'f': + case LONG_FOLLOW_OPTION: + forever = true; + if (optarg == nullptr) + follow_mode = DEFAULT_FOLLOW_MODE; + else + follow_mode = XARGMATCH ("--follow", optarg, + follow_mode_string, follow_mode_map); + break; + + case RETRY_OPTION: + reopen_inaccessible_files = true; + break; + + case MAX_UNCHANGED_STATS_OPTION: + /* --max-unchanged-stats=N */ + max_n_unchanged_stats_between_opens = + xdectoumax (optarg, 0, UINTMAX_MAX, "", + _("invalid maximum number of unchanged stats between opens"), 0); + break; + + case DISABLE_INOTIFY_OPTION: + disable_inotify = true; + break; + + case PID_OPTION: + pid = xdectoumax (optarg, 0, PID_T_MAX, "", _("invalid PID"), 0); + break; + + case PRESUME_INPUT_PIPE_OPTION: + presume_input_pipe = true; + break; + + case 'q': + *header_mode = never; + break; + + case 's': + { + double s; + if (! (xstrtod (optarg, nullptr, &s, cl_strtod) && 0 <= s)) + error (EXIT_FAILURE, 0, + _("invalid number of seconds: %s"), quote (optarg)); + *sleep_interval = s; + } + break; + + case 'v': + *header_mode = always; + break; + + case 'z': + line_end = '\0'; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + error (EXIT_FAILURE, 0, _("option used in invalid context -- %c"), c); + + default: + usage (EXIT_FAILURE); + } + } + + if (reopen_inaccessible_files) + { + if (!forever) + { + reopen_inaccessible_files = false; + error (0, 0, _("warning: --retry ignored; --retry is useful" + " only when following")); + } + else if (follow_mode == Follow_descriptor) + error (0, 0, _("warning: --retry only effective for the initial open")); + } + + if (pid && !forever) + error (0, 0, + _("warning: PID ignored; --pid=PID is useful only when following")); + else if (pid && kill (pid, 0) != 0 && errno == ENOSYS) + { + error (0, 0, _("warning: --pid=PID is not supported on this system")); + pid = 0; + } +} + +/* Mark as '.ignore'd each member of F that corresponds to a + pipe or fifo, and return the number of non-ignored members. */ +static size_t +ignore_fifo_and_pipe (struct File_spec *f, size_t n_files) +{ + /* When there is no FILE operand and stdin is a pipe or FIFO + POSIX requires that tail ignore the -f option. + Since we allow multiple FILE operands, we extend that to say: with -f, + ignore any "-" operand that corresponds to a pipe or FIFO. */ + size_t n_viable = 0; + + for (size_t i = 0; i < n_files; i++) + { + bool is_a_fifo_or_pipe = + (STREQ (f[i].name, "-") + && !f[i].ignore + && 0 <= f[i].fd + && (S_ISFIFO (f[i].mode) + || (HAVE_FIFO_PIPES != 1 && isapipe (f[i].fd)))); + if (is_a_fifo_or_pipe) + { + f[i].fd = -1; + f[i].ignore = true; + } + else + ++n_viable; + } + + return n_viable; +} + +int +main (int argc, char **argv) +{ + enum header_mode header_mode = multiple_files; + bool ok = true; + /* If from_start, the number of items to skip before printing; otherwise, + the number of items at the end of the file to print. Although the type + is signed, the value is never negative. */ + uintmax_t n_units = DEFAULT_N_LINES; + size_t n_files; + char **file; + struct File_spec *F; + size_t i; + bool obsolete_option; + + /* The number of seconds to sleep between iterations. + During one iteration, every file name or descriptor is checked to + see if it has changed. */ + double sleep_interval = 1.0; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + have_read_stdin = false; + + count_lines = true; + forever = from_start = print_headers = false; + line_end = '\n'; + obsolete_option = parse_obsolete_option (argc, argv, &n_units); + argc -= obsolete_option; + argv += obsolete_option; + parse_options (argc, argv, &n_units, &header_mode, &sleep_interval); + + /* To start printing with item N_UNITS from the start of the file, skip + N_UNITS - 1 items. 'tail -n +0' is actually meaningless, but for Unix + compatibility it's treated the same as 'tail -n +1'. */ + if (from_start) + { + if (n_units) + --n_units; + } + + if (optind < argc) + { + n_files = argc - optind; + file = argv + optind; + } + else + { + static char *dummy_stdin = (char *) "-"; + n_files = 1; + file = &dummy_stdin; + } + + { + bool found_hyphen = false; + + for (i = 0; i < n_files; i++) + if (STREQ (file[i], "-")) + found_hyphen = true; + + /* When following by name, there must be a name. */ + if (found_hyphen && follow_mode == Follow_name) + error (EXIT_FAILURE, 0, _("cannot follow %s by name"), quoteaf ("-")); + + /* When following forever, and not using simple blocking, warn if + any file is '-' as the stats() used to check for input are ineffective. + This is only a warning, since tail's output (before a failing seek, + and that from any non-stdin files) might still be useful. */ + if (forever && found_hyphen) + { + struct stat in_stat; + bool blocking_stdin; + blocking_stdin = (pid == 0 && follow_mode == Follow_descriptor + && n_files == 1 && ! fstat (STDIN_FILENO, &in_stat) + && ! S_ISREG (in_stat.st_mode)); + + if (! blocking_stdin && isatty (STDIN_FILENO)) + error (0, 0, _("warning: following standard input" + " indefinitely is ineffective")); + } + } + + /* Don't read anything if we'll never output anything. */ + if (! n_units && ! forever && ! from_start) + return EXIT_SUCCESS; + + F = xnmalloc (n_files, sizeof *F); + for (i = 0; i < n_files; i++) + F[i].name = file[i]; + + if (header_mode == always + || (header_mode == multiple_files && n_files > 1)) + print_headers = true; + + xset_binary_mode (STDOUT_FILENO, O_BINARY); + + for (i = 0; i < n_files; i++) + ok &= tail_file (&F[i], n_units); + + if (forever && ignore_fifo_and_pipe (F, n_files)) + { + /* If stdout is a fifo or pipe, then monitor it + so that we exit if the reader goes away. */ + struct stat out_stat; + if (fstat (STDOUT_FILENO, &out_stat) < 0) + error (EXIT_FAILURE, errno, _("standard output")); + monitor_output = (S_ISFIFO (out_stat.st_mode) + || (HAVE_FIFO_PIPES != 1 && isapipe (STDOUT_FILENO))); + +#if HAVE_INOTIFY + /* tailable_stdin() checks if the user specifies stdin via "-", + or implicitly by providing no arguments. If so, we won't use inotify. + Technically, on systems with a working /dev/stdin, we *could*, + but would it be worth it? Verifying that it's a real device + and hooked up to stdin is not trivial, while reverting to + non-inotify-based tail_forever is easy and portable. + + any_remote_file() checks if the user has specified any + files that reside on remote file systems. inotify is not used + in this case because it would miss any updates to the file + that were not initiated from the local system. + + any_non_remote_file() checks if the user has specified any + files that don't reside on remote file systems. inotify is not used + if there are no open files, as we can't determine if those file + will be on a remote file system. + + any_symlinks() checks if the user has specified any symbolic links. + inotify is not used in this case because it returns updated _targets_ + which would not match the specified names. If we tried to always + use the target names, then we would miss changes to the symlink itself. + + ok is false when one of the files specified could not be opened for + reading. In this case and when following by descriptor, + tail_forever_inotify() cannot be used (in its current implementation). + + FIXME: inotify doesn't give any notification when a new + (remote) file or directory is mounted on top a watched file. + When follow_mode == Follow_name we would ideally like to detect that. + Note if there is a change to the original file then we'll + recheck it and follow the new file, or ignore it if the + file has changed to being remote. + + FIXME-maybe: inotify has a watch descriptor per inode, and hence with + our current hash implementation will only --follow data for one + of the names when multiple hardlinked files are specified, or + for one name when a name is specified multiple times. */ + if (!disable_inotify && (tailable_stdin (F, n_files) + || any_remote_file (F, n_files) + || ! any_non_remote_file (F, n_files) + || any_symlinks (F, n_files) + || any_non_regular_fifo (F, n_files) + || (!ok && follow_mode == Follow_descriptor))) + disable_inotify = true; + + if (!disable_inotify) + { + int wd = inotify_init (); + if (0 <= wd) + { + /* Flush any output from tail_file, now, since + tail_forever_inotify flushes only after writing, + not before reading. */ + if (fflush (stdout) != 0) + write_error (); + + Hash_table *ht; + tail_forever_inotify (wd, F, n_files, sleep_interval, &ht); + hash_free (ht); + close (wd); + errno = 0; + } + error (0, errno, _("inotify cannot be used, reverting to polling")); + } +#endif + disable_inotify = true; + tail_forever (F, n_files, sleep_interval); + } + + if (have_read_stdin && close (STDIN_FILENO) < 0) + error (EXIT_FAILURE, errno, "-"); + main_exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} diff --git a/src/tee.c b/src/tee.c new file mode 100644 index 0000000..27323a3 --- /dev/null +++ b/src/tee.c @@ -0,0 +1,344 @@ +/* tee - read from standard input and write to standard output and files. + Copyright (C) 1985-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Mike Parker, Richard M. Stallman, and David MacKenzie */ + +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "fadvise.h" +#include "iopoll.h" +#include "stdio--.h" +#include "xbinary-io.h" +#include "iopoll.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tee" + +#define AUTHORS \ + proper_name ("Mike Parker"), \ + proper_name ("Richard M. Stallman"), \ + proper_name ("David MacKenzie") + +static bool tee_files (int nfiles, char **files, bool); + +/* If true, append to output files rather than truncating them. */ +static bool append; + +/* If true, ignore interrupts. */ +static bool ignore_interrupts; + +enum output_error + { + output_error_sigpipe, /* traditional behavior, sigpipe enabled. */ + output_error_warn, /* warn on EPIPE, but continue. */ + output_error_warn_nopipe, /* ignore EPIPE, continue. */ + output_error_exit, /* exit on any output error. */ + output_error_exit_nopipe /* exit on any output error except EPIPE. */ + }; + +static enum output_error output_error; + +static struct option const long_options[] = +{ + {"append", no_argument, nullptr, 'a'}, + {"ignore-interrupts", no_argument, nullptr, 'i'}, + {"output-error", optional_argument, nullptr, 'p'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static char const *const output_error_args[] = +{ + "warn", "warn-nopipe", "exit", "exit-nopipe", nullptr +}; +static enum output_error const output_error_types[] = +{ + output_error_warn, output_error_warn_nopipe, + output_error_exit, output_error_exit_nopipe +}; +ARGMATCH_VERIFY (output_error_args, output_error_types); + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name); + fputs (_("\ +Copy standard input to each FILE, and also to standard output.\n\ +\n\ + -a, --append append to the given FILEs, do not overwrite\n\ + -i, --ignore-interrupts ignore interrupt signals\n\ +"), stdout); + fputs (_("\ + -p operate in a more appropriate MODE with pipes.\n\ + --output-error[=MODE] set behavior on write error. See MODE below\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +MODE determines behavior with write errors on the outputs:\n\ + warn diagnose errors writing to any output\n\ + warn-nopipe diagnose errors writing to any output not a pipe\n\ + exit exit on error writing to any output\n\ + exit-nopipe exit on error writing to any output not a pipe\n\ +The default MODE for the -p option is 'warn-nopipe'.\n\ +With \"nopipe\" MODEs, exit immediately if all outputs become broken pipes.\n\ +The default operation when --output-error is not specified, is to\n\ +exit immediately on error writing to a pipe, and diagnose errors\n\ +writing to non pipe outputs.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + append = false; + ignore_interrupts = false; + + int optc; + while ((optc = getopt_long (argc, argv, "aip", long_options, nullptr)) != -1) + { + switch (optc) + { + case 'a': + append = true; + break; + + case 'i': + ignore_interrupts = true; + break; + + case 'p': + if (optarg) + output_error = XARGMATCH ("--output-error", optarg, + output_error_args, output_error_types); + else + output_error = output_error_warn_nopipe; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (ignore_interrupts) + signal (SIGINT, SIG_IGN); + + if (output_error != output_error_sigpipe) + signal (SIGPIPE, SIG_IGN); + + /* Whether to detect and close a broken pipe output. + There is no need if the input is always ready for reading. */ + bool pipe_check = ((output_error == output_error_warn_nopipe + || output_error == output_error_exit_nopipe) + && iopoll_input_ok (STDIN_FILENO)); + + /* Do *not* warn if tee is given no file arguments. + POSIX requires that it work when given no arguments. */ + + bool ok = tee_files (argc - optind, &argv[optind], pipe_check); + if (close (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, "%s", _("standard input")); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} + + +/* Return the index of the first non-null descriptor after idx, + or -1 if all are null. */ + +static int +get_next_out (FILE **descriptors, int nfiles, int idx) +{ + for (idx++; idx <= nfiles; idx++) + if (descriptors[idx]) + return idx; + return -1; /* no outputs remaining */ +} + +/* Remove descriptors[i] due to write failure or broken pipe. + Return true if this indicates a reportable error. */ + +static bool +fail_output (FILE **descriptors, char **files, int i) +{ + int w_errno = errno; + bool fail = errno != EPIPE + || output_error == output_error_exit + || output_error == output_error_warn; + if (descriptors[i] == stdout) + clearerr (stdout); /* Avoid redundant close_stdout diagnostic. */ + if (fail) + { + error (output_error == output_error_exit + || output_error == output_error_exit_nopipe, + w_errno, "%s", quotef (files[i])); + } + descriptors[i] = nullptr; + return fail; +} + + +/* Copy the standard input into each of the NFILES files in FILES + and into the standard output. As a side effect, modify FILES[-1]. + Return true if successful. */ + +static bool +tee_files (int nfiles, char **files, bool pipe_check) +{ + size_t n_outputs = 0; + FILE **descriptors; + bool *out_pollable IF_LINT ( = nullptr); + char buffer[BUFSIZ]; + ssize_t bytes_read = 0; + int i; + int first_out = 0; /* idx of first non-null output in descriptors */ + bool ok = true; + char const *mode_string = + (O_BINARY + ? (append ? "ab" : "wb") + : (append ? "a" : "w")); + + xset_binary_mode (STDIN_FILENO, O_BINARY); + xset_binary_mode (STDOUT_FILENO, O_BINARY); + fadvise (stdin, FADVISE_SEQUENTIAL); + + /* Set up FILES[0 .. NFILES] and DESCRIPTORS[0 .. NFILES]. + In both arrays, entry 0 corresponds to standard output. */ + + descriptors = xnmalloc (nfiles + 1, sizeof *descriptors); + if (pipe_check) + out_pollable = xnmalloc (nfiles + 1, sizeof *out_pollable); + files--; + descriptors[0] = stdout; + if (pipe_check) + out_pollable[0] = iopoll_output_ok (fileno (descriptors[0])); + files[0] = bad_cast (_("standard output")); + setvbuf (stdout, nullptr, _IONBF, 0); + n_outputs++; + + for (i = 1; i <= nfiles; i++) + { + /* Do not treat "-" specially - as mandated by POSIX. */ + descriptors[i] = fopen (files[i], mode_string); + if (descriptors[i] == nullptr) + { + if (pipe_check) + out_pollable[i] = false; + error (output_error == output_error_exit + || output_error == output_error_exit_nopipe, + errno, "%s", quotef (files[i])); + ok = false; + } + else + { + if (pipe_check) + out_pollable[i] = iopoll_output_ok (fileno (descriptors[i])); + setvbuf (descriptors[i], nullptr, _IONBF, 0); + n_outputs++; + } + } + + while (n_outputs) + { + if (pipe_check && out_pollable[first_out]) + { + /* Monitor for input, or errors on first valid output. */ + int err = iopoll (STDIN_FILENO, fileno (descriptors[first_out]), + true); + + /* Close the output if it became a broken pipe. */ + if (err == IOPOLL_BROKEN_OUTPUT) + { + errno = EPIPE; /* behave like write produced EPIPE */ + if (fail_output (descriptors, files, first_out)) + ok = false; + n_outputs--; + first_out = get_next_out (descriptors, nfiles, first_out); + continue; + } + else if (err == IOPOLL_ERROR) + { + error (0, errno, _("iopoll error")); + ok = false; + } + } + + bytes_read = read (STDIN_FILENO, buffer, sizeof buffer); + if (bytes_read < 0 && errno == EINTR) + continue; + if (bytes_read <= 0) + break; + + /* Write to all NFILES + 1 descriptors. + Standard output is the first one. */ + for (i = 0; i <= nfiles; i++) + if (descriptors[i] + && ! fwrite_wait (buffer, bytes_read, descriptors[i])) + { + if (fail_output (descriptors, files, i)) + ok = false; + n_outputs--; + if (i == first_out) + first_out = get_next_out (descriptors, nfiles, first_out); + } + } + + if (bytes_read == -1) + { + error (0, errno, _("read error")); + ok = false; + } + + /* Close the files, but not standard output. */ + for (i = 1; i <= nfiles; i++) + if (descriptors[i] && ! fclose_wait (descriptors[i])) + { + error (0, errno, "%s", quotef (files[i])); + ok = false; + } + + free (descriptors); + if (pipe_check) + free (out_pollable); + + return ok; +} diff --git a/src/temp-stream.c b/src/temp-stream.c new file mode 100644 index 0000000..9150304 --- /dev/null +++ b/src/temp-stream.c @@ -0,0 +1,165 @@ +/* temp-stream.c -- provide a stream to a per process temp file + + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include + +#include "stdlib--.h" /* For mkstemp that returns safer FDs. */ +#include "system.h" +#include "tmpdir.h" + +#include "temp-stream.h" + + +#if defined __MSDOS__ || defined _WIN32 +/* Define this to non-zero on systems for which the regular mechanism + (of unlinking an open file and expecting to be able to write, seek + back to the beginning, then reread it) doesn't work. E.g., on Windows + and DOS systems. */ +# define DONT_UNLINK_WHILE_OPEN 1 +#endif + +#if DONT_UNLINK_WHILE_OPEN + +/* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk. + Using atexit like this is wrong, since it can fail + when called e.g. 32 or more times. + But this isn't a big deal, since the code is used only on WOE/DOS + systems, and few people invoke tac on that many nonseekable files. */ + +static char const *file_to_remove; +static FILE *fp_to_close; + +static void +unlink_tempfile (void) +{ + fclose (fp_to_close); + unlink (file_to_remove); +} + +static void +record_or_unlink_tempfile (char const *fn, FILE *fp) +{ + if (!file_to_remove) + { + file_to_remove = fn; + fp_to_close = fp; + atexit (unlink_tempfile); + } +} + +#else + +static void +record_or_unlink_tempfile (char const *fn, MAYBE_UNUSED FILE *fp) +{ + unlink (fn); +} + +#endif + +/* A wrapper around mkstemp that gives us both an open stream pointer, + FP, and the corresponding FILE_NAME. Always return the same FP/name + pair, rewinding/truncating it upon each reuse. + + Note this honors $TMPDIR, unlike the standard defined tmpfile(). + + Returns TRUE on success. */ +bool +temp_stream (FILE **fp, char **file_name) +{ + static char *tempfile = nullptr; + static FILE *tmp_fp; + if (tempfile == nullptr) + { + char *tempbuf = nullptr; + size_t tempbuf_len = 128; + + while (true) + { + if (! (tempbuf = realloc (tempbuf, tempbuf_len))) + { + error (0, errno, _("failed to make temporary file name")); + return false; + } + + if (path_search (tempbuf, tempbuf_len, nullptr, "cutmp", true) == 0) + break; + + if (errno != EINVAL || PATH_MAX / 2 < tempbuf_len) + { + error (0, errno == EINVAL ? ENAMETOOLONG : errno, + _("failed to make temporary file name")); + return false; + } + + tempbuf_len *= 2; + } + + tempfile = tempbuf; + + /* FIXME: there's a small window between a successful mkstemp call + and the unlink that's performed by record_or_unlink_tempfile. + If we're interrupted in that interval, this code fails to remove + the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN, + the window is much larger -- it extends to the atexit-called + unlink_tempfile. + FIXME: clean up upon fatal signal. Don't block them, in case + $TMPDIR is a remote file system. */ + + int fd = mkstemp (tempfile); + if (fd < 0) + { + error (0, errno, _("failed to create temporary file %s"), + quoteaf (tempfile)); + goto Reset; + } + + tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+")); + if (! tmp_fp) + { + error (0, errno, _("failed to open %s for writing"), + quoteaf (tempfile)); + close (fd); + unlink (tempfile); + Reset: + free (tempfile); + tempfile = nullptr; + return false; + } + + record_or_unlink_tempfile (tempfile, tmp_fp); + } + else + { + clearerr (tmp_fp); + if (fseeko (tmp_fp, 0, SEEK_SET) < 0 + || ftruncate (fileno (tmp_fp), 0) < 0) + { + error (0, errno, _("failed to rewind stream for %s"), + quoteaf (tempfile)); + return false; + } + } + + *fp = tmp_fp; + if (file_name) + *file_name = tempfile; + return true; +} diff --git a/src/temp-stream.h b/src/temp-stream.h new file mode 100644 index 0000000..6c32e0c --- /dev/null +++ b/src/temp-stream.h @@ -0,0 +1,6 @@ +/* A wrapper around mkstemp that gives us both an open stream pointer, + FP, and the corresponding FILE_NAME. Always return the same FP/name + pair, rewinding/truncating it upon each reuse. + + Note this honors $TMPDIR, unlike the standard defined tmpfile(). */ +extern bool temp_stream (FILE **fp, char **file_name); diff --git a/src/test.c b/src/test.c new file mode 100644 index 0000000..a4eb40a --- /dev/null +++ b/src/test.c @@ -0,0 +1,862 @@ +/* GNU test program (ksb and mjb) */ + +/* Modified to run with the GNU shell by bfox. */ + +/* Copyright (C) 1987-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Define TEST_STANDALONE to get the /bin/test version. Otherwise, you get + the shell builtin version. */ + +/* Without this pragma, gcc 4.6.2 20111027 mistakenly suggests that + the advance function might be candidate for attribute 'pure'. */ +#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ +# pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" +#endif + +#include +#include +#include + +#define TEST_STANDALONE 1 + +#ifndef LBRACKET +# define LBRACKET 0 +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#if LBRACKET +# define PROGRAM_NAME "[" +#else +# define PROGRAM_NAME "test" +#endif + +#include "system.h" +#include "assure.h" +#include "quote.h" +#include "stat-time.h" +#include "strnumcmp.h" + +#include +#include "verror.h" + +#if HAVE_SYS_PARAM_H +# include +#endif + +/* Exit status for syntax errors, etc. */ +enum { TEST_TRUE, TEST_FALSE, TEST_FAILURE }; + +#if defined TEST_STANDALONE +# define test_exit(val) exit (val) +# define test_main_return(val) return val +#else + static jmp_buf test_exit_buf; + static int test_error_return = 0; +# define test_exit(val) test_error_return = val, longjmp (test_exit_buf, 1) +# define test_main_return(val) test_exit (val) +#endif /* !TEST_STANDALONE */ + +static int pos; /* The offset of the current argument in ARGV. */ +static int argc; /* The number of arguments present in ARGV. */ +static char **argv; /* The argument list. */ + +static bool unary_operator (void); +static bool binary_operator (bool); +static bool two_arguments (void); +static bool three_arguments (void); +static bool posixtest (int); + +static bool expr (void); +static bool term (void); +static bool and (void); +static bool or (void); + +static void beyond (void); + +ATTRIBUTE_FORMAT ((printf, 1, 2)) +static _Noreturn void +test_syntax_error (char const *format, ...) +{ + va_list ap; + va_start (ap, format); + verror (0, 0, format, ap); + test_exit (TEST_FAILURE); +} + +/* Increment our position in the argument list. Check that we're not + past the end of the argument list. This check is suppressed if the + argument is false. */ + +static void +advance (bool f) +{ + ++pos; + + if (f && pos >= argc) + beyond (); +} + +static void +unary_advance (void) +{ + advance (true); + ++pos; +} + +/* + * beyond - call when we're beyond the end of the argument list (an + * error condition) + */ +static _Noreturn void +beyond (void) +{ + test_syntax_error (_("missing argument after %s"), quote (argv[argc - 1])); +} + +/* If the characters pointed to by STRING constitute a valid number, + return a pointer to the start of the number, skipping any blanks or + leading '+'. Otherwise, report an error and exit. */ +static char const * +find_int (char const *string) +{ + char const *p; + char const *number_start; + + for (p = string; isblank (to_uchar (*p)); p++) + continue; + + if (*p == '+') + { + p++; + number_start = p; + } + else + { + number_start = p; + p += (*p == '-'); + } + + if (ISDIGIT (*p++)) + { + while (ISDIGIT (*p)) + p++; + while (isblank (to_uchar (*p))) + p++; + if (!*p) + return number_start; + } + + test_syntax_error (_("invalid integer %s"), quote (string)); +} + +/* Find the modification time of FILE, and stuff it into *MTIME. + Return true if successful. */ +static bool +get_mtime (char const *filename, struct timespec *mtime) +{ + struct stat finfo; + bool ok = (stat (filename, &finfo) == 0); + if (ok) + *mtime = get_stat_mtime (&finfo); + return ok; +} + +/* Return true if S is one of the test command's binary operators. */ +static bool +binop (char const *s) +{ + return ((STREQ (s, "=")) || (STREQ (s, "!=")) || (STREQ (s, "==")) || + (STREQ (s, "-nt")) || + (STREQ (s, "-ot")) || (STREQ (s, "-ef")) || (STREQ (s, "-eq")) || + (STREQ (s, "-ne")) || (STREQ (s, "-lt")) || (STREQ (s, "-le")) || + (STREQ (s, "-gt")) || (STREQ (s, "-ge"))); +} + +/* + * term - parse a term and return 1 or 0 depending on whether the term + * evaluates to true or false, respectively. + * + * term ::= + * '-'('h'|'d'|'f'|'r'|'s'|'w'|'c'|'b'|'p'|'u'|'g'|'k') filename + * '-'('L'|'x') filename + * '-t' int + * '-'('z'|'n') string + * string + * string ('!='|'=') string + * '-'(eq|ne|le|lt|ge|gt) + * file '-'(nt|ot|ef) file + * '(' ')' + * int ::= + * '-l' string + * positive and negative integers + */ +static bool +term (void) +{ + bool value; + bool negated = false; + + /* Deal with leading 'not's. */ + while (pos < argc && argv[pos][0] == '!' && argv[pos][1] == '\0') + { + advance (true); + negated = !negated; + } + + if (pos >= argc) + beyond (); + + /* A paren-bracketed argument. */ + if (argv[pos][0] == '(' && argv[pos][1] == '\0') + { + int nargs; + + advance (true); + + for (nargs = 1; + pos + nargs < argc && ! STREQ (argv[pos + nargs], ")"); + nargs++) + if (nargs == 4) + { + nargs = argc - pos; + break; + } + + value = posixtest (nargs); + if (argv[pos] == 0) + test_syntax_error (_("%s expected"), quote (")")); + else + if (argv[pos][0] != ')' || argv[pos][1]) + test_syntax_error (_("%s expected, found %s"), + quote_n (0, ")"), quote_n (1, argv[pos])); + advance (false); + } + + /* Are there enough arguments left that this could be dyadic? */ + else if (4 <= argc - pos && STREQ (argv[pos], "-l") && binop (argv[pos + 2])) + value = binary_operator (true); + else if (3 <= argc - pos && binop (argv[pos + 1])) + value = binary_operator (false); + + /* It might be a switch type argument. */ + else if (argv[pos][0] == '-' && argv[pos][1] && argv[pos][2] == '\0') + value = unary_operator (); + else + { + value = (argv[pos][0] != '\0'); + advance (false); + } + + return negated ^ value; +} + +static bool +binary_operator (bool l_is_l) +{ + int op; + struct stat stat_buf, stat_spare; + /* Is the right integer expression of the form '-l string'? */ + bool r_is_l; + + if (l_is_l) + advance (false); + op = pos + 1; + + if ((op < argc - 2) && STREQ (argv[op + 1], "-l")) + { + r_is_l = true; + advance (false); + } + else + r_is_l = false; + + if (argv[op][0] == '-') + { + /* check for eq, nt, and stuff */ + if ((((argv[op][1] == 'l' || argv[op][1] == 'g') + && (argv[op][2] == 'e' || argv[op][2] == 't')) + || (argv[op][1] == 'e' && argv[op][2] == 'q') + || (argv[op][1] == 'n' && argv[op][2] == 'e')) + && !argv[op][3]) + { + char lbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + char rbuf[INT_BUFSIZE_BOUND (uintmax_t)]; + char const *l = (l_is_l + ? umaxtostr (strlen (argv[op - 1]), lbuf) + : find_int (argv[op - 1])); + char const *r = (r_is_l + ? umaxtostr (strlen (argv[op + 2]), rbuf) + : find_int (argv[op + 1])); + int cmp = strintcmp (l, r); + bool xe_operator = (argv[op][2] == 'e'); + pos += 3; + return (argv[op][1] == 'l' ? cmp < xe_operator + : argv[op][1] == 'g' ? cmp > - xe_operator + : (cmp != 0) == xe_operator); + } + + switch (argv[op][1]) + { + default: + break; + + case 'n': + if (argv[op][2] == 't' && !argv[op][3]) + { + /* nt - newer than */ + struct timespec lt, rt; + bool le, re; + pos += 3; + if (l_is_l || r_is_l) + test_syntax_error (_("-nt does not accept -l")); + le = get_mtime (argv[op - 1], <); + re = get_mtime (argv[op + 1], &rt); + return le && (!re || timespec_cmp (lt, rt) > 0); + } + break; + + case 'e': + if (argv[op][2] == 'f' && !argv[op][3]) + { + /* ef - hard link? */ + pos += 3; + if (l_is_l || r_is_l) + test_syntax_error (_("-ef does not accept -l")); + return (stat (argv[op - 1], &stat_buf) == 0 + && stat (argv[op + 1], &stat_spare) == 0 + && stat_buf.st_dev == stat_spare.st_dev + && stat_buf.st_ino == stat_spare.st_ino); + } + break; + + case 'o': + if ('t' == argv[op][2] && '\000' == argv[op][3]) + { + /* ot - older than */ + struct timespec lt, rt; + bool le, re; + pos += 3; + if (l_is_l || r_is_l) + test_syntax_error (_("-ot does not accept -l")); + le = get_mtime (argv[op - 1], <); + re = get_mtime (argv[op + 1], &rt); + return re && (!le || timespec_cmp (lt, rt) < 0); + } + break; + } + + /* FIXME: is this dead code? */ + test_syntax_error (_("%s: unknown binary operator"), quote (argv[op])); + } + + if (argv[op][0] == '=' + && (!argv[op][1] || ((argv[op][1] == '=') && !argv[op][2]))) + { + bool value = STREQ (argv[pos], argv[pos + 2]); + pos += 3; + return value; + } + + if (STREQ (argv[op], "!=")) + { + bool value = !STREQ (argv[pos], argv[pos + 2]); + pos += 3; + return value; + } + + /* Not reached. */ + affirm (false); +} + +static bool +unary_operator (void) +{ + struct stat stat_buf; + + switch (argv[pos][1]) + { + default: + test_syntax_error (_("%s: unary operator expected"), quote (argv[pos])); + + /* All of the following unary operators use unary_advance (), which + checks to make sure that there is an argument, and then advances + pos right past it. This means that pos - 1 is the location of the + argument. */ + + case 'e': /* file exists in the file system? */ + unary_advance (); + return stat (argv[pos - 1], &stat_buf) == 0; + + case 'r': /* file is readable? */ + unary_advance (); + return euidaccess (argv[pos - 1], R_OK) == 0; + + case 'w': /* File is writable? */ + unary_advance (); + return euidaccess (argv[pos - 1], W_OK) == 0; + + case 'x': /* File is executable? */ + unary_advance (); + return euidaccess (argv[pos - 1], X_OK) == 0; + + case 'N': /* File exists and has been modified since it was last read? */ + { + unary_advance (); + if (stat (argv[pos - 1], &stat_buf) != 0) + return false; + struct timespec atime = get_stat_atime (&stat_buf); + struct timespec mtime = get_stat_mtime (&stat_buf); + return (timespec_cmp (mtime, atime) > 0); + } + + case 'O': /* File is owned by you? */ + { + unary_advance (); + if (stat (argv[pos - 1], &stat_buf) != 0) + return false; + errno = 0; + uid_t euid = geteuid (); + uid_t NO_UID = -1; + return ! (euid == NO_UID && errno) && euid == stat_buf.st_uid; + } + + case 'G': /* File is owned by your group? */ + { + unary_advance (); + if (stat (argv[pos - 1], &stat_buf) != 0) + return false; + errno = 0; + gid_t egid = getegid (); + gid_t NO_GID = -1; + return ! (egid == NO_GID && errno) && egid == stat_buf.st_gid; + } + + case 'f': /* File is a file? */ + unary_advance (); + /* Under POSIX, -f is true if the given file exists + and is a regular file. */ + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISREG (stat_buf.st_mode)); + + case 'd': /* File is a directory? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISDIR (stat_buf.st_mode)); + + case 's': /* File has something in it? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && 0 < stat_buf.st_size); + + case 'S': /* File is a socket? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISSOCK (stat_buf.st_mode)); + + case 'c': /* File is character special? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISCHR (stat_buf.st_mode)); + + case 'b': /* File is block special? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISBLK (stat_buf.st_mode)); + + case 'p': /* File is a named pipe? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && S_ISFIFO (stat_buf.st_mode)); + + case 'L': /* Same as -h */ + /*FALLTHROUGH*/ + + case 'h': /* File is a symbolic link? */ + unary_advance (); + return (lstat (argv[pos - 1], &stat_buf) == 0 + && S_ISLNK (stat_buf.st_mode)); + + case 'u': /* File is setuid? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && (stat_buf.st_mode & S_ISUID)); + + case 'g': /* File is setgid? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && (stat_buf.st_mode & S_ISGID)); + + case 'k': /* File has sticky bit set? */ + unary_advance (); + return (stat (argv[pos - 1], &stat_buf) == 0 + && (stat_buf.st_mode & S_ISVTX)); + + case 't': /* File (fd) is a terminal? */ + { + long int fd; + char const *arg; + unary_advance (); + arg = find_int (argv[pos - 1]); + errno = 0; + fd = strtol (arg, nullptr, 10); + return (errno != ERANGE && 0 <= fd && fd <= INT_MAX && isatty (fd)); + } + + case 'n': /* True if arg has some length. */ + unary_advance (); + return argv[pos - 1][0] != 0; + + case 'z': /* True if arg has no length. */ + unary_advance (); + return argv[pos - 1][0] == '\0'; + } +} + +/* + * and: + * term + * term '-a' and + */ +static bool +and (void) +{ + bool value = true; + + while (true) + { + value &= term (); + if (! (pos < argc && STREQ (argv[pos], "-a"))) + return value; + advance (false); + } +} + +/* + * or: + * and + * and '-o' or + */ +static bool +or (void) +{ + bool value = false; + + while (true) + { + value |= and (); + if (! (pos < argc && STREQ (argv[pos], "-o"))) + return value; + advance (false); + } +} + +/* + * expr: + * or + */ +static bool +expr (void) +{ + if (pos >= argc) + beyond (); + + return or (); /* Same with this. */ +} + +static bool +one_argument (void) +{ + return argv[pos++][0] != '\0'; +} + +static bool +two_arguments (void) +{ + bool value; + + if (STREQ (argv[pos], "!")) + { + advance (false); + value = ! one_argument (); + } + else if (argv[pos][0] == '-' + && argv[pos][1] != '\0' + && argv[pos][2] == '\0') + { + value = unary_operator (); + } + else + beyond (); + return (value); +} + +static bool +three_arguments (void) +{ + bool value; + + if (binop (argv[pos + 1])) + value = binary_operator (false); + else if (STREQ (argv[pos], "!")) + { + advance (true); + value = !two_arguments (); + } + else if (STREQ (argv[pos], "(") && STREQ (argv[pos + 2], ")")) + { + advance (false); + value = one_argument (); + advance (false); + } + else if (STREQ (argv[pos + 1], "-a") || STREQ (argv[pos + 1], "-o")) + value = expr (); + else + test_syntax_error (_("%s: binary operator expected"), + quote (argv[pos + 1])); + return (value); +} + +/* This is an implementation of a Posix.2 proposal by David Korn. */ +static bool +posixtest (int nargs) +{ + bool value; + + switch (nargs) + { + case 1: + value = one_argument (); + break; + + case 2: + value = two_arguments (); + break; + + case 3: + value = three_arguments (); + break; + + case 4: + if (STREQ (argv[pos], "!")) + { + advance (true); + value = !three_arguments (); + break; + } + if (STREQ (argv[pos], "(") && STREQ (argv[pos + 3], ")")) + { + advance (false); + value = two_arguments (); + advance (false); + break; + } + FALLTHROUGH; + case 5: + default: + affirm (0 < nargs); + value = expr (); + } + + return (value); +} + +#if defined TEST_STANDALONE + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + fputs (_("\ +Usage: test EXPRESSION\n\ + or: test\n\ + or: [ EXPRESSION ]\n\ + or: [ ]\n\ + or: [ OPTION\n\ +"), stdout); + fputs (_("\ +Exit with the status determined by EXPRESSION.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +An omitted EXPRESSION defaults to false. Otherwise,\n\ +EXPRESSION is true or false and sets exit status. It is one of:\n\ +"), stdout); + fputs (_("\ +\n\ + ( EXPRESSION ) EXPRESSION is true\n\ + ! EXPRESSION EXPRESSION is false\n\ + EXPRESSION1 -a EXPRESSION2 both EXPRESSION1 and EXPRESSION2 are true\n\ + EXPRESSION1 -o EXPRESSION2 either EXPRESSION1 or EXPRESSION2 is true\n\ +"), stdout); + fputs (_("\ +\n\ + -n STRING the length of STRING is nonzero\n\ + STRING equivalent to -n STRING\n\ + -z STRING the length of STRING is zero\n\ + STRING1 = STRING2 the strings are equal\n\ + STRING1 != STRING2 the strings are not equal\n\ +"), stdout); + fputs (_("\ +\n\ + INTEGER1 -eq INTEGER2 INTEGER1 is equal to INTEGER2\n\ + INTEGER1 -ge INTEGER2 INTEGER1 is greater than or equal to INTEGER2\n\ + INTEGER1 -gt INTEGER2 INTEGER1 is greater than INTEGER2\n\ + INTEGER1 -le INTEGER2 INTEGER1 is less than or equal to INTEGER2\n\ + INTEGER1 -lt INTEGER2 INTEGER1 is less than INTEGER2\n\ + INTEGER1 -ne INTEGER2 INTEGER1 is not equal to INTEGER2\n\ +"), stdout); + fputs (_("\ +\n\ + FILE1 -ef FILE2 FILE1 and FILE2 have the same device and inode numbers\n\ + FILE1 -nt FILE2 FILE1 is newer (modification date) than FILE2\n\ + FILE1 -ot FILE2 FILE1 is older than FILE2\n\ +"), stdout); + fputs (_("\ +\n\ + -b FILE FILE exists and is block special\n\ + -c FILE FILE exists and is character special\n\ + -d FILE FILE exists and is a directory\n\ + -e FILE FILE exists\n\ +"), stdout); + fputs (_("\ + -f FILE FILE exists and is a regular file\n\ + -g FILE FILE exists and is set-group-ID\n\ + -G FILE FILE exists and is owned by the effective group ID\n\ + -h FILE FILE exists and is a symbolic link (same as -L)\n\ + -k FILE FILE exists and has its sticky bit set\n\ +"), stdout); + fputs (_("\ + -L FILE FILE exists and is a symbolic link (same as -h)\n\ + -N FILE FILE exists and has been modified since it was last read\n\ + -O FILE FILE exists and is owned by the effective user ID\n\ + -p FILE FILE exists and is a named pipe\n\ + -r FILE FILE exists and the user has read access\n\ + -s FILE FILE exists and has a size greater than zero\n\ +"), stdout); + fputs (_("\ + -S FILE FILE exists and is a socket\n\ + -t FD file descriptor FD is opened on a terminal\n\ + -u FILE FILE exists and its set-user-ID bit is set\n\ + -w FILE FILE exists and the user has write access\n\ + -x FILE FILE exists and the user has execute (or search) access\n\ +"), stdout); + fputs (_("\ +\n\ +Except for -h and -L, all FILE-related tests dereference symbolic links.\n\ +Beware that parentheses need to be escaped (e.g., by backslashes) for shells.\n\ +INTEGER may also be -l STRING, which evaluates to the length of STRING.\n\ +"), stdout); + fputs (_("\ +\n\ +NOTE: Binary -a and -o are inherently ambiguous. Use 'test EXPR1 && test\n\ +EXPR2' or 'test EXPR1 || test EXPR2' instead.\n\ +"), stdout); + fputs (_("\ +\n\ +NOTE: [ honors the --help and --version options, but test does not.\n\ +test treats each of those as it treats any other nonempty STRING.\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, _("test and/or [")); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} +#endif /* TEST_STANDALONE */ + +#if !defined TEST_STANDALONE +# define main test_command +#endif + +#define AUTHORS \ + proper_name ("Kevin Braunsdorf"), \ + proper_name ("Matthew Bradburn") + +/* + * [: + * '[' expr ']' + * test: + * test expr + */ +int +main (int margc, char **margv) +{ + bool value; + +#if !defined TEST_STANDALONE + int code; + + code = setjmp (test_exit_buf); + + if (code) + return (test_error_return); +#else /* TEST_STANDALONE */ + initialize_main (&margc, &margv); + set_program_name (margv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (TEST_FAILURE); + atexit (close_stdout); +#endif /* TEST_STANDALONE */ + + argv = margv; + + if (LBRACKET) + { + /* Recognize --help or --version, but only when invoked in the + "[" form, when the last argument is not "]". Use direct + parsing, rather than parse_long_options, to avoid accepting + abbreviations. POSIX allows "[ --help" and "[ --version" to + have the usual GNU behavior, but it requires "test --help" + and "test --version" to exit silently with status 0. */ + if (margc == 2) + { + if (STREQ (margv[1], "--help")) + usage (EXIT_SUCCESS); + + if (STREQ (margv[1], "--version")) + { + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, + (char *) nullptr); + test_main_return (EXIT_SUCCESS); + } + } + if (margc < 2 || !STREQ (margv[margc - 1], "]")) + test_syntax_error (_("missing %s"), quote ("]")); + + --margc; + } + + argc = margc; + pos = 1; + + if (pos >= argc) + test_main_return (TEST_FALSE); + + value = posixtest (argc - 1); + + if (pos != argc) + test_syntax_error (_("extra argument %s"), quote (argv[pos])); + + test_main_return (value ? TEST_TRUE : TEST_FALSE); +} diff --git a/src/timeout.c b/src/timeout.c new file mode 100644 index 0000000..d2eedc6 --- /dev/null +++ b/src/timeout.c @@ -0,0 +1,614 @@ +/* timeout -- run a command with bounded time + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +/* timeout - Start a command, and kill it if the specified timeout expires + + We try to behave like a shell starting a single (foreground) job, + and will kill the job if we receive the alarm signal we setup. + The exit status of the job is returned, or one of these errors: + EXIT_TIMEDOUT 124 job timed out + EXIT_CANCELED 125 internal error + EXIT_CANNOT_INVOKE 126 error executing job + EXIT_ENOENT 127 couldn't find job to exec + + Caveats: + If user specifies the KILL (9) signal is to be sent on timeout, + the monitor is killed and so exits with 128+9 rather than 124. + + If you start a command in the background, which reads from the tty + and so is immediately sent SIGTTIN to stop, then the timeout + process will ignore this so it can timeout the command as expected. + This can be seen with 'timeout 10 dd&' for example. + However if one brings this group to the foreground with the 'fg' + command before the timer expires, the command will remain + in the stop state as the shell doesn't send a SIGCONT + because the timeout process (group leader) is already running. + To get the command running again one can Ctrl-Z, and do fg again. + Note one can Ctrl-C the whole job when in this state. + I think this could be fixed but I'm not sure the extra + complication is justified for this scenario. + + Written by Pádraig Brady. */ + +#include +#include +#include +#include +#include +#if HAVE_PRCTL +# include +#endif +#include + +#include "system.h" +#include "cl-strtod.h" +#include "xstrtod.h" +#include "sig2str.h" +#include "operand2sig.h" +#include "quote.h" + +#if HAVE_SETRLIMIT +/* FreeBSD 5.0 at least needs and included + before . Currently "system.h" includes . */ +# include +#endif + +/* NonStop circa 2011 lacks both SA_RESTART and siginterrupt. */ +#ifndef SA_RESTART +# define SA_RESTART 0 +#endif + +#define PROGRAM_NAME "timeout" + +#define AUTHORS proper_name_lite ("Padraig Brady", "P\303\241draig Brady") + +static int timed_out; +static int term_signal = SIGTERM; /* same default as kill command. */ +static pid_t monitored_pid; +static double kill_after; +static bool foreground; /* whether to use another program group. */ +static bool preserve_status; /* whether to use a timeout status or not. */ +static bool verbose; /* whether to diagnose timeouts or not. */ +static char const *command; + +/* for long options with no corresponding short option, use enum */ +enum +{ + FOREGROUND_OPTION = CHAR_MAX + 1, + PRESERVE_STATUS_OPTION +}; + +static struct option const long_options[] = +{ + {"kill-after", required_argument, nullptr, 'k'}, + {"signal", required_argument, nullptr, 's'}, + {"verbose", no_argument, nullptr, 'v'}, + {"foreground", no_argument, nullptr, FOREGROUND_OPTION}, + {"preserve-status", no_argument, nullptr, PRESERVE_STATUS_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Start the timeout after which we'll receive a SIGALRM. + Round DURATION up to the next representable value. + Treat out-of-range values as if they were maximal, + as that's more useful in practice than reporting an error. + '0' means don't timeout. */ +static void +settimeout (double duration, bool warn) +{ + +#if HAVE_TIMER_SETTIME + /* timer_settime() provides potentially nanosecond resolution. */ + + struct timespec ts = dtotimespec (duration); + struct itimerspec its = { {0, 0}, ts }; + timer_t timerid; + if (timer_create (CLOCK_REALTIME, nullptr, &timerid) == 0) + { + if (timer_settime (timerid, 0, &its, nullptr) == 0) + return; + else + { + if (warn) + error (0, errno, _("warning: timer_settime")); + timer_delete (timerid); + } + } + else if (warn && errno != ENOSYS) + error (0, errno, _("warning: timer_create")); + +#elif HAVE_SETITIMER + /* setitimer() is more portable (to Darwin for example), + but only provides microsecond resolution. */ + + struct timeval tv; + struct timespec ts = dtotimespec (duration); + tv.tv_sec = ts.tv_sec; + tv.tv_usec = (ts.tv_nsec + 999) / 1000; + if (tv.tv_usec == 1000 * 1000) + { + if (tv.tv_sec != TYPE_MAXIMUM (time_t)) + { + tv.tv_sec++; + tv.tv_usec = 0; + } + else + tv.tv_usec--; + } + struct itimerval it = { {0, 0}, tv }; + if (setitimer (ITIMER_REAL, &it, nullptr) == 0) + return; + else + { + if (warn && errno != ENOSYS) + error (0, errno, _("warning: setitimer")); + } +#endif + + /* fallback to single second resolution provided by alarm(). */ + + unsigned int timeint; + if (UINT_MAX <= duration) + timeint = UINT_MAX; + else + { + unsigned int duration_floor = duration; + timeint = duration_floor + (duration_floor < duration); + } + alarm (timeint); +} + +/* send SIG avoiding the current process. */ + +static int +send_sig (pid_t where, int sig) +{ + /* If sending to the group, then ignore the signal, + so we don't go into a signal loop. Note that this will ignore any of the + signals registered in install_cleanup(), that are sent after we + propagate the first one, which hopefully won't be an issue. Note this + process can be implicitly multithreaded due to some timer_settime() + implementations, therefore a signal sent to the group, can be sent + multiple times to this process. */ + if (where == 0) + signal (sig, SIG_IGN); + return kill (where, sig); +} + +/* Signal handler which is required for sigsuspend() to be interrupted + whenever SIGCHLD is received. */ +static void +chld (int sig) +{ +} + + +static void +cleanup (int sig) +{ + if (sig == SIGALRM) + { + timed_out = 1; + sig = term_signal; + } + if (monitored_pid) + { + if (kill_after) + { + int saved_errno = errno; /* settimeout may reset. */ + /* Start a new timeout after which we'll send SIGKILL. */ + term_signal = SIGKILL; + settimeout (kill_after, false); + kill_after = 0; /* Don't let later signals reset kill alarm. */ + errno = saved_errno; + } + + /* Send the signal directly to the monitored child, + in case it has itself become group leader, + or is not running in a separate group. */ + if (verbose) + { + char signame[MAX (SIG2STR_MAX, INT_BUFSIZE_BOUND (int))]; + if (sig2str (sig, signame) != 0) + snprintf (signame, sizeof signame, "%d", sig); + error (0, 0, _("sending signal %s to command %s"), + signame, quote (command)); + } + send_sig (monitored_pid, sig); + + /* The normal case is the job has remained in our + newly created process group, so send to all processes in that. */ + if (!foreground) + { + send_sig (0, sig); + if (sig != SIGKILL && sig != SIGCONT) + { + send_sig (monitored_pid, SIGCONT); + send_sig (0, SIGCONT); + } + } + } + else /* we're the child or the child is not exec'd yet. */ + _exit (128 + sig); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION] DURATION COMMAND [ARG]...\n\ + or: %s [OPTION]\n"), program_name, program_name); + + fputs (_("\ +Start COMMAND, and kill it if still running after DURATION.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + --preserve-status\n\ + exit with the same status as COMMAND, even when the\n\ + command times out\n\ + --foreground\n\ + when not running timeout directly from a shell prompt,\n\ + allow COMMAND to read from the TTY and get TTY signals;\n\ + in this mode, children of COMMAND will not be timed out\n\ + -k, --kill-after=DURATION\n\ + also send a KILL signal if COMMAND is still running\n\ + this long after the initial signal was sent\n\ + -s, --signal=SIGNAL\n\ + specify the signal to be sent on timeout;\n\ + SIGNAL may be a name like 'HUP' or a number;\n\ + see 'kill -l' for a list of signals\n"), stdout); + fputs (_("\ + -v, --verbose diagnose to stderr any signal sent upon timeout\n"), stdout); + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + + fputs (_("\n\ +DURATION is a floating point number with an optional suffix:\n\ +'s' for seconds (the default), 'm' for minutes, 'h' for hours or \ +'d' for days.\nA duration of 0 disables the associated timeout.\n"), stdout); + + fputs (_("\n\ +Upon timeout, send the TERM signal to COMMAND, if no other SIGNAL specified.\n\ +The TERM signal kills any process that does not block or catch that signal.\n\ +It may be necessary to use the KILL signal, since this signal can't be caught.\ +\n"), stdout); + + fputs (_("\n\ +Exit status:\n\ + 124 if COMMAND times out, and --preserve-status is not specified\n\ + 125 if the timeout command itself fails\n\ + 126 if COMMAND is found but cannot be invoked\n\ + 127 if COMMAND cannot be found\n\ + 137 if COMMAND (or timeout itself) is sent the KILL (9) signal (128+9)\n\ + - the exit status of COMMAND otherwise\n\ +"), stdout); + + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Given a floating point value *X, and a suffix character, SUFFIX_CHAR, + scale *X by the multiplier implied by SUFFIX_CHAR. SUFFIX_CHAR may + be the NUL byte or 's' to denote seconds, 'm' for minutes, 'h' for + hours, or 'd' for days. If SUFFIX_CHAR is invalid, don't modify *X + and return false. Otherwise return true. */ + +static bool +apply_time_suffix (double *x, char suffix_char) +{ + int multiplier; + + switch (suffix_char) + { + case 0: + case 's': + multiplier = 1; + break; + case 'm': + multiplier = 60; + break; + case 'h': + multiplier = 60 * 60; + break; + case 'd': + multiplier = 60 * 60 * 24; + break; + default: + return false; + } + + *x *= multiplier; + + return true; +} + +static double +parse_duration (char const *str) +{ + double duration; + char const *ep; + + if (! (xstrtod (str, &ep, &duration, cl_strtod) || errno == ERANGE) + /* Nonnegative interval. */ + || ! (0 <= duration) + /* No extra chars after the number and an optional s,m,h,d char. */ + || (*ep && *(ep + 1)) + /* Check any suffix char and update timeout based on the suffix. */ + || !apply_time_suffix (&duration, *ep)) + { + error (0, 0, _("invalid time interval %s"), quote (str)); + usage (EXIT_CANCELED); + } + + return duration; +} + +static void +unblock_signal (int sig) +{ + sigset_t unblock_set; + sigemptyset (&unblock_set); + sigaddset (&unblock_set, sig); + if (sigprocmask (SIG_UNBLOCK, &unblock_set, nullptr) != 0) + error (0, errno, _("warning: sigprocmask")); +} + +static void +install_sigchld (void) +{ + struct sigaction sa; + sigemptyset (&sa.sa_mask); /* Allow concurrent calls to handler */ + sa.sa_handler = chld; + sa.sa_flags = SA_RESTART; /* Restart syscalls if possible, as that's + more likely to work cleanly. */ + + sigaction (SIGCHLD, &sa, nullptr); + + /* We inherit the signal mask from our parent process, + so ensure SIGCHLD is not blocked. */ + unblock_signal (SIGCHLD); +} + +static void +install_cleanup (int sigterm) +{ + struct sigaction sa; + sigemptyset (&sa.sa_mask); /* Allow concurrent calls to handler */ + sa.sa_handler = cleanup; + sa.sa_flags = SA_RESTART; /* Restart syscalls if possible, as that's + more likely to work cleanly. */ + + sigaction (SIGALRM, &sa, nullptr); /* our timeout. */ + sigaction (SIGINT, &sa, nullptr); /* Ctrl-C at terminal for example. */ + sigaction (SIGQUIT, &sa, nullptr); /* Ctrl-\ at terminal for example. */ + sigaction (SIGHUP, &sa, nullptr); /* terminal closed for example. */ + sigaction (SIGTERM, &sa, nullptr); /* if killed, stop monitored proc. */ + sigaction (sigterm, &sa, nullptr); /* user specified termination signal. */ +} + +/* Block all signals which were registered with cleanup() as the signal + handler, so we never kill processes after waitpid() returns. + Also block SIGCHLD to ensure it doesn't fire between + waitpid() polling and sigsuspend() waiting for a signal. + Return original mask in OLD_SET. */ +static void +block_cleanup_and_chld (int sigterm, sigset_t *old_set) +{ + sigset_t block_set; + sigemptyset (&block_set); + + sigaddset (&block_set, SIGALRM); + sigaddset (&block_set, SIGINT); + sigaddset (&block_set, SIGQUIT); + sigaddset (&block_set, SIGHUP); + sigaddset (&block_set, SIGTERM); + sigaddset (&block_set, sigterm); + + sigaddset (&block_set, SIGCHLD); + + if (sigprocmask (SIG_BLOCK, &block_set, old_set) != 0) + error (0, errno, _("warning: sigprocmask")); +} + +/* Try to disable core dumps for this process. + Return TRUE if successful, FALSE otherwise. */ +static bool +disable_core_dumps (void) +{ +#if HAVE_PRCTL && defined PR_SET_DUMPABLE + if (prctl (PR_SET_DUMPABLE, 0) == 0) + return true; + +#elif HAVE_SETRLIMIT && defined RLIMIT_CORE + /* Note this doesn't disable processing by a filter in + /proc/sys/kernel/core_pattern on Linux. */ + if (setrlimit (RLIMIT_CORE, &(struct rlimit) {0,0}) == 0) + return true; + +#else + return false; +#endif + + error (0, errno, _("warning: disabling core dumps failed")); + return false; +} + +int +main (int argc, char **argv) +{ + double timeout; + char signame[SIG2STR_MAX]; + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (EXIT_CANCELED); + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "+k:s:v", long_options, nullptr)) != -1) + { + switch (c) + { + case 'k': + kill_after = parse_duration (optarg); + break; + + case 's': + term_signal = operand2sig (optarg, signame); + if (term_signal == -1) + usage (EXIT_CANCELED); + break; + + case 'v': + verbose = true; + break; + + case FOREGROUND_OPTION: + foreground = true; + break; + + case PRESERVE_STATUS_OPTION: + preserve_status = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_CANCELED); + break; + } + } + + if (argc - optind < 2) + usage (EXIT_CANCELED); + + timeout = parse_duration (argv[optind++]); + + argv += optind; + command = argv[0]; + + /* Ensure we're in our own group so all subprocesses can be killed. + Note we don't just put the child in a separate group as + then we would need to worry about foreground and background groups + and propagating signals between them. */ + if (!foreground) + setpgid (0, 0); + + /* Setup handlers before fork() so that we + handle any signals caused by child, without races. */ + install_cleanup (term_signal); + signal (SIGTTIN, SIG_IGN); /* Don't stop if background child needs tty. */ + signal (SIGTTOU, SIG_IGN); /* Don't stop if background child needs tty. */ + install_sigchld (); /* Interrupt sigsuspend() when child exits. */ + + monitored_pid = fork (); + if (monitored_pid == -1) + { + error (0, errno, _("fork system call failed")); + return EXIT_CANCELED; + } + else if (monitored_pid == 0) + { /* child */ + /* exec doesn't reset SIG_IGN -> SIG_DFL. */ + signal (SIGTTIN, SIG_DFL); + signal (SIGTTOU, SIG_DFL); + + execvp (argv[0], argv); + + /* exit like sh, env, nohup, ... */ + int exit_status = errno == ENOENT ? EXIT_ENOENT : EXIT_CANNOT_INVOKE; + error (0, errno, _("failed to run command %s"), quote (command)); + return exit_status; + } + else + { + pid_t wait_result; + int status; + + /* We configure timers so that SIGALRM is sent on expiry. + Therefore ensure we don't inherit a mask blocking SIGALRM. */ + unblock_signal (SIGALRM); + + settimeout (timeout, true); + + /* Ensure we don't cleanup() after waitpid() reaps the child, + to avoid sending signals to a possibly different process. */ + sigset_t cleanup_set; + block_cleanup_and_chld (term_signal, &cleanup_set); + + while ((wait_result = waitpid (monitored_pid, &status, WNOHANG)) == 0) + sigsuspend (&cleanup_set); /* Wait with cleanup signals unblocked. */ + + if (wait_result < 0) + { + /* shouldn't happen. */ + error (0, errno, _("error waiting for command")); + status = EXIT_CANCELED; + } + else + { + if (WIFEXITED (status)) + status = WEXITSTATUS (status); + else if (WIFSIGNALED (status)) + { + int sig = WTERMSIG (status); + if (WCOREDUMP (status)) + error (0, 0, _("the monitored command dumped core")); + if (!timed_out && disable_core_dumps ()) + { + /* exit with the signal flag set. */ + signal (sig, SIG_DFL); + unblock_signal (sig); + raise (sig); + } + /* Allow users to distinguish if command was forcibly killed. + Needed with --foreground where we don't send SIGKILL to + the timeout process itself. */ + if (timed_out && sig == SIGKILL) + preserve_status = true; + status = sig + 128; /* what sh returns for signaled processes. */ + } + else + { + /* shouldn't happen. */ + error (0, 0, _("unknown status from command (%d)"), status); + status = EXIT_FAILURE; + } + } + + if (timed_out && !preserve_status) + status = EXIT_TIMEDOUT; + return status; + } +} diff --git a/src/touch.c b/src/touch.c new file mode 100644 index 0000000..ee1977f --- /dev/null +++ b/src/touch.c @@ -0,0 +1,438 @@ +/* touch -- change modification and access times of files + Copyright (C) 1987-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Rubin, Arnold Robbins, Jim Kingdon, David MacKenzie, + and Randy Smith. */ + +#include +#include +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "assure.h" +#include "fd-reopen.h" +#include "parse-datetime.h" +#include "posixtm.h" +#include "posixver.h" +#include "quote.h" +#include "stat-time.h" +#include "utimens.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "touch" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("Arnold Robbins"), \ + proper_name ("Jim Kingdon"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Randy Smith") + +/* Bitmasks for 'change_times'. */ +#define CH_ATIME 1 +#define CH_MTIME 2 + +/* Which timestamps to change. */ +static int change_times; + +/* (-c) If true, don't create if not already there. */ +static bool no_create; + +/* (-r) If true, use times from a reference file. */ +static bool use_ref; + +/* (-h) If true, change the times of an existing symlink, if possible. */ +static bool no_dereference; + +/* If true, the only thing we have to do is change both the + modification and access time to the current time, so we don't + have to own the file, just be able to read and write it. + On some systems, we can do this if we own the file, even though + we have neither read nor write access to it. */ +static bool amtime_now; + +/* New access and modification times to use when setting time. */ +static struct timespec newtime[2]; + +/* File to use for -r. */ +static char *ref_file; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + TIME_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"time", required_argument, nullptr, TIME_OPTION}, + {"no-create", no_argument, nullptr, 'c'}, + {"date", required_argument, nullptr, 'd'}, + {"reference", required_argument, nullptr, 'r'}, + {"no-dereference", no_argument, nullptr, 'h'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Valid arguments to the '--time' option. */ +static char const *const time_args[] = +{ + "atime", "access", "use", "mtime", "modify", nullptr +}; + +/* The bits in 'change_times' that those arguments set. */ +static int const time_masks[] = +{ + CH_ATIME, CH_ATIME, CH_ATIME, CH_MTIME, CH_MTIME +}; + +/* The interpretation of FLEX_DATE as a date, relative to NOW. */ + +static struct timespec +date_relative (char const *flex_date, struct timespec now) +{ + struct timespec result; + if (! parse_datetime (&result, flex_date, &now)) + error (EXIT_FAILURE, 0, _("invalid date format %s"), quote (flex_date)); + return result; +} + +/* Update the time of file FILE according to the options given. + Return true if successful. */ + +static bool +touch (char const *file) +{ + int fd = -1; + int open_errno = 0; + struct timespec const *t = newtime; + + if (STREQ (file, "-")) + fd = STDOUT_FILENO; + else if (! (no_create || no_dereference)) + { + /* Try to open FILE, creating it if necessary. */ + fd = fd_reopen (STDIN_FILENO, file, + O_WRONLY | O_CREAT | O_NONBLOCK | O_NOCTTY, MODE_RW_UGO); + if (fd < 0) + open_errno = errno; + } + + if (change_times != (CH_ATIME | CH_MTIME)) + { + /* We're setting only one of the time values. */ + if (change_times == CH_MTIME) + newtime[0].tv_nsec = UTIME_OMIT; + else + { + affirm (change_times == CH_ATIME); + newtime[1].tv_nsec = UTIME_OMIT; + } + } + + if (amtime_now) + { + /* Pass nullptr to futimens so it will not fail if we have + write access to the file, but don't own it. */ + t = nullptr; + } + + char const *file_opt = fd == STDOUT_FILENO ? nullptr : file; + int atflag = no_dereference ? AT_SYMLINK_NOFOLLOW : 0; + int utime_errno = (fdutimensat (fd, AT_FDCWD, file_opt, t, atflag) == 0 + ? 0 : errno); + + if (fd == STDIN_FILENO) + { + if (close (STDIN_FILENO) != 0) + { + error (0, errno, _("failed to close %s"), quoteaf (file)); + return false; + } + } + else if (fd == STDOUT_FILENO) + { + /* Do not diagnose "touch -c - >&-". */ + if (utime_errno == EBADF && no_create) + return true; + } + + if (utime_errno != 0) + { + /* Don't diagnose with open_errno if FILE is a directory, as that + would give a bogus diagnostic for e.g., 'touch /' (assuming we + don't own / or have write access). On Solaris 10 and probably + other systems, opening a directory like "." fails with EINVAL. + (On SunOS 4 it was EPERM but that's obsolete.) */ + struct stat st; + if (open_errno + && ! (open_errno == EISDIR + || (open_errno == EINVAL + && stat (file, &st) == 0 && S_ISDIR (st.st_mode)))) + { + /* The wording of this diagnostic should cover at least two cases: + - the file does not exist, but the parent directory is unwritable + - the file exists, but it isn't writable + I think it's not worth trying to distinguish them. */ + error (0, open_errno, _("cannot touch %s"), quoteaf (file)); + } + else + { + if (no_create && utime_errno == ENOENT) + return true; + error (0, utime_errno, _("setting times of %s"), quoteaf (file)); + } + return false; + } + + return true; +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... FILE...\n"), program_name); + fputs (_("\ +Update the access and modification times of each FILE to the current time.\n\ +\n\ +A FILE argument that does not exist is created empty, unless -c or -h\n\ +is supplied.\n\ +\n\ +A FILE argument string of - is handled specially and causes touch to\n\ +change the times of the file associated with standard output.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -a change only the access time\n\ + -c, --no-create do not create any files\n\ + -d, --date=STRING parse STRING and use it instead of current time\n\ + -f (ignored)\n\ +"), stdout); + fputs (_("\ + -h, --no-dereference affect each symbolic link instead of any referenced\n\ + file (useful only on systems that can change the\n\ + timestamps of a symlink)\n\ + -m change only the modification time\n\ +"), stdout); + fputs (_("\ + -r, --reference=FILE use this file's times instead of current time\n\ + -t STAMP use [[CC]YY]MMDDhhmm[.ss] instead of current time\n\ + --time=WORD change the specified time:\n\ + WORD is access, atime, or use: equivalent to -a\n\ + WORD is modify or mtime: equivalent to -m\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +Note that the -d and -t options accept different time-date formats.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int c; + bool date_set = false; + bool ok = true; + char const *flex_date = nullptr; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + change_times = 0; + no_create = use_ref = false; + + while ((c = getopt_long (argc, argv, "acd:fhmr:t:", longopts, nullptr)) != -1) + { + switch (c) + { + case 'a': + change_times |= CH_ATIME; + break; + + case 'c': + no_create = true; + break; + + case 'd': + flex_date = optarg; + break; + + case 'f': + break; + + case 'h': + no_dereference = true; + break; + + case 'm': + change_times |= CH_MTIME; + break; + + case 'r': + use_ref = true; + ref_file = optarg; + break; + + case 't': + if (! posixtime (&newtime[0].tv_sec, optarg, + PDS_LEADING_YEAR | PDS_CENTURY | PDS_SECONDS)) + error (EXIT_FAILURE, 0, _("invalid date format %s"), + quote (optarg)); + newtime[0].tv_nsec = 0; + newtime[1] = newtime[0]; + date_set = true; + break; + + case TIME_OPTION: /* --time */ + change_times |= XARGMATCH ("--time", optarg, + time_args, time_masks); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (change_times == 0) + change_times = CH_ATIME | CH_MTIME; + + if (date_set && (use_ref || flex_date)) + { + error (0, 0, _("cannot specify times from more than one source")); + usage (EXIT_FAILURE); + } + + if (use_ref) + { + struct stat ref_stats; + /* Don't use (no_dereference?lstat:stat) (args), since stat + might be an object-like macro. */ + if (no_dereference ? lstat (ref_file, &ref_stats) + : stat (ref_file, &ref_stats)) + error (EXIT_FAILURE, errno, + _("failed to get attributes of %s"), quoteaf (ref_file)); + newtime[0] = get_stat_atime (&ref_stats); + newtime[1] = get_stat_mtime (&ref_stats); + date_set = true; + if (flex_date) + { + if (change_times & CH_ATIME) + newtime[0] = date_relative (flex_date, newtime[0]); + if (change_times & CH_MTIME) + newtime[1] = date_relative (flex_date, newtime[1]); + } + } + else + { + if (flex_date) + { + struct timespec now = current_timespec (); + newtime[1] = newtime[0] = date_relative (flex_date, now); + date_set = true; + + /* If neither -a nor -m is specified, treat "-d now" as if + it were absent; this lets "touch" succeed more often in + the presence of restrictive permissions. */ + if (change_times == (CH_ATIME | CH_MTIME) + && newtime[0].tv_sec == now.tv_sec + && newtime[0].tv_nsec == now.tv_nsec) + { + /* Check that it really was "-d now", and not a timestamp + that just happens to be the current time. */ + struct timespec notnow, notnow1; + notnow.tv_sec = now.tv_sec ^ 1; + notnow.tv_nsec = now.tv_nsec; + notnow1 = date_relative (flex_date, notnow); + if (notnow1.tv_sec == notnow.tv_sec + && notnow1.tv_nsec == notnow.tv_nsec) + date_set = false; + } + } + } + + /* The obsolete 'MMDDhhmm[YY]' form is valid IFF there are + two or more non-option arguments. */ + if (!date_set && 2 <= argc - optind && posix2_version () < 200112 + && posixtime (&newtime[0].tv_sec, argv[optind], + PDS_TRAILING_YEAR | PDS_PRE_2000)) + { + newtime[0].tv_nsec = 0; + newtime[1] = newtime[0]; + date_set = true; + + if (! getenv ("POSIXLY_CORRECT")) + { + struct tm const *tm = localtime (&newtime[0].tv_sec); + + /* Technically, it appears that even a deliberate attempt to cause + the above localtime to return nullptr will always fail because our + posixtime implementation rejects all dates for which localtime + would fail. However, skip the warning if it ever fails. */ + if (tm) + error (0, 0, + _("warning: 'touch %s' is obsolete; use " + "'touch -t %04ld%02d%02d%02d%02d.%02d'"), + argv[optind], + tm->tm_year + 1900L, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec); + } + + optind++; + } + + if (!date_set) + { + if (change_times == (CH_ATIME | CH_MTIME)) + amtime_now = true; + else + newtime[1].tv_nsec = newtime[0].tv_nsec = UTIME_NOW; + } + + if (optind == argc) + { + error (0, 0, _("missing file operand")); + usage (EXIT_FAILURE); + } + + for (; optind < argc; ++optind) + ok &= touch (argv[optind]); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/tr.c b/src/tr.c new file mode 100644 index 0000000..db91eb2 --- /dev/null +++ b/src/tr.c @@ -0,0 +1,1901 @@ +/* tr -- a filter to translate characters + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Jim Meyering */ + +#include + +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "fadvise.h" +#include "quote.h" +#include "safe-read.h" +#include "xbinary-io.h" +#include "xstrtol.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tr" + +#define AUTHORS proper_name ("Jim Meyering") + +enum { N_CHARS = UCHAR_MAX + 1 }; + +/* An unsigned integer type big enough to hold a repeat count or an + unsigned character. POSIX requires support for repeat counts as + high as 2**31 - 1. Since repeat counts might need to expand to + match the length of an argument string, we need at least size_t to + avoid arbitrary internal limits. It doesn't cost much to use + uintmax_t, though. */ +typedef uintmax_t count; + +/* The value for Spec_list->state that indicates to + get_next that it should initialize the tail pointer. + Its value should be as large as possible to avoid conflict + a valid value for the state field -- and that may be as + large as any valid repeat_count. */ +#define BEGIN_STATE (UINTMAX_MAX - 1) + +/* The value for Spec_list->state that indicates to + get_next that the element pointed to by Spec_list->tail is + being considered for the first time on this pass through the + list -- it indicates that get_next should make any necessary + initializations. */ +#define NEW_ELEMENT (BEGIN_STATE + 1) + +/* The maximum possible repeat count. Due to how the states are + implemented, it can be as much as BEGIN_STATE. */ +#define REPEAT_COUNT_MAXIMUM BEGIN_STATE + +/* The following (but not CC_NO_CLASS) are indices into the array of + valid character class strings. */ +enum Char_class + { + CC_ALNUM = 0, CC_ALPHA = 1, CC_BLANK = 2, CC_CNTRL = 3, + CC_DIGIT = 4, CC_GRAPH = 5, CC_LOWER = 6, CC_PRINT = 7, + CC_PUNCT = 8, CC_SPACE = 9, CC_UPPER = 10, CC_XDIGIT = 11, + CC_NO_CLASS = 9999 + }; + +/* Character class to which a character (returned by get_next) belonged; + but it is set only if the construct from which the character was obtained + was one of the character classes [:upper:] or [:lower:]. The value + is used only when translating and then, only to make sure that upper + and lower class constructs have the same relative positions in string1 + and string2. */ +enum Upper_Lower_class + { + UL_LOWER, + UL_UPPER, + UL_NONE + }; + +/* The type of a List_element. See build_spec_list for more details. */ +enum Range_element_type + { + RE_NORMAL_CHAR, + RE_RANGE, + RE_CHAR_CLASS, + RE_EQUIV_CLASS, + RE_REPEATED_CHAR + }; + +/* One construct in one of tr's argument strings. + For example, consider the POSIX version of the classic tr command: + tr -cs 'a-zA-Z_' '[\n*]' + String1 has 3 constructs, two of which are ranges (a-z and A-Z), + and a single normal character, '_'. String2 has one construct. */ +struct List_element + { + enum Range_element_type type; + struct List_element *next; + union + { + unsigned char normal_char; + struct /* unnamed */ + { + unsigned char first_char; + unsigned char last_char; + } + range; + enum Char_class char_class; + unsigned char equiv_code; + struct /* unnamed */ + { + unsigned char the_repeated_char; + count repeat_count; + } + repeated_char; + } + u; + }; + +/* Each of tr's argument strings is parsed into a form that is easier + to work with: a linked list of constructs (struct List_element). + Each Spec_list structure also encapsulates various attributes of + the corresponding argument string. The attributes are used mainly + to verify that the strings are valid in the context of any options + specified (like -s, -d, or -c). The main exception is the member + 'tail', which is first used to construct the list. After construction, + it is used by get_next to save its state when traversing the list. + The member 'state' serves a similar function. */ +struct Spec_list + { + /* Points to the head of the list of range elements. + The first struct is a dummy; its members are never used. */ + struct List_element *head; + + /* When appending, points to the last element. When traversing via + get_next(), points to the element to process next. Setting + Spec_list.state to the value BEGIN_STATE before calling get_next + signals get_next to initialize tail to point to head->next. */ + struct List_element *tail; + + /* Used to save state between calls to get_next. */ + count state; + + /* Length, in the sense that length ('a-z[:digit:]123abc') + is 42 ( = 26 + 10 + 6). */ + count length; + + /* The number of [c*] and [c*0] constructs that appear in this spec. */ + size_t n_indefinite_repeats; + + /* If n_indefinite_repeats is nonzero, this points to the List_element + corresponding to the last [c*] or [c*0] construct encountered in + this spec. Otherwise it is undefined. */ + struct List_element *indefinite_repeat_element; + + /* True if this spec contains at least one equivalence + class construct e.g. [=c=]. */ + bool has_equiv_class; + + /* True if this spec contains at least one character class + construct. E.g. [:digit:]. */ + bool has_char_class; + + /* True if this spec contains at least one of the character class + constructs (all but upper and lower) that aren't allowed in s2. */ + bool has_restricted_char_class; + }; + +/* A representation for escaped string1 or string2. As a string is parsed, + any backslash-escaped characters (other than octal or \a, \b, \f, \n, + etc.) are marked as such in this structure by setting the corresponding + entry in the ESCAPED vector. */ +struct E_string +{ + char *s; + bool *escaped; + size_t len; +}; + +/* Return nonzero if the Ith character of escaped string ES matches C + and is not escaped itself. */ +static inline bool +es_match (struct E_string const *es, size_t i, char c) +{ + return es->s[i] == c && !es->escaped[i]; +} + +/* When true, each sequence in the input of a repeated character + (call it c) is replaced (in the output) by a single occurrence of c + for every c in the squeeze set. */ +static bool squeeze_repeats = false; + +/* When true, removes characters in the delete set from input. */ +static bool delete = false; + +/* Use the complement of set1 in place of set1. */ +static bool complement = false; + +/* When tr is performing translation and string1 is longer than string2, + POSIX says that the result is unspecified. That gives the implementer + of a POSIX conforming version of tr two reasonable choices for the + semantics of this case. + + * The BSD tr pads string2 to the length of string1 by + repeating the last character in string2. + + * System V tr ignores characters in string1 that have no + corresponding character in string2. That is, string1 is effectively + truncated to the length of string2. + + When nonzero, this flag causes GNU tr to imitate the behavior + of System V tr when translating with string1 longer than string2. + The default is to emulate BSD tr. This flag is ignored in modes where + no translation is performed. Emulating the System V tr + in this exceptional case causes the relatively common BSD idiom: + + tr -cs A-Za-z0-9 '\012' + + to break (it would convert only zero bytes, rather than all + non-alphanumerics, to newlines). + + WARNING: This switch does not provide general BSD or System V + compatibility. For example, it doesn't disable the interpretation + of the POSIX constructs [:alpha:], [=c=], and [c*10], so if by + some unfortunate coincidence you use such constructs in scripts + expecting to use some other version of tr, the scripts will break. */ +static bool truncate_set1 = false; + +/* An alias for (!delete && non_option_args == 2). + It is set in main and used there and in validate(). */ +static bool translating; + +static char io_buf[BUFSIZ]; + +static char const *const char_class_name[] = +{ + "alnum", "alpha", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "xdigit" +}; + +/* Array of boolean values. A character 'c' is a member of the + squeeze set if and only if in_squeeze_set[c] is true. The squeeze + set is defined by the last (possibly, the only) string argument + on the command line when the squeeze option is given. */ +static bool in_squeeze_set[N_CHARS]; + +/* Array of boolean values. A character 'c' is a member of the + delete set if and only if in_delete_set[c] is true. The delete + set is defined by the first (or only) string argument on the + command line when the delete option is given. */ +static bool in_delete_set[N_CHARS]; + +/* Array of character values defining the translation (if any) that + tr is to perform. Translation is performed only when there are + two specification strings and the delete switch is not given. */ +static char xlate[N_CHARS]; + +static struct option const long_options[] = +{ + {"complement", no_argument, nullptr, 'c'}, + {"delete", no_argument, nullptr, 'd'}, + {"squeeze-repeats", no_argument, nullptr, 's'}, + {"truncate-set1", no_argument, nullptr, 't'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... STRING1 [STRING2]\n\ +"), + program_name); + fputs (_("\ +Translate, squeeze, and/or delete characters from standard input,\n\ +writing to standard output. STRING1 and STRING2 specify arrays of\n\ +characters ARRAY1 and ARRAY2 that control the action.\n\ +\n\ + -c, -C, --complement use the complement of ARRAY1\n\ + -d, --delete delete characters in ARRAY1, do not translate\n\ + -s, --squeeze-repeats replace each sequence of a repeated character\n\ + that is listed in the last specified ARRAY,\n\ + with a single occurrence of that character\n\ + -t, --truncate-set1 first truncate ARRAY1 to length of ARRAY2\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +ARRAYs are specified as strings of characters. Most represent themselves.\n\ +Interpreted sequences are:\n\ +\n\ + \\NNN character with octal value NNN (1 to 3 octal digits)\n\ + \\\\ backslash\n\ + \\a audible BEL\n\ + \\b backspace\n\ + \\f form feed\n\ + \\n new line\n\ + \\r return\n\ + \\t horizontal tab\n\ +"), stdout); + fputs (_("\ + \\v vertical tab\n\ + CHAR1-CHAR2 all characters from CHAR1 to CHAR2 in ascending order\n\ + [CHAR*] in ARRAY2, copies of CHAR until length of ARRAY1\n\ + [CHAR*REPEAT] REPEAT copies of CHAR, REPEAT octal if starting with 0\n\ + [:alnum:] all letters and digits\n\ + [:alpha:] all letters\n\ + [:blank:] all horizontal whitespace\n\ + [:cntrl:] all control characters\n\ + [:digit:] all digits\n\ +"), stdout); + fputs (_("\ + [:graph:] all printable characters, not including space\n\ + [:lower:] all lower case letters\n\ + [:print:] all printable characters, including space\n\ + [:punct:] all punctuation characters\n\ + [:space:] all horizontal or vertical whitespace\n\ + [:upper:] all upper case letters\n\ + [:xdigit:] all hexadecimal digits\n\ + [=CHAR=] all characters which are equivalent to CHAR\n\ +"), stdout); + fputs (_("\ +\n\ +Translation occurs if -d is not given and both STRING1 and STRING2 appear.\n\ +-t is only significant when translating. ARRAY2 is extended to length of\n\ +ARRAY1 by repeating its last character as necessary. Excess characters\n\ +of ARRAY2 are ignored. Character classes expand in unspecified order;\n\ +while translating, [:lower:] and [:upper:] may be used in pairs to\n\ +specify case conversion. Squeezing occurs after translation or deletion.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Return nonzero if the character C is a member of the + equivalence class containing the character EQUIV_CLASS. */ + +static inline bool +is_equiv_class_member (unsigned char equiv_class, unsigned char c) +{ + return (equiv_class == c); +} + +/* Return true if the character C is a member of the + character class CHAR_CLASS. */ + +ATTRIBUTE_PURE +static bool +is_char_class_member (enum Char_class char_class, unsigned char c) +{ + int result; + + switch (char_class) + { + case CC_ALNUM: + result = isalnum (c); + break; + case CC_ALPHA: + result = isalpha (c); + break; + case CC_BLANK: + result = isblank (c); + break; + case CC_CNTRL: + result = iscntrl (c); + break; + case CC_DIGIT: + result = isdigit (c); + break; + case CC_GRAPH: + result = isgraph (c); + break; + case CC_LOWER: + result = islower (c); + break; + case CC_PRINT: + result = isprint (c); + break; + case CC_PUNCT: + result = ispunct (c); + break; + case CC_SPACE: + result = isspace (c); + break; + case CC_UPPER: + result = isupper (c); + break; + case CC_XDIGIT: + result = isxdigit (c); + break; + default: + unreachable (); + } + + return !! result; +} + +static void +es_free (struct E_string *es) +{ + free (es->s); + free (es->escaped); +} + +/* Perform the first pass over each range-spec argument S, converting all + \c and \ddd escapes to their one-byte representations. If an invalid + quote sequence is found print an error message and return false; + Otherwise set *ES to the resulting string and return true. + The resulting array of characters may contain zero-bytes; + however, on input, S is assumed to be null-terminated, and hence + cannot contain actual (non-escaped) zero bytes. */ + +static bool +unquote (char const *s, struct E_string *es) +{ + size_t len = strlen (s); + + es->s = xmalloc (len); + es->escaped = xcalloc (len, sizeof es->escaped[0]); + + unsigned int j = 0; + for (unsigned int i = 0; s[i]; i++) + { + unsigned char c; + int oct_digit; + + switch (s[i]) + { + case '\\': + es->escaped[j] = true; + switch (s[i + 1]) + { + case '\\': + c = '\\'; + break; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + c = s[i + 1] - '0'; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + c = 8 * c + oct_digit; + ++i; + oct_digit = s[i + 2] - '0'; + if (0 <= oct_digit && oct_digit <= 7) + { + if (8 * c + oct_digit < N_CHARS) + { + c = 8 * c + oct_digit; + ++i; + } + else + { + /* A 3-digit octal number larger than \377 won't + fit in 8 bits. So we stop when adding the + next digit would put us over the limit and + give a warning about the ambiguity. POSIX + isn't clear on this, and we interpret this + lack of clarity as meaning the resulting behavior + is undefined, which means we're allowed to issue + a warning. */ + error (0, 0, _("warning: the ambiguous octal escape\ + \\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, %c"), + s[i], s[i + 1], s[i + 2], + s[i], s[i + 1], s[i + 2]); + } + } + } + break; + case '\0': + error (0, 0, _("warning: an unescaped backslash " + "at end of string is not portable")); + /* POSIX is not clear about this. */ + es->escaped[j] = false; + i--; + c = '\\'; + break; + default: + c = s[i + 1]; + break; + } + ++i; + es->s[j++] = c; + break; + default: + es->s[j++] = s[i]; + break; + } + } + es->len = j; + return true; +} + +/* If CLASS_STR is a valid character class string, return its index + in the global char_class_name array. Otherwise, return CC_NO_CLASS. */ + +ATTRIBUTE_PURE +static enum Char_class +look_up_char_class (char const *class_str, size_t len) +{ + enum Char_class i; + + for (i = 0; i < ARRAY_CARDINALITY (char_class_name); i++) + if (STREQ_LEN (class_str, char_class_name[i], len) + && strlen (char_class_name[i]) == len) + return i; + return CC_NO_CLASS; +} + +/* Return a newly allocated string with a printable version of C. + This function is used solely for formatting error messages. */ + +static char * +make_printable_char (unsigned char c) +{ + char *buf = xmalloc (5); + + if (isprint (c)) + { + buf[0] = c; + buf[1] = '\0'; + } + else + { + sprintf (buf, "\\%03o", c); + } + return buf; +} + +/* Return a newly allocated copy of S which is suitable for printing. + LEN is the number of characters in S. Most non-printing + (isprint) characters are represented by a backslash followed by + 3 octal digits. However, the characters represented by \c escapes + where c is one of [abfnrtv] are represented by their 2-character \c + sequences. This function is used solely for printing error messages. */ + +static char * +make_printable_str (char const *s, size_t len) +{ + /* Worst case is that every character expands to a backslash + followed by a 3-character octal escape sequence. */ + char *printable_buf = xnmalloc (len + 1, 4); + char *p = printable_buf; + + for (size_t i = 0; i < len; i++) + { + char buf[5]; + char const *tmp = nullptr; + unsigned char c = s[i]; + + switch (c) + { + case '\\': + tmp = "\\"; + break; + case '\a': + tmp = "\\a"; + break; + case '\b': + tmp = "\\b"; + break; + case '\f': + tmp = "\\f"; + break; + case '\n': + tmp = "\\n"; + break; + case '\r': + tmp = "\\r"; + break; + case '\t': + tmp = "\\t"; + break; + case '\v': + tmp = "\\v"; + break; + default: + if (isprint (c)) + { + buf[0] = c; + buf[1] = '\0'; + } + else + sprintf (buf, "\\%03o", c); + tmp = buf; + break; + } + p = stpcpy (p, tmp); + } + return printable_buf; +} + +/* Append a newly allocated structure representing a + character C to the specification list LIST. */ + +static void +append_normal_char (struct Spec_list *list, unsigned char c) +{ + struct List_element *new = xmalloc (sizeof *new); + new->next = nullptr; + new->type = RE_NORMAL_CHAR; + new->u.normal_char = c; + list->tail->next = new; + list->tail = new; +} + +/* Append a newly allocated structure representing the range + of characters from FIRST to LAST to the specification list LIST. + Return false if LAST precedes FIRST in the collating sequence, + true otherwise. This means that '[c-c]' is acceptable. */ + +static bool +append_range (struct Spec_list *list, unsigned char first, unsigned char last) +{ + if (last < first) + { + char *tmp1 = make_printable_char (first); + char *tmp2 = make_printable_char (last); + + error (0, 0, + _("range-endpoints of '%s-%s' are in reverse collating sequence order"), + tmp1, tmp2); + free (tmp1); + free (tmp2); + return false; + } + struct List_element *new = xmalloc (sizeof *new); + new->next = nullptr; + new->type = RE_RANGE; + new->u.range.first_char = first; + new->u.range.last_char = last; + list->tail->next = new; + list->tail = new; + return true; +} + +/* If CHAR_CLASS_STR is a valid character class string, append a + newly allocated structure representing that character class to the end + of the specification list LIST and return true. If CHAR_CLASS_STR is not + a valid string return false. */ + +static bool +append_char_class (struct Spec_list *list, + char const *char_class_str, size_t len) +{ + enum Char_class char_class = look_up_char_class (char_class_str, len); + if (char_class == CC_NO_CLASS) + return false; + struct List_element *new = xmalloc (sizeof *new); + new->next = nullptr; + new->type = RE_CHAR_CLASS; + new->u.char_class = char_class; + list->tail->next = new; + list->tail = new; + return true; +} + +/* Append a newly allocated structure representing a [c*n] + repeated character construct to the specification list LIST. + THE_CHAR is the single character to be repeated, and REPEAT_COUNT + is a non-negative repeat count. */ + +static void +append_repeated_char (struct Spec_list *list, unsigned char the_char, + count repeat_count) +{ + struct List_element *new = xmalloc (sizeof *new); + new->next = nullptr; + new->type = RE_REPEATED_CHAR; + new->u.repeated_char.the_repeated_char = the_char; + new->u.repeated_char.repeat_count = repeat_count; + list->tail->next = new; + list->tail = new; +} + +/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and + the length of that string, LEN, if LEN is exactly one, append + a newly allocated structure representing the specified + equivalence class to the specification list, LIST and return true. + If LEN is not 1, return false. */ + +static bool +append_equiv_class (struct Spec_list *list, + char const *equiv_class_str, size_t len) +{ + if (len != 1) + return false; + + struct List_element *new = xmalloc (sizeof *new); + new->next = nullptr; + new->type = RE_EQUIV_CLASS; + new->u.equiv_code = *equiv_class_str; + list->tail->next = new; + list->tail = new; + return true; +} + +/* Search forward starting at START_IDX for the 2-char sequence + (PRE_BRACKET_CHAR,']') in the string P of length P_LEN. If such + a sequence is found, set *RESULT_IDX to the index of the first + character and return true. Otherwise return false. P may contain + zero bytes. */ + +static bool +find_closing_delim (const struct E_string *es, size_t start_idx, + char pre_bracket_char, size_t *result_idx) +{ + for (size_t i = start_idx; i < es->len - 1; i++) + if (es->s[i] == pre_bracket_char && es->s[i + 1] == ']' + && !es->escaped[i] && !es->escaped[i + 1]) + { + *result_idx = i; + return true; + } + return false; +} + +/* Parse the bracketed repeat-char syntax. If the P_LEN characters + beginning with P[ START_IDX ] comprise a valid [c*n] construct, + then set *CHAR_TO_REPEAT, *REPEAT_COUNT, and *CLOSING_BRACKET_IDX + and return zero. If the second character following + the opening bracket is not '*' or if no closing bracket can be + found, return -1. If a closing bracket is found and the + second char is '*', but the string between the '*' and ']' isn't + empty, an octal number, or a decimal number, print an error message + and return -2. */ + +static int +find_bracketed_repeat (const struct E_string *es, size_t start_idx, + unsigned char *char_to_repeat, count *repeat_count, + size_t *closing_bracket_idx) +{ + affirm (start_idx + 1 < es->len); + if (!es_match (es, start_idx + 1, '*')) + return -1; + + for (size_t i = start_idx + 2; i < es->len && !es->escaped[i]; i++) + { + if (es->s[i] == ']') + { + size_t digit_str_len = i - start_idx - 2; + + *char_to_repeat = es->s[start_idx]; + if (digit_str_len == 0) + { + /* We've matched [c*] -- no explicit repeat count. */ + *repeat_count = 0; + } + else + { + /* Here, we have found [c*s] where s should be a string + of octal (if it starts with '0') or decimal digits. */ + char const *digit_str = &es->s[start_idx + 2]; + char *d_end; + if ((xstrtoumax (digit_str, &d_end, *digit_str == '0' ? 8 : 10, + repeat_count, nullptr) + != LONGINT_OK) + || REPEAT_COUNT_MAXIMUM < *repeat_count + || digit_str + digit_str_len != d_end) + { + char *tmp = make_printable_str (digit_str, digit_str_len); + error (0, 0, + _("invalid repeat count %s in [c*n] construct"), + quote (tmp)); + free (tmp); + return -2; + } + } + *closing_bracket_idx = i; + return 0; + } + } + return -1; /* No bracket found. */ +} + +/* Return true if the string at ES->s[IDX] matches the regular + expression '\*[0-9]*]', false otherwise. The string does not + match if any of its characters are escaped. */ + +ATTRIBUTE_PURE +static bool +star_digits_closebracket (const struct E_string *es, size_t idx) +{ + if (!es_match (es, idx, '*')) + return false; + + for (size_t i = idx + 1; i < es->len; i++) + if (!ISDIGIT (to_uchar (es->s[i])) || es->escaped[i]) + return es_match (es, i, ']'); + return false; +} + +/* Convert string UNESCAPED_STRING (which has been preprocessed to + convert backslash-escape sequences) of length LEN characters into + a linked list of the following 5 types of constructs: + - [:str:] Character class where 'str' is one of the 12 valid strings. + - [=c=] Equivalence class where 'c' is any single character. + - [c*n] Repeat the single character 'c' 'n' times. n may be omitted. + However, if 'n' is present, it must be a non-negative octal or + decimal integer. + - r-s Range of characters from 'r' to 's'. The second endpoint must + not precede the first in the current collating sequence. + - c Any other character is interpreted as itself. */ + +static bool +build_spec_list (const struct E_string *es, struct Spec_list *result) +{ + char const *p = es->s; + + /* The main for-loop below recognizes the 4 multi-character constructs. + A character that matches (in its context) none of the multi-character + constructs is classified as 'normal'. Since all multi-character + constructs have at least 3 characters, any strings of length 2 or + less are composed solely of normal characters. Hence, the index of + the outer for-loop runs only as far as LEN-2. */ + size_t i; + for (i = 0; i + 2 < es->len; /* empty */) + { + if (es_match (es, i, '[')) + { + bool matched_multi_char_construct; + size_t closing_bracket_idx; + unsigned char char_to_repeat; + count repeat_count; + int err; + + matched_multi_char_construct = true; + if (es_match (es, i + 1, ':') || es_match (es, i + 1, '=')) + { + size_t closing_delim_idx; + + if (find_closing_delim (es, i + 2, p[i + 1], &closing_delim_idx)) + { + size_t opnd_str_len = closing_delim_idx - 1 - (i + 2) + 1; + char const *opnd_str = p + i + 2; + + if (opnd_str_len == 0) + { + if (p[i + 1] == ':') + error (0, 0, _("missing character class name '[::]'")); + else + error (0, 0, + _("missing equivalence class character '[==]'")); + return false; + } + + if (p[i + 1] == ':') + { + /* FIXME: big comment. */ + if (!append_char_class (result, opnd_str, opnd_str_len)) + { + if (star_digits_closebracket (es, i + 2)) + goto try_bracketed_repeat; + else + { + char *tmp = make_printable_str (opnd_str, + opnd_str_len); + error (0, 0, _("invalid character class %s"), + quote (tmp)); + free (tmp); + return false; + } + } + } + else + { + /* FIXME: big comment. */ + if (!append_equiv_class (result, opnd_str, opnd_str_len)) + { + if (star_digits_closebracket (es, i + 2)) + goto try_bracketed_repeat; + else + { + char *tmp = make_printable_str (opnd_str, + opnd_str_len); + error (0, 0, + _("%s: equivalence class operand must be a single character"), + tmp); + free (tmp); + return false; + } + } + } + + i = closing_delim_idx + 2; + continue; + } + /* Else fall through. This could be [:*] or [=*]. */ + } + + try_bracketed_repeat: + + /* Determine whether this is a bracketed repeat range + matching the RE \[.\*(dec_or_oct_number)?]. */ + err = find_bracketed_repeat (es, i + 1, &char_to_repeat, + &repeat_count, + &closing_bracket_idx); + if (err == 0) + { + append_repeated_char (result, char_to_repeat, repeat_count); + i = closing_bracket_idx + 1; + } + else if (err == -1) + { + matched_multi_char_construct = false; + } + else + { + /* Found a string that looked like [c*n] but the + numeric part was invalid. */ + return false; + } + + if (matched_multi_char_construct) + continue; + + /* We reach this point if P does not match [:str:], [=c=], + [c*n], or [c*]. Now, see if P looks like a range '[-c' + (from '[' to 'c'). */ + } + + /* Look ahead one char for ranges like a-z. */ + if (es_match (es, i + 1, '-')) + { + if (!append_range (result, p[i], p[i + 2])) + return false; + i += 3; + } + else + { + append_normal_char (result, p[i]); + ++i; + } + } + + /* Now handle the (2 or fewer) remaining characters p[i]..p[es->len - 1]. */ + for (; i < es->len; i++) + append_normal_char (result, p[i]); + + return true; +} + +/* Advance past the current construct. + S->tail must be non-null. */ +static void +skip_construct (struct Spec_list *s) +{ + s->tail = s->tail->next; + s->state = NEW_ELEMENT; +} + +/* Given a Spec_list S (with its saved state implicit in the values + of its members 'tail' and 'state'), return the next single character + in the expansion of S's constructs. If the last character of S was + returned on the previous call or if S was empty, this function + returns -1. For example, successive calls to get_next where S + represents the spec-string 'a-d[y*3]' will return the sequence + of values a, b, c, d, y, y, y, -1. Finally, if the construct from + which the returned character comes is [:upper:] or [:lower:], the + parameter CLASS is given a value to indicate which it was. Otherwise + CLASS is set to UL_NONE. This value is used only when constructing + the translation table to verify that any occurrences of upper and + lower class constructs in the spec-strings appear in the same relative + positions. */ + +static int +get_next (struct Spec_list *s, enum Upper_Lower_class *class) +{ + struct List_element *p; + int return_val; + int i; + + if (class) + *class = UL_NONE; + + if (s->state == BEGIN_STATE) + { + s->tail = s->head->next; + s->state = NEW_ELEMENT; + } + + p = s->tail; + if (p == nullptr) + return -1; + + switch (p->type) + { + case RE_NORMAL_CHAR: + return_val = p->u.normal_char; + s->state = NEW_ELEMENT; + s->tail = p->next; + break; + + case RE_RANGE: + if (s->state == NEW_ELEMENT) + s->state = p->u.range.first_char; + else + ++(s->state); + return_val = s->state; + if (s->state == p->u.range.last_char) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + break; + + case RE_CHAR_CLASS: + if (class) + { + switch (p->u.char_class) + { + case CC_LOWER: + *class = UL_LOWER; + break; + case CC_UPPER: + *class = UL_UPPER; + break; + default: + break; + } + } + + if (s->state == NEW_ELEMENT) + { + for (i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + break; + affirm (i < N_CHARS); + s->state = i; + } + assure (is_char_class_member (p->u.char_class, s->state)); + return_val = s->state; + for (i = s->state + 1; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + break; + if (i < N_CHARS) + s->state = i; + else + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + break; + + case RE_EQUIV_CLASS: + /* FIXME: this assumes that each character is alone in its own + equivalence class (which appears to be correct for my + LC_COLLATE. But I don't know of any function that allows + one to determine a character's equivalence class. */ + + return_val = p->u.equiv_code; + s->state = NEW_ELEMENT; + s->tail = p->next; + break; + + case RE_REPEATED_CHAR: + /* Here, a repeat count of n == 0 means don't repeat at all. */ + if (p->u.repeated_char.repeat_count == 0) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + return_val = get_next (s, class); + } + else + { + if (s->state == NEW_ELEMENT) + { + s->state = 0; + } + ++(s->state); + return_val = p->u.repeated_char.the_repeated_char; + if (s->state == p->u.repeated_char.repeat_count) + { + s->tail = p->next; + s->state = NEW_ELEMENT; + } + } + break; + + default: + unreachable (); + } + + return return_val; +} + +/* This is a minor kludge. This function is called from + get_spec_stats to determine the cardinality of a set derived + from a complemented string. It's a kludge in that some of the + same operations are (duplicated) performed in set_initialize. */ + +static int +card_of_complement (struct Spec_list *s) +{ + int c; + int cardinality = N_CHARS; + bool in_set[N_CHARS] = { 0, }; + + s->state = BEGIN_STATE; + while ((c = get_next (s, nullptr)) != -1) + { + cardinality -= (!in_set[c]); + in_set[c] = true; + } + return cardinality; +} + +/* Discard the lengths associated with a case conversion, + as using the actual number of upper or lower case characters + is problematic when they don't match in some locales. + Also ensure the case conversion classes in string2 are + aligned correctly with those in string1. + Note POSIX says the behavior of 'tr "[:upper:]" "[:upper:]"' + is undefined. Therefore we allow it (unlike Solaris) + and treat it as a no-op. */ + +static void +validate_case_classes (struct Spec_list *s1, struct Spec_list *s2) +{ + size_t n_upper = 0; + size_t n_lower = 0; + int c1 = 0; + int c2 = 0; + MAYBE_UNUSED count old_s1_len = s1->length, old_s2_len = s2->length; + struct List_element *s1_tail = s1->tail; + struct List_element *s2_tail = s2->tail; + bool s1_new_element = true; + bool s2_new_element = true; + + if (complement || !s2->has_char_class) + return; + + for (int i = 0; i < N_CHARS; i++) + { + if (isupper (i)) + n_upper++; + if (islower (i)) + n_lower++; + } + + s1->state = BEGIN_STATE; + s2->state = BEGIN_STATE; + + while (c1 != -1 && c2 != -1) + { + enum Upper_Lower_class class_s1, class_s2; + + c1 = get_next (s1, &class_s1); + c2 = get_next (s2, &class_s2); + + /* If c2 transitions to a new case class, then + c1 must also transition at the same time. */ + if (s2_new_element && class_s2 != UL_NONE + && !(s1_new_element && class_s1 != UL_NONE)) + error (EXIT_FAILURE, 0, + _("misaligned [:upper:] and/or [:lower:] construct")); + + /* If case converting, quickly skip over the elements. */ + if (class_s2 != UL_NONE) + { + skip_construct (s1); + skip_construct (s2); + /* Discount insignificant/problematic lengths. */ + s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1; + s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1; + } + + s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new. */ + s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new. */ + } + + affirm (old_s1_len >= s1->length && old_s2_len >= s2->length); + + s1->tail = s1_tail; + s2->tail = s2_tail; +} + +/* Gather statistics about the spec-list S in preparation for the tests + in validate that determine the consistency of the specs. This function + is called at most twice; once for string1, and again for any string2. + LEN_S1 < 0 indicates that this is the first call and that S represents + string1. When LEN_S1 >= 0, it is the length of the expansion of the + constructs in string1, and we can use its value to resolve any + indefinite repeat construct in S (which represents string2). Hence, + this function has the side-effect that it converts a valid [c*] + construct in string2 to [c*n] where n is large enough (or 0) to give + string2 the same length as string1. For example, with the command + tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would + be 26 and S (representing string2) would be converted to 'A[\n*24]Z'. */ + +static void +get_spec_stats (struct Spec_list *s) +{ + struct List_element *p; + count length = 0; + + s->n_indefinite_repeats = 0; + s->has_equiv_class = false; + s->has_restricted_char_class = false; + s->has_char_class = false; + for (p = s->head->next; p; p = p->next) + { + count len = 0; + count new_length; + + switch (p->type) + { + case RE_NORMAL_CHAR: + len = 1; + break; + + case RE_RANGE: + affirm (p->u.range.last_char >= p->u.range.first_char); + len = p->u.range.last_char - p->u.range.first_char + 1; + break; + + case RE_CHAR_CLASS: + s->has_char_class = true; + for (int i = 0; i < N_CHARS; i++) + if (is_char_class_member (p->u.char_class, i)) + ++len; + switch (p->u.char_class) + { + case CC_UPPER: + case CC_LOWER: + break; + default: + s->has_restricted_char_class = true; + break; + } + break; + + case RE_EQUIV_CLASS: + for (int i = 0; i < N_CHARS; i++) + if (is_equiv_class_member (p->u.equiv_code, i)) + ++len; + s->has_equiv_class = true; + break; + + case RE_REPEATED_CHAR: + if (p->u.repeated_char.repeat_count > 0) + len = p->u.repeated_char.repeat_count; + else + { + s->indefinite_repeat_element = p; + ++(s->n_indefinite_repeats); + } + break; + + default: + unreachable (); + } + + /* Check for arithmetic overflow in computing length. Also, reject + any length greater than the maximum repeat count, in case the + length is later used to compute the repeat count for an + indefinite element. */ + new_length = length + len; + if (! (length <= new_length && new_length <= REPEAT_COUNT_MAXIMUM)) + error (EXIT_FAILURE, 0, _("too many characters in set")); + length = new_length; + } + + s->length = length; +} + +static void +get_s1_spec_stats (struct Spec_list *s1) +{ + get_spec_stats (s1); + if (complement) + s1->length = card_of_complement (s1); +} + +static void +get_s2_spec_stats (struct Spec_list *s2, count len_s1) +{ + get_spec_stats (s2); + if (len_s1 >= s2->length && s2->n_indefinite_repeats == 1) + { + s2->indefinite_repeat_element->u.repeated_char.repeat_count = + len_s1 - s2->length; + s2->length = len_s1; + } +} + +static void +spec_init (struct Spec_list *spec_list) +{ + struct List_element *new = xmalloc (sizeof *new); + spec_list->head = spec_list->tail = new; + spec_list->head->next = nullptr; +} + +/* This function makes two passes over the argument string S. The first + one converts all \c and \ddd escapes to their one-byte representations. + The second constructs a linked specification list, SPEC_LIST, of the + characters and constructs that comprise the argument string. If either + of these passes detects an error, this function returns false. */ + +static bool +parse_str (char const *s, struct Spec_list *spec_list) +{ + struct E_string es; + bool ok = unquote (s, &es) && build_spec_list (&es, spec_list); + es_free (&es); + return ok; +} + +/* Given two specification lists, S1 and S2, and assuming that + S1->length > S2->length, append a single [c*n] element to S2 where c + is the last character in the expansion of S2 and n is the difference + between the two lengths. + Upon successful completion, S2->length is set to S1->length. The only + way this function can fail to make S2 as long as S1 is when S2 has + zero-length, since in that case, there is no last character to repeat. + So S2->length is required to be at least 1. */ + +static void +string2_extend (const struct Spec_list *s1, struct Spec_list *s2) +{ + struct List_element *p; + unsigned char char_to_repeat; + + affirm (translating); + affirm (s1->length > s2->length); + affirm (s2->length > 0); + + p = s2->tail; + switch (p->type) + { + case RE_NORMAL_CHAR: + char_to_repeat = p->u.normal_char; + break; + case RE_RANGE: + char_to_repeat = p->u.range.last_char; + break; + case RE_CHAR_CLASS: + /* Note BSD allows extending of classes in string2. For example: + tr '[:upper:]0-9' '[:lower:]' + That's not portable however, contradicts POSIX and is dependent + on your collating sequence. */ + error (EXIT_FAILURE, 0, + _("when translating with string1 longer than string2,\n" + "the latter string must not end with a character class")); + + case RE_REPEATED_CHAR: + char_to_repeat = p->u.repeated_char.the_repeated_char; + break; + + case RE_EQUIV_CLASS: + /* This shouldn't happen, because validate exits with an error + if it finds an equiv class in string2 when translating. */ + affirm (false); + + default: + unreachable (); + } + + append_repeated_char (s2, char_to_repeat, s1->length - s2->length); + s2->length = s1->length; +} + +/* Return true if S is a non-empty list in which exactly one + character (but potentially, many instances of it) appears. + E.g., [X*] or xxxxxxxx. */ + +static bool +homogeneous_spec_list (struct Spec_list *s) +{ + int b, c; + + s->state = BEGIN_STATE; + + if ((b = get_next (s, nullptr)) == -1) + return false; + + while ((c = get_next (s, nullptr)) != -1) + if (c != b) + return false; + + return true; +} + +/* Die with an error message if S1 and S2 describe strings that + are not valid with the given command line switches. + A side effect of this function is that if a valid [c*] or + [c*0] construct appears in string2, it is converted to [c*n] + with a value for n that makes s2->length == s1->length. By + the same token, if the --truncate-set1 option is not + given, S2 may be extended. */ + +static void +validate (struct Spec_list *s1, struct Spec_list *s2) +{ + get_s1_spec_stats (s1); + if (s1->n_indefinite_repeats > 0) + error (EXIT_FAILURE, 0, + _("the [c*] repeat construct may not appear in string1")); + + if (s2) + { + get_s2_spec_stats (s2, s1->length); + + if (s2->n_indefinite_repeats > 1) + error (EXIT_FAILURE, 0, + _("only one [c*] repeat construct may appear in string2")); + + if (translating) + { + if (s2->has_equiv_class) + error (EXIT_FAILURE, 0, + _("[=c=] expressions may not appear in string2" + " when translating")); + + if (s2->has_restricted_char_class) + error (EXIT_FAILURE, 0, + _("when translating, the only character classes" + " that may appear in\n" + "string2 are 'upper' and 'lower'")); + + validate_case_classes (s1, s2); + + if (s1->length > s2->length) + { + if (!truncate_set1) + { + /* string2 must be non-empty unless --truncate-set1 is + given or string1 is empty. */ + + if (s2->length == 0) + error (EXIT_FAILURE, 0, + _("when not truncating set1," + " string2 must be non-empty")); + string2_extend (s1, s2); + } + } + + if (complement && s1->has_char_class + && ! (s2->length == s1->length && homogeneous_spec_list (s2))) + error (EXIT_FAILURE, 0, + _("when translating with complemented character classes,\n" + "string2 must map all characters in the domain to one")); + } + else + /* Not translating. */ + { + if (s2->n_indefinite_repeats > 0) + error (EXIT_FAILURE, 0, + _("the [c*] construct may appear in string2" + " only when translating")); + } + } +} + +/* Read buffers of SIZE bytes via the function READER (if READER is + null, read from stdin) until EOF. When non-null, READER is either + read_and_delete or read_and_xlate. After each buffer is read, it is + processed and written to stdout. The buffers are processed so that + multiple consecutive occurrences of the same character in the input + stream are replaced by a single occurrence of that character if the + character is in the squeeze set. */ + +static void +squeeze_filter (char *buf, size_t size, size_t (*reader) (char *, size_t)) +{ + /* A value distinct from any character that may have been stored in a + buffer as the result of a block-read in the function squeeze_filter. */ + const int NOT_A_CHAR = INT_MAX; + + int char_to_squeeze = NOT_A_CHAR; + size_t i = 0; + size_t nr = 0; + + while (true) + { + if (i >= nr) + { + nr = reader (buf, size); + if (nr == 0) + break; + i = 0; + } + + size_t begin = i; + + if (char_to_squeeze == NOT_A_CHAR) + { + size_t out_len; + /* Here, by being a little tricky, we can get a significant + performance increase in most cases when the input is + reasonably large. Since tr will modify the input only + if two consecutive (and identical) input characters are + in the squeeze set, we can step by two through the data + when searching for a character in the squeeze set. This + means there may be a little more work in a few cases and + perhaps twice as much work in the worst cases where most + of the input is removed by squeezing repeats. But most + uses of this functionality seem to remove less than 20-30% + of the input. */ + for (; i < nr && !in_squeeze_set[to_uchar (buf[i])]; i += 2) + continue; + + /* There is a special case when i == nr and we've just + skipped a character (the last one in buf) that is in + the squeeze set. */ + if (i == nr && in_squeeze_set[to_uchar (buf[i - 1])]) + --i; + + if (i >= nr) + out_len = nr - begin; + else + { + char_to_squeeze = buf[i]; + /* We're about to output buf[begin..i]. */ + out_len = i - begin + 1; + + /* But since we stepped by 2 in the loop above, + out_len may be one too large. */ + if (i > 0 && buf[i - 1] == char_to_squeeze) + --out_len; + + /* Advance i to the index of first character to be + considered when looking for a char different from + char_to_squeeze. */ + ++i; + } + if (out_len > 0 + && fwrite (&buf[begin], 1, out_len, stdout) != out_len) + write_error (); + } + + if (char_to_squeeze != NOT_A_CHAR) + { + /* Advance i to index of first char != char_to_squeeze + (or to nr if all the rest of the characters in this + buffer are the same as char_to_squeeze). */ + for (; i < nr && buf[i] == char_to_squeeze; i++) + continue; + if (i < nr) + char_to_squeeze = NOT_A_CHAR; + /* If (i >= nr) we've squeezed the last character in this buffer. + So now we have to read a new buffer and continue comparing + characters against char_to_squeeze. */ + } + } +} + +static size_t +plain_read (char *buf, size_t size) +{ + size_t nr = safe_read (STDIN_FILENO, buf, size); + if (nr == SAFE_READ_ERROR) + error (EXIT_FAILURE, errno, _("read error")); + return nr; +} + +/* Read buffers of SIZE bytes from stdin until one is found that + contains at least one character not in the delete set. Store + in the array BUF, all characters from that buffer that are not + in the delete set, and return the number of characters saved + or 0 upon EOF. */ + +static size_t +read_and_delete (char *buf, size_t size) +{ + size_t n_saved; + + /* This enclosing do-while loop is to make sure that + we don't return zero (indicating EOF) when we've + just deleted all the characters in a buffer. */ + do + { + size_t nr = plain_read (buf, size); + + if (nr == 0) + return 0; + + /* This first loop may be a waste of code, but gives much + better performance when no characters are deleted in + the beginning of a buffer. It just avoids the copying + of buf[i] into buf[n_saved] when it would be a NOP. */ + + size_t i; + for (i = 0; i < nr && !in_delete_set[to_uchar (buf[i])]; i++) + continue; + n_saved = i; + + for (++i; i < nr; i++) + if (!in_delete_set[to_uchar (buf[i])]) + buf[n_saved++] = buf[i]; + } + while (n_saved == 0); + + return n_saved; +} + +/* Read at most SIZE bytes from stdin into the array BUF. Then + perform the in-place and one-to-one mapping specified by the global + array 'xlate'. Return the number of characters read, or 0 upon EOF. */ + +static size_t +read_and_xlate (char *buf, size_t size) +{ + size_t bytes_read = plain_read (buf, size); + + for (size_t i = 0; i < bytes_read; i++) + buf[i] = xlate[to_uchar (buf[i])]; + + return bytes_read; +} + +/* Initialize a boolean membership set, IN_SET, with the character + values obtained by traversing the linked list of constructs S + using the function 'get_next'. IN_SET is expected to have been + initialized to all zeros by the caller. If COMPLEMENT_THIS_SET + is true the resulting set is complemented. */ + +static void +set_initialize (struct Spec_list *s, bool complement_this_set, bool *in_set) +{ + int c; + + s->state = BEGIN_STATE; + while ((c = get_next (s, nullptr)) != -1) + in_set[c] = true; + if (complement_this_set) + for (size_t i = 0; i < N_CHARS; i++) + in_set[i] = (!in_set[i]); +} + +int +main (int argc, char **argv) +{ + int c; + int non_option_args; + int min_operands; + int max_operands; + struct Spec_list buf1, buf2; + struct Spec_list *s1 = &buf1; + struct Spec_list *s2 = &buf2; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "+AcCdst", long_options, nullptr)) != -1) + { + switch (c) + { + case 'A': + /* Undocumented option, for compatibility with AIX. */ + setlocale (LC_COLLATE, "C"); + setlocale (LC_CTYPE, "C"); + break; + + case 'c': + case 'C': + complement = true; + break; + + case 'd': + delete = true; + break; + + case 's': + squeeze_repeats = true; + break; + + case 't': + truncate_set1 = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + break; + } + } + + non_option_args = argc - optind; + translating = (non_option_args == 2 && !delete); + min_operands = 1 + (delete == squeeze_repeats); + max_operands = 1 + (delete <= squeeze_repeats); + + if (non_option_args < min_operands) + { + if (non_option_args == 0) + error (0, 0, _("missing operand")); + else + { + error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); + fprintf (stderr, "%s\n", + _(squeeze_repeats + ? N_("Two strings must be given when " + "both deleting and squeezing repeats.") + : N_("Two strings must be given when translating."))); + } + usage (EXIT_FAILURE); + } + + if (max_operands < non_option_args) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + max_operands])); + if (non_option_args == 2) + fprintf (stderr, "%s\n", + _("Only one string may be given when " + "deleting without squeezing repeats.")); + usage (EXIT_FAILURE); + } + + spec_init (s1); + if (!parse_str (argv[optind], s1)) + main_exit (EXIT_FAILURE); + + if (non_option_args == 2) + { + spec_init (s2); + if (!parse_str (argv[optind + 1], s2)) + main_exit (EXIT_FAILURE); + } + else + s2 = nullptr; + + validate (s1, s2); + + /* Use binary I/O, since 'tr' is sometimes used to transliterate + non-printable characters, or characters which are stripped away + by text-mode reads (like CR and ^Z). */ + xset_binary_mode (STDIN_FILENO, O_BINARY); + xset_binary_mode (STDOUT_FILENO, O_BINARY); + fadvise (stdin, FADVISE_SEQUENTIAL); + + if (squeeze_repeats && non_option_args == 1) + { + set_initialize (s1, complement, in_squeeze_set); + squeeze_filter (io_buf, sizeof io_buf, plain_read); + } + else if (delete && non_option_args == 1) + { + set_initialize (s1, complement, in_delete_set); + + while (true) + { + size_t nr = read_and_delete (io_buf, sizeof io_buf); + if (nr == 0) + break; + if (fwrite (io_buf, 1, nr, stdout) != nr) + write_error (); + } + } + else if (squeeze_repeats && delete && non_option_args == 2) + { + set_initialize (s1, complement, in_delete_set); + set_initialize (s2, false, in_squeeze_set); + squeeze_filter (io_buf, sizeof io_buf, read_and_delete); + } + else if (translating) + { + if (complement) + { + bool *in_s1 = in_delete_set; + + set_initialize (s1, false, in_s1); + s2->state = BEGIN_STATE; + for (int i = 0; i < N_CHARS; i++) + xlate[i] = i; + for (int i = 0; i < N_CHARS; i++) + { + if (!in_s1[i]) + { + int ch = get_next (s2, nullptr); + affirm (ch != -1 || truncate_set1); + if (ch == -1) + { + /* This will happen when tr is invoked like e.g. + tr -cs A-Za-z0-9 '\012'. */ + break; + } + xlate[i] = ch; + } + } + } + else + { + int c1, c2; + enum Upper_Lower_class class_s1; + enum Upper_Lower_class class_s2; + + for (int i = 0; i < N_CHARS; i++) + xlate[i] = i; + s1->state = BEGIN_STATE; + s2->state = BEGIN_STATE; + while (true) + { + c1 = get_next (s1, &class_s1); + c2 = get_next (s2, &class_s2); + + if (class_s1 == UL_LOWER && class_s2 == UL_UPPER) + { + for (int i = 0; i < N_CHARS; i++) + if (islower (i)) + xlate[i] = toupper (i); + } + else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER) + { + for (int i = 0; i < N_CHARS; i++) + if (isupper (i)) + xlate[i] = tolower (i); + } + else + { + /* The following should have been checked by validate... */ + if (c1 == -1 || c2 == -1) + break; + xlate[c1] = c2; + } + + /* When case-converting, skip the elements as an optimization. */ + if (class_s2 != UL_NONE) + { + skip_construct (s1); + skip_construct (s2); + } + } + affirm (c1 == -1 || truncate_set1); + } + if (squeeze_repeats) + { + set_initialize (s2, false, in_squeeze_set); + squeeze_filter (io_buf, sizeof io_buf, read_and_xlate); + } + else + { + while (true) + { + size_t bytes_read = read_and_xlate (io_buf, sizeof io_buf); + if (bytes_read == 0) + break; + if (fwrite (io_buf, 1, bytes_read, stdout) != bytes_read) + write_error (); + } + } + } + + if (close (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, _("standard input")); + + main_exit (EXIT_SUCCESS); +} diff --git a/src/true.c b/src/true.c new file mode 100644 index 0000000..f65a922 --- /dev/null +++ b/src/true.c @@ -0,0 +1,80 @@ +/* Exit with a status code indicating success. + Copyright (C) 1999-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include +#include +#include "system.h" + +/* Act like "true" by default; false.c overrides this. */ +#ifndef EXIT_STATUS +# define EXIT_STATUS EXIT_SUCCESS +#endif + +#if EXIT_STATUS == EXIT_SUCCESS +# define PROGRAM_NAME "true" +#else +# define PROGRAM_NAME "false" +#endif + +#define AUTHORS proper_name ("Jim Meyering") + +void +usage (int status) +{ + printf (_("\ +Usage: %s [ignored command line arguments]\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + printf ("%s\n\n", + _(EXIT_STATUS == EXIT_SUCCESS + ? N_("Exit with a status code indicating success.") + : N_("Exit with a status code indicating failure."))); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + exit (status); +} + +int +main (int argc, char **argv) +{ + /* Recognize --help or --version only if it's the only command-line + argument. */ + if (argc == 2) + { + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Note true(1) will return EXIT_FAILURE in the + edge case where writes fail with GNU specific options. */ + atexit (close_stdout); + + if (STREQ (argv[1], "--help")) + usage (EXIT_STATUS); + + if (STREQ (argv[1], "--version")) + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, + (char *) nullptr); + } + + return EXIT_STATUS; +} diff --git a/src/truncate.c b/src/truncate.c new file mode 100644 index 0000000..2fe5109 --- /dev/null +++ b/src/truncate.c @@ -0,0 +1,384 @@ +/* truncate -- truncate or extend the length of files. + Copyright (C) 2008-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady + + This is backwards compatible with the FreeBSD utility, but is more + flexible wrt the size specifications and the use of long options, + to better fit the "GNU" environment. */ + +#include /* sets _FILE_OFFSET_BITS=64 etc. */ +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" +#include "stat-size.h" +#include "xdectoint.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "truncate" + +#define AUTHORS proper_name_lite ("Padraig Brady", "P\303\241draig Brady") + +/* (-c) If true, don't create if not already there */ +static bool no_create; + +/* (-o) If true, --size refers to blocks not bytes */ +static bool block_mode; + +/* (-r) Reference file to use size from */ +static char const *ref_file; + +static struct option const longopts[] = +{ + {"no-create", no_argument, nullptr, 'c'}, + {"io-blocks", no_argument, nullptr, 'o'}, + {"reference", required_argument, nullptr, 'r'}, + {"size", required_argument, nullptr, 's'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +typedef enum +{ rm_abs = 0, rm_rel, rm_min, rm_max, rm_rdn, rm_rup } rel_mode_t; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s OPTION... FILE...\n"), program_name); + fputs (_("\ +Shrink or extend the size of each FILE to the specified size\n\ +\n\ +A FILE argument that does not exist is created.\n\ +\n\ +If a FILE is larger than the specified size, the extra data is lost.\n\ +If a FILE is shorter, it is extended and the sparse extended part (hole)\n\ +reads as zero bytes.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -c, --no-create do not create any files\n\ +"), stdout); + fputs (_("\ + -o, --io-blocks treat SIZE as number of IO blocks instead of bytes\n\ +"), stdout); + fputs (_("\ + -r, --reference=RFILE base size on RFILE\n\ + -s, --size=SIZE set or adjust the file size by SIZE bytes\n"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_size_note (); + fputs (_("\n\ +SIZE may also be prefixed by one of the following modifying characters:\n\ +'+' extend by, '-' reduce by, '<' at most, '>' at least,\n\ +'/' round down to multiple of, '%' round up to multiple of.\n"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* return true on success, false on error. */ +static bool +do_ftruncate (int fd, char const *fname, off_t ssize, off_t rsize, + rel_mode_t rel_mode) +{ + struct stat sb; + off_t nsize; + + if ((block_mode || (rel_mode && rsize < 0)) && fstat (fd, &sb) != 0) + { + error (0, errno, _("cannot fstat %s"), quoteaf (fname)); + return false; + } + if (block_mode) + { + ptrdiff_t blksize = ST_BLKSIZE (sb); + intmax_t ssize0 = ssize; + if (ckd_mul (&ssize, ssize, blksize)) + { + error (0, 0, + _("overflow in %" PRIdMAX + " * %" PRIdPTR " byte blocks for file %s"), + ssize0, blksize, quoteaf (fname)); + return false; + } + } + if (rel_mode) + { + off_t fsize; + + if (0 <= rsize) + fsize = rsize; + else + { + if (usable_st_size (&sb)) + { + fsize = sb.st_size; + if (fsize < 0) + { + /* Sanity check. Overflow is the only reason I can think + this would ever go negative. */ + error (0, 0, _("%s has unusable, apparently negative size"), + quoteaf (fname)); + return false; + } + } + else + { + fsize = lseek (fd, 0, SEEK_END); + if (fsize < 0) + { + error (0, errno, _("cannot get the size of %s"), + quoteaf (fname)); + return false; + } + } + } + + if (rel_mode == rm_min) + nsize = MAX (fsize, ssize); + else if (rel_mode == rm_max) + nsize = MIN (fsize, ssize); + else if (rel_mode == rm_rdn) + /* 0..ssize-1 -> 0 */ + nsize = fsize - fsize % ssize; + else + { + if (rel_mode == rm_rup) + { + /* 1..ssize -> ssize */ + off_t r = fsize % ssize; + ssize = r == 0 ? 0 : ssize - r; + } + if (ckd_add (&nsize, fsize, ssize)) + { + error (0, 0, _("overflow extending size of file %s"), + quoteaf (fname)); + return false; + } + } + } + else + nsize = ssize; + if (nsize < 0) + nsize = 0; + + if (ftruncate (fd, nsize) != 0) + { + intmax_t s = nsize; + error (0, errno, _("failed to truncate %s at %"PRIdMAX" bytes"), + quoteaf (fname), s); + return false; + } + + return true; +} + +int +main (int argc, char **argv) +{ + bool got_size = false; + off_t size IF_LINT ( = 0); + off_t rsize = -1; + rel_mode_t rel_mode = rm_abs; + int c; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, "cor:s:", longopts, nullptr)) != -1) + { + switch (c) + { + case 'c': + no_create = true; + break; + + case 'o': + block_mode = true; + break; + + case 'r': + ref_file = optarg; + break; + + case 's': + /* skip any whitespace */ + while (isspace (to_uchar (*optarg))) + optarg++; + switch (*optarg) + { + case '<': + rel_mode = rm_max; + optarg++; + break; + case '>': + rel_mode = rm_min; + optarg++; + break; + case '/': + rel_mode = rm_rdn; + optarg++; + break; + case '%': + rel_mode = rm_rup; + optarg++; + break; + } + /* skip any whitespace */ + while (isspace (to_uchar (*optarg))) + optarg++; + if (*optarg == '+' || *optarg == '-') + { + if (rel_mode) + { + error (0, 0, _("multiple relative modifiers specified")); + /* Note other combinations are flagged as invalid numbers */ + usage (EXIT_FAILURE); + } + rel_mode = rm_rel; + } + /* Support dd BLOCK size suffixes + lowercase g,t,m for bsd compat. + Note we don't support dd's b=512, c=1, w=2 or 21x512MiB formats. */ + size = xdectoimax (optarg, OFF_T_MIN, OFF_T_MAX, "EgGkKmMPQRtTYZ0", + _("Invalid number"), 0); + /* Rounding to multiple of 0 is nonsensical */ + if ((rel_mode == rm_rup || rel_mode == rm_rdn) && size == 0) + error (EXIT_FAILURE, 0, _("division by zero")); + got_size = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + argv += optind; + argc -= optind; + + /* must specify either size or reference file */ + if (!ref_file && !got_size) + { + error (0, 0, _("you must specify either %s or %s"), + quote_n (0, "--size"), quote_n (1, "--reference")); + usage (EXIT_FAILURE); + } + /* must specify a relative size with a reference file */ + if (ref_file && got_size && !rel_mode) + { + error (0, 0, _("you must specify a relative %s with %s"), + quote_n (0, "--size"), quote_n (1, "--reference")); + usage (EXIT_FAILURE); + } + /* block_mode without size is not valid */ + if (block_mode && !got_size) + { + error (0, 0, _("%s was specified but %s was not"), + quote_n (0, "--io-blocks"), quote_n (1, "--size")); + usage (EXIT_FAILURE); + } + /* must specify at least 1 file */ + if (argc < 1) + { + error (0, 0, _("missing file operand")); + usage (EXIT_FAILURE); + } + + if (ref_file) + { + struct stat sb; + off_t file_size = -1; + if (stat (ref_file, &sb) != 0) + error (EXIT_FAILURE, errno, _("cannot stat %s"), quoteaf (ref_file)); + if (usable_st_size (&sb)) + file_size = sb.st_size; + else + { + int ref_fd = open (ref_file, O_RDONLY); + if (0 <= ref_fd) + { + off_t file_end = lseek (ref_fd, 0, SEEK_END); + int saved_errno = errno; + close (ref_fd); /* ignore failure */ + if (0 <= file_end) + file_size = file_end; + else + { + /* restore, in case close clobbered it. */ + errno = saved_errno; + } + } + } + if (file_size < 0) + error (EXIT_FAILURE, errno, _("cannot get the size of %s"), + quoteaf (ref_file)); + if (!got_size) + size = file_size; + else + rsize = file_size; + } + + int oflags = O_WRONLY | (no_create ? 0 : O_CREAT) | O_NONBLOCK; + bool errors = false; + + for (char const *fname; (fname = *argv); argv++) + { + int fd = open (fname, oflags, MODE_RW_UGO); + if (fd < 0) + { + /* 'truncate -s0 -c no-such-file' shouldn't gen error + 'truncate -s0 no-such-dir/file' should gen ENOENT error + 'truncate -s0 no-such-dir/' should gen EISDIR error + 'truncate -s0 .' should gen EISDIR error */ + if (!(no_create && errno == ENOENT)) + { + error (0, errno, _("cannot open %s for writing"), + quoteaf (fname)); + errors = true; + } + } + else + { + errors |= !do_ftruncate (fd, fname, size, rsize, rel_mode); + if (close (fd) != 0) + { + error (0, errno, _("failed to close %s"), quoteaf (fname)); + errors = true; + } + } + } + + return errors ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/src/tsort.c b/src/tsort.c new file mode 100644 index 0000000..58bb487 --- /dev/null +++ b/src/tsort.c @@ -0,0 +1,552 @@ +/* tsort - topological sort. + Copyright (C) 1998-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Mark Kettenis . */ + +/* The topological sort is done according to Algorithm T (Topological + sort) in Donald E. Knuth, The Art of Computer Programming, Volume + 1/Fundamental Algorithms, page 262. */ + +#include + +#include + +#include "system.h" +#include "assure.h" +#include "long-options.h" +#include "fadvise.h" +#include "readtokens.h" +#include "stdio--.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tsort" + +#define AUTHORS proper_name ("Mark Kettenis") + +/* Token delimiters when reading from a file. */ +#define DELIM " \t\n" + +/* Members of the list of successors. */ +struct successor +{ + struct item *suc; + struct successor *next; +}; + +/* Each string is held in memory as the head of a list of successors. */ +struct item +{ + char const *str; + struct item *left, *right; + signed char balance; /* -1, 0, or +1 */ + bool printed; + size_t count; + struct item *qlink; + struct successor *top; +}; + +/* The head of the sorted list. */ +static struct item *head = nullptr; + +/* The tail of the list of 'zeros', strings that have no predecessors. */ +static struct item *zeros = nullptr; + +/* Used for loop detection. */ +static struct item *loop = nullptr; + +/* The number of strings to sort. */ +static size_t n_strings = 0; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION] [FILE]\n\ +Write totally ordered list consistent with the partial ordering in FILE.\n\ +"), program_name); + + emit_stdin_note (); + + fputs (_("\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + + exit (status); +} + +/* Create a new item/node for STR. */ +static struct item * +new_item (char const *str) +{ + /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */ + struct item *k = xzalloc (sizeof *k); + if (str) + k->str = xstrdup (str); + return k; +} + +/* Search binary tree rooted at *ROOT for STR. Allocate a new tree if + *ROOT is null. Insert a node/item for STR if not found. Return + the node/item found/created for STR. + + This is done according to Algorithm A (Balanced tree search and + insertion) in Donald E. Knuth, The Art of Computer Programming, + Volume 3/Searching and Sorting, pages 455--457. */ + +static struct item * +search_item (struct item *root, char const *str) +{ + struct item *p, *q, *r, *s, *t; + int a; + + /* Make sure the tree is not empty, since that is what the algorithm + below expects. */ + if (root->right == nullptr) + return (root->right = new_item (str)); + + /* A1. Initialize. */ + t = root; + s = p = root->right; + + while (true) + { + /* A2. Compare. */ + a = strcmp (str, p->str); + if (a == 0) + return p; + + /* A3 & A4. Move left & right. */ + if (a < 0) + q = p->left; + else + q = p->right; + + if (q == nullptr) + { + /* A5. Insert. */ + q = new_item (str); + + /* A3 & A4. (continued). */ + if (a < 0) + p->left = q; + else + p->right = q; + + /* A6. Adjust balance factors. */ + a = strcmp (str, s->str); + if (a < 0) + { + r = p = s->left; + a = -1; + } + else + { + affirm (0 < a); + r = p = s->right; + a = 1; + } + + while (p != q) + { + int cmp = strcmp (str, p->str); + if (cmp < 0) + { + p->balance = -1; + p = p->left; + } + else + { + affirm (0 < cmp); + p->balance = 1; + p = p->right; + } + } + + /* A7. Balancing act. */ + if (s->balance == 0 || s->balance == -a) + { + s->balance += a; + return q; + } + + if (r->balance == a) + { + /* A8. Single Rotation. */ + p = r; + if (a < 0) + { + s->left = r->right; + r->right = s; + } + else + { + s->right = r->left; + r->left = s; + } + s->balance = r->balance = 0; + } + else + { + /* A9. Double rotation. */ + if (a < 0) + { + p = r->right; + r->right = p->left; + p->left = r; + s->left = p->right; + p->right = s; + } + else + { + p = r->left; + r->left = p->right; + p->right = r; + s->right = p->left; + p->left = s; + } + + s->balance = 0; + r->balance = 0; + if (p->balance == a) + s->balance = -a; + else if (p->balance == -a) + r->balance = a; + p->balance = 0; + } + + /* A10. Finishing touch. */ + if (s == t->right) + t->right = p; + else + t->left = p; + + return q; + } + + /* A3 & A4. (continued). */ + if (q->balance) + { + t = p; + s = q; + } + + p = q; + } + + /* NOTREACHED */ +} + +/* Record the fact that J precedes K. */ + +static void +record_relation (struct item *j, struct item *k) +{ + struct successor *p; + + if (!STREQ (j->str, k->str)) + { + k->count++; + p = xmalloc (sizeof *p); + p->suc = k; + p->next = j->top; + j->top = p; + } +} + +static bool +count_items (MAYBE_UNUSED struct item *unused) +{ + n_strings++; + return false; +} + +static bool +scan_zeros (struct item *k) +{ + /* Ignore strings that have already been printed. */ + if (k->count == 0 && !k->printed) + { + if (head == nullptr) + head = k; + else + zeros->qlink = k; + + zeros = k; + } + + return false; +} + +/* Try to detect the loop. If we have detected that K is part of a + loop, print the loop on standard error, remove a relation to break + the loop, and return true. + + The loop detection strategy is as follows: Realize that what we're + dealing with is essentially a directed graph. If we find an item + that is part of a graph that contains a cycle we traverse the graph + in backwards direction. In general there is no unique way to do + this, but that is no problem. If we encounter an item that we have + encountered before, we know that we've found a cycle. All we have + to do now is retrace our steps, printing out the items until we + encounter that item again. (This is not necessarily the item that + we started from originally.) Since the order in which the items + are stored in the tree is not related to the specified partial + ordering, we may need to walk the tree several times before the + loop has completely been constructed. If the loop was found, the + global variable LOOP will be null. */ + +static bool +detect_loop (struct item *k) +{ + if (k->count > 0) + { + /* K does not have to be part of a cycle. It is however part of + a graph that contains a cycle. */ + + if (loop == nullptr) + /* Start traversing the graph at K. */ + loop = k; + else + { + struct successor **p = &k->top; + + while (*p) + { + if ((*p)->suc == loop) + { + if (k->qlink) + { + /* We have found a loop. Retrace our steps. */ + while (loop) + { + struct item *tmp = loop->qlink; + + error (0, 0, "%s", (loop->str)); + + /* Until we encounter K again. */ + if (loop == k) + { + /* Remove relation. */ + struct successor *s = *p; + s->suc->count--; + *p = s->next; + IF_LINT (free (s)); + break; + } + + /* Tidy things up since we might have to + detect another loop. */ + loop->qlink = nullptr; + loop = tmp; + } + + while (loop) + { + struct item *tmp = loop->qlink; + + loop->qlink = nullptr; + loop = tmp; + } + + /* Since we have found the loop, stop walking + the tree. */ + return true; + } + else + { + k->qlink = loop; + loop = k; + break; + } + } + + p = &(*p)->next; + } + } + } + + return false; +} + +/* Recurse (sub)tree rooted at ROOT, calling ACTION for each node. + Stop when ACTION returns true. */ + +static bool +recurse_tree (struct item *root, bool (*action) (struct item *)) +{ + if (root->left == nullptr && root->right == nullptr) + return (*action) (root); + else + { + if (root->left != nullptr) + if (recurse_tree (root->left, action)) + return true; + if ((*action) (root)) + return true; + if (root->right != nullptr) + if (recurse_tree (root->right, action)) + return true; + } + + return false; +} + +/* Walk the tree specified by the head ROOT, calling ACTION for + each node. */ + +static void +walk_tree (struct item *root, bool (*action) (struct item *)) +{ + if (root->right) + recurse_tree (root->right, action); +} + +/* Do a topological sort on FILE. Exit with appropriate exit status. */ + +static _Noreturn void +tsort (char const *file) +{ + bool ok = true; + struct item *j = nullptr; + struct item *k = nullptr; + token_buffer tokenbuffer; + bool is_stdin = STREQ (file, "-"); + + /* Initialize the head of the tree holding the strings we're sorting. */ + struct item *root = new_item (nullptr); + + if (!is_stdin && ! freopen (file, "r", stdin)) + error (EXIT_FAILURE, errno, "%s", quotef (file)); + + fadvise (stdin, FADVISE_SEQUENTIAL); + + init_tokenbuffer (&tokenbuffer); + + while (true) + { + /* T2. Next Relation. */ + size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer); + if (len == (size_t) -1) + { + if (ferror (stdin)) + error (EXIT_FAILURE, errno, _("%s: read error"), quotef (file)); + break; + } + + affirm (len != 0); + + k = search_item (root, tokenbuffer.buffer); + if (j) + { + /* T3. Record the relation. */ + record_relation (j, k); + k = nullptr; + } + + j = k; + } + + if (k != nullptr) + error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"), + quotef (file)); + + /* T1. Initialize (N <- n). */ + walk_tree (root, count_items); + + while (n_strings > 0) + { + /* T4. Scan for zeros. */ + walk_tree (root, scan_zeros); + + while (head) + { + struct successor *p = head->top; + + /* T5. Output front of queue. */ + puts (head->str); + head->printed = true; + n_strings--; + + /* T6. Erase relations. */ + while (p) + { + p->suc->count--; + if (p->suc->count == 0) + { + zeros->qlink = p->suc; + zeros = p->suc; + } + + p = p->next; + } + + /* T7. Remove from queue. */ + head = head->qlink; + } + + /* T8. End of process. */ + if (n_strings > 0) + { + /* The input contains a loop. */ + error (0, 0, _("%s: input contains a loop:"), quotef (file)); + ok = false; + + /* Print the loop and remove a relation to break it. */ + do + walk_tree (root, detect_loop); + while (loop); + } + } + + if (fclose (stdin) != 0) + error (EXIT_FAILURE, errno, "%s", + is_stdin ? _("standard input") : quotef (file)); + + exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (1 < argc - optind) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + tsort (optind == argc ? "-" : argv[optind]); +} diff --git a/src/tty.c b/src/tty.c new file mode 100644 index 0000000..6415797 --- /dev/null +++ b/src/tty.c @@ -0,0 +1,132 @@ +/* tty -- print the name of the terminal connected to standard input + Copyright (C) 1990-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Displays "not a tty" if stdin is not a terminal. + Displays nothing if -s option is given. + Exit status 0 if stdin is a tty, 1 if not a tty, 2 if usage error, + 3 if write error. + + Written by David MacKenzie . */ + +#include +#include +#include +#include + +#include "system.h" +#include "quote.h" + +/* Exit statuses. */ +enum + { + TTY_STDIN_NOTTY = 1, + TTY_FAILURE = 2, + TTY_WRITE_ERROR = 3 + }; + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "tty" + +#define AUTHORS proper_name ("David MacKenzie") + +/* If true, return an exit status but produce no output. */ +static bool silent; + +static struct option const longopts[] = +{ + {"silent", no_argument, nullptr, 's'}, + {"quiet", no_argument, nullptr, 's'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]...\n"), program_name); + fputs (_("\ +Print the file name of the terminal connected to standard input.\n\ +\n\ + -s, --silent, --quiet print nothing, only return an exit status\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + initialize_exit_failure (TTY_WRITE_ERROR); + atexit (close_stdout); + + silent = false; + + while ((optc = getopt_long (argc, argv, "s", longopts, nullptr)) != -1) + { + switch (optc) + { + case 's': + silent = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (TTY_FAILURE); + } + } + + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (TTY_FAILURE); + } + + errno = ENOENT; + + if (silent) + return isatty (STDIN_FILENO) ? EXIT_SUCCESS : TTY_STDIN_NOTTY; + + int status = EXIT_SUCCESS; + char const *tty = ttyname (STDIN_FILENO); + + if (! tty) + { + tty = _("not a tty"); + status = TTY_STDIN_NOTTY; + } + + puts (tty); + return status; +} diff --git a/src/uname-arch.c b/src/uname-arch.c new file mode 100644 index 0000000..eb42942 --- /dev/null +++ b/src/uname-arch.c @@ -0,0 +1,2 @@ +#include "uname.h" +int uname_mode = UNAME_ARCH; diff --git a/src/uname-uname.c b/src/uname-uname.c new file mode 100644 index 0000000..450245d --- /dev/null +++ b/src/uname-uname.c @@ -0,0 +1,2 @@ +#include "uname.h" +int uname_mode = UNAME_UNAME; diff --git a/src/uname.c b/src/uname.c new file mode 100644 index 0000000..883b9a4 --- /dev/null +++ b/src/uname.c @@ -0,0 +1,379 @@ +/* uname -- print system information + + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by David MacKenzie */ + +#include +#include +#include +#include +#include + +#if HAVE_SYSINFO && HAVE_SYS_SYSTEMINFO_H +# include +#endif + +#if HAVE_SYS_SYSCTL_H && ! defined __GLIBC__ && ! defined __APPLE__ +# if HAVE_SYS_PARAM_H +# include /* needed for OpenBSD 3.0 */ +# endif +# include +# ifdef HW_MODEL +# ifdef HW_MACHINE_ARCH +/* E.g., FreeBSD 4.5, NetBSD 1.5.2 */ +# define UNAME_HARDWARE_PLATFORM HW_MODEL +# define UNAME_PROCESSOR HW_MACHINE_ARCH +# else +/* E.g., OpenBSD 3.0 */ +# define UNAME_PROCESSOR HW_MODEL +# endif +# endif +#endif + +#include "system.h" +#include "quote.h" +#include "uname.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME (uname_mode == UNAME_UNAME ? "uname" : "arch") + +#define AUTHORS proper_name ("David MacKenzie") +#define ARCH_AUTHORS "David MacKenzie", "Karel Zak" + +/* Values that are bitwise or'd into 'toprint'. */ +/* Kernel name. */ +#define PRINT_KERNEL_NAME 1 + +/* Node name on a communications network. */ +#define PRINT_NODENAME 2 + +/* Kernel release. */ +#define PRINT_KERNEL_RELEASE 4 + +/* Kernel version. */ +#define PRINT_KERNEL_VERSION 8 + +/* Machine hardware name. */ +#define PRINT_MACHINE 16 + +/* Processor type. */ +#define PRINT_PROCESSOR 32 + +/* Hardware platform. */ +#define PRINT_HARDWARE_PLATFORM 64 + +/* Operating system. */ +#define PRINT_OPERATING_SYSTEM 128 + +static struct option const uname_long_options[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"kernel-name", no_argument, nullptr, 's'}, + {"sysname", no_argument, nullptr, 's'}, /* Obsolescent. */ + {"nodename", no_argument, nullptr, 'n'}, + {"kernel-release", no_argument, nullptr, 'r'}, + {"release", no_argument, nullptr, 'r'}, /* Obsolescent. */ + {"kernel-version", no_argument, nullptr, 'v'}, + {"machine", no_argument, nullptr, 'm'}, + {"processor", no_argument, nullptr, 'p'}, + {"hardware-platform", no_argument, nullptr, 'i'}, + {"operating-system", no_argument, nullptr, 'o'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +static struct option const arch_long_options[] = +{ + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]...\n"), program_name); + + if (uname_mode == UNAME_UNAME) + { + fputs (_("\ +Print certain system information. With no OPTION, same as -s.\n\ +\n\ + -a, --all print all information, in the following order,\n\ + except omit -p and -i if unknown:\n\ + -s, --kernel-name print the kernel name\n\ + -n, --nodename print the network node hostname\n\ + -r, --kernel-release print the kernel release\n\ +"), stdout); + fputs (_("\ + -v, --kernel-version print the kernel version\n\ + -m, --machine print the machine hardware name\n\ + -p, --processor print the processor type (non-portable)\n\ + -i, --hardware-platform print the hardware platform (non-portable)\n\ + -o, --operating-system print the operating system\n\ +"), stdout); + } + else + { + fputs (_("\ +Print machine architecture.\n\ +\n\ +"), stdout); + } + + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Print ELEMENT, preceded by a space if something has already been + printed. */ + +static void +print_element (char const *element) +{ + static bool printed; + if (printed) + putchar (' '); + printed = true; + fputs (element, stdout); +} + +/* Print ELEMENT, preceded by a space if something has already been + printed. But if the environment variable ENVVAR is set, print its + value instead of ELEMENT. */ + +static void +print_element_env (char const *element, MAYBE_UNUSED char const *envvar) +{ +#ifdef __APPLE__ + if (envvar) + { + char const *val = getenv (envvar); + if (val) + element = val; + } +#endif + print_element (element); +} + + +/* Set all the option flags according to the switches specified. + Return the mask indicating which elements to print. */ + +static int +decode_switches (int argc, char **argv) +{ + int c; + unsigned int toprint = 0; + + if (uname_mode == UNAME_ARCH) + { + while ((c = getopt_long (argc, argv, "", + arch_long_options, nullptr)) + != -1) + { + switch (c) + { + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, ARCH_AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + toprint = PRINT_MACHINE; + } + else + { + while ((c = getopt_long (argc, argv, "asnrvmpio", + uname_long_options, nullptr)) + != -1) + { + switch (c) + { + case 'a': + toprint = UINT_MAX; + break; + + case 's': + toprint |= PRINT_KERNEL_NAME; + break; + + case 'n': + toprint |= PRINT_NODENAME; + break; + + case 'r': + toprint |= PRINT_KERNEL_RELEASE; + break; + + case 'v': + toprint |= PRINT_KERNEL_VERSION; + break; + + case 'm': + toprint |= PRINT_MACHINE; + break; + + case 'p': + toprint |= PRINT_PROCESSOR; + break; + + case 'i': + toprint |= PRINT_HARDWARE_PLATFORM; + break; + + case 'o': + toprint |= PRINT_OPERATING_SYSTEM; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + } + + if (argc != optind) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + return toprint; +} + +int +main (int argc, char **argv) +{ + static char const unknown[] = "unknown"; + + /* Mask indicating which elements to print. */ + unsigned int toprint = 0; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + toprint = decode_switches (argc, argv); + + if (toprint == 0) + toprint = PRINT_KERNEL_NAME; + + if (toprint + & (PRINT_KERNEL_NAME | PRINT_NODENAME | PRINT_KERNEL_RELEASE + | PRINT_KERNEL_VERSION | PRINT_MACHINE)) + { + struct utsname name; + + if (uname (&name) == -1) + error (EXIT_FAILURE, errno, _("cannot get system name")); + + if (toprint & PRINT_KERNEL_NAME) + print_element_env (name.sysname, "UNAME_SYSNAME"); + if (toprint & PRINT_NODENAME) + print_element_env (name.nodename, "UNAME_NODENAME"); + if (toprint & PRINT_KERNEL_RELEASE) + print_element_env (name.release, "UNAME_RELEASE"); + if (toprint & PRINT_KERNEL_VERSION) + print_element_env (name.version, "UNAME_VERSION"); + if (toprint & PRINT_MACHINE) + print_element_env (name.machine, "UNAME_MACHINE"); + } + + if (toprint & PRINT_PROCESSOR) + { + char const *element = unknown; +#ifdef __APPLE__ +# if defined __arm__ || defined __arm64__ + element = "arm"; +# elif defined __i386__ || defined __x86_64__ + element = "i386"; +# elif defined __ppc__ || defined __ppc64__ + element = "powerpc"; +# endif +#endif +#if HAVE_SYSINFO && defined SI_ARCHITECTURE + if (element == unknown) + { + static char processor[257]; + if (0 <= sysinfo (SI_ARCHITECTURE, processor, sizeof processor)) + element = processor; + } +#endif +#ifdef UNAME_PROCESSOR + if (element == unknown) + { + static char processor[257]; + size_t s = sizeof processor; + static int mib[] = { CTL_HW, UNAME_PROCESSOR }; + if (sysctl (mib, 2, processor, &s, 0, 0) >= 0) + element = processor; + } +#endif + if (! (toprint == UINT_MAX && element == unknown)) + print_element (element); + } + + if (toprint & PRINT_HARDWARE_PLATFORM) + { + char const *element = unknown; +#if HAVE_SYSINFO && defined SI_PLATFORM + { + static char hardware_platform[257]; + if (0 <= sysinfo (SI_PLATFORM, + hardware_platform, sizeof hardware_platform)) + element = hardware_platform; + } +#endif +#ifdef UNAME_HARDWARE_PLATFORM + if (element == unknown) + { + static char hardware_platform[257]; + size_t s = sizeof hardware_platform; + static int mib[] = { CTL_HW, UNAME_HARDWARE_PLATFORM }; + if (sysctl (mib, 2, hardware_platform, &s, 0, 0) >= 0) + element = hardware_platform; + } +#endif + if (! (toprint == UINT_MAX && element == unknown)) + print_element (element); + } + + if (toprint & PRINT_OPERATING_SYSTEM) + print_element (HOST_OPERATING_SYSTEM); + + putchar ('\n'); + + return EXIT_SUCCESS; +} diff --git a/src/uname.h b/src/uname.h new file mode 100644 index 0000000..f4da9b0 --- /dev/null +++ b/src/uname.h @@ -0,0 +1,7 @@ +/* This is for the 'uname' program. */ +#define UNAME_UNAME 1 + +/* This is for the 'arch' program. */ +#define UNAME_ARCH 2 + +extern int uname_mode; diff --git a/src/unexpand.c b/src/unexpand.c new file mode 100644 index 0000000..5a2283f --- /dev/null +++ b/src/unexpand.c @@ -0,0 +1,323 @@ +/* unexpand - convert blanks to tabs + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* By default, convert only maximal strings of initial blanks and tabs + into tabs. + Preserves backspace characters in the output; they decrement the + column count for tab calculations. + The default action is equivalent to -8. + + Options: + --tabs=tab1[,tab2[,...]] + -t tab1[,tab2[,...]] + -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1 + columns apart instead of the default 8. Otherwise, + set the tabs at columns tab1, tab2, etc. (numbered from + 0); preserve any blanks beyond the tab stops given. + --all + -a Use tabs wherever they would replace 2 or more blanks, + not just at the beginnings of lines. + + David MacKenzie */ + +#include + +#include +#include +#include +#include "system.h" +#include "expand-common.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "unexpand" + +#define AUTHORS proper_name ("David MacKenzie") + + + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + CONVERT_FIRST_ONLY_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"tabs", required_argument, nullptr, 't'}, + {"all", no_argument, nullptr, 'a'}, + {"first-only", no_argument, nullptr, CONVERT_FIRST_ONLY_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + fputs (_("\ +Convert blanks in each FILE to tabs, writing to standard output.\n\ +"), stdout); + + emit_stdin_note (); + emit_mandatory_arg_note (); + + fputs (_("\ + -a, --all convert all blanks, instead of just initial blanks\n\ + --first-only convert only leading sequences of blanks (overrides -a)\n\ + -t, --tabs=N have tabs N characters apart instead of 8 (enables -a)\n\ +"), stdout); + emit_tab_list_info (); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Change blanks to tabs, writing to stdout. + Read each file in 'file_list', in order. */ + +static void +unexpand (void) +{ + /* Input stream. */ + FILE *fp = next_file (nullptr); + + /* The array of pending blanks. In non-POSIX locales, blanks can + include characters other than spaces, so the blanks must be + stored, not merely counted. */ + char *pending_blank; + + if (!fp) + return; + + /* The worst case is a non-blank character, then one blank, then a + tab stop, then MAX_COLUMN_WIDTH - 1 blanks, then a non-blank; so + allocate MAX_COLUMN_WIDTH bytes to store the blanks. */ + pending_blank = xmalloc (max_column_width); + + while (true) + { + /* Input character, or EOF. */ + int c; + + /* If true, perform translations. */ + bool convert = true; + + + /* The following variables have valid values only when CONVERT + is true: */ + + /* Column of next input character. */ + uintmax_t column = 0; + + /* Column the next input tab stop is on. */ + uintmax_t next_tab_column = 0; + + /* Index in TAB_LIST of next tab stop to examine. */ + size_t tab_index = 0; + + /* If true, the first pending blank came just before a tab stop. */ + bool one_blank_before_tab_stop = false; + + /* If true, the previous input character was a blank. This is + initially true, since initial strings of blanks are treated + as if the line was preceded by a blank. */ + bool prev_blank = true; + + /* Number of pending columns of blanks. */ + size_t pending = 0; + + + /* Convert a line of text. */ + + do + { + while ((c = getc (fp)) < 0 && (fp = next_file (fp))) + continue; + + if (convert) + { + bool blank = !! isblank (c); + + if (blank) + { + bool last_tab; + + next_tab_column = get_next_tab_column (column, &tab_index, + &last_tab); + + if (last_tab) + convert = false; + + if (convert) + { + if (next_tab_column < column) + error (EXIT_FAILURE, 0, _("input line is too long")); + + if (c == '\t') + { + column = next_tab_column; + + if (pending) + pending_blank[0] = '\t'; + } + else + { + column++; + + if (! (prev_blank && column == next_tab_column)) + { + /* It is not yet known whether the pending blanks + will be replaced by tabs. */ + if (column == next_tab_column) + one_blank_before_tab_stop = true; + pending_blank[pending++] = c; + prev_blank = true; + continue; + } + + /* Replace the pending blanks by a tab or two. */ + pending_blank[0] = c = '\t'; + } + + /* Discard pending blanks, unless it was a single + blank just before the previous tab stop. */ + pending = one_blank_before_tab_stop; + } + } + else if (c == '\b') + { + /* Go back one column, and force recalculation of the + next tab stop. */ + column -= !!column; + next_tab_column = column; + tab_index -= !!tab_index; + } + else + { + column++; + if (!column) + error (EXIT_FAILURE, 0, _("input line is too long")); + } + + if (pending) + { + if (pending > 1 && one_blank_before_tab_stop) + pending_blank[0] = '\t'; + if (fwrite (pending_blank, 1, pending, stdout) != pending) + write_error (); + pending = 0; + one_blank_before_tab_stop = false; + } + + prev_blank = blank; + convert &= convert_entire_line || blank; + } + + if (c < 0) + { + free (pending_blank); + return; + } + + if (putchar (c) < 0) + write_error (); + } + while (c != '\n'); + } +} + +int +main (int argc, char **argv) +{ + bool have_tabval = false; + uintmax_t tabval IF_LINT ( = 0); + int c; + + /* If true, cancel the effect of any -a (explicit or implicit in -t), + so that only leading blanks will be considered. */ + bool convert_first_only = false; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((c = getopt_long (argc, argv, ",0123456789at:", longopts, nullptr)) + != -1) + { + switch (c) + { + case '?': + usage (EXIT_FAILURE); + case 'a': + convert_entire_line = true; + break; + case 't': + convert_entire_line = true; + parse_tab_stops (optarg); + break; + case CONVERT_FIRST_ONLY_OPTION: + convert_first_only = true; + break; + case ',': + if (have_tabval) + add_tab_stop (tabval); + have_tabval = false; + break; + case_GETOPT_HELP_CHAR; + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + default: + if (!have_tabval) + { + tabval = 0; + have_tabval = true; + } + if (!DECIMAL_DIGIT_ACCUMULATE (tabval, c - '0', uintmax_t)) + error (EXIT_FAILURE, 0, _("tab stop value is too large")); + break; + } + } + + if (convert_first_only) + convert_entire_line = false; + + if (have_tabval) + add_tab_stop (tabval); + + finalize_tab_stops (); + + set_file_list ((optind < argc) ? &argv[optind] : nullptr); + + unexpand (); + + cleanup_file_list_stdin (); + + return exit_status; +} diff --git a/src/uniq.c b/src/uniq.c new file mode 100644 index 0000000..fab04de --- /dev/null +++ b/src/uniq.c @@ -0,0 +1,665 @@ +/* uniq -- remove duplicate lines from a sorted file + Copyright (C) 1986-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Richard M. Stallman and David MacKenzie. */ + +#include + +#include +#include + +#include "system.h" +#include "argmatch.h" +#include "linebuffer.h" +#include "fadvise.h" +#include "posixver.h" +#include "stdio--.h" +#include "xstrtol.h" +#include "memcasecmp.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "uniq" + +#define AUTHORS \ + proper_name ("Richard M. Stallman"), \ + proper_name ("David MacKenzie") + +#define SWAP_LINES(A, B) \ + do \ + { \ + struct linebuffer *_tmp; \ + _tmp = (A); \ + (A) = (B); \ + (B) = _tmp; \ + } \ + while (0) + +/* Number of fields to skip on each line when doing comparisons. */ +static size_t skip_fields; + +/* Number of chars to skip after skipping any fields. */ +static size_t skip_chars; + +/* Number of chars to compare. */ +static size_t check_chars; + +enum countmode +{ + count_occurrences, /* -c Print count before output lines. */ + count_none /* Default. Do not print counts. */ +}; + +/* Whether and how to precede the output lines with a count of the number of + times they occurred in the input. */ +static enum countmode countmode; + +/* Which lines to output: unique lines, the first of a group of + repeated lines, and the second and subsequent of a group of + repeated lines. */ +static bool output_unique; +static bool output_first_repeated; +static bool output_later_repeated; + +/* If true, ignore case when comparing. */ +static bool ignore_case; + +enum delimit_method +{ + /* No delimiters output. --all-repeated[=none] */ + DM_NONE, + + /* Delimiter precedes all groups. --all-repeated=prepend */ + DM_PREPEND, + + /* Delimit all groups. --all-repeated=separate */ + DM_SEPARATE +}; + +static char const *const delimit_method_string[] = +{ + "none", "prepend", "separate", nullptr +}; + +static enum delimit_method const delimit_method_map[] = +{ + DM_NONE, DM_PREPEND, DM_SEPARATE +}; + +/* Select whether/how to delimit groups of duplicate lines. */ +static enum delimit_method delimit_groups; + +enum grouping_method +{ + /* No grouping, when "--group" isn't used */ + GM_NONE, + + /* Delimiter precedes all groups. --group=prepend */ + GM_PREPEND, + + /* Delimiter follows all groups. --group=append */ + GM_APPEND, + + /* Delimiter between groups. --group[=separate] */ + GM_SEPARATE, + + /* Delimiter before and after each group. --group=both */ + GM_BOTH +}; + +static char const *const grouping_method_string[] = +{ + "prepend", "append", "separate", "both", nullptr +}; + +static enum grouping_method const grouping_method_map[] = +{ + GM_PREPEND, GM_APPEND, GM_SEPARATE, GM_BOTH +}; + +static enum grouping_method grouping = GM_NONE; + +enum +{ + GROUP_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"count", no_argument, nullptr, 'c'}, + {"repeated", no_argument, nullptr, 'd'}, + {"all-repeated", optional_argument, nullptr, 'D'}, + {"group", optional_argument, nullptr, GROUP_OPTION}, + {"ignore-case", no_argument, nullptr, 'i'}, + {"unique", no_argument, nullptr, 'u'}, + {"skip-fields", required_argument, nullptr, 'f'}, + {"skip-chars", required_argument, nullptr, 's'}, + {"check-chars", required_argument, nullptr, 'w'}, + {"zero-terminated", no_argument, nullptr, 'z'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [INPUT [OUTPUT]]\n\ +"), + program_name); + fputs (_("\ +Filter adjacent matching lines from INPUT (or standard input),\n\ +writing to OUTPUT (or standard output).\n\ +\n\ +With no options, matching lines are merged to the first occurrence.\n\ +"), stdout); + + emit_mandatory_arg_note (); + + fputs (_("\ + -c, --count prefix lines by the number of occurrences\n\ + -d, --repeated only print duplicate lines, one for each group\n\ +"), stdout); + fputs (_("\ + -D print all duplicate lines\n\ + --all-repeated[=METHOD] like -D, but allow separating groups\n\ + with an empty line;\n\ + METHOD={none(default),prepend,separate}\n\ +"), stdout); + fputs (_("\ + -f, --skip-fields=N avoid comparing the first N fields\n\ +"), stdout); + fputs (_("\ + --group[=METHOD] show all items, separating groups with an empty line;\n\ + METHOD={separate(default),prepend,append,both}\n\ +"), stdout); + fputs (_("\ + -i, --ignore-case ignore differences in case when comparing\n\ + -s, --skip-chars=N avoid comparing the first N characters\n\ + -u, --unique only print unique lines\n\ +"), stdout); + fputs (_("\ + -z, --zero-terminated line delimiter is NUL, not newline\n\ +"), stdout); + fputs (_("\ + -w, --check-chars=N compare no more than N characters in lines\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +A field is a run of blanks (usually spaces and/or TABs), then non-blank\n\ +characters. Fields are skipped before chars.\n\ +"), stdout); + fputs (_("\ +\n\ +Note: 'uniq' does not detect repeated lines unless they are adjacent.\n\ +You may want to sort the input first, or use 'sort -u' without 'uniq'.\n\ +"), stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static bool +strict_posix2 (void) +{ + int posix_ver = posix2_version (); + return 200112 <= posix_ver && posix_ver < 200809; +} + +/* Convert OPT to size_t, reporting an error using MSGID if OPT is + invalid. Silently convert too-large values to SIZE_MAX. */ + +static size_t +size_opt (char const *opt, char const *msgid) +{ + uintmax_t size; + + switch (xstrtoumax (opt, nullptr, 10, &size, "")) + { + case LONGINT_OK: + case LONGINT_OVERFLOW: + break; + + default: + error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid)); + } + + return MIN (size, SIZE_MAX); +} + +/* Given a linebuffer LINE, + return a pointer to the beginning of the line's field to be compared. */ + +ATTRIBUTE_PURE +static char * +find_field (struct linebuffer const *line) +{ + size_t count; + char const *lp = line->buffer; + size_t size = line->length - 1; + size_t i = 0; + + for (count = 0; count < skip_fields && i < size; count++) + { + while (i < size && field_sep (lp[i])) + i++; + while (i < size && !field_sep (lp[i])) + i++; + } + + i += MIN (skip_chars, size - i); + + return line->buffer + i; +} + +/* Return false if two strings OLD and NEW match, true if not. + OLD and NEW point not to the beginnings of the lines + but rather to the beginnings of the fields to compare. + OLDLEN and NEWLEN are their lengths. */ + +static bool +different (char *old, char *new, size_t oldlen, size_t newlen) +{ + if (check_chars < oldlen) + oldlen = check_chars; + if (check_chars < newlen) + newlen = check_chars; + + if (ignore_case) + return oldlen != newlen || memcasecmp (old, new, oldlen); + else + return oldlen != newlen || memcmp (old, new, oldlen); +} + +/* Output the line in linebuffer LINE to standard output + provided that the switches say it should be output. + MATCH is true if the line matches the previous line. + If requested, print the number of times it occurred, as well; + LINECOUNT + 1 is the number of times that the line occurred. */ + +static void +writeline (struct linebuffer const *line, + bool match, uintmax_t linecount) +{ + if (! (linecount == 0 ? output_unique + : !match ? output_first_repeated + : output_later_repeated)) + return; + + if (countmode == count_occurrences) + printf ("%7" PRIuMAX " ", linecount + 1); + + if (fwrite (line->buffer, sizeof (char), line->length, stdout) + != line->length) + write_error (); +} + +/* Process input file INFILE with output to OUTFILE. + If either is "-", use the standard I/O stream for it instead. */ + +static void +check_file (char const *infile, char const *outfile, char delimiter) +{ + struct linebuffer lb1, lb2; + struct linebuffer *thisline, *prevline; + + if (! (STREQ (infile, "-") || freopen (infile, "r", stdin))) + error (EXIT_FAILURE, errno, "%s", quotef (infile)); + if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout))) + error (EXIT_FAILURE, errno, "%s", quotef (outfile)); + + fadvise (stdin, FADVISE_SEQUENTIAL); + + thisline = &lb1; + prevline = &lb2; + + initbuffer (thisline); + initbuffer (prevline); + + /* The duplication in the following 'if' and 'else' blocks is an + optimization to distinguish between when we can print input + lines immediately (1. & 2.) or not. + + 1. --group => all input lines are printed. + checking for unique/duplicated lines is used only for printing + group separators. + + 2. The default case in which none of these options has been specified: + --count, --repeated, --all-repeated, --unique + In the default case, this optimization lets uniq output each different + line right away, without waiting to see if the next one is different. + + 3. All other cases. + */ + if (output_unique && output_first_repeated && countmode == count_none) + { + char *prevfield = nullptr; + size_t prevlen; + bool first_group_printed = false; + + while (!feof (stdin)) + { + char *thisfield; + size_t thislen; + bool new_group; + + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) + break; + + thisfield = find_field (thisline); + thislen = thisline->length - 1 - (thisfield - thisline->buffer); + + new_group = (!prevfield + || different (thisfield, prevfield, thislen, prevlen)); + + if (new_group && grouping != GM_NONE + && (grouping == GM_PREPEND || grouping == GM_BOTH + || (first_group_printed && (grouping == GM_APPEND + || grouping == GM_SEPARATE)))) + putchar (delimiter); + + if (new_group || grouping != GM_NONE) + { + if (fwrite (thisline->buffer, sizeof (char), thisline->length, + stdout) != thisline->length) + write_error (); + + SWAP_LINES (prevline, thisline); + prevfield = thisfield; + prevlen = thislen; + first_group_printed = true; + } + } + if ((grouping == GM_BOTH || grouping == GM_APPEND) && first_group_printed) + putchar (delimiter); + } + else + { + char *prevfield; + size_t prevlen; + uintmax_t match_count = 0; + bool first_delimiter = true; + + if (readlinebuffer_delim (prevline, stdin, delimiter) == 0) + goto closefiles; + prevfield = find_field (prevline); + prevlen = prevline->length - 1 - (prevfield - prevline->buffer); + + while (!feof (stdin)) + { + bool match; + char *thisfield; + size_t thislen; + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) + { + if (ferror (stdin)) + goto closefiles; + break; + } + thisfield = find_field (thisline); + thislen = thisline->length - 1 - (thisfield - thisline->buffer); + match = !different (thisfield, prevfield, thislen, prevlen); + match_count += match; + + if (match_count == UINTMAX_MAX) + { + if (count_occurrences) + error (EXIT_FAILURE, 0, _("too many repeated lines")); + match_count--; + } + + if (delimit_groups != DM_NONE) + { + if (!match) + { + if (match_count) /* a previous match */ + first_delimiter = false; /* Only used when DM_SEPARATE */ + } + else if (match_count == 1) + { + if ((delimit_groups == DM_PREPEND) + || (delimit_groups == DM_SEPARATE + && !first_delimiter)) + putchar (delimiter); + } + } + + if (!match || output_later_repeated) + { + writeline (prevline, match, match_count); + SWAP_LINES (prevline, thisline); + prevfield = thisfield; + prevlen = thislen; + if (!match) + match_count = 0; + } + } + + writeline (prevline, false, match_count); + } + + closefiles: + if (ferror (stdin) || fclose (stdin) != 0) + error (EXIT_FAILURE, errno, _("error reading %s"), quoteaf (infile)); + + /* stdout is handled via the atexit-invoked close_stdout function. */ + + free (lb1.buffer); + free (lb2.buffer); +} + +enum Skip_field_option_type + { + SFO_NONE, + SFO_OBSOLETE, + SFO_NEW + }; + +int +main (int argc, char **argv) +{ + int optc = 0; + bool posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr); + enum Skip_field_option_type skip_field_option_type = SFO_NONE; + unsigned int nfiles = 0; + char const *file[2]; + char delimiter = '\n'; /* change with --zero-terminated, -z */ + bool output_option_used = false; /* if true, one of -u/-d/-D/-c was used */ + + file[0] = file[1] = "-"; + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + skip_chars = 0; + skip_fields = 0; + check_chars = SIZE_MAX; + output_unique = output_first_repeated = true; + output_later_repeated = false; + countmode = count_none; + delimit_groups = DM_NONE; + + while (true) + { + /* Parse an operand with leading "+" as a file after "--" was + seen; or if pedantic and a file was seen; or if not + obsolete. */ + + if (optc == -1 + || (posixly_correct && nfiles != 0) + || ((optc = getopt_long (argc, argv, + "-0123456789Dcdf:is:uw:z", + longopts, nullptr)) + == -1)) + { + if (argc <= optind) + break; + if (nfiles == 2) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + file[nfiles++] = argv[optind++]; + } + else switch (optc) + { + case 1: + { + uintmax_t size; + if (optarg[0] == '+' + && ! strict_posix2 () + && xstrtoumax (optarg, nullptr, 10, &size, "") == LONGINT_OK + && size <= SIZE_MAX) + skip_chars = size; + else if (nfiles == 2) + { + error (0, 0, _("extra operand %s"), quote (optarg)); + usage (EXIT_FAILURE); + } + else + file[nfiles++] = optarg; + } + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + if (skip_field_option_type == SFO_NEW) + skip_fields = 0; + + if (!DECIMAL_DIGIT_ACCUMULATE (skip_fields, optc - '0', size_t)) + skip_fields = SIZE_MAX; + + skip_field_option_type = SFO_OBSOLETE; + } + break; + + case 'c': + countmode = count_occurrences; + output_option_used = true; + break; + + case 'd': + output_unique = false; + output_option_used = true; + break; + + case 'D': + output_unique = false; + output_later_repeated = true; + if (optarg == nullptr) + delimit_groups = DM_NONE; + else + delimit_groups = XARGMATCH ("--all-repeated", optarg, + delimit_method_string, + delimit_method_map); + output_option_used = true; + break; + + case GROUP_OPTION: + if (optarg == nullptr) + grouping = GM_SEPARATE; + else + grouping = XARGMATCH ("--group", optarg, + grouping_method_string, + grouping_method_map); + break; + + case 'f': + skip_field_option_type = SFO_NEW; + skip_fields = size_opt (optarg, + N_("invalid number of fields to skip")); + break; + + case 'i': + ignore_case = true; + break; + + case 's': + skip_chars = size_opt (optarg, + N_("invalid number of bytes to skip")); + break; + + case 'u': + output_first_repeated = false; + output_option_used = true; + break; + + case 'w': + check_chars = size_opt (optarg, + N_("invalid number of bytes to compare")); + break; + + case 'z': + delimiter = '\0'; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + /* Note we could allow --group with -D at least, and that would + avoid the need to specify a grouping method to --all-repeated. + It was thought best to avoid deprecating those parameters though + and keep --group separate to other options. */ + if (grouping != GM_NONE && output_option_used) + { + error (0, 0, _("--group is mutually exclusive with -c/-d/-D/-u")); + usage (EXIT_FAILURE); + } + + if (grouping != GM_NONE && countmode != count_none) + { + error (0, 0, + _("grouping and printing repeat counts is meaningless")); + usage (EXIT_FAILURE); + } + + if (countmode == count_occurrences && output_later_repeated) + { + error (0, 0, + _("printing all duplicated lines and repeat counts is meaningless")); + usage (EXIT_FAILURE); + } + + check_file (file[0], file[1], delimiter); + + return EXIT_SUCCESS; +} diff --git a/src/unlink.c b/src/unlink.c new file mode 100644 index 0000000..3bf7a42 --- /dev/null +++ b/src/unlink.c @@ -0,0 +1,86 @@ +/* unlink utility for GNU. + Copyright (C) 2001-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Michael Stone */ + +/* Implementation overview: + + Simply call the system 'unlink' function */ + +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "unlink" + +#define AUTHORS proper_name ("Michael Stone") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s FILE\n\ + or: %s OPTION\n"), program_name, program_name); + fputs (_("Call the unlink function to remove the specified FILE.\n\n"), + stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (argc < optind + 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + if (optind + 1 < argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + if (unlink (argv[optind]) != 0) + error (EXIT_FAILURE, errno, _("cannot unlink %s"), quoteaf (argv[optind])); + + return EXIT_SUCCESS; +} diff --git a/src/uptime.c b/src/uptime.c new file mode 100644 index 0000000..ec7f2f7 --- /dev/null +++ b/src/uptime.c @@ -0,0 +1,206 @@ +/* GNU's uptime. + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Created by hacking who.c by Kaveh Ghazi ghazi@caip.rutgers.edu. */ + +#include + +#include +#include +#include + +#include "system.h" + +#include "long-options.h" +#include "quote.h" +#include "readutmp.h" +#include "fprintftime.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "uptime" + +#define AUTHORS \ + proper_name ("Joseph Arceneaux"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Kaveh Ghazi") + +static int +print_uptime (idx_t n, struct gl_utmp const *utmp_buf) +{ + int status = EXIT_SUCCESS; + time_t boot_time = 0; + + /* Loop through all the utmp entries we just read and count up the valid + ones, also in the process possibly gleaning boottime. */ + idx_t entries = 0; + for (idx_t i = 0; i < n; i++) + { + struct gl_utmp const *this = &utmp_buf[i]; + entries += IS_USER_PROCESS (this); + if (UT_TYPE_BOOT_TIME (this)) + boot_time = this->ut_ts.tv_sec; + } + /* The gnulib module 'readutmp' is supposed to provide a BOOT_TIME entry + on all platforms. */ + if (boot_time == 0) + { + error (0, errno, _("couldn't get boot time")); + status = EXIT_FAILURE; + } + + time_t time_now = time (nullptr); + struct tm *tmn = time_now == (time_t) -1 ? nullptr : localtime (&time_now); + /* procps' version of uptime also prints the seconds field, but + previous versions of coreutils don't. */ + if (tmn) + /* TRANSLATORS: This prints the current clock time. */ + fprintftime (stdout, _(" %H:%M:%S "), tmn, 0, 0); + else + { + printf (_(" ??:???? ")); + status = EXIT_FAILURE; + } + + intmax_t uptime; + if (time_now == (time_t) -1 || boot_time == 0 + || ckd_sub (&uptime, time_now, boot_time) || uptime < 0) + { + printf (_("up ???? days ??:??, ")); + status = EXIT_FAILURE; + } + else + { + intmax_t updays = uptime / 86400; + int uphours = uptime % 86400 / 3600; + int upmins = uptime % 86400 % 3600 / 60; + if (0 < updays) + printf (ngettext ("up %"PRIdMAX" day %2d:%02d, ", + "up %"PRIdMAX" days %2d:%02d, ", + select_plural (updays)), + updays, uphours, upmins); + else + printf (_("up %2d:%02d, "), uphours, upmins); + } + + printf (ngettext ("%td user", "%td users", select_plural (entries)), + entries); + + double avg[3]; + int loads = getloadavg (avg, 3); + + if (loads == -1) + putchar ('\n'); + else + { + if (loads > 0) + printf (_(", load average: %.2f"), avg[0]); + if (loads > 1) + printf (", %.2f", avg[1]); + if (loads > 2) + printf (", %.2f", avg[2]); + if (loads > 0) + putchar ('\n'); + } + + return status; +} + +/* Display the system uptime and the number of users on the system, + according to utmp file FILENAME. Use read_utmp OPTIONS to read the + utmp file. */ + +static _Noreturn void +uptime (char const *filename, int options) +{ + idx_t n_users; + struct gl_utmp *utmp_buf; + int read_utmp_status = (read_utmp (filename, &n_users, &utmp_buf, options) < 0 + ? EXIT_FAILURE : EXIT_SUCCESS); + if (read_utmp_status != EXIT_SUCCESS) + { + error (0, errno, "%s", quotef (filename)); + n_users = 0; + utmp_buf = nullptr; + } + + int print_uptime_status = print_uptime (n_users, utmp_buf); + exit (MAX (read_utmp_status, print_uptime_status)); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]\n"), program_name); + printf (_("\ +Print the current time, the length of time the system has been up,\n\ +the number of users on the system, and the average number of jobs\n\ +in the run queue over the last 1, 5 and 15 minutes.")); +#ifdef __linux__ + /* It would be better to introduce a configure test for this, + but such a test is hard to write. For the moment then, we + have a hack which depends on the preprocessor used at compile + time to tell us what the running kernel is. Ugh. */ + printf (_(" \ +Processes in\n\ +an uninterruptible sleep state also contribute to the load average.\n")); +#else + printf (_("\n")); +#endif + printf (_("\ +If FILE is not specified, use %s. %s as FILE is common.\n\ +\n"), + UTMP_FILE, WTMP_FILE); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + switch (argc - optind) + { + case 0: /* uptime */ + uptime (UTMP_FILE, READ_UTMP_CHECK_PIDS); + break; + + case 1: /* uptime */ + uptime (argv[optind], 0); + break; + + default: /* lose */ + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } +} diff --git a/src/users.c b/src/users.c new file mode 100644 index 0000000..f89fe65 --- /dev/null +++ b/src/users.c @@ -0,0 +1,148 @@ +/* GNU's users. + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by jla; revised by djm */ + +#include +#include + +#include +#include "system.h" + +#include "long-options.h" +#include "quote.h" +#include "readutmp.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "users" + +#define AUTHORS \ + proper_name ("Joseph Arceneaux"), \ + proper_name ("David MacKenzie") + +static int +userid_compare (const void *v_a, const void *v_b) +{ + char **a = (char **) v_a; + char **b = (char **) v_b; + return strcmp (*a, *b); +} + +static void +list_entries_users (idx_t n, struct gl_utmp const *this) +{ + char **u = xinmalloc (n, sizeof *u); + idx_t i; + idx_t n_entries = 0; + + while (n--) + { + if (IS_USER_PROCESS (this)) + { + char *trimmed_name; + + trimmed_name = extract_trimmed_name (this); + + u[n_entries] = trimmed_name; + ++n_entries; + } + this++; + } + + qsort (u, n_entries, sizeof (u[0]), userid_compare); + + for (i = 0; i < n_entries; i++) + { + char c = (i < n_entries - 1 ? ' ' : '\n'); + fputs (u[i], stdout); + putchar (c); + } + + for (i = 0; i < n_entries; i++) + free (u[i]); + free (u); +} + +/* Display a list of users on the system, according to utmp file FILENAME. + Use read_utmp OPTIONS to read FILENAME. */ + +static void +users (char const *filename, int options) +{ + idx_t n_users; + struct gl_utmp *utmp_buf; + options |= READ_UTMP_USER_PROCESS; + if (read_utmp (filename, &n_users, &utmp_buf, options) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (filename)); + + list_entries_users (n_users, utmp_buf); + + free (utmp_buf); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [FILE]\n"), program_name); + printf (_("\ +Output who is currently logged in according to FILE.\n\ +If FILE is not specified, use %s. %s as FILE is common.\n\ +\n\ +"), + UTMP_FILE, WTMP_FILE); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + switch (argc - optind) + { + case 0: /* users */ + users (UTMP_FILE, READ_UTMP_CHECK_PIDS); + break; + + case 1: /* users */ + users (argv[optind], 0); + break; + + default: /* lose */ + error (0, 0, _("extra operand %s"), quote (argv[optind + 1])); + usage (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/src/wc.c b/src/wc.c new file mode 100644 index 0000000..29114c8 --- /dev/null +++ b/src/wc.c @@ -0,0 +1,1033 @@ +/* wc - print the number of lines, words, and bytes in files + Copyright (C) 1985-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Paul Rubin, phr@ocf.berkeley.edu + and David MacKenzie, djm@gnu.ai.mit.edu. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "system.h" +#include "assure.h" +#include "argmatch.h" +#include "argv-iter.h" +#include "fadvise.h" +#include "mbchar.h" +#include "physmem.h" +#include "readtokens0.h" +#include "safe-read.h" +#include "stat-size.h" +#include "xbinary-io.h" + +#if !defined iswspace && !HAVE_ISWSPACE +# define iswspace(wc) \ + ((wc) == to_uchar (wc) && isspace (to_uchar (wc))) +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "wc" + +#define AUTHORS \ + proper_name ("Paul Rubin"), \ + proper_name ("David MacKenzie") + +/* Size of atomic reads. */ +#define BUFFER_SIZE (16 * 1024) + +#ifdef USE_AVX2_WC_LINECOUNT +/* From wc_avx2.c */ +extern bool +wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out, + uintmax_t *bytes_out); +#endif + +static bool debug; + +/* Cumulative number of lines, words, chars and bytes in all files so far. + max_line_length is the maximum over all files processed so far. */ +static uintmax_t total_lines; +static uintmax_t total_words; +static uintmax_t total_chars; +static uintmax_t total_bytes; +static uintmax_t total_lines_overflow; +static uintmax_t total_words_overflow; +static uintmax_t total_chars_overflow; +static uintmax_t total_bytes_overflow; +static uintmax_t max_line_length; + +/* Which counts to print. */ +static bool print_lines, print_words, print_chars, print_bytes; +static bool print_linelength; + +/* The print width of each count. */ +static int number_width; + +/* True if we have ever read the standard input. */ +static bool have_read_stdin; + +/* Used to determine if file size can be determined without reading. */ +static size_t page_size; + +/* Enable to _not_ treat non breaking space as a word separator. */ +static bool posixly_correct; + +/* The result of calling fstat or stat on a file descriptor or file. */ +struct fstatus +{ + /* If positive, fstat or stat has not been called yet. Otherwise, + this is the value returned from fstat or stat. */ + int failed; + + /* If FAILED is zero, this is the file's status. */ + struct stat st; +}; + +/* For long options that have no equivalent short option, use a + non-character as a pseudo short option, starting with CHAR_MAX + 1. */ +enum +{ + DEBUG_PROGRAM_OPTION = CHAR_MAX + 1, + FILES0_FROM_OPTION, + TOTAL_OPTION, +}; + +static struct option const longopts[] = +{ + {"bytes", no_argument, nullptr, 'c'}, + {"chars", no_argument, nullptr, 'm'}, + {"lines", no_argument, nullptr, 'l'}, + {"words", no_argument, nullptr, 'w'}, + {"debug", no_argument, nullptr, DEBUG_PROGRAM_OPTION}, + {"files0-from", required_argument, nullptr, FILES0_FROM_OPTION}, + {"max-line-length", no_argument, nullptr, 'L'}, + {"total", required_argument, nullptr, TOTAL_OPTION}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +enum total_type + { + total_auto, /* 0: default or --total=auto */ + total_always, /* 1: --total=always */ + total_only, /* 2: --total=only */ + total_never /* 3: --total=never */ + }; +static char const *const total_args[] = +{ + "auto", "always", "only", "never", nullptr +}; +static enum total_type const total_types[] = +{ + total_auto, total_always, total_only, total_never +}; +ARGMATCH_VERIFY (total_args, total_types); +static enum total_type total_mode = total_auto; + +#ifdef USE_AVX2_WC_LINECOUNT +static bool +avx2_supported (void) +{ + bool avx_enabled = 0 < __builtin_cpu_supports ("avx2"); + + if (debug) + error (0, 0, (avx_enabled + ? _("using avx2 hardware support") + : _("avx2 support not detected"))); + + return avx_enabled; +} +#endif + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ + or: %s [OPTION]... --files0-from=F\n\ +"), + program_name, program_name); + fputs (_("\ +Print newline, word, and byte counts for each FILE, and a total line if\n\ +more than one FILE is specified. A word is a non-zero-length sequence of\n\ +printable characters delimited by white space.\n\ +"), stdout); + + emit_stdin_note (); + + fputs (_("\ +\n\ +The options below may be used to select which counts are printed, always in\n\ +the following order: newline, word, character, byte, maximum line length.\n\ + -c, --bytes print the byte counts\n\ + -m, --chars print the character counts\n\ + -l, --lines print the newline counts\n\ +"), stdout); + fputs (_("\ + --files0-from=F read input from the files specified by\n\ + NUL-terminated names in file F;\n\ + If F is - then read names from standard input\n\ + -L, --max-line-length print the maximum display width\n\ + -w, --words print the word counts\n\ +"), stdout); + fputs (_("\ + --total=WHEN when to print a line with total counts;\n\ + WHEN can be: auto, always, only, never\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +/* Return non zero if a non breaking space. */ +ATTRIBUTE_PURE +static int +iswnbspace (wint_t wc) +{ + return ! posixly_correct + && (wc == 0x00A0 || wc == 0x2007 + || wc == 0x202F || wc == 0x2060); +} + +static int +isnbspace (int c) +{ + return iswnbspace (btowc (c)); +} + +/* FILE is the name of the file (or null for standard input) + associated with the specified counters. */ +static void +write_counts (uintmax_t lines, + uintmax_t words, + uintmax_t chars, + uintmax_t bytes, + uintmax_t linelength, + char const *file) +{ + static char const format_sp_int[] = " %*s"; + char const *format_int = format_sp_int + 1; + char buf[INT_BUFSIZE_BOUND (uintmax_t)]; + + if (print_lines) + { + printf (format_int, number_width, umaxtostr (lines, buf)); + format_int = format_sp_int; + } + if (print_words) + { + printf (format_int, number_width, umaxtostr (words, buf)); + format_int = format_sp_int; + } + if (print_chars) + { + printf (format_int, number_width, umaxtostr (chars, buf)); + format_int = format_sp_int; + } + if (print_bytes) + { + printf (format_int, number_width, umaxtostr (bytes, buf)); + format_int = format_sp_int; + } + if (print_linelength) + { + printf (format_int, number_width, umaxtostr (linelength, buf)); + } + if (file) + printf (" %s", strchr (file, '\n') ? quotef (file) : file); + putchar ('\n'); +} + +static bool +wc_lines (char const *file, int fd, uintmax_t *lines_out, uintmax_t *bytes_out) +{ + size_t bytes_read; + uintmax_t lines, bytes; + char buf[BUFFER_SIZE + 1]; + bool long_lines = false; + + if (!lines_out || !bytes_out) + { + return false; + } + + lines = bytes = 0; + + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + return false; + } + + bytes += bytes_read; + + char *p = buf; + char *end = buf + bytes_read; + uintmax_t plines = lines; + + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) + lines += *p++ == '\n'; + } + else + { + /* rawmemchr is more efficient with longer lines. */ + *end = '\n'; + while ((p = rawmemchr (p, '\n')) < end) + { + ++p; + ++lines; + } + } + + /* If the average line length in the block is >= 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines - plines <= bytes_read / 15) + long_lines = true; + else + long_lines = false; + } + + *bytes_out = bytes; + *lines_out = lines; + + return true; +} + +/* Count words. FILE_X is the name of the file (or null for standard + input) that is open on descriptor FD. *FSTATUS is its status. + CURRENT_POS is the current file offset if known, negative if unknown. + Return true if successful. */ +static bool +wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) +{ + bool ok = true; + char buf[BUFFER_SIZE + 1]; + size_t bytes_read; + uintmax_t lines, words, chars, bytes, linelength; + bool count_bytes, count_chars, count_complicated; + char const *file = file_x ? file_x : _("standard input"); + + lines = words = chars = bytes = linelength = 0; + + /* If in the current locale, chars are equivalent to bytes, we prefer + counting bytes, because that's easier. */ +#if MB_LEN_MAX > 1 + if (MB_CUR_MAX > 1) + { + count_bytes = print_bytes; + count_chars = print_chars; + } + else +#endif + { + count_bytes = print_bytes || print_chars; + count_chars = false; + } + count_complicated = print_words || print_linelength; + + /* Advise the kernel of our access pattern only if we will read(). */ + if (!count_bytes || count_chars || print_lines || count_complicated) + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + + /* When counting only bytes, save some line- and word-counting + overhead. If FD is a 'regular' Unix file, using lseek is enough + to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE + bytes at a time until EOF. Note that the 'size' (number of bytes) + that wc reports is smaller than stats.st_size when the file is not + positioned at its beginning. That's why the lseek calls below are + necessary. For example the command + '(dd ibs=99k skip=1 count=0; ./wc -c) < /etc/group' + should make wc report '0' bytes. */ + + if (count_bytes && !count_chars && !print_lines && !count_complicated) + { + bool skip_read = false; + + if (0 < fstatus->failed) + fstatus->failed = fstat (fd, &fstatus->st); + + /* For sized files, seek to one st_blksize before EOF rather than to EOF. + This works better for files in proc-like file systems where + the size is only approximate. */ + if (! fstatus->failed && usable_st_size (&fstatus->st) + && 0 <= fstatus->st.st_size) + { + off_t end_pos = fstatus->st.st_size; + if (current_pos < 0) + current_pos = lseek (fd, 0, SEEK_CUR); + + if (end_pos % page_size) + { + /* We only need special handling of /proc and /sys files etc. + when they're a multiple of PAGE_SIZE. In the common case + for files with st_size not a multiple of PAGE_SIZE, + it's more efficient and accurate to use st_size. + + Be careful here. The current position may actually be + beyond the end of the file. As in the example above. */ + + bytes = end_pos < current_pos ? 0 : end_pos - current_pos; + if (bytes && 0 <= lseek (fd, bytes, SEEK_CUR)) + skip_read = true; + else + bytes = 0; + } + else + { + off_t hi_pos = end_pos - end_pos % (ST_BLKSIZE (fstatus->st) + 1); + if (0 <= current_pos && current_pos < hi_pos + && 0 <= lseek (fd, hi_pos, SEEK_CUR)) + bytes = hi_pos - current_pos; + } + } + + if (! skip_read) + { + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + bytes += bytes_read; + } + } + } + else if (!count_chars && !count_complicated) + { +#ifdef USE_AVX2_WC_LINECOUNT + static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *); + if (!wc_lines_p) + wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines; +#else + bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *) + = wc_lines; +#endif + + /* Use a separate loop when counting only lines or lines and bytes -- + but not chars or words. */ + ok = wc_lines_p (file, fd, &lines, &bytes); + } +#if MB_LEN_MAX > 1 +# define SUPPORT_OLD_MBRTOWC 1 + else if (MB_CUR_MAX > 1) + { + bool in_word = false; + uintmax_t linepos = 0; + mbstate_t state = { 0, }; + bool in_shift = false; +# if SUPPORT_OLD_MBRTOWC + /* Back-up the state before each multibyte character conversion and + move the last incomplete character of the buffer to the front + of the buffer. This is needed because we don't know whether + the 'mbrtowc' function updates the state when it returns -2, -- + this is the ISO C 99 and glibc-2.2 behavior - or not - amended + ANSI C, glibc-2.1 and Solaris 5.7 behavior. We don't have an + autoconf test for this, yet. */ + size_t prev = 0; /* number of bytes carried over from previous round */ +# else + const size_t prev = 0; +# endif + + while ((bytes_read = safe_read (fd, buf + prev, BUFFER_SIZE - prev)) > 0) + { + char const *p; +# if SUPPORT_OLD_MBRTOWC + mbstate_t backup_state; +# endif + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + p = buf; + bytes_read += prev; + do + { + wchar_t wide_char; + size_t n; + bool wide = true; + + if (!in_shift && is_basic (*p)) + { + /* Handle most ASCII characters quickly, without calling + mbrtowc(). */ + n = 1; + wide_char = *p; + wide = false; + } + else + { + in_shift = true; +# if SUPPORT_OLD_MBRTOWC + backup_state = state; +# endif + n = mbrtowc (&wide_char, p, bytes_read, &state); + if (n == (size_t) -2) + { +# if SUPPORT_OLD_MBRTOWC + state = backup_state; +# endif + break; + } + if (n == (size_t) -1) + { + /* Remember that we read a byte, but don't complain + about the error. Because of the decoding error, + this is a considered to be byte but not a + character (that is, chars is not incremented). */ + p++; + bytes_read--; + continue; + } + if (mbsinit (&state)) + in_shift = false; + if (n == 0) + { + wide_char = 0; + n = 1; + } + } + + switch (wide_char) + { + case '\n': + lines++; + FALLTHROUGH; + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto mb_word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto mb_word_separator; + case ' ': + linepos++; + FALLTHROUGH; + case '\v': + mb_word_separator: + words += in_word; + in_word = false; + break; + default: + if (wide && iswprint (wide_char)) + { + /* wcwidth can be expensive on OSX for example, + so avoid if not needed. */ + if (print_linelength) + { + int width = wcwidth (wide_char); + if (width > 0) + linepos += width; + } + if (iswspace (wide_char) || iswnbspace (wide_char)) + goto mb_word_separator; + in_word = true; + } + else if (!wide && isprint (to_uchar (*p))) + { + linepos++; + if (isspace (to_uchar (*p))) + goto mb_word_separator; + in_word = true; + } + break; + } + + p += n; + bytes_read -= n; + chars++; + } + while (bytes_read > 0); + +# if SUPPORT_OLD_MBRTOWC + if (bytes_read > 0) + { + if (bytes_read == BUFFER_SIZE) + { + /* Encountered a very long redundant shift sequence. */ + p++; + bytes_read--; + } + memmove (buf, p, bytes_read); + } + prev = bytes_read; +# endif + } + if (linepos > linelength) + linelength = linepos; + words += in_word; + } +#endif + else + { + bool in_word = false; + uintmax_t linepos = 0; + + while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) + { + char const *p = buf; + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + ok = false; + break; + } + + bytes += bytes_read; + do + { + switch (*p++) + { + case '\n': + lines++; + FALLTHROUGH; + case '\r': + case '\f': + if (linepos > linelength) + linelength = linepos; + linepos = 0; + goto word_separator; + case '\t': + linepos += 8 - (linepos % 8); + goto word_separator; + case ' ': + linepos++; + FALLTHROUGH; + case '\v': + word_separator: + words += in_word; + in_word = false; + break; + default: + if (isprint (to_uchar (p[-1]))) + { + linepos++; + if (isspace (to_uchar (p[-1])) + || isnbspace (to_uchar (p[-1]))) + goto word_separator; + in_word = true; + } + break; + } + } + while (--bytes_read); + } + if (linepos > linelength) + linelength = linepos; + words += in_word; + } + + if (count_chars < print_chars) + chars = bytes; + + if (total_mode != total_only) + write_counts (lines, words, chars, bytes, linelength, file_x); + + if (ckd_add (&total_lines, total_lines, lines)) + total_lines_overflow = true; + if (ckd_add (&total_words, total_words, words)) + total_words_overflow = true; + if (ckd_add (&total_chars, total_chars, chars)) + total_chars_overflow = true; + if (ckd_add (&total_bytes, total_bytes, bytes)) + total_bytes_overflow = true; + + if (linelength > max_line_length) + max_line_length = linelength; + + return ok; +} + +static bool +wc_file (char const *file, struct fstatus *fstatus) +{ + if (! file || STREQ (file, "-")) + { + have_read_stdin = true; + xset_binary_mode (STDIN_FILENO, O_BINARY); + return wc (STDIN_FILENO, file, fstatus, -1); + } + else + { + int fd = open (file, O_RDONLY | O_BINARY); + if (fd == -1) + { + error (0, errno, "%s", quotef (file)); + return false; + } + else + { + bool ok = wc (fd, file, fstatus, 0); + if (close (fd) != 0) + { + error (0, errno, "%s", quotef (file)); + return false; + } + return ok; + } + } +} + +/* Return the file status for the NFILES files addressed by FILE. + Optimize the case where only one number is printed, for just one + file; in that case we can use a print width of 1, so we don't need + to stat the file. Handle the case of (nfiles == 0) in the same way; + that happens when we don't know how long the list of file names will be. */ + +static struct fstatus * +get_input_fstatus (size_t nfiles, char *const *file) +{ + struct fstatus *fstatus = xnmalloc (nfiles ? nfiles : 1, sizeof *fstatus); + + if (nfiles == 0 + || (nfiles == 1 + && ((print_lines + print_words + print_chars + + print_bytes + print_linelength) + == 1))) + fstatus[0].failed = 1; + else + { + for (size_t i = 0; i < nfiles; i++) + fstatus[i].failed = (! file[i] || STREQ (file[i], "-") + ? fstat (STDIN_FILENO, &fstatus[i].st) + : stat (file[i], &fstatus[i].st)); + } + + return fstatus; +} + +/* Return a print width suitable for the NFILES files whose status is + recorded in FSTATUS. Optimize the same special case that + get_input_fstatus optimizes. */ + +ATTRIBUTE_PURE +static int +compute_number_width (size_t nfiles, struct fstatus const *fstatus) +{ + int width = 1; + + if (0 < nfiles && fstatus[0].failed <= 0) + { + int minimum_width = 1; + uintmax_t regular_total = 0; + + for (size_t i = 0; i < nfiles; i++) + if (! fstatus[i].failed) + { + if (S_ISREG (fstatus[i].st.st_mode)) + regular_total += fstatus[i].st.st_size; + else + minimum_width = 7; + } + + for (; 10 <= regular_total; regular_total /= 10) + width++; + if (width < minimum_width) + width = minimum_width; + } + + return width; +} + + +int +main (int argc, char **argv) +{ + bool ok; + int optc; + size_t nfiles; + char **files; + char *files_from = nullptr; + struct fstatus *fstatus; + struct Tokens tok; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + page_size = getpagesize (); + /* Line buffer stdout to ensure lines are written atomically and immediately + so that processes running in parallel do not intersperse their output. */ + setvbuf (stdout, nullptr, _IOLBF, 0); + + posixly_correct = (getenv ("POSIXLY_CORRECT") != nullptr); + + print_lines = print_words = print_chars = print_bytes = false; + print_linelength = false; + total_lines = total_words = total_chars = total_bytes = max_line_length = 0; + + while ((optc = getopt_long (argc, argv, "clLmw", longopts, nullptr)) != -1) + switch (optc) + { + case 'c': + print_bytes = true; + break; + + case 'm': + print_chars = true; + break; + + case 'l': + print_lines = true; + break; + + case 'w': + print_words = true; + break; + + case 'L': + print_linelength = true; + break; + + case DEBUG_PROGRAM_OPTION: + debug = true; + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case TOTAL_OPTION: + total_mode = XARGMATCH ("--total", optarg, total_args, total_types); + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + + if (! (print_lines || print_words || print_chars || print_bytes + || print_linelength)) + print_lines = print_words = print_bytes = true; + + bool read_tokens = false; + struct argv_iterator *ai; + if (files_from) + { + FILE *stream; + + /* When using --files0-from=F, you may not specify any files + on the command-line. */ + if (optind < argc) + { + error (0, 0, _("extra operand %s"), quoteaf (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } + + if (STREQ (files_from, "-")) + stream = stdin; + else + { + stream = fopen (files_from, "r"); + if (stream == nullptr) + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); + } + + /* Read the file list into RAM if we can detect its size and that + size is reasonable. Otherwise, we'll read a name at a time. */ + struct stat st; + if (fstat (fileno (stream), &st) == 0 + && S_ISREG (st.st_mode) + && st.st_size <= MIN (10 * 1024 * 1024, physmem_available () / 2)) + { + read_tokens = true; + readtokens0_init (&tok); + if (! readtokens0 (stream, &tok) || fclose (stream) != 0) + error (EXIT_FAILURE, 0, _("cannot read file names from %s"), + quoteaf (files_from)); + files = tok.tok; + nfiles = tok.n_tok; + ai = argv_iter_init_argv (files); + } + else + { + files = nullptr; + nfiles = 0; + ai = argv_iter_init_stream (stream); + } + } + else + { + static char *stdin_only[] = { nullptr }; + files = (optind < argc ? argv + optind : stdin_only); + nfiles = (optind < argc ? argc - optind : 1); + ai = argv_iter_init_argv (files); + } + + if (!ai) + xalloc_die (); + + fstatus = get_input_fstatus (nfiles, files); + if (total_mode == total_only) + number_width = 1; /* No extra padding, since no alignment requirement. */ + else + number_width = compute_number_width (nfiles, fstatus); + + ok = true; + for (int i = 0; /* */; i++) + { + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + affirm (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | wc --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == nullptr) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + ok &= wc_file (file_name, &fstatus[nfiles ? i : 0]); + + if (! nfiles) + fstatus[0].failed = 1; + } + argv_iter_done: + + /* No arguments on the command line is fine. That means read from stdin. + However, no arguments on the --files0-from input stream is an error + means don't read anything. */ + if (ok && !files_from && argv_iter_n_args (ai) == 0) + ok &= wc_file (nullptr, &fstatus[0]); + + if (read_tokens) + readtokens0_free (&tok); + + if (total_mode != total_never + && (total_mode != total_auto || 1 < argv_iter_n_args (ai))) + { + if (total_lines_overflow) + { + total_lines = UINTMAX_MAX; + error (0, EOVERFLOW, _("total lines")); + ok = false; + } + if (total_words_overflow) + { + total_words = UINTMAX_MAX; + error (0, EOVERFLOW, _("total words")); + ok = false; + } + if (total_chars_overflow) + { + total_chars = UINTMAX_MAX; + error (0, EOVERFLOW, _("total characters")); + ok = false; + } + if (total_bytes_overflow) + { + total_bytes = UINTMAX_MAX; + error (0, EOVERFLOW, _("total bytes")); + ok = false; + } + + write_counts (total_lines, total_words, total_chars, total_bytes, + max_line_length, + total_mode != total_only ? _("total") : nullptr); + } + + argv_iter_free (ai); + + free (fstatus); + + if (have_read_stdin && close (STDIN_FILENO) != 0) + error (EXIT_FAILURE, errno, "-"); + + return ok ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/src/wc_avx2.c b/src/wc_avx2.c new file mode 100644 index 0000000..eff7972 --- /dev/null +++ b/src/wc_avx2.c @@ -0,0 +1,121 @@ +/* wc_avx - Count the number of newlines with avx2 instructions. + Copyright (C) 2021-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "system.h" +#include "safe-read.h" + +#include + +/* This must be below 16 KB (16384) or else the accumulators can + theoretically overflow, producing wrong result. This is 2*32 bytes below, + so there is no single bytes in the optimal case. */ +#define BUFSIZE (16320) + +extern bool +wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out, + uintmax_t *bytes_out); + +extern bool +wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out, + uintmax_t *bytes_out) +{ + __m256i accumulator; + __m256i accumulator2; + __m256i zeroes; + __m256i endlines; + __m256i avx_buf[BUFSIZE / sizeof (__m256i)]; + __m256i *datap; + uintmax_t lines = 0; + uintmax_t bytes = 0; + size_t bytes_read = 0; + + + if (!lines_out || !bytes_out) + return false; + + /* Using two parallel accumulators gave a good performance increase. + Adding a third gave no additional benefit, at least on an + Intel Xeon E3-1231v3. Maybe on a newer CPU with additional vector + execution engines it would be a win. */ + accumulator = _mm256_setzero_si256 (); + accumulator2 = _mm256_setzero_si256 (); + zeroes = _mm256_setzero_si256 (); + endlines = _mm256_set1_epi8 ('\n'); + + while ((bytes_read = safe_read (fd, avx_buf, sizeof (avx_buf))) > 0) + { + __m256i to_match; + __m256i to_match2; + __m256i matches; + __m256i matches2; + + if (bytes_read == SAFE_READ_ERROR) + { + error (0, errno, "%s", quotef (file)); + return false; + } + + bytes += bytes_read; + + datap = avx_buf; + char *end = ((char *)avx_buf) + bytes_read; + + while (bytes_read >= 64) + { + to_match = _mm256_load_si256 (datap); + to_match2 = _mm256_load_si256 (datap + 1); + + matches = _mm256_cmpeq_epi8 (to_match, endlines); + matches2 = _mm256_cmpeq_epi8 (to_match2, endlines); + /* Compare will set each 8 bit integer in the register to 0xFF + on match. When we subtract it the 8 bit accumulators + will underflow, so this is equal to adding 1. */ + accumulator = _mm256_sub_epi8 (accumulator, matches); + accumulator2 = _mm256_sub_epi8 (accumulator2, matches2); + + datap += 2; + bytes_read -= 64; + } + + /* Horizontally add all 8 bit integers in the register, + and then reset it */ + accumulator = _mm256_sad_epu8 (accumulator, zeroes); + lines += _mm256_extract_epi16 (accumulator, 0) + + _mm256_extract_epi16 (accumulator, 4) + + _mm256_extract_epi16 (accumulator, 8) + + _mm256_extract_epi16 (accumulator, 12); + accumulator = _mm256_setzero_si256 (); + + accumulator2 = _mm256_sad_epu8 (accumulator2, zeroes); + lines += _mm256_extract_epi16 (accumulator2, 0) + + _mm256_extract_epi16 (accumulator2, 4) + + _mm256_extract_epi16 (accumulator2, 8) + + _mm256_extract_epi16 (accumulator2, 12); + accumulator2 = _mm256_setzero_si256 (); + + /* Finish up any left over bytes */ + char *p = (char *)datap; + while (p != end) + lines += *p++ == '\n'; + } + + *lines_out = lines; + *bytes_out = bytes; + + return true; +} diff --git a/src/who.c b/src/who.c new file mode 100644 index 0000000..c235f94 --- /dev/null +++ b/src/who.c @@ -0,0 +1,834 @@ +/* GNU's who. + Copyright (C) 1992-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by jla; revised by djm; revised again by mstone */ + +/* Output format: + name [state] line time [activity] [pid] [comment] [exit] + state: -T + name, line, time: not -q + idle: -u +*/ + +#include +#include +#include +#include + +#include +#include "system.h" + +#include "c-ctype.h" +#include "canon-host.h" +#include "readutmp.h" +#include "hard-locale.h" +#include "quote.h" + +#ifdef TTY_GROUP_NAME +# include +#endif + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "who" + +#define AUTHORS \ + proper_name ("Joseph Arceneaux"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Michael Stone") + +#ifdef RUN_LVL +# define UT_TYPE_RUN_LVL(U) ((U)->ut_type == RUN_LVL) +#else +# define UT_TYPE_RUN_LVL(U) false +#endif + +#ifdef INIT_PROCESS +# define UT_TYPE_INIT_PROCESS(U) ((U)->ut_type == INIT_PROCESS) +#else +# define UT_TYPE_INIT_PROCESS(U) false +#endif + +#ifdef LOGIN_PROCESS +# define UT_TYPE_LOGIN_PROCESS(U) ((U)->ut_type == LOGIN_PROCESS) +#else +# define UT_TYPE_LOGIN_PROCESS(U) false +#endif + +#ifdef DEAD_PROCESS +# define UT_TYPE_DEAD_PROCESS(U) ((U)->ut_type == DEAD_PROCESS) +#else +# define UT_TYPE_DEAD_PROCESS(U) false +#endif + +#ifdef NEW_TIME +# define UT_TYPE_NEW_TIME(U) ((U)->ut_type == NEW_TIME) +#else +# define UT_TYPE_NEW_TIME(U) false +#endif + +#define IDLESTR_LEN 6 + +#if HAVE_STRUCT_XTMP_UT_PID +# define PIDSTR_DECL_AND_INIT(Var, Utmp_ent) \ + char Var[INT_STRLEN_BOUND (Utmp_ent->ut_pid) + 1]; \ + sprintf (Var, "%ld", (long int) (Utmp_ent->ut_pid)) +#else +# define PIDSTR_DECL_AND_INIT(Var, Utmp_ent) \ + char const *Var = "" +#endif + +#if HAVE_STRUCT_XTMP_UT_ID +# define UT_ID(U) ((U)->ut_id) +#else +# define UT_ID(U) "??" +#endif + +/* If true, attempt to canonicalize hostnames via a DNS lookup. */ +static bool do_lookup; + +/* If true, display only a list of usernames and count of + the users logged on. + Ignored for 'who am i'. */ +static bool short_list; + +/* If true, display only name, line, and time fields. */ +static bool short_output; + +/* If true, display the hours:minutes since each user has touched + the keyboard, or "." if within the last minute, or "old" if + not within the last day. */ +static bool include_idle; + +/* If true, display a line at the top describing each field. */ +static bool include_heading; + +/* If true, display a '+' for each user if mesg y, a '-' if mesg n, + or a '?' if their tty cannot be statted. */ +static bool include_mesg; + +/* If true, display process termination & exit status. */ +static bool include_exit; + +/* If true, display the last boot time. */ +static bool need_boottime; + +/* If true, display dead processes. */ +static bool need_deadprocs; + +/* If true, display processes waiting for user login. */ +static bool need_login; + +/* If true, display processes started by init. */ +static bool need_initspawn; + +/* If true, display the last clock change. */ +static bool need_clockchange; + +/* If true, display the current runlevel. */ +static bool need_runlevel; + +/* If true, display user processes. */ +static bool need_users; + +/* If true, display info only for the controlling tty. */ +static bool my_line_only; + +/* The strftime format to use for login times, and its expected + output width. */ +static char const *time_format; +static int time_format_width; + +/* for long options with no corresponding short option, use enum */ +enum +{ + LOOKUP_OPTION = CHAR_MAX + 1 +}; + +static struct option const longopts[] = +{ + {"all", no_argument, nullptr, 'a'}, + {"boot", no_argument, nullptr, 'b'}, + {"count", no_argument, nullptr, 'q'}, + {"dead", no_argument, nullptr, 'd'}, + {"heading", no_argument, nullptr, 'H'}, + {"login", no_argument, nullptr, 'l'}, + {"lookup", no_argument, nullptr, LOOKUP_OPTION}, + {"message", no_argument, nullptr, 'T'}, + {"mesg", no_argument, nullptr, 'T'}, + {"process", no_argument, nullptr, 'p'}, + {"runlevel", no_argument, nullptr, 'r'}, + {"short", no_argument, nullptr, 's'}, + {"time", no_argument, nullptr, 't'}, + {"users", no_argument, nullptr, 'u'}, + {"writable", no_argument, nullptr, 'T'}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {nullptr, 0, nullptr, 0} +}; + +/* Return a string representing the time between WHEN and now. + BOOTTIME is the time of last reboot. + FIXME: locale? */ +static char const * +idle_string (time_t when, time_t boottime) +{ + static time_t now = TYPE_MINIMUM (time_t); + + if (now == TYPE_MINIMUM (time_t)) + time (&now); + + int seconds_idle; + if (boottime < when && when <= now + && ! ckd_sub (&seconds_idle, now, when) + && seconds_idle < 24 * 60 * 60) + { + if (seconds_idle < 60) + return " . "; + else + { + static char idle_hhmm[IDLESTR_LEN]; + sprintf (idle_hhmm, "%02d:%02d", + seconds_idle / (60 * 60), + (seconds_idle % (60 * 60)) / 60); + return idle_hhmm; + } + } + + return _(" old "); +} + +/* Return a time string. */ +static char const * +time_string (struct gl_utmp const *utmp_ent) +{ + static char buf[INT_STRLEN_BOUND (intmax_t) + sizeof "-%m-%d %H:%M"]; + struct tm *tmp = localtime (&utmp_ent->ut_ts.tv_sec); + + if (tmp) + { + strftime (buf, sizeof buf, time_format, tmp); + return buf; + } + else + return timetostr (utmp_ent->ut_ts.tv_sec, buf); +} + +/* Print formatted output line. Uses mostly arbitrary field sizes, probably + will need tweaking if any of the localization stuff is done, or for 64 bit + pids, etc. */ +static void +print_line (char const *user, const char state, + char const *line, + char const *time_str, char const *idle, char const *pid, + char const *comment, char const *exitstr) +{ + static char mesg[3] = { ' ', 'x', '\0' }; + char *buf; + char x_idle[1 + IDLESTR_LEN + 1]; + char x_pid[1 + INT_STRLEN_BOUND (pid_t) + 1]; + char *x_exitstr; + int err; + + mesg[1] = state; + + if (include_idle && !short_output && strlen (idle) < sizeof x_idle - 1) + sprintf (x_idle, " %-6s", idle); + else + *x_idle = '\0'; + + if (!short_output && strlen (pid) < sizeof x_pid - 1) + sprintf (x_pid, " %10s", pid); + else + *x_pid = '\0'; + + x_exitstr = xmalloc (include_exit ? 1 + MAX (12, strlen (exitstr)) + 1 : 1); + if (include_exit) + sprintf (x_exitstr, " %-12s", exitstr); + else + *x_exitstr = '\0'; + + err = asprintf (&buf, + "%-8s" + "%s" + " %-12s" + " %-*s" + "%s" + "%s" + " %-8s" + "%s" + , + user ? user : " .", + include_mesg ? mesg : "", + line, + time_format_width, + time_str, + x_idle, + x_pid, + /* FIXME: it's not really clear whether the following + field should be in the short_output. A strict reading + of SUSv2 would suggest not, but I haven't seen any + implementations that actually work that way... */ + comment, + x_exitstr + ); + if (err == -1) + xalloc_die (); + + { + /* Remove any trailing spaces. */ + char *p = buf + strlen (buf); + while (*--p == ' ') + /* empty */; + *(p + 1) = '\0'; + } + + puts (buf); + free (buf); + free (x_exitstr); +} + +/* Return true if a terminal device given as PSTAT allows other users + to send messages to; false otherwise */ +static bool +is_tty_writable (struct stat const *pstat) +{ +#ifdef TTY_GROUP_NAME + /* Ensure the group of the TTY device matches TTY_GROUP_NAME, more info at + https://bugzilla.redhat.com/454261 */ + struct group *ttygr = getgrnam (TTY_GROUP_NAME); + if (!ttygr || (pstat->st_gid != ttygr->gr_gid)) + return false; +#endif + + return pstat->st_mode & S_IWGRP; +} + +/* Send properly parsed USER_PROCESS info to print_line. The most + recent boot time is BOOTTIME. */ +static void +print_user (struct gl_utmp const *utmp_ent, time_t boottime) +{ + struct stat stats; + time_t last_change; + char mesg; + char idlestr[IDLESTR_LEN + 1]; + PIDSTR_DECL_AND_INIT (pidstr, utmp_ent); + static char *hoststr; +#if HAVE_STRUCT_XTMP_UT_HOST + static idx_t hostlen; +#endif + + /* If ut_line contains a space, the device name starts after the space. */ + char *line = utmp_ent->ut_line; + char *space = strchr (line, ' '); + line = space ? space + 1 : line; + + int dirfd; + if (IS_ABSOLUTE_FILE_NAME (line)) + dirfd = AT_FDCWD; + else + { + static int dev_dirfd; + if (!dev_dirfd) + { + dev_dirfd = open ("/dev", O_PATHSEARCH | O_DIRECTORY); + if (dev_dirfd < 0) + dev_dirfd = AT_FDCWD - 1; + } + dirfd = dev_dirfd; + } + + if (AT_FDCWD <= dirfd && fstatat (dirfd, line, &stats, 0) == 0) + { + mesg = is_tty_writable (&stats) ? '+' : '-'; + last_change = stats.st_atime; + } + else + { + mesg = '?'; + last_change = 0; + } + + if (last_change) + sprintf (idlestr, "%.*s", IDLESTR_LEN, idle_string (last_change, boottime)); + else + sprintf (idlestr, " ?"); + +#if HAVE_STRUCT_XTMP_UT_HOST + if (utmp_ent->ut_host[0]) + { + char *host = nullptr; + char *display = nullptr; + char *ut_host = utmp_ent->ut_host; + + /* Look for an X display. */ + display = strchr (ut_host, ':'); + if (display) + *display++ = '\0'; + + if (*ut_host && do_lookup) + { + /* See if we can canonicalize it. */ + host = canon_host (ut_host); + } + + if (! host) + host = ut_host; + + if (display) + { + idx_t needed = strlen (host) + strlen (display) + 4; + if (hostlen < needed) + { + free (hoststr); + hoststr = xpalloc (nullptr, &hostlen, needed - hostlen, -1, 1); + } + char *p = hoststr; + *p++ = '('; + p = stpcpy (p, host); + *p++ = ':'; + strcpy (stpcpy (p, display), ")"); + } + else + { + idx_t needed = strlen (host) + 3; + if (hostlen < needed) + { + free (hoststr); + hoststr = xpalloc (nullptr, &hostlen, needed - hostlen, -1, 1); + } + char *p = hoststr; + *p++ = '('; + strcpy (stpcpy (p, host), ")"); + } + + if (host != ut_host) + free (host); + } + else + { + if (hostlen < 1) + hoststr = xpalloc (hoststr, &hostlen, 1, -1, 1); + *hoststr = '\0'; + } +#endif + + print_line (utmp_ent->ut_user, mesg, + utmp_ent->ut_line, + time_string (utmp_ent), idlestr, pidstr, + hoststr ? hoststr : "", ""); +} + +static void +print_boottime (struct gl_utmp const *utmp_ent) +{ + print_line ("", ' ', _("system boot"), + time_string (utmp_ent), "", "", "", ""); +} + +static char * +make_id_equals_comment (struct gl_utmp const *utmp_ent) +{ + char const *id = UT_ID (utmp_ent); + idx_t idlen = strlen (id); + char const *prefix = _("id="); + idx_t prefixlen = strlen (prefix); + char *comment = xmalloc (prefixlen + idlen + 1); + char *p = mempcpy (comment, prefix, prefixlen); + p = mempcpy (p, id, idlen); + *p = '\0'; + return comment; +} + +static void +print_deadprocs (struct gl_utmp const *utmp_ent) +{ + static char *exitstr; + char *comment = make_id_equals_comment (utmp_ent); + PIDSTR_DECL_AND_INIT (pidstr, utmp_ent); + + if (!exitstr) + exitstr = xmalloc (strlen (_("term=")) + + INT_STRLEN_BOUND (utmp_ent->ut_exit.e_termination) + 1 + + strlen (_("exit=")) + + INT_STRLEN_BOUND (utmp_ent->ut_exit.e_exit) + + 1); + sprintf (exitstr, "%s%d %s%d", _("term="), utmp_ent->ut_exit.e_termination, + _("exit="), utmp_ent->ut_exit.e_exit); + + /* FIXME: add idle time? */ + + print_line ("", ' ', utmp_ent->ut_line, + time_string (utmp_ent), "", pidstr, comment, exitstr); + free (comment); +} + +static void +print_login (struct gl_utmp const *utmp_ent) +{ + char *comment = make_id_equals_comment (utmp_ent); + PIDSTR_DECL_AND_INIT (pidstr, utmp_ent); + + /* FIXME: add idle time? */ + + print_line (_("LOGIN"), ' ', utmp_ent->ut_line, + time_string (utmp_ent), "", pidstr, comment, ""); + free (comment); +} + +static void +print_initspawn (struct gl_utmp const *utmp_ent) +{ + char *comment = make_id_equals_comment (utmp_ent); + PIDSTR_DECL_AND_INIT (pidstr, utmp_ent); + + print_line ("", ' ', utmp_ent->ut_line, + time_string (utmp_ent), "", pidstr, comment, ""); + free (comment); +} + +static void +print_clockchange (struct gl_utmp const *utmp_ent) +{ + /* FIXME: handle NEW_TIME & OLD_TIME both */ + print_line ("", ' ', _("clock change"), + time_string (utmp_ent), "", "", "", ""); +} + +static void +print_runlevel (struct gl_utmp const *utmp_ent) +{ + static char *runlevline, *comment; + unsigned char last = utmp_ent->ut_pid / 256; + unsigned char curr = utmp_ent->ut_pid % 256; + + if (!runlevline) + runlevline = xmalloc (strlen (_("run-level")) + 3); + sprintf (runlevline, "%s %c", _("run-level"), curr); + + if (!comment) + comment = xmalloc (strlen (_("last=")) + 2); + sprintf (comment, "%s%c", _("last="), (last == 'N') ? 'S' : last); + + print_line ("", ' ', runlevline, time_string (utmp_ent), + "", "", c_isprint (last) ? comment : "", ""); + + return; +} + +/* Print the username of each valid entry and the number of valid entries + in UTMP_BUF, which should have N elements. */ +static void +list_entries_who (idx_t n, struct gl_utmp const *utmp_buf) +{ + idx_t entries = 0; + char const *separator = ""; + + while (n--) + { + if (IS_USER_PROCESS (utmp_buf)) + { + char *trimmed_name; + + trimmed_name = extract_trimmed_name (utmp_buf); + + printf ("%s%s", separator, trimmed_name); + free (trimmed_name); + separator = " "; + entries++; + } + utmp_buf++; + } + printf (_("\n# users=%td\n"), entries); +} + +static void +print_heading (void) +{ + print_line (_("NAME"), ' ', _("LINE"), _("TIME"), _("IDLE"), + _("PID"), _("COMMENT"), _("EXIT")); +} + +/* Display UTMP_BUF, which should have N entries. */ +static void +scan_entries (idx_t n, struct gl_utmp const *utmp_buf) +{ + char *ttyname_b IF_LINT ( = nullptr); + time_t boottime = TYPE_MINIMUM (time_t); + + if (include_heading) + print_heading (); + + if (my_line_only) + { + ttyname_b = ttyname (STDIN_FILENO); + if (!ttyname_b) + return; + if (STRNCMP_LIT (ttyname_b, "/dev/") == 0) + ttyname_b += sizeof "/dev/" - 1; /* Discard /dev/ prefix. */ + } + + while (n--) + { + if (!my_line_only + || STREQ (ttyname_b, utmp_buf->ut_line)) + { + if (need_users && IS_USER_PROCESS (utmp_buf)) + print_user (utmp_buf, boottime); + else if (need_runlevel && UT_TYPE_RUN_LVL (utmp_buf)) + print_runlevel (utmp_buf); + else if (need_boottime && UT_TYPE_BOOT_TIME (utmp_buf)) + print_boottime (utmp_buf); + /* I've never seen one of these, so I don't know what it should + look like :^) + FIXME: handle OLD_TIME also, perhaps show the delta? */ + else if (need_clockchange && UT_TYPE_NEW_TIME (utmp_buf)) + print_clockchange (utmp_buf); + else if (need_initspawn && UT_TYPE_INIT_PROCESS (utmp_buf)) + print_initspawn (utmp_buf); + else if (need_login && UT_TYPE_LOGIN_PROCESS (utmp_buf)) + print_login (utmp_buf); + else if (need_deadprocs && UT_TYPE_DEAD_PROCESS (utmp_buf)) + print_deadprocs (utmp_buf); + } + + if (UT_TYPE_BOOT_TIME (utmp_buf)) + boottime = utmp_buf->ut_ts.tv_sec; + + utmp_buf++; + } +} + +/* Display a list of who is on the system, according to utmp file FILENAME. + Use read_utmp OPTIONS to read the file. */ +static void +who (char const *filename, int options) +{ + idx_t n_users; + struct gl_utmp *utmp_buf; + if (short_list) + options |= READ_UTMP_USER_PROCESS; + if (read_utmp (filename, &n_users, &utmp_buf, options) != 0) + error (EXIT_FAILURE, errno, "%s", quotef (filename)); + + if (short_list) + list_entries_who (n_users, utmp_buf); + else + scan_entries (n_users, utmp_buf); + + free (utmp_buf); +} + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]... [ FILE | ARG1 ARG2 ]\n"), program_name); + fputs (_("\ +Print information about users who are currently logged in.\n\ +"), stdout); + fputs (_("\ +\n\ + -a, --all same as -b -d --login -p -r -t -T -u\n\ + -b, --boot time of last system boot\n\ + -d, --dead print dead processes\n\ + -H, --heading print line of column headings\n\ +"), stdout); + fputs (_("\ + -l, --login print system login processes\n\ +"), stdout); + fputs (_("\ + --lookup attempt to canonicalize hostnames via DNS\n\ + -m only hostname and user associated with stdin\n\ + -p, --process print active processes spawned by init\n\ +"), stdout); + fputs (_("\ + -q, --count all login names and number of users logged on\n\ + -r, --runlevel print current runlevel\n\ + -s, --short print only name, line, and time (default)\n\ + -t, --time print last system clock change\n\ +"), stdout); + fputs (_("\ + -T, -w, --mesg add user's message status as +, - or ?\n\ + -u, --users list users logged in\n\ + --message same as -T\n\ + --writable same as -T\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + printf (_("\ +\n\ +If FILE is not specified, use %s. %s as FILE is common.\n\ +If ARG1 ARG2 given, -m presumed: 'am i' or 'mom likes' are usual.\n\ +"), UTMP_FILE, WTMP_FILE); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + int optc; + bool assumptions = true; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + while ((optc = getopt_long (argc, argv, "abdlmpqrstuwHT", longopts, nullptr)) + != -1) + { + switch (optc) + { + case 'a': + need_boottime = true; + need_deadprocs = true; + need_login = true; + need_initspawn = true; + need_runlevel = true; + need_clockchange = true; + need_users = true; + include_mesg = true; + include_idle = true; + include_exit = true; + assumptions = false; + break; + + case 'b': + need_boottime = true; + assumptions = false; + break; + + case 'd': + need_deadprocs = true; + include_idle = true; + include_exit = true; + assumptions = false; + break; + + case 'H': + include_heading = true; + break; + + case 'l': + need_login = true; + include_idle = true; + assumptions = false; + break; + + case 'm': + my_line_only = true; + break; + + case 'p': + need_initspawn = true; + assumptions = false; + break; + + case 'q': + short_list = true; + break; + + case 'r': + need_runlevel = true; + include_idle = true; + assumptions = false; + break; + + case 's': + short_output = true; + break; + + case 't': + need_clockchange = true; + assumptions = false; + break; + + case 'T': + case 'w': + include_mesg = true; + break; + + case 'u': + need_users = true; + include_idle = true; + assumptions = false; + break; + + case LOOKUP_OPTION: + do_lookup = true; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + usage (EXIT_FAILURE); + } + } + + if (assumptions) + { + need_users = true; + short_output = true; + } + + if (include_exit) + { + short_output = false; + } + + if (hard_locale (LC_TIME)) + { + time_format = "%Y-%m-%d %H:%M"; + time_format_width = 4 + 1 + 2 + 1 + 2 + 1 + 2 + 1 + 2; + } + else + { + time_format = "%b %e %H:%M"; + time_format_width = 3 + 1 + 2 + 1 + 2 + 1 + 2; + } + + switch (argc - optind) + { + case 2: /* who */ + my_line_only = true; + FALLTHROUGH; + case -1: + case 0: /* who */ + who (UTMP_FILE, READ_UTMP_CHECK_PIDS); + break; + + case 1: /* who */ + who (argv[optind], 0); + break; + + default: /* lose */ + error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); + usage (EXIT_FAILURE); + } + + return EXIT_SUCCESS; +} diff --git a/src/whoami.c b/src/whoami.c new file mode 100644 index 0000000..e7a578a --- /dev/null +++ b/src/whoami.c @@ -0,0 +1,88 @@ +/* whoami -- print effective userid + + Copyright (C) 1989-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Equivalent to 'id -un'. */ +/* Written by Richard Mlynarik. */ + +#include +#include +#include +#include + +#include "system.h" +#include "long-options.h" +#include "quote.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "whoami" + +#define AUTHORS proper_name ("Richard Mlynarik") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("Usage: %s [OPTION]...\n"), program_name); + fputs (_("\ +Print the user name associated with the current effective user ID.\n\ +Same as id -un.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + struct passwd *pw; + uid_t uid; + uid_t NO_UID = -1; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + if (optind != argc) + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + usage (EXIT_FAILURE); + } + + errno = 0; + uid = geteuid (); + pw = uid == NO_UID && errno ? nullptr : getpwuid (uid); + if (!pw) + error (EXIT_FAILURE, errno, _("cannot find name for user ID %lu"), + (unsigned long int) uid); + puts (pw->pw_name); + return EXIT_SUCCESS; +} diff --git a/src/yes.c b/src/yes.c new file mode 100644 index 0000000..207b407 --- /dev/null +++ b/src/yes.c @@ -0,0 +1,129 @@ +/* yes - output a string repeatedly until killed + Copyright (C) 1991-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* David MacKenzie */ + +#include +#include +#include + +#include "system.h" + +#include "full-write.h" +#include "long-options.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "yes" + +#define AUTHORS proper_name ("David MacKenzie") + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s [STRING]...\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + + fputs (_("\ +Repeatedly output a line with all specified STRING(s), or 'y'.\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +int +main (int argc, char **argv) +{ + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + parse_gnu_standard_options_only (argc, argv, PROGRAM_NAME, PACKAGE_NAME, + Version, true, usage, AUTHORS, + (char const *) nullptr); + + char **operands = argv + optind; + char **operand_lim = argv + argc; + if (optind == argc) + *operand_lim++ = bad_cast ("y"); + + /* Buffer data locally once, rather than having the + large overhead of stdio buffering each item. */ + size_t bufalloc = 0; + bool reuse_operand_strings = true; + char **operandp = operands; + do + { + size_t operand_len = strlen (*operandp); + bufalloc += operand_len + 1; + if (operandp + 1 < operand_lim + && *operandp + operand_len + 1 != operandp[1]) + reuse_operand_strings = false; + } + while (++operandp < operand_lim); + + /* Improve performance by using a buffer size greater than BUFSIZ / 2. */ + if (bufalloc <= BUFSIZ / 2) + { + bufalloc = BUFSIZ; + reuse_operand_strings = false; + } + + /* Fill the buffer with one copy of the output. If possible, reuse + the operands strings; this wins when the buffer would be large. */ + char *buf = reuse_operand_strings ? *operands : xmalloc (bufalloc); + size_t bufused = 0; + operandp = operands; + do + { + size_t operand_len = strlen (*operandp); + if (! reuse_operand_strings) + memcpy (buf + bufused, *operandp, operand_len); + bufused += operand_len; + buf[bufused++] = ' '; + } + while (++operandp < operand_lim); + buf[bufused - 1] = '\n'; + + /* If a larger buffer was allocated, fill it by repeating the buffer + contents. */ + size_t copysize = bufused; + for (size_t copies = bufalloc / copysize; --copies; ) + { + memcpy (buf + bufused, buf, copysize); + bufused += copysize; + } + + /* Repeatedly output the buffer until there is a write error; then fail. */ + while (full_write (STDOUT_FILENO, buf, bufused) == bufused) + continue; + error (0, errno, _("standard output")); + main_exit (EXIT_FAILURE); +} -- cgit v1.2.3