diff options
Diffstat (limited to 'src/printf.c')
-rw-r--r-- | src/printf.c | 731 |
1 files changed, 731 insertions, 0 deletions
diff --git a/src/printf.c b/src/printf.c new file mode 100644 index 0000000..68c3883 --- /dev/null +++ b/src/printf.c @@ -0,0 +1,731 @@ +/* printf - format and print data + Copyright (C) 1990-2022 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Usage: printf format [argument...] + + A front end to the printf function that lets it be used from the shell. + + Backslash escapes: + + \" = double quote + \\ = backslash + \a = alert (bell) + \b = backspace + \c = produce no further output + \e = escape + \f = form feed + \n = new line + \r = carriage return + \t = horizontal tab + \v = vertical tab + \ooo = octal number (ooo is 1 to 3 digits) + \xhh = hexadecimal number (hhh is 1 to 2 digits) + \uhhhh = 16-bit Unicode character (hhhh is 4 digits) + \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits) + + Additional directive: + + %b = print an argument string, interpreting backslash escapes, + except that octal escapes are of the form \0 or \0ooo. + + %q = print an argument string in a format that can be + reused as shell input. Escaped characters used the proposed + POSIX $'' syntax supported by most shells. + + The 'format' argument is re-used as many times as necessary + to convert all of the given arguments. + + David MacKenzie <djm@gnu.ai.mit.edu> */ + +#include <config.h> +#include <stdio.h> +#include <sys/types.h> +#include <wchar.h> + +#include "system.h" +#include "cl-strtod.h" +#include "die.h" +#include "error.h" +#include "quote.h" +#include "unicodeio.h" +#include "xprintf.h" + +/* The official name of this program (e.g., no 'g' prefix). */ +#define PROGRAM_NAME "printf" + +#define AUTHORS proper_name ("David MacKenzie") + +#define isodigit(c) ((c) >= '0' && (c) <= '7') +#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : \ + (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 : (c) - '0') +#define octtobin(c) ((c) - '0') + +/* The value to return to the calling program. */ +static int exit_status; + +/* True if the POSIXLY_CORRECT environment variable is set. */ +static bool posixly_correct; + +/* This message appears in N_() here rather than just in _() below because + the sole use would have been in a #define. */ +static char const *const cfcc_msg = + N_("warning: %s: character(s) following character constant have been ignored"); + +void +usage (int status) +{ + if (status != EXIT_SUCCESS) + emit_try_help (); + else + { + printf (_("\ +Usage: %s FORMAT [ARGUMENT]...\n\ + or: %s OPTION\n\ +"), + program_name, program_name); + fputs (_("\ +Print ARGUMENT(s) according to FORMAT, or execute according to OPTION:\n\ +\n\ +"), stdout); + fputs (HELP_OPTION_DESCRIPTION, stdout); + fputs (VERSION_OPTION_DESCRIPTION, stdout); + fputs (_("\ +\n\ +FORMAT controls the output as in C printf. Interpreted sequences are:\n\ +\n\ + \\\" double quote\n\ +"), stdout); + fputs (_("\ + \\\\ backslash\n\ + \\a alert (BEL)\n\ + \\b backspace\n\ + \\c produce no further output\n\ + \\e escape\n\ + \\f form feed\n\ + \\n new line\n\ + \\r carriage return\n\ + \\t horizontal tab\n\ + \\v vertical tab\n\ +"), stdout); + fputs (_("\ + \\NNN byte with octal value NNN (1 to 3 digits)\n\ + \\xHH byte with hexadecimal value HH (1 to 2 digits)\n\ + \\uHHHH Unicode (ISO/IEC 10646) character with hex value HHHH (4 digits)\n\ + \\UHHHHHHHH Unicode character with hex value HHHHHHHH (8 digits)\n\ +"), stdout); + fputs (_("\ + %% a single %\n\ + %b ARGUMENT as a string with '\\' escapes interpreted,\n\ + except that octal escapes are of the form \\0 or \\0NNN\n\ + %q ARGUMENT is printed in a format that can be reused as shell input,\n\ + escaping non-printable characters with the proposed POSIX $'' syntax.\ +\n\n\ +and all C format specifications ending with one of diouxXfeEgGcs, with\n\ +ARGUMENTs converted to proper type first. Variable widths are handled.\n\ +"), stdout); + printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME); + emit_ancillary_info (PROGRAM_NAME); + } + exit (status); +} + +static void +verify_numeric (char const *s, char const *end) +{ + if (errno) + { + error (0, errno, "%s", quote (s)); + exit_status = EXIT_FAILURE; + } + else if (*end) + { + if (s == end) + error (0, 0, _("%s: expected a numeric value"), quote (s)); + else + error (0, 0, _("%s: value not completely converted"), quote (s)); + exit_status = EXIT_FAILURE; + } +} + +#define STRTOX(TYPE, FUNC_NAME, LIB_FUNC_EXPR) \ +static TYPE \ +FUNC_NAME (char const *s) \ +{ \ + char *end; \ + TYPE val; \ + \ + if ((*s == '\"' || *s == '\'') && *(s + 1)) \ + { \ + unsigned char ch = *++s; \ + val = ch; \ + \ + if (MB_CUR_MAX > 1 && *(s + 1)) \ + { \ + mbstate_t mbstate = { 0, }; \ + wchar_t wc; \ + size_t slen = strlen (s); \ + ssize_t bytes; \ + bytes = mbrtowc (&wc, s, slen, &mbstate); \ + if (0 < bytes) \ + { \ + val = wc; \ + s += bytes - 1; \ + } \ + } \ + \ + /* If POSIXLY_CORRECT is not set, then give a warning that there \ + are characters following the character constant and that GNU \ + printf is ignoring those characters. If POSIXLY_CORRECT *is* \ + set, then don't give the warning. */ \ + if (*++s != 0 && !posixly_correct) \ + error (0, 0, _(cfcc_msg), s); \ + } \ + else \ + { \ + errno = 0; \ + val = (LIB_FUNC_EXPR); \ + verify_numeric (s, end); \ + } \ + return val; \ +} \ + +STRTOX (intmax_t, vstrtoimax, strtoimax (s, &end, 0)) +STRTOX (uintmax_t, vstrtoumax, strtoumax (s, &end, 0)) +STRTOX (long double, vstrtold, cl_strtold (s, &end)) + +/* Output a single-character \ escape. */ + +static void +print_esc_char (char c) +{ + switch (c) + { + case 'a': /* Alert. */ + putchar ('\a'); + break; + case 'b': /* Backspace. */ + putchar ('\b'); + break; + case 'c': /* Cancel the rest of the output. */ + exit (EXIT_SUCCESS); + break; + case 'e': /* Escape. */ + putchar ('\x1B'); + break; + case 'f': /* Form feed. */ + putchar ('\f'); + break; + case 'n': /* New line. */ + putchar ('\n'); + break; + case 'r': /* Carriage return. */ + putchar ('\r'); + break; + case 't': /* Horizontal tab. */ + putchar ('\t'); + break; + case 'v': /* Vertical tab. */ + putchar ('\v'); + break; + default: + putchar (c); + break; + } +} + +/* Print a \ escape sequence starting at ESCSTART. + Return the number of characters in the escape sequence + besides the backslash. + If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o + is an octal digit; otherwise they are of the form \ooo. */ + +static int +print_esc (char const *escstart, bool octal_0) +{ + char const *p = escstart + 1; + int esc_value = 0; /* Value of \nnn escape. */ + int esc_length; /* Length of \nnn escape. */ + + if (*p == 'x') + { + /* A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. */ + for (esc_length = 0, ++p; + esc_length < 2 && isxdigit (to_uchar (*p)); + ++esc_length, ++p) + esc_value = esc_value * 16 + hextobin (*p); + if (esc_length == 0) + die (EXIT_FAILURE, 0, _("missing hexadecimal number in escape")); + putchar (esc_value); + } + else if (isodigit (*p)) + { + /* Parse \0ooo (if octal_0 && *p == '0') or \ooo (otherwise). + Allow \ooo if octal_0 && *p != '0'; this is an undocumented + extension to POSIX that is compatible with Bash 2.05b. */ + for (esc_length = 0, p += octal_0 && *p == '0'; + esc_length < 3 && isodigit (*p); + ++esc_length, ++p) + esc_value = esc_value * 8 + octtobin (*p); + putchar (esc_value); + } + else if (*p && strchr ("\"\\abcefnrtv", *p)) + print_esc_char (*p++); + else if (*p == 'u' || *p == 'U') + { + char esc_char = *p; + unsigned int uni_value; + + uni_value = 0; + for (esc_length = (esc_char == 'u' ? 4 : 8), ++p; + esc_length > 0; + --esc_length, ++p) + { + if (! isxdigit (to_uchar (*p))) + die (EXIT_FAILURE, 0, _("missing hexadecimal number in escape")); + uni_value = uni_value * 16 + hextobin (*p); + } + + /* A universal character name shall not specify a character short + identifier in the range 00000000 through 00000020, 0000007F through + 0000009F, or 0000D800 through 0000DFFF inclusive. A universal + character name shall not designate a character in the required + character set. */ + if ((uni_value <= 0x9f + && uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60) + || (uni_value >= 0xd800 && uni_value <= 0xdfff)) + die (EXIT_FAILURE, 0, _("invalid universal character name \\%c%0*x"), + esc_char, (esc_char == 'u' ? 4 : 8), uni_value); + + print_unicode_char (stdout, uni_value, 0); + } + else + { + putchar ('\\'); + if (*p) + { + putchar (*p); + p++; + } + } + return p - escstart - 1; +} + +/* Print string STR, evaluating \ escapes. */ + +static void +print_esc_string (char const *str) +{ + for (; *str; str++) + if (*str == '\\') + str += print_esc (str, true); + else + putchar (*str); +} + +/* Evaluate a printf conversion specification. START is the start of + the directive, LENGTH is its length, and CONVERSION specifies the + type of conversion. LENGTH does not include any length modifier or + the conversion specifier itself. FIELD_WIDTH and PRECISION are the + field width and precision for '*' values, if HAVE_FIELD_WIDTH and + HAVE_PRECISION are true, respectively. ARGUMENT is the argument to + be formatted. */ + +static void +print_direc (char const *start, size_t length, char conversion, + bool have_field_width, int field_width, + bool have_precision, int precision, + char const *argument) +{ + char *p; /* Null-terminated copy of % directive. */ + + /* Create a null-terminated copy of the % directive, with an + intmax_t-wide length modifier substituted for any existing + integer length modifier. */ + { + char *q; + char const *length_modifier; + size_t length_modifier_len; + + switch (conversion) + { + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + length_modifier = PRIdMAX; + length_modifier_len = sizeof PRIdMAX - 2; + break; + + case 'a': case 'e': case 'f': case 'g': + case 'A': case 'E': case 'F': case 'G': + length_modifier = "L"; + length_modifier_len = 1; + break; + + default: + length_modifier = start; /* Any valid pointer will do. */ + length_modifier_len = 0; + break; + } + + p = xmalloc (length + length_modifier_len + 2); + q = mempcpy (p, start, length); + q = mempcpy (q, length_modifier, length_modifier_len); + *q++ = conversion; + *q = '\0'; + } + + switch (conversion) + { + case 'd': + case 'i': + { + intmax_t arg = vstrtoimax (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'o': + case 'u': + case 'x': + case 'X': + { + uintmax_t arg = vstrtoumax (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + { + long double arg = vstrtold (argument); + if (!have_field_width) + { + if (!have_precision) + xprintf (p, arg); + else + xprintf (p, precision, arg); + } + else + { + if (!have_precision) + xprintf (p, field_width, arg); + else + xprintf (p, field_width, precision, arg); + } + } + break; + + case 'c': + if (!have_field_width) + xprintf (p, *argument); + else + xprintf (p, field_width, *argument); + break; + + case 's': + if (!have_field_width) + { + if (!have_precision) + xprintf (p, argument); + else + xprintf (p, precision, argument); + } + else + { + if (!have_precision) + xprintf (p, field_width, argument); + else + xprintf (p, field_width, precision, argument); + } + break; + } + + free (p); +} + +/* Print the text in FORMAT, using ARGV (with ARGC elements) for + arguments to any '%' directives. + Return the number of elements of ARGV used. */ + +static int +print_formatted (char const *format, int argc, char **argv) +{ + int save_argc = argc; /* Preserve original value. */ + char const *f; /* Pointer into 'format'. */ + char const *direc_start; /* Start of % directive. */ + size_t direc_length; /* Length of % directive. */ + bool have_field_width; /* True if FIELD_WIDTH is valid. */ + int field_width = 0; /* Arg to first '*'. */ + bool have_precision; /* True if PRECISION is valid. */ + int precision = 0; /* Arg to second '*'. */ + char ok[UCHAR_MAX + 1]; /* ok['x'] is true if %x is allowed. */ + + for (f = format; *f; ++f) + { + switch (*f) + { + case '%': + direc_start = f++; + direc_length = 1; + have_field_width = have_precision = false; + if (*f == '%') + { + putchar ('%'); + break; + } + if (*f == 'b') + { + /* FIXME: Field width and precision are not supported + for %b, even though POSIX requires it. */ + if (argc > 0) + { + print_esc_string (*argv); + ++argv; + --argc; + } + break; + } + + if (*f == 'q') + { + if (argc > 0) + { + fputs (quotearg_style (shell_escape_quoting_style, *argv), + stdout); + ++argv; + --argc; + } + break; + } + + memset (ok, 0, sizeof ok); + ok['a'] = ok['A'] = ok['c'] = ok['d'] = ok['e'] = ok['E'] = + ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['i'] = ok['o'] = + ok['s'] = ok['u'] = ok['x'] = ok['X'] = 1; + + for (;; f++, direc_length++) + switch (*f) + { +#if (__GLIBC__ == 2 && 2 <= __GLIBC_MINOR__) || 3 <= __GLIBC__ + case 'I': +#endif + case '\'': + ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = + ok['o'] = ok['s'] = ok['x'] = ok['X'] = 0; + break; + case '-': case '+': case ' ': + break; + case '#': + ok['c'] = ok['d'] = ok['i'] = ok['s'] = ok['u'] = 0; + break; + case '0': + ok['c'] = ok['s'] = 0; + break; + default: + goto no_more_flag_characters; + } + no_more_flag_characters: + + if (*f == '*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t width = vstrtoimax (*argv); + if (INT_MIN <= width && width <= INT_MAX) + field_width = width; + else + die (EXIT_FAILURE, 0, _("invalid field width: %s"), + quote (*argv)); + ++argv; + --argc; + } + else + field_width = 0; + have_field_width = true; + } + else + while (ISDIGIT (*f)) + { + ++f; + ++direc_length; + } + if (*f == '.') + { + ++f; + ++direc_length; + ok['c'] = 0; + if (*f == '*') + { + ++f; + ++direc_length; + if (argc > 0) + { + intmax_t prec = vstrtoimax (*argv); + if (prec < 0) + { + /* A negative precision is taken as if the + precision were omitted, so -1 is safe + here even if prec < INT_MIN. */ + precision = -1; + } + else if (INT_MAX < prec) + die (EXIT_FAILURE, 0, _("invalid precision: %s"), + quote (*argv)); + else + precision = prec; + ++argv; + --argc; + } + else + precision = 0; + have_precision = true; + } + else + while (ISDIGIT (*f)) + { + ++f; + ++direc_length; + } + } + + while (*f == 'l' || *f == 'L' || *f == 'h' + || *f == 'j' || *f == 't' || *f == 'z') + ++f; + + { + unsigned char conversion = *f; + if (! ok[conversion]) + die (EXIT_FAILURE, 0, + _("%.*s: invalid conversion specification"), + (int) (f + 1 - direc_start), direc_start); + } + + print_direc (direc_start, direc_length, *f, + have_field_width, field_width, + have_precision, precision, + (argc <= 0 ? "" : (argc--, *argv++))); + break; + + case '\\': + f += print_esc (f, false); + break; + + default: + putchar (*f); + } + } + + return save_argc - argc; +} + +int +main (int argc, char **argv) +{ + char *format; + int args_used; + + initialize_main (&argc, &argv); + set_program_name (argv[0]); + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + atexit (close_stdout); + + exit_status = EXIT_SUCCESS; + + posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); + + /* We directly parse options, rather than use parse_long_options, in + order to avoid accepting abbreviations. */ + if (argc == 2) + { + if (STREQ (argv[1], "--help")) + usage (EXIT_SUCCESS); + + if (STREQ (argv[1], "--version")) + { + version_etc (stdout, PROGRAM_NAME, PACKAGE_NAME, Version, AUTHORS, + (char *) NULL); + return EXIT_SUCCESS; + } + } + + /* The above handles --help and --version. + Since there is no other invocation of getopt, handle '--' here. */ + if (1 < argc && STREQ (argv[1], "--")) + { + --argc; + ++argv; + } + + if (argc <= 1) + { + error (0, 0, _("missing operand")); + usage (EXIT_FAILURE); + } + + format = argv[1]; + argc -= 2; + argv += 2; + + do + { + args_used = print_formatted (format, argc, argv); + argc -= args_used; + argv += args_used; + } + while (args_used > 0 && argc > 0); + + if (argc > 0) + error (0, 0, + _("warning: ignoring excess arguments, starting with %s"), + quote (argv[0])); + + return exit_status; +} |