diff options
Diffstat (limited to '')
-rw-r--r-- | lib/printf-parse.c | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/lib/printf-parse.c b/lib/printf-parse.c new file mode 100644 index 0000000..3040749 --- /dev/null +++ b/lib/printf-parse.c @@ -0,0 +1,623 @@ +/* Formatted output to strings. + Copyright (C) 1999-2000, 2002-2003, 2006-2023 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* This file can be parametrized with the following macros: + CHAR_T The element type of the format string. + CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters + in the format string are ASCII. + DIRECTIVE Structure denoting a format directive. + Depends on CHAR_T. + DIRECTIVES Structure denoting the set of format directives of a + format string. Depends on CHAR_T. + PRINTF_PARSE Function that parses a format string. + Depends on CHAR_T. + STATIC Set to 'static' to declare the function static. + ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */ + +#ifndef PRINTF_PARSE +# include <config.h> +#endif + +/* Specification. */ +#ifndef PRINTF_PARSE +# include "printf-parse.h" +#endif + +/* Default parameters. */ +#ifndef PRINTF_PARSE +# define PRINTF_PARSE printf_parse +# define CHAR_T char +# define DIRECTIVE char_directive +# define DIRECTIVES char_directives +#endif + +/* Get size_t, NULL. */ +#include <stddef.h> + +/* Get intmax_t. */ +#include <stdint.h> + +/* malloc(), realloc(), free(). */ +#include <stdlib.h> + +/* memcpy(). */ +#include <string.h> + +/* errno. */ +#include <errno.h> + +/* Checked size_t computations. */ +#include "xsize.h" + +#if CHAR_T_ONLY_ASCII +/* c_isascii(). */ +# include "c-ctype.h" +#endif + +#ifdef STATIC +STATIC +#endif +int +PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a) +{ + const CHAR_T *cp = format; /* pointer into format */ + size_t arg_posn = 0; /* number of regular arguments consumed */ + size_t d_allocated; /* allocated elements of d->dir */ + size_t a_allocated; /* allocated elements of a->arg */ + size_t max_width_length = 0; + size_t max_precision_length = 0; + + d->count = 0; + d_allocated = N_DIRECT_ALLOC_DIRECTIVES; + d->dir = d->direct_alloc_dir; + + a->count = 0; + a_allocated = N_DIRECT_ALLOC_ARGUMENTS; + a->arg = a->direct_alloc_arg; + +#define REGISTER_ARG(_index_,_type_) \ + { \ + size_t n = (_index_); \ + if (n >= a_allocated) \ + { \ + size_t memory_size; \ + argument *memory; \ + \ + a_allocated = xtimes (a_allocated, 2); \ + if (a_allocated <= n) \ + a_allocated = xsum (n, 1); \ + memory_size = xtimes (a_allocated, sizeof (argument)); \ + if (size_overflow_p (memory_size)) \ + /* Overflow, would lead to out of memory. */ \ + goto out_of_memory; \ + memory = (argument *) (a->arg != a->direct_alloc_arg \ + ? realloc (a->arg, memory_size) \ + : malloc (memory_size)); \ + if (memory == NULL) \ + /* Out of memory. */ \ + goto out_of_memory; \ + if (a->arg == a->direct_alloc_arg) \ + memcpy (memory, a->arg, a->count * sizeof (argument)); \ + a->arg = memory; \ + } \ + while (a->count <= n) \ + a->arg[a->count++].type = TYPE_NONE; \ + if (a->arg[n].type == TYPE_NONE) \ + a->arg[n].type = (_type_); \ + else if (a->arg[n].type != (_type_)) \ + /* Ambiguous type for positional argument. */ \ + goto error; \ + } + + while (*cp != '\0') + { + CHAR_T c = *cp++; + if (c == '%') + { + size_t arg_index = ARG_NONE; + DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */ + + /* Initialize the next directive. */ + dp->dir_start = cp - 1; + dp->flags = 0; + dp->width_start = NULL; + dp->width_end = NULL; + dp->width_arg_index = ARG_NONE; + dp->precision_start = NULL; + dp->precision_end = NULL; + dp->precision_arg_index = ARG_NONE; + dp->arg_index = ARG_NONE; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory later. */ + goto error; + arg_index = n - 1; + cp = np + 1; + } + } + + /* Read the flags. */ + for (;;) + { + if (*cp == '\'') + { + dp->flags |= FLAG_GROUP; + cp++; + } + else if (*cp == '-') + { + dp->flags |= FLAG_LEFT; + cp++; + } + else if (*cp == '+') + { + dp->flags |= FLAG_SHOWSIGN; + cp++; + } + else if (*cp == ' ') + { + dp->flags |= FLAG_SPACE; + cp++; + } + else if (*cp == '#') + { + dp->flags |= FLAG_ALT; + cp++; + } + else if (*cp == '0') + { + dp->flags |= FLAG_ZERO; + cp++; + } +#if __GLIBC__ >= 2 && !defined __UCLIBC__ + else if (*cp == 'I') + { + dp->flags |= FLAG_LOCALIZED; + cp++; + } +#endif + else + break; + } + + /* Parse the field width. */ + if (*cp == '*') + { + dp->width_start = cp; + cp++; + dp->width_end = cp; + if (max_width_length < 1) + max_width_length = 1; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory later. */ + goto error; + dp->width_arg_index = n - 1; + cp = np + 1; + } + } + if (dp->width_arg_index == ARG_NONE) + { + dp->width_arg_index = arg_posn++; + if (dp->width_arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->width_arg_index, TYPE_INT); + } + else if (*cp >= '0' && *cp <= '9') + { + size_t width_length; + + dp->width_start = cp; + for (; *cp >= '0' && *cp <= '9'; cp++) + ; + dp->width_end = cp; + width_length = dp->width_end - dp->width_start; + if (max_width_length < width_length) + max_width_length = width_length; + } + + /* Parse the precision. */ + if (*cp == '.') + { + cp++; + if (*cp == '*') + { + dp->precision_start = cp - 1; + cp++; + dp->precision_end = cp; + if (max_precision_length < 2) + max_precision_length = 2; + + /* Test for positional argument. */ + if (*cp >= '0' && *cp <= '9') + { + const CHAR_T *np; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + ; + if (*np == '$') + { + size_t n = 0; + + for (np = cp; *np >= '0' && *np <= '9'; np++) + n = xsum (xtimes (n, 10), *np - '0'); + if (n == 0) + /* Positional argument 0. */ + goto error; + if (size_overflow_p (n)) + /* n too large, would lead to out of memory + later. */ + goto error; + dp->precision_arg_index = n - 1; + cp = np + 1; + } + } + if (dp->precision_arg_index == ARG_NONE) + { + dp->precision_arg_index = arg_posn++; + if (dp->precision_arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->precision_arg_index, TYPE_INT); + } + else + { + size_t precision_length; + + dp->precision_start = cp - 1; + for (; *cp >= '0' && *cp <= '9'; cp++) + ; + dp->precision_end = cp; + precision_length = dp->precision_end - dp->precision_start; + if (max_precision_length < precision_length) + max_precision_length = precision_length; + } + } + + { + arg_type type; + + /* Parse argument type/size specifiers. */ + { + int flags = 0; + + for (;;) + { + if (*cp == 'h') + { + flags |= (1 << (flags & 1)); + cp++; + } + else if (*cp == 'L') + { + flags |= 4; + cp++; + } + else if (*cp == 'l') + { + flags += 8; + cp++; + } + else if (*cp == 'j') + { + if (sizeof (intmax_t) > sizeof (long)) + { + /* intmax_t = long long */ + flags += 16; + } + else if (sizeof (intmax_t) > sizeof (int)) + { + /* intmax_t = long */ + flags += 8; + } + cp++; + } + else if (*cp == 'z' || *cp == 'Z') + { + /* 'z' is standardized in ISO C 99, but glibc uses 'Z' + because the warning facility in gcc-2.95.2 understands + only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */ + if (sizeof (size_t) > sizeof (long)) + { + /* size_t = long long */ + flags += 16; + } + else if (sizeof (size_t) > sizeof (int)) + { + /* size_t = long */ + flags += 8; + } + cp++; + } + else if (*cp == 't') + { + if (sizeof (ptrdiff_t) > sizeof (long)) + { + /* ptrdiff_t = long long */ + flags += 16; + } + else if (sizeof (ptrdiff_t) > sizeof (int)) + { + /* ptrdiff_t = long */ + flags += 8; + } + cp++; + } +#if defined __APPLE__ && defined __MACH__ + /* On Mac OS X 10.3, PRIdMAX is defined as "qd". + We cannot change it to "lld" because PRIdMAX must also + be understood by the system's printf routines. */ + else if (*cp == 'q') + { + if (64 / 8 > sizeof (long)) + { + /* int64_t = long long */ + flags += 16; + } + else + { + /* int64_t = long */ + flags += 8; + } + cp++; + } +#endif +#if defined _WIN32 && ! defined __CYGWIN__ + /* On native Windows, PRIdMAX is defined as "I64d". + We cannot change it to "lld" because PRIdMAX must also + be understood by the system's printf routines. */ + else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4') + { + if (64 / 8 > sizeof (long)) + { + /* __int64 = long long */ + flags += 16; + } + else + { + /* __int64 = long */ + flags += 8; + } + cp += 3; + } +#endif + else + break; + } + + /* Read the conversion character. */ + c = *cp++; + switch (c) + { + case 'd': case 'i': + /* If 'long long' is larger than 'long': */ + if (flags >= 16 || (flags & 4)) + type = TYPE_LONGLONGINT; + else + /* If 'long long' is the same as 'long', we parse "lld" into + TYPE_LONGINT. */ + if (flags >= 8) + type = TYPE_LONGINT; + else if (flags & 2) + type = TYPE_SCHAR; + else if (flags & 1) + type = TYPE_SHORT; + else + type = TYPE_INT; + break; + case 'o': case 'u': case 'x': case 'X': + /* If 'unsigned long long' is larger than 'unsigned long': */ + if (flags >= 16 || (flags & 4)) + type = TYPE_ULONGLONGINT; + else + /* If 'unsigned long long' is the same as 'unsigned long', we + parse "llu" into TYPE_ULONGINT. */ + if (flags >= 8) + type = TYPE_ULONGINT; + else if (flags & 2) + type = TYPE_UCHAR; + else if (flags & 1) + type = TYPE_USHORT; + else + type = TYPE_UINT; + break; + case 'f': case 'F': case 'e': case 'E': case 'g': case 'G': + case 'a': case 'A': + if (flags >= 16 || (flags & 4)) + type = TYPE_LONGDOUBLE; + else + type = TYPE_DOUBLE; + break; + case 'c': + if (flags >= 8) +#if HAVE_WINT_T + type = TYPE_WIDE_CHAR; +#else + goto error; +#endif + else + type = TYPE_CHAR; + break; +#if HAVE_WINT_T + case 'C': + type = TYPE_WIDE_CHAR; + c = 'c'; + break; +#endif + case 's': + if (flags >= 8) +#if HAVE_WCHAR_T + type = TYPE_WIDE_STRING; +#else + goto error; +#endif + else + type = TYPE_STRING; + break; +#if HAVE_WCHAR_T + case 'S': + type = TYPE_WIDE_STRING; + c = 's'; + break; +#endif + case 'p': + type = TYPE_POINTER; + break; + case 'n': + /* If 'long long' is larger than 'long': */ + if (flags >= 16 || (flags & 4)) + type = TYPE_COUNT_LONGLONGINT_POINTER; + else + /* If 'long long' is the same as 'long', we parse "lln" into + TYPE_COUNT_LONGINT_POINTER. */ + if (flags >= 8) + type = TYPE_COUNT_LONGINT_POINTER; + else if (flags & 2) + type = TYPE_COUNT_SCHAR_POINTER; + else if (flags & 1) + type = TYPE_COUNT_SHORT_POINTER; + else + type = TYPE_COUNT_INT_POINTER; + break; +#if ENABLE_UNISTDIO + /* The unistdio extensions. */ + case 'U': + if (flags >= 16) + type = TYPE_U32_STRING; + else if (flags >= 8) + type = TYPE_U16_STRING; + else + type = TYPE_U8_STRING; + break; +#endif + case '%': + type = TYPE_NONE; + break; + default: + /* Unknown conversion character. */ + goto error; + } + } + + if (type != TYPE_NONE) + { + dp->arg_index = arg_index; + if (dp->arg_index == ARG_NONE) + { + dp->arg_index = arg_posn++; + if (dp->arg_index == ARG_NONE) + /* arg_posn wrapped around. */ + goto error; + } + REGISTER_ARG (dp->arg_index, type); + } + dp->conversion = c; + dp->dir_end = cp; + } + + d->count++; + if (d->count >= d_allocated) + { + size_t memory_size; + DIRECTIVE *memory; + + d_allocated = xtimes (d_allocated, 2); + memory_size = xtimes (d_allocated, sizeof (DIRECTIVE)); + if (size_overflow_p (memory_size)) + /* Overflow, would lead to out of memory. */ + goto out_of_memory; + memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir + ? realloc (d->dir, memory_size) + : malloc (memory_size)); + if (memory == NULL) + /* Out of memory. */ + goto out_of_memory; + if (d->dir == d->direct_alloc_dir) + memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE)); + d->dir = memory; + } + } +#if CHAR_T_ONLY_ASCII + else if (!c_isascii (c)) + { + /* Non-ASCII character. Not supported. */ + goto error; + } +#endif + } + d->dir[d->count].dir_start = cp; + + d->max_width_length = max_width_length; + d->max_precision_length = max_precision_length; + return 0; + +error: + if (a->arg != a->direct_alloc_arg) + free (a->arg); + if (d->dir != d->direct_alloc_dir) + free (d->dir); + errno = EINVAL; + return -1; + +out_of_memory: + if (a->arg != a->direct_alloc_arg) + free (a->arg); + if (d->dir != d->direct_alloc_dir) + free (d->dir); + errno = ENOMEM; + return -1; +} + +#undef PRINTF_PARSE +#undef DIRECTIVES +#undef DIRECTIVE +#undef CHAR_T_ONLY_ASCII +#undef CHAR_T |