diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 16:58:41 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 16:58:41 +0000 |
commit | e1908ae95dd4c9d19ee4dfabfc8bf8a7f85943fe (patch) | |
tree | f5cc731bedcac0fb7fe14d952e4581e749f8bb87 /src/set-fields.c | |
parent | Initial commit. (diff) | |
download | coreutils-e1908ae95dd4c9d19ee4dfabfc8bf8a7f85943fe.tar.xz coreutils-e1908ae95dd4c9d19ee4dfabfc8bf8a7f85943fe.zip |
Adding upstream version 9.4.upstream/9.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/set-fields.c')
-rw-r--r-- | src/set-fields.c | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/src/set-fields.c b/src/set-fields.c new file mode 100644 index 0000000..b299280 --- /dev/null +++ b/src/set-fields.c @@ -0,0 +1,308 @@ +/* set-fields.c -- common functions for parsing field list + Copyright (C) 2015-2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* Extracted from cut.c by Assaf Gordon */ + +#include <config.h> + +#include "system.h" +#include "quote.h" +#include "set-fields.h" + +/* Array of `struct field_range_pair' holding all the finite ranges. */ +struct field_range_pair *frp; + +/* Number of finite ranges specified by the user. */ +size_t n_frp; + +/* Number of `struct field_range_pair's allocated. */ +static size_t n_frp_allocated; + +#define FATAL_ERROR(Message) \ + do \ + { \ + error (0, 0, (Message)); \ + usage (EXIT_FAILURE); \ + } \ + while (0) + +/* Append LOW, HIGH to the list RP of range pairs, allocating additional + space if necessary. Update global variable N_FRP. When allocating, + update global variable N_FRP_ALLOCATED. */ +static void +add_range_pair (uintmax_t lo, uintmax_t hi) +{ + if (n_frp == n_frp_allocated) + frp = X2NREALLOC (frp, &n_frp_allocated); + frp[n_frp].lo = lo; + frp[n_frp].hi = hi; + ++n_frp; +} + + +/* Comparison function for qsort to order the list of + struct field_range_pairs. */ +static int +compare_ranges (const void *a, const void *b) +{ + struct field_range_pair const *ap = a, *bp = b; + return (ap->lo > bp->lo) - (ap->lo < bp->lo); +} + +/* Reallocate Range Pair entries, with corresponding + entries outside the range of each specified entry. */ + +static void +complement_rp (void) +{ + struct field_range_pair *c = frp; + size_t n = n_frp; + + frp = nullptr; + n_frp = 0; + n_frp_allocated = 0; + + if (c[0].lo > 1) + add_range_pair (1, c[0].lo - 1); + + for (size_t i = 1; i < n; ++i) + { + if (c[i - 1].hi + 1 == c[i].lo) + continue; + + add_range_pair (c[i - 1].hi + 1, c[i].lo - 1); + } + + if (c[n - 1].hi < UINTMAX_MAX) + add_range_pair (c[n - 1].hi + 1, UINTMAX_MAX); + + free (c); +} + +/* Given the list of field or byte range specifications FIELDSTR, + allocate and initialize the FRP array. FIELDSTR should + be composed of one or more numbers or ranges of numbers, separated + by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; + 'n-' means 'n' through end of line. + n=0 and n>=UINTMAX_MAX values will trigger an error. + + if SETFLD_ALLOW_DASH option is used, a single '-' means all fields + (otherwise a single dash triggers an error). + + if SETFLD_COMPLEMENT option is used, the specified field list + is complemented (e.g. '1-3' will result in fields '4-'). + + if SETFLD_ERRMSG_USE_POS option is used, error messages + will say 'position' (or 'byte/character positions') + instead of fields (used with cut -b/-c). + + The function terminates on failure. + + Upon return, the FRP array is initialized to contain + a non-overlapping, increasing list of field ranges. + + N_FRP holds the number of field ranges in the FRP array. + + The first field is stored as 1 (zero is not used). + An open-ended range (i.e., until the last field of the input line) + is indicated with hi = UINTMAX_MAX. + + A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last + field range pair. + + Examples: + given '1-2,4', frp = [ { .lo = 1, .hi = 2 }, + { .lo = 4, .hi = 4 }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; + + given '3-', frp = [ { .lo = 3, .hi = UINTMAX_MAX }, + { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ]; +*/ +void +set_fields (char const *fieldstr, unsigned int options) +{ + uintmax_t initial = 1; /* Value of first number in a range. */ + uintmax_t value = 0; /* If nonzero, a number being accumulated. */ + bool lhs_specified = false; + bool rhs_specified = false; + bool dash_found = false; /* True if a '-' is found in this field. */ + + bool in_digits = false; + + /* Collect and store in RP the range end points. */ + + /* Special case: '--field=-' means all fields, emulate '--field=1-' . */ + if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-")) + { + value = 1; + lhs_specified = true; + dash_found = true; + fieldstr++; + } + + while (true) + { + if (*fieldstr == '-') + { + in_digits = false; + /* Starting a range. */ + if (dash_found) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("invalid byte or character range") + : _("invalid field range")); + + dash_found = true; + fieldstr++; + + if (lhs_specified && !value) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("byte/character positions are numbered from 1") + : _("fields are numbered from 1")); + + initial = (lhs_specified ? value : 1); + value = 0; + } + else if (*fieldstr == ',' + || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') + { + in_digits = false; + /* Ending the string, or this field/byte sublist. */ + if (dash_found) + { + dash_found = false; + + if (!lhs_specified && !rhs_specified) + { + /* if a lone dash is allowed, emulate '1-' for all fields */ + if (options & SETFLD_ALLOW_DASH) + initial = 1; + else + FATAL_ERROR (_("invalid range with no endpoint: -")); + } + + /* A range. Possibilities: -n, m-n, n-. + In any case, 'initial' contains the start of the range. */ + if (!rhs_specified) + { + /* 'n-'. From 'initial' to end of line. */ + add_range_pair (initial, UINTMAX_MAX); + } + else + { + /* 'm-n' or '-n' (1-n). */ + if (value < initial) + FATAL_ERROR (_("invalid decreasing range")); + + add_range_pair (initial, value); + } + value = 0; + } + else + { + /* A simple field number, not a range. */ + if (value == 0) + FATAL_ERROR ((options & SETFLD_ERRMSG_USE_POS) + ? _("byte/character positions are numbered from 1") + : _("fields are numbered from 1")); + + add_range_pair (value, value); + value = 0; + } + + if (*fieldstr == '\0') + break; + + fieldstr++; + lhs_specified = false; + rhs_specified = false; + } + else if (ISDIGIT (*fieldstr)) + { + /* Record beginning of digit string, in case we have to + complain about it. */ + static char const *num_start; + if (!in_digits || !num_start) + num_start = fieldstr; + in_digits = true; + + if (dash_found) + rhs_specified = 1; + else + lhs_specified = 1; + + /* Detect overflow. */ + if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t) + || value == UINTMAX_MAX) + { + /* In case the user specified -c$(echo 2^64|bc),22, + complain only about the first number. */ + /* Determine the length of the offending number. */ + size_t len = strspn (num_start, "0123456789"); + char *bad_num = ximemdup0 (num_start, len); + error (0, 0, (options & SETFLD_ERRMSG_USE_POS) + ?_("byte/character offset %s is too large") + :_("field number %s is too large"), + quote (bad_num)); + free (bad_num); + usage (EXIT_FAILURE); + } + + fieldstr++; + } + else + { + error (0, 0, (options & SETFLD_ERRMSG_USE_POS) + ?_("invalid byte/character position %s") + :_("invalid field value %s"), + quote (fieldstr)); + usage (EXIT_FAILURE); + } + } + + if (!n_frp) + FATAL_ERROR ((options&SETFLD_ERRMSG_USE_POS) + ? _("missing list of byte/character positions") + : _("missing list of fields")); + + qsort (frp, n_frp, sizeof (frp[0]), compare_ranges); + + /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ + for (size_t i = 0; i < n_frp; ++i) + { + for (size_t j = i + 1; j < n_frp; ++j) + { + if (frp[j].lo <= frp[i].hi) + { + frp[i].hi = MAX (frp[j].hi, frp[i].hi); + memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp); + n_frp--; + j--; + } + else + break; + } + } + + if (options & SETFLD_COMPLEMENT) + complement_rp (); + + /* After merging, reallocate RP so we release memory to the system. + Also add a sentinel at the end of RP, to avoid out of bounds access + and for performance reasons. */ + ++n_frp; + frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair)); + frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX; +} |