/* cut,lcut - extract specified fields from a line and assign them to an array
or print them to the standard output */
/*
Copyright (C) 2020 Free Software Foundation, Inc.
Bash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Bash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Bash. If not, see .
*/
/* See Makefile for compilation details. */
#include
#if defined (HAVE_UNISTD_H)
# include
#endif
#include "bashansi.h"
#include
#include
#include
#include "loadables.h"
#include "shmbutil.h"
#define CUT_ARRAY_DEFAULT "CUTFIELDS"
#define NOPOS -2 /* sentinel for unset startpos/endpos */
#define BOL 0
#define EOL INT_MAX
#define NORANGE -1 /* just a position, no range */
#define BFLAG (1 << 0)
#define CFLAG (1 << 1)
#define DFLAG (1 << 2)
#define FFLAG (1 << 3)
#define SFLAG (1 << 4)
struct cutpos
{
int startpos, endpos; /* zero-based, correction done in getlist() */
};
struct cutop
{
int flags;
int delim;
int npos;
struct cutpos *poslist;
};
static int
poscmp (a, b)
void *a, *b;
{
struct cutpos *p1, *p2;
p1 = (struct cutpos *)a;
p2 = (struct cutpos *)b;
return (p1->startpos - p2->startpos);
}
static int
getlist (arg, opp)
char *arg;
struct cutpos **opp;
{
char *ntok, *ltok, *larg;
int s, e;
intmax_t num;
struct cutpos *poslist;
int npos, nsize;
poslist = 0;
nsize = npos = 0;
s = e = 0;
larg = arg;
while (ltok = strsep (&larg, ","))
{
if (*ltok == 0)
continue;
ntok = strsep (<ok, "-");
if (*ntok == 0)
s = BOL;
else
{
if (legal_number (ntok, &num) == 0 || (int)num != num || num <= 0)
{
builtin_error ("%s: invalid list value", ntok);
*opp = poslist;
return -1;
}
s = num;
s--; /* fields are 1-based */
}
if (ltok == 0)
e = NORANGE;
else if (*ltok == 0)
e = EOL;
else
{
if (legal_number (ltok, &num) == 0 || (int)num != num || num <= 0)
{
builtin_error ("%s: invalid list value", ltok);
*opp = poslist;
return -1;
}
e = num;
e--;
if (e == s)
e = NORANGE;
}
if (npos == nsize)
{
nsize += 4;
poslist = (struct cutpos *)xrealloc (poslist, nsize * sizeof (struct cutpos));
}
poslist[npos].startpos = s;
poslist[npos].endpos = e;
npos++;
}
if (npos == 0)
{
builtin_error ("missing list of positions");
*opp = poslist;
return -1;
}
qsort (poslist, npos, sizeof(poslist[0]), poscmp);
*opp = poslist;
return npos;
}
static int
cutbytes (v, line, ops)
SHELL_VAR *v;
char *line;
struct cutop *ops;
{
arrayind_t ind;
char *buf, *bmap;
size_t llen;
int i, b, n, s, e;
llen = strlen (line);
buf = xmalloc (llen + 1);
bmap = xmalloc (llen + 1);
memset (bmap, 0, llen);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= llen)
e = llen - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
b = 0;
for (i = 0; i < llen; i++)
if (bmap[i])
buf[b++] = line[i];
buf[b] = 0;
if (v)
{
ind = 0;
bind_array_element (v, ind, buf, 0);
ind++;
}
else
printf ("%s\n", buf);
free (buf);
free (bmap);
return ind;
}
static int
cutchars (v, line, ops)
SHELL_VAR *v;
char *line;
struct cutop *ops;
{
arrayind_t ind;
char *buf, *bmap;
wchar_t *wbuf, *wb2;
size_t llen, wlen;
int i, b, n, s, e;
if (MB_CUR_MAX == 1)
return (cutbytes (v, line, ops));
if (locale_utf8locale && utf8_mbsmbchar (line) == 0)
return (cutbytes (v, line, ops));
llen = strlen (line);
wbuf = (wchar_t *)xmalloc ((llen + 1) * sizeof (wchar_t));
wlen = mbstowcs (wbuf, line, llen);
if (MB_INVALIDCH (wlen))
{
free (wbuf);
return (cutbytes (v, line, ops));
}
bmap = xmalloc (llen + 1);
memset (bmap, 0, llen);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= wlen)
e = wlen - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
wb2 = (wchar_t *)xmalloc ((wlen + 1) * sizeof (wchar_t));
b = 0;
for (i = 0; i < wlen; i++)
if (bmap[i])
wb2[b++] = wbuf[i];
wb2[b] = 0;
free (wbuf);
buf = bmap;
n = wcstombs (buf, wb2, llen);
if (v)
{
ind = 0;
bind_array_element (v, ind, buf, 0);
ind++;
}
else
printf ("%s\n", buf);
free (buf);
free (wb2);
return ind;
}
/* The basic strategy is to cut the line into fields using strsep, populate
an array of fields from 0..nf, then select those fields using the same
bitmap approach as cut{bytes,chars} and assign them to the array variable
V or print them on stdout. This function obeys SFLAG. */
static int
cutfields (v, line, ops)
SHELL_VAR *v;
char *line;
struct cutop *ops;
{
arrayind_t ind;
char *buf, *bmap, *field, **fields, delim[2];
size_t llen, fsize;
int i, b, n, s, e, nf;
ind = 0;
delim[0] = ops->delim;
delim[1] = '\0';
fields = 0;
nf = 0;
fsize = 0;
field = buf = line;
do
{
field = strsep (&buf, delim); /* destructive */
if (nf == fsize)
{
fsize += 8;
fields = xrealloc (fields, fsize * sizeof (char *));
}
fields[nf] = field;
if (field)
nf++;
}
while (field);
if (nf == 1)
{
free (fields);
if (ops->flags & SFLAG)
return ind;
if (v)
{
bind_array_element (v, ind, line, 0);
ind++;
}
else
printf ("%s\n", line);
return ind;
}
bmap = xmalloc (nf + 1);
memset (bmap, 0, nf);
for (n = 0; n < ops->npos; n++)
{
s = ops->poslist[n].startpos; /* no translation needed yet */
e = ops->poslist[n].endpos;
if (e == NORANGE)
e = s;
else if (e == EOL || e >= nf)
e = nf - 1;
/* even if a column is specified multiple times, it will only be printed
once */
for (i = s; i <= e; i++)
bmap[i] = 1;
}
for (i = 1, b = 0; b < nf; b++)
{
if (bmap[b] == 0)
continue;
if (v)
{
bind_array_element (v, ind, fields[b], 0);
ind++;
}
else
{
if (i == 0)
putchar (ops->delim);
printf ("%s", fields[b]);
}
i = 0;
}
if (v == 0)
putchar ('\n');
return nf;
}
static int
cutline (v, line, ops)
SHELL_VAR *v;
char *line;
struct cutop *ops;
{
int rval;
if (ops->flags & BFLAG)
rval = cutbytes (v, line, ops);
else if (ops->flags & CFLAG)
rval = cutchars (v, line, ops);
else
rval = cutfields (v, line, ops);
return (rval >= 0 ? EXECUTION_SUCCESS : EXECUTION_FAILURE);
}
static int
cutfile (v, list, ops)
SHELL_VAR *v;
WORD_LIST *list;
struct cutop *ops;
{
int fd, unbuffered_read;
char *line, *b;
size_t llen;
WORD_LIST *l;
ssize_t n;
line = 0;
llen = 0;
l = list;
do
{
/* for each file */
if (l == 0 || (l->word->word[0] == '-' && l->word->word[1] == '\0'))
fd = 0;
else
fd = open (l->word->word, O_RDONLY);
if (fd < 0)
{
file_error (l->word->word);
return (EXECUTION_FAILURE);
}
#ifndef __CYGWIN__
unbuffered_read = (lseek (fd, 0L, SEEK_CUR) < 0) && (errno == ESPIPE);
#else
unbuffered_read = 1;
#endif
while ((n = zgetline (fd, &line, &llen, '\n', unbuffered_read)) != -1)
{
QUIT;
if (line[n] == '\n')
line[n] = '\0'; /* cutline expects no newline terminator */
cutline (v, line, ops); /* can modify line */
}
if (fd > 0)
close (fd);
QUIT;
if (l)
l = l->next;
}
while (l);
free (line);
return EXECUTION_SUCCESS;
}
#define OPTSET(x) ((cutflags & (x)) ? 1 : 0)
static int
cut_internal (which, list)
int which; /* not used yet */
WORD_LIST *list;
{
int opt, rval, cutflags, delim, npos;
char *array_name, *cutstring, *list_arg;
SHELL_VAR *v;
struct cutop op;
struct cutpos *poslist;
v = 0;
rval = EXECUTION_SUCCESS;
cutflags = 0;
array_name = 0;
list_arg = 0;
delim = '\t';
reset_internal_getopt ();
while ((opt = internal_getopt (list, "a:b:c:d:f:sn")) != -1)
{
switch (opt)
{
case 'a':
array_name = list_optarg;
break;
case 'b':
cutflags |= BFLAG;
list_arg = list_optarg;
break;
case 'c':
cutflags |= CFLAG;
list_arg = list_optarg;
break;
case 'd':
cutflags |= DFLAG;
delim = list_optarg[0];
if (delim == 0 || list_optarg[1])
{
builtin_error ("delimiter must be a single non-null character");
return (EX_USAGE);
}
break;
case 'f':
cutflags |= FFLAG;
list_arg = list_optarg;
break;
case 'n':
break;
case 's':
cutflags |= SFLAG;
break;
CASE_HELPOPT;
default:
builtin_usage ();
return (EX_USAGE);
}
}
list = loptend;
if (array_name && (legal_identifier (array_name) == 0))
{
sh_invalidid (array_name);
return (EXECUTION_FAILURE);
}
if (list == 0 && which == 0)
{
builtin_error ("string argument required");
return (EX_USAGE);
}
/* options are mutually exclusive and one is required */
if ((OPTSET (BFLAG) + OPTSET (CFLAG) + OPTSET (FFLAG)) != 1)
{
builtin_usage ();
return (EX_USAGE);
}
if ((npos = getlist (list_arg, &poslist)) < 0)
{
free (poslist);
return (EXECUTION_FAILURE);
}
if (array_name)
{
v = find_or_make_array_variable (array_name, 1);
if (v == 0 || readonly_p (v) || noassign_p (v))
{
if (v && readonly_p (v))
err_readonly (array_name);
return (EXECUTION_FAILURE);
}
else if (array_p (v) == 0)
{
builtin_error ("%s: not an indexed array", array_name);
return (EXECUTION_FAILURE);
}
if (invisible_p (v))
VUNSETATTR (v, att_invisible);
array_flush (array_cell (v));
}
op.flags = cutflags;
op.delim = delim;
op.npos = npos;
op.poslist = poslist;
/* we implement cut as a builtin with a cutfile() function that opens each
filename in LIST as a filename (or `-' for stdin) and runs cutline on
every line in the file. */
if (which == 0)
{
cutstring = list->word->word;
if (cutstring == 0 || *cutstring == 0)
{
free (poslist);
return (EXECUTION_SUCCESS);
}
rval = cutline (v, cutstring, &op);
}
else
rval = cutfile (v, list, &op);
return (rval);
}
int
lcut_builtin (list)
WORD_LIST *list;
{
return (cut_internal (0, list));
}
int
cut_builtin (list)
WORD_LIST *list;
{
return (cut_internal (1, list));
}
char *lcut_doc[] = {
"Extract selected fields from a string.",
"",
"Select portions of LINE (as specified by LIST) and assign them to",
"elements of the indexed array ARRAY starting at index 0, or write",
"them to the standard output if -a is not specified.",
"",
"Items specified by LIST are either column positions or fields delimited",
"by a special character, and are described more completely in cut(1).",
"",
"Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
"field delimiter is specified by -d (default TAB). Column numbering",
"starts at 1.",
(char *)NULL
};
struct builtin lcut_struct = {
"lcut", /* builtin name */
lcut_builtin, /* function implementing the builtin */
BUILTIN_ENABLED, /* initial flags for builtin */
lcut_doc, /* array of long documentation strings. */
"lcut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] line", /* usage synopsis; becomes short_doc */
0 /* reserved for internal use */
};
char *cut_doc[] = {
"Extract selected fields from each line of a file.",
"",
"Select portions of each line (as specified by LIST) from each FILE",
"and write them to the standard output. cut reads from the standard",
"input if no FILE arguments are specified or if a FILE argument is a",
"single hyphen.",
"",
"Items specified by LIST are either column positions or fields delimited",
"by a special character, and are described more completely in cut(1).",
"",
"Columns correspond to bytes (-b), characters (-c), or fields (-f). The",
"field delimiter is specified by -d (default TAB). Column numbering",
"starts at 1.",
(char *)NULL
};
struct builtin cut_struct = {
"cut", /* builtin name */
cut_builtin, /* function implementing the builtin */
BUILTIN_ENABLED, /* initial flags for builtin */
cut_doc, /* array of long documentation strings. */
"cut [-a ARRAY] [-b LIST] [-c LIST] [-f LIST] [-d CHAR] [-sn] [file ...]", /* usage synopsis; becomes short_doc */
0 /* reserved for internal use */
};