summaryrefslogtreecommitdiffstats
path: root/examples/loadables/dsv.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/loadables/dsv.c')
-rw-r--r--examples/loadables/dsv.c300
1 files changed, 300 insertions, 0 deletions
diff --git a/examples/loadables/dsv.c b/examples/loadables/dsv.c
new file mode 100644
index 0000000..70e59cb
--- /dev/null
+++ b/examples/loadables/dsv.c
@@ -0,0 +1,300 @@
+/* dsv - process a line of delimiter-separated data and populate an indexed
+ array with the fields */
+
+/*
+ Copyright (C) 2022 Free Software Foundation, Inc.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* See Makefile for compilation details. */
+
+#include <config.h>
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#include "bashansi.h"
+#include <stdio.h>
+
+#include "loadables.h"
+
+#define DSV_ARRAY_DEFAULT "DSV"
+
+#define NQUOTE 0
+#define DQUOTE 1
+#define SQUOTE 2
+
+#define F_SHELLQUOTE 0x01
+#define F_GREEDY 0x02
+#define F_PRESERVE 0x04
+
+/* Split LINE into delimiter-separated fields, storing each field into a
+ separate element of array variable DSV, starting at index 0. The format
+ of LINE is delimiter-separated values. By default, this splits lines of
+ CSV data as described in RFC 4180. If *DSTRING is any other value than
+ ',', this uses that character as a field delimiter. Pass F_SHELLQUOTE in
+ FLAGS to understand shell-like double-quoting and backslash-escaping in
+ double quotes instead of the "" CSV behavior, and shell-like single quotes.
+ Pass F_GREEDY in FLAGS to consume multiple leading and trailing instances
+ of *DSTRING and consecutive instances of *DSTRING in LINE without creating
+ null fields. If you want to preserve the quote characters in the generated
+ fields, pass F_PRESERVE; by default, this removes them. */
+static int
+dsvsplit (dsv, line, dstring, flags)
+ SHELL_VAR *dsv;
+ char *line, *dstring;
+ int flags;
+{
+ arrayind_t ind;
+ char *field, *prev, *buf, *xbuf;
+ int delim, qstate;
+ int b, rval;
+
+ xbuf = 0;
+ ind = 0;
+ field = prev = line;
+
+ /* If we want a greedy split, consume leading instances of *DSTRING */
+ if (flags & F_GREEDY)
+ {
+ while (*prev == *dstring)
+ prev++;
+ field = prev;
+ }
+
+ do
+ {
+ if (*prev == '"')
+ {
+ if (xbuf == 0)
+ xbuf = xmalloc (strlen (prev) + 1);
+ buf = xbuf;
+ b = 0;
+ if (flags & F_PRESERVE)
+ buf[b++] = *prev;
+ qstate = DQUOTE;
+ for (field = ++prev; *field; field++)
+ {
+ if (qstate == DQUOTE && *field == '"' && field[1] == '"' && (flags & F_SHELLQUOTE) == 0)
+ buf[b++] = *field++; /* skip double quote */
+ else if (qstate == DQUOTE && (flags & F_SHELLQUOTE) && *field == '\\' && strchr (slashify_in_quotes, field[1]) != 0)
+ buf[b++] = *++field; /* backslash quoted double quote */
+ else if (qstate == DQUOTE && *field == '"')
+ {
+ qstate = NQUOTE;
+ if (flags & F_PRESERVE)
+ buf[b++] = *field;
+ }
+ else if (qstate == NQUOTE && *field == *dstring)
+ break;
+ else
+ /* This copies any text between a closing double quote and the
+ delimiter. If you want to change that, make sure to do the
+ copy only if qstate == DQUOTE. */
+ buf[b++] = *field;
+ }
+ buf[b] = '\0';
+ }
+ else if ((flags & F_SHELLQUOTE) && *prev == '\'')
+ {
+ if (xbuf == 0)
+ xbuf = xmalloc (strlen (prev) + 1);
+ buf = xbuf;
+ b = 0;
+ if (flags & F_PRESERVE)
+ buf[b++] = *prev;
+ qstate = SQUOTE;
+ for (field = ++prev; *field; field++)
+ {
+ if (qstate == SQUOTE && *field == '\'')
+ {
+ qstate = NQUOTE;
+ if (flags & F_PRESERVE)
+ buf[b++] = *field;
+ }
+ else if (qstate == NQUOTE && *field == *dstring)
+ break;
+ else
+ /* This copies any text between a closing single quote and the
+ delimiter. If you want to change that, make sure to do the
+ copy only if qstate == SQUOTE. */
+ buf[b++] = *field;
+ }
+ buf[b] = '\0';
+ }
+ else
+ {
+ buf = prev;
+ field = prev + strcspn (prev, dstring);
+ }
+
+ delim = *field;
+ *field = '\0';
+
+ if ((flags & F_GREEDY) == 0 || buf[0])
+ {
+ bind_array_element (dsv, ind, buf, 0);
+ ind++;
+ }
+
+ *field = delim;
+
+ if (delim == *dstring)
+ prev = field + 1;
+ }
+ while (delim == *dstring);
+
+ if (xbuf)
+ free (xbuf);
+
+ return (rval = ind); /* number of fields */
+}
+
+int
+dsv_builtin (list)
+ WORD_LIST *list;
+{
+ int opt, rval, flags;
+ char *array_name, *dsvstring, *delims;
+ SHELL_VAR *v;
+
+ array_name = 0;
+ rval = EXECUTION_SUCCESS;
+
+ delims = ",";
+ flags = 0;
+
+ reset_internal_getopt ();
+ while ((opt = internal_getopt (list, "a:d:Sgp")) != -1)
+ {
+ switch (opt)
+ {
+ case 'a':
+ array_name = list_optarg;
+ break;
+ case 'd':
+ delims = list_optarg;
+ break;
+ case 'S':
+ flags |= F_SHELLQUOTE;
+ break;
+ case 'g':
+ flags |= F_GREEDY;
+ break;
+ case 'p':
+ flags |= F_PRESERVE;
+ break;
+ CASE_HELPOPT;
+ default:
+ builtin_usage ();
+ return (EX_USAGE);
+ }
+ }
+ list = loptend;
+
+ if (array_name == 0)
+ array_name = DSV_ARRAY_DEFAULT;
+
+ if (legal_identifier (array_name) == 0)
+ {
+ sh_invalidid (array_name);
+ return (EXECUTION_FAILURE);
+ }
+
+ if (list == 0)
+ {
+ builtin_error ("dsv string argument required");
+ return (EX_USAGE);
+ }
+
+ v = find_or_make_array_variable (array_name, 1);
+ if (v == 0 || readonly_p (v) || noassign_p (v))
+ {
+ if (v && readonly_p (v))
+ err_readonly (array_name);
+ return (EXECUTION_FAILURE);
+ }
+ else if (array_p (v) == 0)
+ {
+ builtin_error ("%s: not an indexed array", array_name);
+ return (EXECUTION_FAILURE);
+ }
+ if (invisible_p (v))
+ VUNSETATTR (v, att_invisible);
+ array_flush (array_cell (v));
+
+ dsvstring = list->word->word;
+
+ if (dsvstring == 0 || *dsvstring == 0)
+ return (EXECUTION_SUCCESS);
+
+ opt = dsvsplit (v, dsvstring, delims, flags);
+ /* Maybe do something with OPT here, it's the number of fields */
+
+ return (rval);
+}
+
+/* Called when builtin is enabled and loaded from the shared object. If this
+ function returns 0, the load fails. */
+int
+dsv_builtin_load (name)
+ char *name;
+{
+ return (1);
+}
+
+/* Called when builtin is disabled. */
+void
+dsv_builtin_unload (name)
+ char *name;
+{
+}
+
+char *dsv_doc[] = {
+ "Read delimiter-separated fields from STRING.",
+ "",
+ "Parse STRING, a line of delimiter-separated values, into individual",
+ "fields, and store them into the indexed array ARRAYNAME starting at",
+ "index 0. The parsing understands and skips over double-quoted strings. ",
+ "If ARRAYNAME is not supplied, \"DSV\" is the default array name.",
+ "If the delimiter is a comma, the default, this parses comma-",
+ "separated values as specified in RFC 4180.",
+ "",
+ "The -d option specifies the delimiter. The delimiter is the first",
+ "character of the DELIMS argument. Specifying a DELIMS argument that",
+ "contains more than one character is not supported and will produce",
+ "unexpected results. The -S option enables shell-like quoting: double-",
+ "quoted strings can contain backslashes preceding special characters,",
+ "and the backslash will be removed; and single-quoted strings are",
+ "processed as the shell would process them. The -g option enables a",
+ "greedy split: sequences of the delimiter are skipped at the beginning",
+ "and end of STRING, and consecutive instances of the delimiter in STRING",
+ "do not generate empty fields. If the -p option is supplied, dsv leaves",
+ "quote characters as part of the generated field; otherwise they are",
+ "removed.",
+ "",
+ "The return value is 0 unless an invalid option is supplied or the ARRAYNAME",
+ "argument is invalid or readonly.",
+ (char *)NULL
+};
+
+struct builtin dsv_struct = {
+ "dsv", /* builtin name */
+ dsv_builtin, /* function implementing the builtin */
+ BUILTIN_ENABLED, /* initial flags for builtin */
+ dsv_doc, /* array of long documentation strings. */
+ "dsv [-a ARRAYNAME] [-d DELIMS] [-Sgp] string", /* usage synopsis; becomes short_doc */
+ 0 /* reserved for internal use */
+};