diff options
Diffstat (limited to 'src/util/mystrtok.c')
-rw-r--r-- | src/util/mystrtok.c | 347 |
1 files changed, 347 insertions, 0 deletions
diff --git a/src/util/mystrtok.c b/src/util/mystrtok.c new file mode 100644 index 0000000..d5f32b7 --- /dev/null +++ b/src/util/mystrtok.c @@ -0,0 +1,347 @@ +/*++ +/* NAME +/* mystrtok 3 +/* SUMMARY +/* safe tokenizer +/* SYNOPSIS +/* #include <stringops.h> +/* +/* char *mystrtok(bufp, delimiters) +/* char **bufp; +/* const char *delimiters; +/* +/* char *mystrtokq(bufp, delimiters, parens) +/* char **bufp; +/* const char *delimiters; +/* const char *parens; +/* +/* char *mystrtokdq(bufp, delimiters) +/* char **bufp; +/* const char *delimiters; +/* +/* char *mystrtok_cw(bufp, delimiters, blame) +/* char **bufp; +/* const char *delimiters; +/* const char *blame; +/* +/* char *mystrtokq_cw(bufp, delimiters, parens, blame) +/* char **bufp; +/* const char *delimiters; +/* const char *parens; +/* const char *blame; +/* +/* char *mystrtokdq_cw(bufp, delimiters, blame) +/* char **bufp; +/* const char *delimiters; +/* const char *blame; +/* DESCRIPTION +/* mystrtok() splits a buffer on the specified \fIdelimiters\fR. +/* Tokens are delimited by runs of delimiters, so this routine +/* cannot return zero-length tokens. +/* +/* mystrtokq() is like mystrtok() but will not split text +/* between balanced parentheses. \fIparens\fR specifies the +/* opening and closing parenthesis (one of each). The set of +/* \fIparens\fR must be distinct from the set of \fIdelimiters\fR. +/* +/* mystrtokdq() is like mystrtok() but will not split text +/* between double quotes. The backslash character may be used +/* to escape characters. The double quote and backslash +/* character must not appear in the set of \fIdelimiters\fR. +/* +/* The \fIbufp\fR argument specifies the start of the search; it +/* is updated with each call. The input is destroyed. +/* +/* The result value is the next token, or a null pointer when the +/* end of the buffer was reached. +/* +/* mystrtok_cw(), mystrtokq_cw(), and mystrtokdq_cw, log a +/* warning and return null when the result would look like +/* comment. The \fBblame\fR argument provides context for +/* warning messages. Specify a null pointer to disable the +/* comment check. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + +/* System library. */ + +#include <sys_defs.h> +#include <string.h> + +/* Utility library. */ + +#include <msg.h> +#include <stringops.h> + +/* mystrtok_warn - warn for #comment after other text */ + +static void mystrtok_warn(const char *start, const char *bufp, const char *blame) +{ + msg_warn("%s: #comment after other text is not allowed: %s %.20s...", + blame, start, bufp); +} + +/* mystrtok - ABI compatibility wrapper */ + +#undef mystrtok + +char *mystrtok(char **src, const char *sep) +{ + return (mystrtok_cw(src, sep, (char *) 0)); +} + +/* mystrtok - safe tokenizer */ + +char *mystrtok_cw(char **src, const char *sep, const char *blame) +{ + char *start = *src; + char *end; + + /* + * Skip over leading delimiters. + */ + start += strspn(start, sep); + if (*start == 0) { + *src = start; + return (0); + } + + /* + * Separate off one token. + */ + end = start + strcspn(start, sep); + if (*end != 0) + *end++ = 0; + *src = end; + + if (blame && *start == '#') { + mystrtok_warn(start, *src, blame); + return (0); + } else { + return (start); + } +} + +/* mystrtokq - ABI compatibility wrapper */ + +#undef mystrtokq + +char *mystrtokq(char **src, const char *sep, const char *parens) +{ + return (mystrtokq_cw(src, sep, parens, (char *) 0)); +} + +/* mystrtokq_cw - safe tokenizer with quoting support */ + +char *mystrtokq_cw(char **src, const char *sep, const char *parens, + const char *blame) +{ + char *start = *src; + static char *cp; + int ch; + int level; + + /* + * Skip over leading delimiters. + */ + start += strspn(start, sep); + if (*start == 0) { + *src = start; + return (0); + } + + /* + * Parse out the next token. + */ + for (level = 0, cp = start; (ch = *(unsigned char *) cp) != 0; cp++) { + if (ch == parens[0]) { + level++; + } else if (level > 0 && ch == parens[1]) { + level--; + } else if (level == 0 && strchr(sep, ch) != 0) { + *cp++ = 0; + break; + } + } + *src = cp; + + if (blame && *start == '#') { + mystrtok_warn(start, *src, blame); + return (0); + } else { + return (start); + } +} + +/* mystrtokdq - ABI compatibility wrapper */ + +#undef mystrtokdq + +char *mystrtokdq(char **src, const char *sep) +{ + return (mystrtokdq_cw(src, sep, (char *) 0)); +} + +/* mystrtokdq_cw - safe tokenizer, double quote and backslash support */ + +char *mystrtokdq_cw(char **src, const char *sep, const char *blame) +{ + char *cp = *src; + char *start; + + /* + * Skip leading delimiters. + */ + cp += strspn(cp, sep); + + /* + * Skip to next unquoted space or comma. + */ + if (*cp == 0) { + start = 0; + } else { + int in_quotes; + + for (in_quotes = 0, start = cp; *cp; cp++) { + if (*cp == '\\') { + if (*++cp == 0) + break; + } else if (*cp == '"') { + in_quotes = !in_quotes; + } else if (!in_quotes && strchr(sep, *(unsigned char *) cp) != 0) { + *cp++ = 0; + break; + } + } + } + *src = cp; + + if (blame && start && *start == '#') { + mystrtok_warn(start, *src, blame); + return (0); + } else { + return (start); + } +} + +#ifdef TEST + + /* + * Test program. + */ +#include "msg.h" +#include "mymalloc.h" + + /* + * The following needs to be large enough to include a null terminator in + * every testcase.expected field. + */ +#define EXPECT_SIZE 5 + +struct testcase { + const char *action; + const char *input; + const char *expected[EXPECT_SIZE]; +}; +static const struct testcase testcases[] = { + {"mystrtok", ""}, + {"mystrtok", " foo ", {"foo"}}, + {"mystrtok", " foo bar ", {"foo", "bar"}}, + {"mystrtokq", ""}, + {"mystrtokq", "foo bar", {"foo", "bar"}}, + {"mystrtokq", "{ bar } ", {"{ bar }"}}, + {"mystrtokq", "foo { bar } baz", {"foo", "{ bar }", "baz"}}, + {"mystrtokq", "foo{ bar } baz", {"foo{ bar }", "baz"}}, + {"mystrtokq", "foo { bar }baz", {"foo", "{ bar }baz"}}, + {"mystrtokdq", ""}, + {"mystrtokdq", " foo ", {"foo"}}, + {"mystrtokdq", " foo bar ", {"foo", "bar"}}, + {"mystrtokdq", " foo\\ bar ", {"foo\\ bar"}}, + {"mystrtokdq", " foo \\\" bar", {"foo", "\\\"", "bar"}}, + {"mystrtokdq", " foo \" bar baz\" ", {"foo", "\" bar baz\""}}, + {"mystrtok_cw", "#after text"}, + {"mystrtok_cw", "before-text #after text", {"before-text"}}, + {"mystrtokq_cw", "#after text"}, + {"mystrtokq_cw", "{ before text } #after text", "{ before text }"}, + {"mystrtokdq_cw", "#after text"}, + {"mystrtokdq_cw", "\"before text\" #after text", {"\"before text\""}}, +}; + +int main(void) +{ + const struct testcase *tp; + char *actual; + int pass; + int fail; + int match; + int n; + +#define NUM_TESTS sizeof(testcases)/sizeof(testcases[0]) +#define STR_OR_NULL(s) ((s) ? (s) : "null") + + for (pass = fail = 0, tp = testcases; tp < testcases + NUM_TESTS; tp++) { + char *saved_input = mystrdup(tp->input); + char *cp = saved_input; + + msg_info("RUN test case %ld %s >%s<", + (long) (tp - testcases), tp->action, tp->input); +#if 0 + msg_info("action=%s", tp->action); + msg_info("input=%s", tp->input); + for (n = 0; tp->expected[n]; tp++) + msg_info("expected[%d]=%s", n, tp->expected[n]); +#endif + + for (n = 0; n < EXPECT_SIZE; n++) { + if (strcmp(tp->action, "mystrtok") == 0) { + actual = mystrtok(&cp, CHARS_SPACE); + } else if (strcmp(tp->action, "mystrtokq") == 0) { + actual = mystrtokq(&cp, CHARS_SPACE, CHARS_BRACE); + } else if (strcmp(tp->action, "mystrtokdq") == 0) { + actual = mystrtokdq(&cp, CHARS_SPACE); + } else if (strcmp(tp->action, "mystrtok_cw") == 0) { + actual = mystrtok_cw(&cp, CHARS_SPACE, "test"); + } else if (strcmp(tp->action, "mystrtokq_cw") == 0) { + actual = mystrtokq_cw(&cp, CHARS_SPACE, CHARS_BRACE, "test"); + } else if (strcmp(tp->action, "mystrtokdq_cw") == 0) { + actual = mystrtokdq_cw(&cp, CHARS_SPACE, "test"); + } else { + msg_panic("invalid command: %s", tp->action); + } + if ((match = (actual && tp->expected[n]) ? + (strcmp(actual, tp->expected[n]) == 0) : + (actual == tp->expected[n])) != 0) { + if (actual == 0) { + msg_info("PASS test %ld", (long) (tp - testcases)); + pass++; + break; + } + } else { + msg_warn("expected: >%s<, got: >%s<", + STR_OR_NULL(tp->expected[n]), STR_OR_NULL(actual)); + msg_info("FAIL test %ld", (long) (tp - testcases)); + fail++; + break; + } + } + if (n >= EXPECT_SIZE) + msg_panic("need to increase EXPECT_SIZE"); + myfree(saved_input); + } + return (fail > 0); +} + +#endif |