summaryrefslogtreecommitdiffstats
path: root/lib/isc/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/isc/regex.c')
-rw-r--r--lib/isc/regex.c436
1 files changed, 436 insertions, 0 deletions
diff --git a/lib/isc/regex.c b/lib/isc/regex.c
new file mode 100644
index 0000000..f7a3f5e
--- /dev/null
+++ b/lib/isc/regex.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <stdbool.h>
+
+#include <isc/file.h>
+#include <isc/print.h>
+#include <isc/regex.h>
+#include <isc/string.h>
+
+#if VALREGEX_REPORT_REASON
+#define FAIL(x) \
+ do { \
+ reason = (x); \
+ goto error; \
+ } while (0)
+#else /* if VALREGEX_REPORT_REASON */
+#define FAIL(x) goto error
+#endif /* if VALREGEX_REPORT_REASON */
+
+/*
+ * Validate the regular expression 'C' locale.
+ */
+int
+isc_regex_validate(const char *c) {
+ enum {
+ none,
+ parse_bracket,
+ parse_bound,
+ parse_ce,
+ parse_ec,
+ parse_cc
+ } state = none;
+ /* Well known character classes. */
+ const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
+ ":graph:", ":space:", ":blank:", ":lower:",
+ ":upper:", ":cntrl:", ":print:", ":xdigit:" };
+ bool seen_comma = false;
+ bool seen_high = false;
+ bool seen_char = false;
+ bool seen_ec = false;
+ bool seen_ce = false;
+ bool have_atom = false;
+ int group = 0;
+ int range = 0;
+ int sub = 0;
+ bool empty_ok = false;
+ bool neg = false;
+ bool was_multiple = false;
+ unsigned int low = 0;
+ unsigned int high = 0;
+ const char *ccname = NULL;
+ int range_start = 0;
+#if VALREGEX_REPORT_REASON
+ const char *reason = "";
+#endif /* if VALREGEX_REPORT_REASON */
+
+ if (c == NULL || *c == 0) {
+ FAIL("empty string");
+ }
+
+ while (c != NULL && *c != 0) {
+ switch (state) {
+ case none:
+ switch (*c) {
+ case '\\': /* make literal */
+ ++c;
+ switch (*c) {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if ((*c - '0') > sub) {
+ FAIL("bad back reference");
+ }
+ have_atom = true;
+ was_multiple = false;
+ break;
+ case 0:
+ FAIL("escaped end-of-string");
+ default:
+ goto literal;
+ }
+ ++c;
+ break;
+ case '[': /* bracket start */
+ ++c;
+ neg = false;
+ was_multiple = false;
+ seen_char = false;
+ state = parse_bracket;
+ break;
+ case '{': /* bound start */
+ switch (c[1]) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (!have_atom) {
+ FAIL("no atom");
+ }
+ if (was_multiple) {
+ FAIL("was multiple");
+ }
+ seen_comma = false;
+ seen_high = false;
+ low = high = 0;
+ state = parse_bound;
+ break;
+ default:
+ goto literal;
+ }
+ ++c;
+ have_atom = true;
+ was_multiple = true;
+ break;
+ case '}':
+ goto literal;
+ case '(': /* group start */
+ have_atom = false;
+ was_multiple = false;
+ empty_ok = true;
+ ++group;
+ ++sub;
+ ++c;
+ break;
+ case ')': /* group end */
+ if (group && !have_atom && !empty_ok) {
+ FAIL("empty alternative");
+ }
+ have_atom = true;
+ was_multiple = false;
+ if (group != 0) {
+ --group;
+ }
+ ++c;
+ break;
+ case '|': /* alternative separator */
+ if (!have_atom) {
+ FAIL("no atom");
+ }
+ have_atom = false;
+ empty_ok = false;
+ was_multiple = false;
+ ++c;
+ break;
+ case '^':
+ case '$':
+ have_atom = true;
+ was_multiple = true;
+ ++c;
+ break;
+ case '+':
+ case '*':
+ case '?':
+ if (was_multiple) {
+ FAIL("was multiple");
+ }
+ if (!have_atom) {
+ FAIL("no atom");
+ }
+ have_atom = true;
+ was_multiple = true;
+ ++c;
+ break;
+ case '.':
+ default:
+ literal:
+ have_atom = true;
+ was_multiple = false;
+ ++c;
+ break;
+ }
+ break;
+ case parse_bound:
+ switch (*c) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (!seen_comma) {
+ low = low * 10 + *c - '0';
+ if (low > 255) {
+ FAIL("lower bound too big");
+ }
+ } else {
+ seen_high = true;
+ high = high * 10 + *c - '0';
+ if (high > 255) {
+ FAIL("upper bound too big");
+ }
+ }
+ ++c;
+ break;
+ case ',':
+ if (seen_comma) {
+ FAIL("multiple commas");
+ }
+ seen_comma = true;
+ ++c;
+ break;
+ default:
+ case '{':
+ FAIL("non digit/comma");
+ case '}':
+ if (seen_high && low > high) {
+ FAIL("bad parse bound");
+ }
+ seen_comma = false;
+ state = none;
+ ++c;
+ break;
+ }
+ break;
+ case parse_bracket:
+ switch (*c) {
+ case '^':
+ if (seen_char || neg) {
+ goto inside;
+ }
+ neg = true;
+ ++c;
+ break;
+ case '-':
+ if (range == 2) {
+ goto inside;
+ }
+ if (!seen_char) {
+ goto inside;
+ }
+ if (range == 1) {
+ FAIL("bad range");
+ }
+ range = 2;
+ ++c;
+ break;
+ case '[':
+ ++c;
+ switch (*c) {
+ case '.': /* collating element */
+ if (range != 0) {
+ --range;
+ }
+ ++c;
+ state = parse_ce;
+ seen_ce = false;
+ break;
+ case '=': /* equivalence class */
+ if (range == 2) {
+ FAIL("equivalence class in "
+ "range");
+ }
+ ++c;
+ state = parse_ec;
+ seen_ec = false;
+ break;
+ case ':': /* character class */
+ if (range == 2) {
+ FAIL("character class in "
+ "range");
+ }
+ ccname = c;
+ ++c;
+ state = parse_cc;
+ break;
+ }
+ seen_char = true;
+ break;
+ case ']':
+ if (!c[1] && !seen_char) {
+ FAIL("unfinished brace");
+ }
+ if (!seen_char) {
+ goto inside;
+ }
+ ++c;
+ range = 0;
+ have_atom = true;
+ state = none;
+ break;
+ default:
+ inside:
+ seen_char = true;
+ if (range == 2 && (*c & 0xff) < range_start) {
+ FAIL("out of order range");
+ }
+ if (range != 0) {
+ --range;
+ }
+ range_start = *c & 0xff;
+ ++c;
+ break;
+ }
+ break;
+ case parse_ce:
+ switch (*c) {
+ case '.':
+ ++c;
+ switch (*c) {
+ case ']':
+ if (!seen_ce) {
+ FAIL("empty ce");
+ }
+ ++c;
+ state = parse_bracket;
+ break;
+ default:
+ if (seen_ce) {
+ range_start = 256;
+ } else {
+ range_start = '.';
+ }
+ seen_ce = true;
+ break;
+ }
+ break;
+ default:
+ if (seen_ce) {
+ range_start = 256;
+ } else {
+ range_start = *c;
+ }
+ seen_ce = true;
+ ++c;
+ break;
+ }
+ break;
+ case parse_ec:
+ switch (*c) {
+ case '=':
+ ++c;
+ switch (*c) {
+ case ']':
+ if (!seen_ec) {
+ FAIL("no ec");
+ }
+ ++c;
+ state = parse_bracket;
+ break;
+ default:
+ seen_ec = true;
+ break;
+ }
+ break;
+ default:
+ seen_ec = true;
+ ++c;
+ break;
+ }
+ break;
+ case parse_cc:
+ switch (*c) {
+ case ':':
+ ++c;
+ switch (*c) {
+ case ']': {
+ unsigned int i;
+ bool found = false;
+ for (i = 0;
+ i < sizeof(cc) / sizeof(*cc); i++)
+ {
+ unsigned int len;
+ len = strlen(cc[i]);
+ if (len !=
+ (unsigned int)(c - ccname))
+ {
+ continue;
+ }
+ if (strncmp(cc[i], ccname, len))
+ {
+ continue;
+ }
+ found = true;
+ }
+ if (!found) {
+ FAIL("unknown cc");
+ }
+ ++c;
+ state = parse_bracket;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ default:
+ ++c;
+ break;
+ }
+ break;
+ }
+ }
+ if (group != 0) {
+ FAIL("group open");
+ }
+ if (state != none) {
+ FAIL("incomplete");
+ }
+ if (!have_atom) {
+ FAIL("no atom");
+ }
+ return (sub);
+
+error:
+#if VALREGEX_REPORT_REASON
+ fprintf(stderr, "%s\n", reason);
+#endif /* if VALREGEX_REPORT_REASON */
+ return (-1);
+}