1 files changed, 436 insertions, 0 deletions
diff --git a/lib/isc/regex.c b/lib/isc/regex.c
new file mode 100644
index 0000000..f7a3f5e
--- /dev/null
+++ b/lib/isc/regex.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <stdbool.h>
+
+#include <isc/file.h>
+#include <isc/print.h>
+#include <isc/regex.h>
+#include <isc/string.h>
+
+#if VALREGEX_REPORT_REASON
+#define FAIL(x)               \
+	do {                  \
+		reason = (x); \
+		goto error;   \
+	} while (0)
+#else /* if VALREGEX_REPORT_REASON */
+#define FAIL(x) goto error
+#endif /* if VALREGEX_REPORT_REASON */
+
+/*
+ * Validate the regular expression 'C' locale.
+ */
+int
+isc_regex_validate(const char *c) {
+	enum {
+		none,
+		parse_bracket,
+		parse_bound,
+		parse_ce,
+		parse_ec,
+		parse_cc
+	} state = none;
+	/* Well known character classes. */
+	const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
+			     ":graph:", ":space:", ":blank:", ":lower:",
+			     ":upper:", ":cntrl:", ":print:", ":xdigit:" };
+	bool seen_comma = false;
+	bool seen_high = false;
+	bool seen_char = false;
+	bool seen_ec = false;
+	bool seen_ce = false;
+	bool have_atom = false;
+	int group = 0;
+	int range = 0;
+	int sub = 0;
+	bool empty_ok = false;
+	bool neg = false;
+	bool was_multiple = false;
+	unsigned int low = 0;
+	unsigned int high = 0;
+	const char *ccname = NULL;
+	int range_start = 0;
+#if VALREGEX_REPORT_REASON
+	const char *reason = "";
+#endif /* if VALREGEX_REPORT_REASON */
+
+	if (c == NULL || *c == 0) {
+		FAIL("empty string");
+	}
+
+	while (c != NULL && *c != 0) {
+		switch (state) {
+		case none:
+			switch (*c) {
+			case '\\': /* make literal */
+				++c;
+				switch (*c) {
+				case '1':
+				case '2':
+				case '3':
+				case '4':
+				case '5':
+				case '6':
+				case '7':
+				case '8':
+				case '9':
+					if ((*c - '0') > sub) {
+						FAIL("bad back reference");
+					}
+					have_atom = true;
+					was_multiple = false;
+					break;
+				case 0:
+					FAIL("escaped end-of-string");
+				default:
+					goto literal;
+				}
+				++c;
+				break;
+			case '[': /* bracket start */
+				++c;
+				neg = false;
+				was_multiple = false;
+				seen_char = false;
+				state = parse_bracket;
+				break;
+			case '{': /* bound start */
+				switch (c[1]) {
+				case '0':
+				case '1':
+				case '2':
+				case '3':
+				case '4':
+				case '5':
+				case '6':
+				case '7':
+				case '8':
+				case '9':
+					if (!have_atom) {
+						FAIL("no atom");
+					}
+					if (was_multiple) {
+						FAIL("was multiple");
+					}
+					seen_comma = false;
+					seen_high = false;
+					low = high = 0;
+					state = parse_bound;
+					break;
+				default:
+					goto literal;
+				}
+				++c;
+				have_atom = true;
+				was_multiple = true;
+				break;
+			case '}':
+				goto literal;
+			case '(': /* group start */
+				have_atom = false;
+				was_multiple = false;
+				empty_ok = true;
+				++group;
+				++sub;
+				++c;
+				break;
+			case ')': /* group end */
+				if (group && !have_atom && !empty_ok) {
+					FAIL("empty alternative");
+				}
+				have_atom = true;
+				was_multiple = false;
+				if (group != 0) {
+					--group;
+				}
+				++c;
+				break;
+			case '|': /* alternative separator */
+				if (!have_atom) {
+					FAIL("no atom");
+				}
+				have_atom = false;
+				empty_ok = false;
+				was_multiple = false;
+				++c;
+				break;
+			case '^':
+			case '$':
+				have_atom = true;
+				was_multiple = true;
+				++c;
+				break;
+			case '+':
+			case '*':
+			case '?':
+				if (was_multiple) {
+					FAIL("was multiple");
+				}
+				if (!have_atom) {
+					FAIL("no atom");
+				}
+				have_atom = true;
+				was_multiple = true;
+				++c;
+				break;
+			case '.':
+			default:
+			literal:
+				have_atom = true;
+				was_multiple = false;
+				++c;
+				break;
+			}
+			break;
+		case parse_bound:
+			switch (*c) {
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+				if (!seen_comma) {
+					low = low * 10 + *c - '0';
+					if (low > 255) {
+						FAIL("lower bound too big");
+					}
+				} else {
+					seen_high = true;
+					high = high * 10 + *c - '0';
+					if (high > 255) {
+						FAIL("upper bound too big");
+					}
+				}
+				++c;
+				break;
+			case ',':
+				if (seen_comma) {
+					FAIL("multiple commas");
+				}
+				seen_comma = true;
+				++c;
+				break;
+			default:
+			case '{':
+				FAIL("non digit/comma");
+			case '}':
+				if (seen_high && low > high) {
+					FAIL("bad parse bound");
+				}
+				seen_comma = false;
+				state = none;
+				++c;
+				break;
+			}
+			break;
+		case parse_bracket:
+			switch (*c) {
+			case '^':
+				if (seen_char || neg) {
+					goto inside;
+				}
+				neg = true;
+				++c;
+				break;
+			case '-':
+				if (range == 2) {
+					goto inside;
+				}
+				if (!seen_char) {
+					goto inside;
+				}
+				if (range == 1) {
+					FAIL("bad range");
+				}
+				range = 2;
+				++c;
+				break;
+			case '[':
+				++c;
+				switch (*c) {
+				case '.': /* collating element */
+					if (range != 0) {
+						--range;
+					}
+					++c;
+					state = parse_ce;
+					seen_ce = false;
+					break;
+				case '=': /* equivalence class */
+					if (range == 2) {
+						FAIL("equivalence class in "
+						     "range");
+					}
+					++c;
+					state = parse_ec;
+					seen_ec = false;
+					break;
+				case ':': /* character class */
+					if (range == 2) {
+						FAIL("character class in "
+						     "range");
+					}
+					ccname = c;
+					++c;
+					state = parse_cc;
+					break;
+				}
+				seen_char = true;
+				break;
+			case ']':
+				if (!c[1] && !seen_char) {
+					FAIL("unfinished brace");
+				}
+				if (!seen_char) {
+					goto inside;
+				}
+				++c;
+				range = 0;
+				have_atom = true;
+				state = none;
+				break;
+			default:
+			inside:
+				seen_char = true;
+				if (range == 2 && (*c & 0xff) < range_start) {
+					FAIL("out of order range");
+				}
+				if (range != 0) {
+					--range;
+				}
+				range_start = *c & 0xff;
+				++c;
+				break;
+			}
+			break;
+		case parse_ce:
+			switch (*c) {
+			case '.':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ce) {
+						FAIL("empty ce");
+					}
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					if (seen_ce) {
+						range_start = 256;
+					} else {
+						range_start = '.';
+					}
+					seen_ce = true;
+					break;
+				}
+				break;
+			default:
+				if (seen_ce) {
+					range_start = 256;
+				} else {
+					range_start = *c;
+				}
+				seen_ce = true;
+				++c;
+				break;
+			}
+			break;
+		case parse_ec:
+			switch (*c) {
+			case '=':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ec) {
+						FAIL("no ec");
+					}
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					seen_ec = true;
+					break;
+				}
+				break;
+			default:
+				seen_ec = true;
+				++c;
+				break;
+			}
+			break;
+		case parse_cc:
+			switch (*c) {
+			case ':':
+				++c;
+				switch (*c) {
+				case ']': {
+					unsigned int i;
+					bool found = false;
+					for (i = 0;
+					     i < sizeof(cc) / sizeof(*cc); i++)
+					{
+						unsigned int len;
+						len = strlen(cc[i]);
+						if (len !=
+						    (unsigned int)(c - ccname))
+						{
+							continue;
+						}
+						if (strncmp(cc[i], ccname, len))
+						{
+							continue;
+						}
+						found = true;
+					}
+					if (!found) {
+						FAIL("unknown cc");
+					}
+					++c;
+					state = parse_bracket;
+					break;
+				}
+				default:
+					break;
+				}
+				break;
+			default:
+				++c;
+				break;
+			}
+			break;
+		}
+	}
+	if (group != 0) {
+		FAIL("group open");
+	}
+	if (state != none) {
+		FAIL("incomplete");
+	}
+	if (!have_atom) {
+		FAIL("no atom");
+	}
+	return (sub);
+
+error:
+#if VALREGEX_REPORT_REASON
+	fprintf(stderr, "%s\n", reason);
+#endif /* if VALREGEX_REPORT_REASON */
+	return (-1);
+}