1 files changed, 368 insertions, 0 deletions
diff --git a/lib/isc/regex.c b/lib/isc/regex.c
new file mode 100644
index 0000000..63261bb
--- /dev/null
+++ b/lib/isc/regex.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <config.h>
+
+#include <stdbool.h>
+
+#include <isc/file.h>
+#include <isc/print.h>
+#include <isc/regex.h>
+#include <isc/string.h>
+
+#if VALREGEX_REPORT_REASON
+#define FAIL(x) do { reason = (x); goto error; } while(0)
+#else
+#define FAIL(x) goto error
+#endif
+
+/*
+ * Validate the regular expression 'C' locale.
+ */
+int
+isc_regex_validate(const char *c) {
+	enum {
+		none, parse_bracket, parse_bound,
+		parse_ce, parse_ec, parse_cc
+	} state = none;
+	/* Well known character classes. */
+	const char *cc[] = {
+		":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
+		":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
+		":print:", ":xdigit:"
+	};
+	bool seen_comma = false;
+	bool seen_high = false;
+	bool seen_char = false;
+	bool seen_ec = false;
+	bool seen_ce = false;
+	bool have_atom = false;
+	int group = 0;
+	int range = 0;
+	int sub = 0;
+	bool empty_ok = false;
+	bool neg = false;
+	bool was_multiple = false;
+	unsigned int low = 0;
+	unsigned int high = 0;
+	const char *ccname = NULL;
+	int range_start = 0;
+#if VALREGEX_REPORT_REASON
+	const char *reason = "";
+#endif
+
+	if (c == NULL || *c == 0)
+		FAIL("empty string");
+
+	while (c != NULL && *c != 0) {
+		switch (state) {
+		case none:
+			switch (*c) {
+			case '\\':	/* make literal */
+				++c;
+				switch (*c) {
+				case '1': case '2': case '3':
+				case '4': case '5': case '6':
+				case '7': case '8': case '9':
+					if ((*c - '0') > sub)
+						FAIL("bad back reference");
+					have_atom = true;
+					was_multiple = false;
+					break;
+				case 0:
+					FAIL("escaped end-of-string");
+				default:
+					goto literal;
+				}
+				++c;
+				break;
+			case '[':	/* bracket start */
+				++c;
+				neg = false;
+				was_multiple = false;
+				seen_char = false;
+				state = parse_bracket;
+				break;
+			case '{': 	/* bound start */
+				switch (c[1]) {
+				case '0': case '1': case '2': case '3':
+				case '4': case '5': case '6': case '7':
+				case '8': case '9':
+					if (!have_atom)
+						FAIL("no atom");
+					if (was_multiple)
+						FAIL("was multiple");
+					seen_comma = false;
+					seen_high = false;
+					low = high = 0;
+					state = parse_bound;
+					break;
+				default:
+					goto literal;
+				}
+				++c;
+				have_atom = true;
+				was_multiple = true;
+				break;
+			case '}':
+				goto literal;
+			case '(':	/* group start */
+				have_atom = false;
+				was_multiple = false;
+				empty_ok = true;
+				++group;
+				++sub;
+				++c;
+				break;
+			case ')':	/* group end */
+				if (group && !have_atom && !empty_ok)
+					FAIL("empty alternative");
+				have_atom = true;
+				was_multiple = false;
+				if (group != 0)
+					--group;
+				++c;
+				break;
+			case '|':	/* alternative seperator */
+				if (!have_atom)
+					FAIL("no atom");
+				have_atom = false;
+				empty_ok = false;
+				was_multiple = false;
+				++c;
+				break;
+			case '^':
+			case '$':
+				have_atom = true;
+				was_multiple = true;
+				++c;
+				break;
+			case '+':
+			case '*':
+			case '?':
+				if (was_multiple)
+					FAIL("was multiple");
+				if (!have_atom)
+					FAIL("no atom");
+				have_atom = true;
+				was_multiple = true;
+				++c;
+				break;
+			case '.':
+			default:
+			literal:
+				have_atom = true;
+				was_multiple = false;
+				++c;
+				break;
+			}
+			break;
+		case parse_bound:
+			switch (*c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				if (!seen_comma) {
+					low = low * 10 + *c - '0';
+					if (low > 255)
+						FAIL("lower bound too big");
+				} else {
+					seen_high = true;
+					high = high * 10 + *c - '0';
+					if (high > 255)
+						FAIL("upper bound too big");
+				}
+				++c;
+				break;
+			case ',':
+				if (seen_comma)
+					FAIL("multiple commas");
+				seen_comma = true;
+				++c;
+				break;
+			default:
+			case '{':
+				FAIL("non digit/comma");
+			case '}':
+				if (seen_high && low > high)
+					FAIL("bad parse bound");
+				seen_comma = false;
+				state = none;
+				++c;
+				break;
+			}
+			break;
+		case parse_bracket:
+			switch (*c) {
+			case '^':
+				if (seen_char || neg) goto inside;
+				neg = true;
+				++c;
+				break;
+			case '-':
+				if (range == 2) goto inside;
+				if (!seen_char) goto inside;
+				if (range == 1)
+					FAIL("bad range");
+				range = 2;
+				++c;
+				break;
+			case '[':
+				++c;
+				switch (*c) {
+				case '.':	/* collating element */
+					if (range != 0) --range;
+					++c;
+					state = parse_ce;
+					seen_ce = false;
+					break;
+				case '=':	/* equivalence class */
+					if (range == 2)
+					    FAIL("equivalence class in range");
+					++c;
+					state = parse_ec;
+					seen_ec = false;
+					break;
+				case ':':	/* character class */
+					if (range == 2)
+					      FAIL("character class in range");
+					ccname = c;
+					++c;
+					state = parse_cc;
+					break;
+				}
+				seen_char = true;
+				break;
+			case ']':
+				if (!c[1] && !seen_char)
+					FAIL("unfinished brace");
+				if (!seen_char)
+					goto inside;
+				++c;
+				range = 0;
+				have_atom = true;
+				state = none;
+				break;
+			default:
+			inside:
+				seen_char = true;
+				if (range == 2 && (*c & 0xff) < range_start)
+					FAIL("out of order range");
+				if (range != 0)
+					--range;
+				range_start = *c & 0xff;
+				++c;
+				break;
+			};
+			break;
+		case parse_ce:
+			switch (*c) {
+			case '.':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ce)
+						 FAIL("empty ce");
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					if (seen_ce)
+						range_start = 256;
+					else
+						range_start = '.';
+					seen_ce = true;
+					break;
+				}
+				break;
+			default:
+				if (seen_ce)
+					range_start = 256;
+				else
+					range_start = *c;
+				seen_ce = true;
+				++c;
+				break;
+			}
+			break;
+		case parse_ec:
+			switch (*c) {
+			case '=':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ec)
+						FAIL("no ec");
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					seen_ec = true;
+					break;
+				}
+				break;
+			default:
+				seen_ec = true;
+				++c;
+				break;
+			}
+			break;
+		case parse_cc:
+			switch (*c) {
+			case ':':
+				++c;
+				switch (*c) {
+				case ']': {
+					unsigned int i;
+					bool found = false;
+					for (i = 0;
+					     i < sizeof(cc)/sizeof(*cc);
+					     i++)
+					{
+						unsigned int len;
+						len = strlen(cc[i]);
+						if (len !=
+						    (unsigned int)(c - ccname))
+							continue;
+						if (strncmp(cc[i], ccname, len))
+							continue;
+						found = true;
+					}
+					if (!found)
+						FAIL("unknown cc");
+					++c;
+					state = parse_bracket;
+					break;
+					}
+				default:
+					break;
+				}
+				break;
+			default:
+				++c;
+				break;
+			}
+			break;
+		}
+	}
+	if (group != 0)
+		FAIL("group open");
+	if (state != none)
+		FAIL("incomplete");
+	if (!have_atom)
+		FAIL("no atom");
+	return (sub);
+
+ error:
+#if VALREGEX_REPORT_REASON
+	fprintf(stderr, "%s\n", reason);
+#endif
+	return (-1);
+}