1
0
Fork 0
bind9/lib/isc/regex.c
Daniel Baumann f66ff7eae6
Adding upstream version 1:9.20.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 13:32:37 +02:00

435 lines
7.5 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <stdbool.h>
#include <isc/file.h>
#include <isc/regex.h>
#include <isc/string.h>
#if VALREGEX_REPORT_REASON
#define FAIL(x) \
do { \
reason = (x); \
goto error; \
} while (0)
#else /* if VALREGEX_REPORT_REASON */
#define FAIL(x) goto error
#endif /* if VALREGEX_REPORT_REASON */
/*
* Validate the regular expression 'C' locale.
*/
int
isc_regex_validate(const char *c) {
enum {
none,
parse_bracket,
parse_bound,
parse_ce,
parse_ec,
parse_cc
} state = none;
/* Well known character classes. */
const char *cc[] = { ":alnum:", ":digit:", ":punct:", ":alpha:",
":graph:", ":space:", ":blank:", ":lower:",
":upper:", ":cntrl:", ":print:", ":xdigit:" };
bool seen_comma = false;
bool seen_high = false;
bool seen_char = false;
bool seen_ec = false;
bool seen_ce = false;
bool have_atom = false;
int group = 0;
int range = 0;
int sub = 0;
bool empty_ok = false;
bool neg = false;
bool was_multiple = false;
unsigned int low = 0;
unsigned int high = 0;
const char *ccname = NULL;
int range_start = 0;
#if VALREGEX_REPORT_REASON
const char *reason = "";
#endif /* if VALREGEX_REPORT_REASON */
if (c == NULL || *c == 0) {
FAIL("empty string");
}
while (c != NULL && *c != 0) {
switch (state) {
case none:
switch (*c) {
case '\\': /* make literal */
++c;
switch (*c) {
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if ((*c - '0') > sub) {
FAIL("bad back reference");
}
have_atom = true;
was_multiple = false;
break;
case 0:
FAIL("escaped end-of-string");
default:
goto literal;
}
++c;
break;
case '[': /* bracket start */
++c;
neg = false;
was_multiple = false;
seen_char = false;
state = parse_bracket;
break;
case '{': /* bound start */
switch (c[1]) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (!have_atom) {
FAIL("no atom");
}
if (was_multiple) {
FAIL("was multiple");
}
seen_comma = false;
seen_high = false;
low = high = 0;
state = parse_bound;
break;
default:
goto literal;
}
++c;
have_atom = true;
was_multiple = true;
break;
case '}':
goto literal;
case '(': /* group start */
have_atom = false;
was_multiple = false;
empty_ok = true;
++group;
++sub;
++c;
break;
case ')': /* group end */
if (group && !have_atom && !empty_ok) {
FAIL("empty alternative");
}
have_atom = true;
was_multiple = false;
if (group != 0) {
--group;
}
++c;
break;
case '|': /* alternative separator */
if (!have_atom) {
FAIL("no atom");
}
have_atom = false;
empty_ok = false;
was_multiple = false;
++c;
break;
case '^':
case '$':
have_atom = true;
was_multiple = true;
++c;
break;
case '+':
case '*':
case '?':
if (was_multiple) {
FAIL("was multiple");
}
if (!have_atom) {
FAIL("no atom");
}
have_atom = true;
was_multiple = true;
++c;
break;
case '.':
default:
literal:
have_atom = true;
was_multiple = false;
++c;
break;
}
break;
case parse_bound:
switch (*c) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (!seen_comma) {
low = low * 10 + *c - '0';
if (low > 255) {
FAIL("lower bound too big");
}
} else {
seen_high = true;
high = high * 10 + *c - '0';
if (high > 255) {
FAIL("upper bound too big");
}
}
++c;
break;
case ',':
if (seen_comma) {
FAIL("multiple commas");
}
seen_comma = true;
++c;
break;
default:
case '{':
FAIL("non digit/comma");
case '}':
if (seen_high && low > high) {
FAIL("bad parse bound");
}
seen_comma = false;
state = none;
++c;
break;
}
break;
case parse_bracket:
switch (*c) {
case '^':
if (seen_char || neg) {
goto inside;
}
neg = true;
++c;
break;
case '-':
if (range == 2) {
goto inside;
}
if (!seen_char) {
goto inside;
}
if (range == 1) {
FAIL("bad range");
}
range = 2;
++c;
break;
case '[':
++c;
switch (*c) {
case '.': /* collating element */
if (range != 0) {
--range;
}
++c;
state = parse_ce;
seen_ce = false;
break;
case '=': /* equivalence class */
if (range == 2) {
FAIL("equivalence class in "
"range");
}
++c;
state = parse_ec;
seen_ec = false;
break;
case ':': /* character class */
if (range == 2) {
FAIL("character class in "
"range");
}
ccname = c;
++c;
state = parse_cc;
break;
}
seen_char = true;
break;
case ']':
if (!c[1] && !seen_char) {
FAIL("unfinished brace");
}
if (!seen_char) {
goto inside;
}
++c;
range = 0;
have_atom = true;
state = none;
break;
default:
inside:
seen_char = true;
if (range == 2 && (*c & 0xff) < range_start) {
FAIL("out of order range");
}
if (range != 0) {
--range;
}
range_start = *c & 0xff;
++c;
break;
}
break;
case parse_ce:
switch (*c) {
case '.':
++c;
switch (*c) {
case ']':
if (!seen_ce) {
FAIL("empty ce");
}
++c;
state = parse_bracket;
break;
default:
if (seen_ce) {
range_start = 256;
} else {
range_start = '.';
}
seen_ce = true;
break;
}
break;
default:
if (seen_ce) {
range_start = 256;
} else {
range_start = *c;
}
seen_ce = true;
++c;
break;
}
break;
case parse_ec:
switch (*c) {
case '=':
++c;
switch (*c) {
case ']':
if (!seen_ec) {
FAIL("no ec");
}
++c;
state = parse_bracket;
break;
default:
seen_ec = true;
break;
}
break;
default:
seen_ec = true;
++c;
break;
}
break;
case parse_cc:
switch (*c) {
case ':':
++c;
switch (*c) {
case ']': {
unsigned int i;
bool found = false;
for (i = 0;
i < sizeof(cc) / sizeof(*cc); i++)
{
unsigned int len;
len = strlen(cc[i]);
if (len !=
(unsigned int)(c - ccname))
{
continue;
}
if (strncmp(cc[i], ccname, len))
{
continue;
}
found = true;
}
if (!found) {
FAIL("unknown cc");
}
++c;
state = parse_bracket;
break;
}
default:
break;
}
break;
default:
++c;
break;
}
break;
}
}
if (group != 0) {
FAIL("group open");
}
if (state != none) {
FAIL("incomplete");
}
if (!have_atom) {
FAIL("no atom");
}
return sub;
error:
#if VALREGEX_REPORT_REASON
fprintf(stderr, "%s\n", reason);
#endif /* if VALREGEX_REPORT_REASON */
return -1;
}