diff options
Diffstat (limited to 'src/util/dict_regexp.c')
-rw-r--r-- | src/util/dict_regexp.c | 873 |
1 files changed, 873 insertions, 0 deletions
diff --git a/src/util/dict_regexp.c b/src/util/dict_regexp.c new file mode 100644 index 0000000..81cc6dd --- /dev/null +++ b/src/util/dict_regexp.c @@ -0,0 +1,873 @@ +/*++ +/* NAME +/* dict_regexp 3 +/* SUMMARY +/* dictionary manager interface to REGEXP regular expression library +/* SYNOPSIS +/* #include <dict_regexp.h> +/* +/* DICT *dict_regexp_open(name, dummy, dict_flags) +/* const char *name; +/* int dummy; +/* int dict_flags; +/* DESCRIPTION +/* dict_regexp_open() opens the named file and compiles the contained +/* regular expressions. The result object can be used to match strings +/* against the table. +/* SEE ALSO +/* dict(3) generic dictionary manager +/* regexp_table(5) format of Postfix regular expression tables +/* AUTHOR(S) +/* LaMont Jones +/* lamont@hp.com +/* +/* Based on PCRE dictionary contributed by Andrew McNamara +/* andrewm@connect.com.au +/* connect.com.au Pty. Ltd. +/* Level 3, 213 Miller St +/* North Sydney, NSW, Australia +/* +/* Heavily rewritten by Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + +/* System library. */ + +#include "sys_defs.h" + +#ifdef HAS_POSIX_REGEXP + +#include <sys/stat.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <ctype.h> +#include <regex.h> +#ifdef STRCASECMP_IN_STRINGS_H +#include <strings.h> +#endif + +/* Utility library. */ + +#include "mymalloc.h" +#include "msg.h" +#include "safe.h" +#include "vstream.h" +#include "vstring.h" +#include "stringops.h" +#include "readlline.h" +#include "dict.h" +#include "dict_regexp.h" +#include "mac_parse.h" +#include "warn_stat.h" +#include "mvect.h" + + /* + * Support for IF/ENDIF based on an idea by Bert Driehuis. + */ +#define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */ +#define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */ +#define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */ + + /* + * Regular expression before compiling. + */ +typedef struct { + char *regexp; /* regular expression */ + int options; /* regcomp() options */ + int match; /* positive or negative match */ +} DICT_REGEXP_PATTERN; + + /* + * Compiled generic rule, and subclasses that derive from it. + */ +typedef struct DICT_REGEXP_RULE { + int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */ + int lineno; /* source file line number */ + struct DICT_REGEXP_RULE *next; /* next rule in dict */ +} DICT_REGEXP_RULE; + +typedef struct { + DICT_REGEXP_RULE rule; /* generic part */ + regex_t *first_exp; /* compiled primary pattern */ + int first_match; /* positive or negative match */ + regex_t *second_exp; /* compiled secondary pattern */ + int second_match; /* positive or negative match */ + char *replacement; /* replacement text */ + size_t max_sub; /* largest $number in replacement */ +} DICT_REGEXP_MATCH_RULE; + +typedef struct { + DICT_REGEXP_RULE rule; /* generic members */ + regex_t *expr; /* the condition */ + int match; /* positive or negative match */ + struct DICT_REGEXP_RULE *endif_rule;/* matching endif rule */ +} DICT_REGEXP_IF_RULE; + + /* + * Regexp map. + */ +typedef struct { + DICT dict; /* generic members */ + regmatch_t *pmatch; /* matched substring info */ + DICT_REGEXP_RULE *head; /* first rule */ + VSTRING *expansion_buf; /* lookup result */ +} DICT_REGEXP; + + /* + * Macros to make dense code more readable. + */ +#define NULL_SUBSTITUTIONS (0) +#define NULL_MATCH_RESULT ((regmatch_t *) 0) + + /* + * Context for $number expansion callback. + */ +typedef struct { + DICT_REGEXP *dict_regexp; /* the dictionary handle */ + DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */ + const char *lookup_string; /* matched text */ +} DICT_REGEXP_EXPAND_CONTEXT; + + /* + * Context for $number pre-scan callback. + */ +typedef struct { + const char *mapname; /* name of regexp map */ + int lineno; /* where in file */ + size_t max_sub; /* largest $number seen */ + char *literal; /* constant result, $$ -> $ */ +} DICT_REGEXP_PRESCAN_CONTEXT; + + /* + * Compatibility. + */ +#ifndef MAC_PARSE_OK +#define MAC_PARSE_OK 0 +#endif + +/* dict_regexp_expand - replace $number with substring from matched text */ + +static int dict_regexp_expand(int type, VSTRING *buf, void *ptr) +{ + DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr; + DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule; + DICT_REGEXP *dict_regexp = ctxt->dict_regexp; + regmatch_t *pmatch; + size_t n; + + /* + * Replace $number by the corresponding substring from the matched text. + * We pre-scanned the replacement text at compile time, so any out of + * range $number means that something impossible has happened. + */ + if (type == MAC_PARSE_VARNAME) { + n = atoi(vstring_str(buf)); + if (n < 1 || n > match_rule->max_sub) + msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"", + dict_regexp->dict.name, match_rule->rule.lineno, + vstring_str(buf)); + pmatch = dict_regexp->pmatch + n; + if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo) + return (MAC_PARSE_UNDEF); /* empty or not matched */ + vstring_strncat(dict_regexp->expansion_buf, + ctxt->lookup_string + pmatch->rm_so, + pmatch->rm_eo - pmatch->rm_so); + return (MAC_PARSE_OK); + } + + /* + * Straight text - duplicate with no substitution. + */ + else { + vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf)); + return (MAC_PARSE_OK); + } +} + +/* dict_regexp_regerror - report regexp compile/execute error */ + +static void dict_regexp_regerror(const char *mapname, int lineno, int error, + const regex_t *expr) +{ + char errbuf[256]; + + (void) regerror(error, expr, errbuf, sizeof(errbuf)); + msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf); +} + + /* + * Inlined to reduce function call overhead in the time-critical loop. + */ +#define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \ + ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \ + ((err) == REG_NOMATCH ? !(match) : \ + (err) == 0 ? (match) : \ + (dict_regexp_regerror((map), (line), (err), (expr)), 0))) + +/* dict_regexp_lookup - match string and perform optional substitution */ + +static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string) +{ + DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; + DICT_REGEXP_RULE *rule; + DICT_REGEXP_IF_RULE *if_rule; + DICT_REGEXP_MATCH_RULE *match_rule; + DICT_REGEXP_EXPAND_CONTEXT expand_context; + int error; + + dict->error = 0; + + if (msg_verbose) + msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string); + + /* + * Optionally fold the key. + */ + if (dict->flags & DICT_FLAG_FOLD_MUL) { + if (dict->fold_buf == 0) + dict->fold_buf = vstring_alloc(10); + vstring_strcpy(dict->fold_buf, lookup_string); + lookup_string = lowercase(vstring_str(dict->fold_buf)); + } + for (rule = dict_regexp->head; rule; rule = rule->next) { + + switch (rule->op) { + + /* + * Search for the first matching primary expression. Limit the + * overhead for substring substitution to the bare minimum. + */ + case DICT_REGEXP_OP_MATCH: + match_rule = (DICT_REGEXP_MATCH_RULE *) rule; + if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, + match_rule->first_exp, + match_rule->first_match, + lookup_string, + match_rule->max_sub > 0 ? + match_rule->max_sub + 1 : 0, + dict_regexp->pmatch)) + continue; + if (match_rule->second_exp + && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, + match_rule->second_exp, + match_rule->second_match, + lookup_string, + NULL_SUBSTITUTIONS, + NULL_MATCH_RESULT)) + continue; + + /* + * Skip $number substitutions when the replacement text contains + * no $number strings, as learned during the compile time + * pre-scan. The pre-scan already replaced $$ by $. + */ + if (match_rule->max_sub == 0) + return (match_rule->replacement); + + /* + * Perform $number substitutions on the replacement text. We + * pre-scanned the replacement text at compile time. Any macro + * expansion errors at this point mean something impossible has + * happened. + */ + if (!dict_regexp->expansion_buf) + dict_regexp->expansion_buf = vstring_alloc(10); + VSTRING_RESET(dict_regexp->expansion_buf); + expand_context.lookup_string = lookup_string; + expand_context.match_rule = match_rule; + expand_context.dict_regexp = dict_regexp; + + if (mac_parse(match_rule->replacement, dict_regexp_expand, + (void *) &expand_context) & MAC_PARSE_ERROR) + msg_panic("regexp map %s, line %d: bad replacement syntax", + dict->name, rule->lineno); + VSTRING_TERMINATE(dict_regexp->expansion_buf); + return (vstring_str(dict_regexp->expansion_buf)); + + /* + * Conditional. + */ + case DICT_REGEXP_OP_IF: + if_rule = (DICT_REGEXP_IF_RULE *) rule; + if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, + if_rule->expr, if_rule->match, lookup_string, + NULL_SUBSTITUTIONS, NULL_MATCH_RESULT)) + continue; + /* An IF without matching ENDIF has no "endif" rule. */ + if ((rule = if_rule->endif_rule) == 0) + return (0); + /* FALLTHROUGH */ + + /* + * ENDIF after IF. + */ + case DICT_REGEXP_OP_ENDIF: + continue; + + default: + msg_panic("dict_regexp_lookup: impossible operation %d", rule->op); + } + } + return (0); +} + +/* dict_regexp_close - close regexp dictionary */ + +static void dict_regexp_close(DICT *dict) +{ + DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; + DICT_REGEXP_RULE *rule; + DICT_REGEXP_RULE *next; + DICT_REGEXP_MATCH_RULE *match_rule; + DICT_REGEXP_IF_RULE *if_rule; + + for (rule = dict_regexp->head; rule; rule = next) { + next = rule->next; + switch (rule->op) { + case DICT_REGEXP_OP_MATCH: + match_rule = (DICT_REGEXP_MATCH_RULE *) rule; + if (match_rule->first_exp) { + regfree(match_rule->first_exp); + myfree((void *) match_rule->first_exp); + } + if (match_rule->second_exp) { + regfree(match_rule->second_exp); + myfree((void *) match_rule->second_exp); + } + if (match_rule->replacement) + myfree((void *) match_rule->replacement); + break; + case DICT_REGEXP_OP_IF: + if_rule = (DICT_REGEXP_IF_RULE *) rule; + if (if_rule->expr) { + regfree(if_rule->expr); + myfree((void *) if_rule->expr); + } + break; + case DICT_REGEXP_OP_ENDIF: + break; + default: + msg_panic("dict_regexp_close: unknown operation %d", rule->op); + } + myfree((void *) rule); + } + if (dict_regexp->pmatch) + myfree((void *) dict_regexp->pmatch); + if (dict_regexp->expansion_buf) + vstring_free(dict_regexp->expansion_buf); + if (dict->fold_buf) + vstring_free(dict->fold_buf); + dict_free(dict); +} + +/* dict_regexp_get_pat - extract one pattern with options from rule */ + +static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp, + DICT_REGEXP_PATTERN *pat) +{ + char *p = *bufp; + char re_delim; + + /* + * Process negation operators. + */ + pat->match = 1; + for (;;) { + if (*p == '!') + pat->match = !pat->match; + else if (!ISSPACE(*p)) + break; + p++; + } + if (*p == 0) { + msg_warn("regexp map %s, line %d: no regexp: skipping this rule", + mapname, lineno); + return (0); + } + + /* + * Search for the closing delimiter, handling backslash escape. + */ + re_delim = *p++; + pat->regexp = p; + while (*p) { + if (*p == '\\') { + if (p[1]) + p++; + else + break; + } else if (*p == re_delim) { + break; + } + ++p; + } + if (!*p) { + msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": " + "skipping this rule", mapname, lineno, re_delim); + return (0); + } + *p++ = 0; /* null terminate */ + + /* + * Search for options. + */ + pat->options = REG_EXTENDED | REG_ICASE; + while (*p && !ISSPACE(*p) && *p != '!') { + switch (*p) { + case 'i': + pat->options ^= REG_ICASE; + break; + case 'm': + pat->options ^= REG_NEWLINE; + break; + case 'x': + pat->options ^= REG_EXTENDED; + break; + default: + msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": " + "skipping this rule", mapname, lineno, *p); + return (0); + } + ++p; + } + *bufp = p; + return (1); +} + +/* dict_regexp_get_pats - get the primary and second patterns and flags */ + +static int dict_regexp_get_pats(const char *mapname, int lineno, char **p, + DICT_REGEXP_PATTERN *first_pat, + DICT_REGEXP_PATTERN *second_pat) +{ + + /* + * Get the primary and optional secondary patterns and their flags. + */ + if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0) + return (0); + if (**p == '!') { +#if 0 + static int bitrot_warned = 0; + + if (bitrot_warned == 0) { + msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away," + " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead", + mapname, lineno); + bitrot_warned = 1; + } +#endif + if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0) + return (0); + } else { + second_pat->regexp = 0; + } + return (1); +} + +/* dict_regexp_prescan - find largest $number in replacement text */ + +static int dict_regexp_prescan(int type, VSTRING *buf, void *context) +{ + DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context; + size_t n; + + /* + * Keep a copy of literal text (with $$ already replaced by $) if and + * only if the replacement text contains no $number expression. This way + * we can avoid having to scan the replacement text at lookup time. + */ + if (type == MAC_PARSE_VARNAME) { + if (ctxt->literal) { + myfree(ctxt->literal); + ctxt->literal = 0; + } + if (!alldig(vstring_str(buf))) { + msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"", + ctxt->mapname, ctxt->lineno, vstring_str(buf)); + return (MAC_PARSE_ERROR); + } + n = atoi(vstring_str(buf)); + if (n < 1) { + msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"", + ctxt->mapname, ctxt->lineno, vstring_str(buf)); + return (MAC_PARSE_ERROR); + } + if (n > ctxt->max_sub) + ctxt->max_sub = n; + } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { + if (ctxt->literal) + msg_panic("regexp map %s, line %d: multiple literals but no $number", + ctxt->mapname, ctxt->lineno); + ctxt->literal = mystrdup(vstring_str(buf)); + } + return (MAC_PARSE_OK); +} + +/* dict_regexp_compile_pat - compile one pattern */ + +static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno, + DICT_REGEXP_PATTERN *pat) +{ + int error; + regex_t *expr; + + expr = (regex_t *) mymalloc(sizeof(*expr)); + error = regcomp(expr, pat->regexp, pat->options); + if (error != 0) { + dict_regexp_regerror(mapname, lineno, error, expr); + myfree((void *) expr); + return (0); + } + return (expr); +} + +/* dict_regexp_rule_alloc - fill in a generic rule structure */ + +static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int lineno, size_t size) +{ + DICT_REGEXP_RULE *rule; + + rule = (DICT_REGEXP_RULE *) mymalloc(size); + rule->op = op; + rule->lineno = lineno; + rule->next = 0; + + return (rule); +} + +/* dict_regexp_parseline - parse one rule */ + +static DICT_REGEXP_RULE *dict_regexp_parseline(DICT *dict, const char *mapname, + int lineno, char *line, + int nesting) +{ + char *p; + + p = line; + + /* + * An ordinary rule takes one or two patterns and replacement text. + */ + if (!ISALNUM(*p)) { + DICT_REGEXP_PATTERN first_pat; + DICT_REGEXP_PATTERN second_pat; + DICT_REGEXP_PRESCAN_CONTEXT prescan_context; + regex_t *first_exp = 0; + regex_t *second_exp; + DICT_REGEXP_MATCH_RULE *match_rule; + + /* + * Get the primary and the optional secondary patterns. + */ + if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat)) + return (0); + + /* + * Get the replacement text. + */ + while (*p && ISSPACE(*p)) + ++p; + if (!*p) { + msg_warn("regexp map %s, line %d: no replacement text: " + "using empty string", mapname, lineno); + } + + /* + * Find the highest-numbered $number in the replacement text. We can + * speed up pattern matching 1) by passing hints to the regexp + * compiler, setting the REG_NOSUB flag when the replacement text + * contains no $number string; 2) by passing hints to the regexp + * execution code, limiting the amount of text that is made available + * for substitution. + */ + prescan_context.mapname = mapname; + prescan_context.lineno = lineno; + prescan_context.max_sub = 0; + prescan_context.literal = 0; + + /* + * The optimizer will eliminate code duplication and/or dead code. + */ +#define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ + if (first_exp) { \ + regfree(first_exp); \ + myfree((void *) first_exp); \ + } \ + if (prescan_context.literal) \ + myfree(prescan_context.literal); \ + return (rval); \ + } while (0) + + if (dict->flags & DICT_FLAG_SRC_RHS_IS_FILE) { + VSTRING *base64_buf; + char *err; + + if ((base64_buf = dict_file_to_b64(dict, p)) == 0) { + err = dict_file_get_error(dict); + msg_warn("regexp map %s, line %d: %s: skipping this rule", + mapname, lineno, err); + myfree(err); + CREATE_MATCHOP_ERROR_RETURN(0); + } + p = vstring_str(base64_buf); + } + if (mac_parse(p, dict_regexp_prescan, (void *) &prescan_context) + & MAC_PARSE_ERROR) { + msg_warn("regexp map %s, line %d: bad replacement syntax: " + "skipping this rule", mapname, lineno); + CREATE_MATCHOP_ERROR_RETURN(0); + } + + /* + * Compile the primary and the optional secondary pattern. Speed up + * execution when no matched text needs to be substituted into the + * result string, or when the highest numbered substring is less than + * the total number of () subpatterns. + */ + if (prescan_context.max_sub == 0) + first_pat.options |= REG_NOSUB; + if (prescan_context.max_sub > 0 && first_pat.match == 0) { + msg_warn("regexp map %s, line %d: $number found in negative match " + "replacement text: skipping this rule", mapname, lineno); + CREATE_MATCHOP_ERROR_RETURN(0); + } + if (prescan_context.max_sub > 0 && (dict->flags & DICT_FLAG_NO_REGSUB)) { + msg_warn("regexp map %s, line %d: " + "regular expression substitution is not allowed: " + "skipping this rule", mapname, lineno); + CREATE_MATCHOP_ERROR_RETURN(0); + } + if ((first_exp = dict_regexp_compile_pat(mapname, lineno, + &first_pat)) == 0) + CREATE_MATCHOP_ERROR_RETURN(0); + if (prescan_context.max_sub > first_exp->re_nsub) { + msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": " + "skipping this rule", mapname, lineno, + (int) prescan_context.max_sub); + CREATE_MATCHOP_ERROR_RETURN(0); + } + if (second_pat.regexp != 0) { + second_pat.options |= REG_NOSUB; + if ((second_exp = dict_regexp_compile_pat(mapname, lineno, + &second_pat)) == 0) + CREATE_MATCHOP_ERROR_RETURN(0); + } else { + second_exp = 0; + } + match_rule = (DICT_REGEXP_MATCH_RULE *) + dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, lineno, + sizeof(DICT_REGEXP_MATCH_RULE)); + match_rule->first_exp = first_exp; + match_rule->first_match = first_pat.match; + match_rule->max_sub = prescan_context.max_sub; + match_rule->second_exp = second_exp; + match_rule->second_match = second_pat.match; + if (prescan_context.literal) + match_rule->replacement = prescan_context.literal; + else + match_rule->replacement = mystrdup(p); + return ((DICT_REGEXP_RULE *) match_rule); + } + + /* + * The IF operator takes one pattern but no replacement text. + */ + else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { + DICT_REGEXP_PATTERN pattern; + regex_t *expr; + DICT_REGEXP_IF_RULE *if_rule; + + p += 2; + while (*p && ISSPACE(*p)) + p++; + if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern)) + return (0); + while (*p && ISSPACE(*p)) + ++p; + if (*p) { + msg_warn("regexp map %s, line %d: ignoring extra text after" + " IF statement: \"%s\"", mapname, lineno, p); + msg_warn("regexp map %s, line %d: do not prepend whitespace" + " to statements between IF and ENDIF", mapname, lineno); + } + if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0) + return (0); + if_rule = (DICT_REGEXP_IF_RULE *) + dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, lineno, + sizeof(DICT_REGEXP_IF_RULE)); + if_rule->expr = expr; + if_rule->match = pattern.match; + if_rule->endif_rule = 0; + return ((DICT_REGEXP_RULE *) if_rule); + } + + /* + * The ENDIF operator takes no patterns and no replacement text. + */ + else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { + DICT_REGEXP_RULE *rule; + + p += 5; + if (nesting == 0) { + msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF", + mapname, lineno); + return (0); + } + while (*p && ISSPACE(*p)) + ++p; + if (*p) + msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF", + mapname, lineno); + rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, lineno, + sizeof(DICT_REGEXP_RULE)); + return (rule); + } + + /* + * Unrecognized input. + */ + else { + msg_warn("regexp map %s, line %d: ignoring unrecognized request", + mapname, lineno); + return (0); + } +} + +/* dict_regexp_open - load and compile a file containing regular expressions */ + +DICT *dict_regexp_open(const char *mapname, int open_flags, int dict_flags) +{ + const char myname[] = "dict_regexp_open"; + DICT_REGEXP *dict_regexp; + VSTREAM *map_fp = 0; + struct stat st; + VSTRING *line_buffer = 0; + DICT_REGEXP_RULE *rule; + DICT_REGEXP_RULE *last_rule = 0; + int lineno; + int last_line = 0; + size_t max_sub = 0; + int nesting = 0; + char *p; + DICT_REGEXP_RULE **rule_stack = 0; + MVECT mvect; + + /* + * Let the optimizer worry about eliminating redundant code. + */ +#define DICT_REGEXP_OPEN_RETURN(d) do { \ + DICT *__d = (d); \ + if (line_buffer != 0) \ + vstring_free(line_buffer); \ + if (map_fp != 0) \ + vstream_fclose(map_fp); \ + return (__d); \ + } while (0) + + /* + * Sanity checks. + */ + if (open_flags != O_RDONLY) + DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, + mapname, open_flags, dict_flags, + "%s:%s map requires O_RDONLY access mode", + DICT_TYPE_REGEXP, mapname)); + + /* + * Open the configuration file. + */ + if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) + DICT_REGEXP_OPEN_RETURN(dict_surrogate(DICT_TYPE_REGEXP, mapname, + open_flags, dict_flags, + "open %s: %m", mapname)); + if (fstat(vstream_fileno(map_fp), &st) < 0) + msg_fatal("fstat %s: %m", mapname); + + line_buffer = vstring_alloc(100); + + dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname, + sizeof(*dict_regexp)); + dict_regexp->dict.lookup = dict_regexp_lookup; + dict_regexp->dict.close = dict_regexp_close; + dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN; + if (dict_flags & DICT_FLAG_FOLD_MUL) + dict_regexp->dict.fold_buf = vstring_alloc(10); + dict_regexp->head = 0; + dict_regexp->pmatch = 0; + dict_regexp->expansion_buf = 0; + dict_regexp->dict.owner.uid = st.st_uid; + dict_regexp->dict.owner.status = (st.st_uid != 0); + + /* + * Parse the regexp table. + */ + while (readllines(line_buffer, map_fp, &last_line, &lineno)) { + p = vstring_str(line_buffer); + trimblanks(p, 0)[0] = 0; + if (*p == 0) + continue; + rule = dict_regexp_parseline(&dict_regexp->dict, mapname, lineno, + p, nesting); + if (rule == 0) + continue; + if (rule->op == DICT_REGEXP_OP_MATCH) { + if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub) + max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub; + } else if (rule->op == DICT_REGEXP_OP_IF) { + if (rule_stack == 0) + rule_stack = (DICT_REGEXP_RULE **) mvect_alloc(&mvect, + sizeof(*rule_stack), nesting + 1, + (MVECT_FN) 0, (MVECT_FN) 0); + else + rule_stack = + (DICT_REGEXP_RULE **) mvect_realloc(&mvect, nesting + 1); + rule_stack[nesting] = rule; + nesting++; + } else if (rule->op == DICT_REGEXP_OP_ENDIF) { + DICT_REGEXP_IF_RULE *if_rule; + + if (nesting-- <= 0) + /* Already handled in dict_regexp_parseline(). */ + msg_panic("%s: ENDIF without IF", myname); + if (rule_stack[nesting]->op != DICT_REGEXP_OP_IF) + msg_panic("%s: unexpected rule stack element type %d", + myname, rule_stack[nesting]->op); + if_rule = (DICT_REGEXP_IF_RULE *) rule_stack[nesting]; + if_rule->endif_rule = rule; + } + if (last_rule == 0) + dict_regexp->head = rule; + else + last_rule->next = rule; + last_rule = rule; + } + + while (nesting-- > 0) + msg_warn("regexp map %s, line %d: IF has no matching ENDIF", + mapname, rule_stack[nesting]->lineno); + + if (rule_stack) + (void) mvect_free(&mvect); + + /* + * Allocate space for only as many matched substrings as used in the + * replacement text. + */ + if (max_sub > 0) + dict_regexp->pmatch = + (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1)); + + dict_file_purge_buffers(&dict_regexp->dict); + DICT_REGEXP_OPEN_RETURN(DICT_DEBUG (&dict_regexp->dict)); +} + +#endif |