/*++ /* NAME /* ip_match 3 /* SUMMARY /* IP address pattern matching /* SYNOPSIS /* #include /* /* char *ip_match_parse(byte_codes, pattern) /* VSTRING *byte_codes; /* char *pattern; /* /* char *ip_match_save(byte_codes) /* const VSTRING *byte_codes; /* /* int ip_match_execute(byte_codes, addr_bytes) /* cost char *byte_codes; /* const char *addr_bytes; /* /* char *ip_match_dump(printable, byte_codes) /* VSTRING *printable; /* const char *byte_codes; /* DESCRIPTION /* This module supports IP address pattern matching. See below /* for a description of the supported address pattern syntax. /* /* This implementation aims to minimize the cost of encoding /* the pattern in internal form, while still providing good /* matching performance in the typical case. The first byte /* of an encoded pattern specifies the expected address family /* (for example, AF_INET); other details of the encoding are /* private and are subject to change. /* /* ip_match_parse() converts the user-specified pattern to /* internal form. The result value is a null pointer in case /* of success, or a pointer into the byte_codes buffer with a /* detailed problem description. /* /* ip_match_save() saves the result from ip_match_parse() for /* longer-term usage. The result should be passed to myfree(). /* /* ip_match_execute() matches a binary network in addr_bytes /* against a byte-code array in byte_codes. It is an error to /* use different address families for the byte_codes and addr_bytes /* arguments (the first byte-code value contains the expected /* address family). The result is non-zero in case of success. /* /* ip_match_dump() produces an ASCII dump of a byte-code array. /* The dump is supposed to be identical to the input pattern /* modulo upper/lower case or leading nulls with IPv6). This /* function is primarily a debugging aid. /* /* Arguments /* .IP addr_bytes /* Binary network address in network-byte order. /* .IP byte_codes /* Byte-code array produced by ip_match_parse(). /* .IP pattern /* Human-readable address pattern. /* .IP printable /* storage for ASCII dump of a byte-code array. /* IPV4 PATTERN SYNTAX /* .ad /* .fi /* An IPv4 address pattern has four fields separated by ".". /* Each field is either a decimal number, or a sequence inside /* "[]" that contains one or more ";"-separated decimal /* numbers or number..number ranges. /* /* Examples of patterns are 1.2.3.4 (matches itself, as one /* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4, /* 1.2.3.6, 1.2.3.7, 1.2.3.8). /* /* Thus, any pattern field can be a sequence inside "[]", but /* a "[]" sequence cannot span multiple address fields, and /* a pattern field cannot contain both a number and a "[]" /* sequence at the same time. /* /* This means that the pattern 1.2.[3.4] is not valid (the /* sequence [3.4] cannot span two address fields) and the /* pattern 1.2.3.3[6..9] is also not valid (the last field /* cannot be both number 3 and sequence [6..9] at the same /* time). /* /* The syntax for IPv4 patterns is as follows: /* /* .in +5 /* v4pattern = v4field "." v4field "." v4field "." v4field /* .br /* v4field = v4octet | "[" v4sequence "]" /* .br /* v4octet = any decimal number in the range 0 through 255 /* .br /* v4sequence = v4seq_member | v4sequence ";" v4seq_member /* .br /* v4seq_member = v4octet | v4octet ".." v4octet /* .in /* LICENSE /* .ad /* .fi /* The Secure Mailer license must be distributed with this /* software. /* AUTHOR(S) /* Wietse Venema /* IBM T.J. Watson Research /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA /*--*/ /* System library. */ #include #include #include #include /* Utility library. */ #include #include #include #include /* * Token values. The in-band values are also used as byte-code values. */ #define IP_MATCH_CODE_OPEN '[' /* in-band */ #define IP_MATCH_CODE_CLOSE ']' /* in-band */ #define IP_MATCH_CODE_OVAL 'N' /* in-band */ #define IP_MATCH_CODE_RANGE 'R' /* in-band */ #define IP_MATCH_CODE_EOF '\0' /* in-band */ #define IP_MATCH_CODE_ERR 256 /* out-of-band */ /* * SLMs. */ #define STR vstring_str #define LEN VSTRING_LEN /* ip_match_save - make longer-term copy of byte code */ char *ip_match_save(const VSTRING *byte_codes) { char *dst; dst = mymalloc(LEN(byte_codes)); return (memcpy(dst, STR(byte_codes), LEN(byte_codes))); } /* ip_match_dump - byte-code pretty printer */ char *ip_match_dump(VSTRING *printable, const char *byte_codes) { const char *myname = "ip_match_dump"; const unsigned char *bp; int octet_count = 0; int ch; /* * Sanity check. Use different dumping loops for AF_INET and AF_INET6. */ if (*byte_codes != AF_INET) msg_panic("%s: malformed byte-code header", myname); /* * Pretty-print and sanity-check the byte codes. Note: the loops in this * code have no auto-increment at the end of the iteration. Instead, each * byte-code handler bumps the byte-code pointer appropriately. */ VSTRING_RESET(printable); bp = (const unsigned char *) byte_codes + 1; for (;;) { /* * Simple numeric field. */ if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { vstring_sprintf_append(printable, "%d", *bp); bp += 1; } /* * Wild-card numeric field. */ else if (ch == IP_MATCH_CODE_OPEN) { vstring_sprintf_append(printable, "["); for (;;) { /* Numeric range. */ if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]); bp += 2; } /* Number. */ else if (ch == IP_MATCH_CODE_OVAL) { vstring_sprintf_append(printable, "%d", *bp); bp += 1; } /* End-of-wildcard. */ else if (ch == IP_MATCH_CODE_CLOSE) { break; } /* Corruption. */ else { msg_panic("%s: unexpected byte code (decimal %d) " "after \"%s\"", myname, ch, STR(printable)); } /* Output the wild-card field separator and repeat the loop. */ if (*bp != IP_MATCH_CODE_CLOSE) vstring_sprintf_append(printable, ";"); } vstring_sprintf_append(printable, "]"); } /* * Corruption. */ else { msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", myname, ch, STR(printable)); } /* * Require four octets, not one more, not one less. */ if (++octet_count == 4) { if (*bp != 0) msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", myname, ch, STR(printable)); return (STR(printable)); } if (*bp == 0) msg_panic("%s: truncated byte code after \"%s\"", myname, STR(printable)); /* * Output the address field separator and repeat the loop. */ vstring_sprintf_append(printable, "."); } } /* ip_match_print_code_prefix - printable byte-code prefix */ static char *ip_match_print_code_prefix(const char *byte_codes, size_t len) { static VSTRING *printable = 0; const char *fmt; const char *bp; /* * This is primarily for emergency debugging so we don't care about * non-reentrancy. */ if (printable == 0) printable = vstring_alloc(100); else VSTRING_RESET(printable); /* * Use decimal for IPv4 and hexadecimal otherwise, so that address octet * values are easy to recognize. */ fmt = (*byte_codes == AF_INET ? "%d " : "%02x "); for (bp = byte_codes; bp < byte_codes + len; bp++) vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp); return (STR(printable)); } /* ip_match_execute - byte-code matching engine */ int ip_match_execute(const char *byte_codes, const char *addr_bytes) { const char *myname = "ip_match_execute"; const unsigned char *bp; const unsigned char *ap; int octet_count = 0; int ch; int matched; /* * Sanity check. Use different execute loops for AF_INET and AF_INET6. */ if (*byte_codes != AF_INET) msg_panic("%s: malformed byte-code header (decimal %d)", myname, *(const unsigned char *) byte_codes); /* * Match the address bytes against the byte codes. Avoid problems with * (char -> int) sign extension on architectures with signed characters. */ bp = (const unsigned char *) byte_codes + 1; ap = (const unsigned char *) addr_bytes; for (octet_count = 0; octet_count < 4; octet_count++, ap++) { /* * Simple numeric field. */ if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { if (*ap == *bp) bp += 1; else return (0); } /* * Wild-card numeric field. */ else if (ch == IP_MATCH_CODE_OPEN) { matched = 0; for (;;) { /* Numeric range. */ if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { if (!matched) matched = (*ap >= bp[0] && *ap <= bp[1]); bp += 2; } /* Number. */ else if (ch == IP_MATCH_CODE_OVAL) { if (!matched) matched = (*ap == *bp); bp += 1; } /* End-of-wildcard. */ else if (ch == IP_MATCH_CODE_CLOSE) { break; } /* Corruption. */ else { size_t len = (const char *) bp - byte_codes - 1; msg_panic("%s: unexpected byte code (decimal %d) " "after \"%s\"", myname, ch, ip_match_print_code_prefix(byte_codes, len)); } } if (matched == 0) return (0); } /* * Corruption. */ else { size_t len = (const char *) bp - byte_codes - 1; msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", myname, ch, ip_match_print_code_prefix(byte_codes, len)); } } return (1); } /* ip_match_next_token - carve out the next token from input pattern */ static int ip_match_next_token(char **pstart, char **psaved_start, int *poval) { unsigned char *cp; int oval; /* octet value */ int type; /* token value */ /* * Return a literal, error, or EOF token. Update the read pointer to the * start of the next token or leave it at the string terminator. */ #define IP_MATCH_RETURN_TOK(next, type) \ do { *pstart = (char *) (next); return (type); } while (0) /* * Return a token that contains an IPv4 address octet value. */ #define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \ *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \ } while (0) /* * Light-weight tokenizer. Each result is an IPv4 address octet value, a * literal character value, error, or EOF. */ *psaved_start = *pstart; cp = (unsigned char *) *pstart; if (ISDIGIT(*cp)) { oval = *cp - '0'; type = IP_MATCH_CODE_OVAL; for (cp += 1; ISDIGIT(*cp); cp++) { oval *= 10; oval += *cp - '0'; if (oval > 255) type = IP_MATCH_CODE_ERR; } IP_MATCH_RETURN_TOK_VAL(cp, type, oval); } else { IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp); } } /* ipmatch_print_parse_error - formatted parsing error, with context */ static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply, char *start, char *here, char *next, const char *fmt,...) { va_list ap; int start_width; int here_width; /* * Format the error type. */ va_start(ap, fmt); vstring_vsprintf(reply, fmt, ap); va_end(ap); /* * Format the error context. The syntax is complex enough that it is * worth the effort to precisely indicate what input is in error. * * XXX Workaround for %.*s to avoid output when a zero width is specified. */ if (start != 0) { start_width = here - start; here_width = next - here; vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"", start_width, start_width == 0 ? "" : start, here_width, here_width == 0 ? "" : here, next); } } /* ip_match_parse - parse an entire wild-card address pattern */ char *ip_match_parse(VSTRING *byte_codes, char *pattern) { int octet_count; char *saved_cp; char *cp; int token_type; int look_ahead; int oval; int saved_oval; /* * Simplify this if we change to {} for wildcard notation. */ #define FIND_TERMINATOR(start, cp) do { \ int _level = 0; \ for (cp = (start) ; *cp; cp++) { \ if (*cp == '[') _level++; \ if (*cp != ']') continue; \ if (--_level == 0) break; \ } \ } while (0) /* * Strip [] around the entire pattern. */ if (*pattern == '[') { FIND_TERMINATOR(pattern, cp); if (cp[0] == 0) { vstring_sprintf(byte_codes, "missing \"]\" character"); return (STR(byte_codes)); } if (cp[1] == 0) { *cp = 0; pattern += 1; } } /* * Sanity check. In this case we can't show any error context. */ if (*pattern == 0) { vstring_sprintf(byte_codes, "empty address pattern"); return (STR(byte_codes)); } /* * Simple parser with on-the-fly encoding. For now, IPv4 support only. * Use different parser loops for IPv4 and IPv6. */ VSTRING_RESET(byte_codes); VSTRING_ADDCH(byte_codes, AF_INET); octet_count = 0; cp = pattern; /* * Require four address fields separated by ".", each field containing a * numeric octet value or a sequence inside []. The loop head has no test * and does not step the loop variable. The tokenizer advances the loop * variable, and the loop termination logic is inside the loop. */ for (;;) { switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) { /* * Numeric address field. */ case IP_MATCH_CODE_OVAL: VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); VSTRING_ADDCH(byte_codes, oval); break; /* * Wild-card address field. */ case IP_MATCH_CODE_OPEN: VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN); /* Require ";"-separated numbers or numeric ranges. */ for (;;) { token_type = ip_match_next_token(&cp, &saved_cp, &oval); if (token_type == IP_MATCH_CODE_OVAL) { saved_oval = oval; look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); /* Numeric range. */ if (look_ahead == '.') { /* Brute-force parsing. */ if (ip_match_next_token(&cp, &saved_cp, &oval) == '.' && ip_match_next_token(&cp, &saved_cp, &oval) == IP_MATCH_CODE_OVAL && saved_oval <= oval) { VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE); VSTRING_ADDCH(byte_codes, saved_oval); VSTRING_ADDCH(byte_codes, oval); look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); } else { ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "numeric range error"); return (STR(byte_codes)); } } /* Single number. */ else { VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); VSTRING_ADDCH(byte_codes, saved_oval); } /* Require ";" or end-of-wildcard. */ token_type = look_ahead; if (token_type == ';') { continue; } else if (token_type == IP_MATCH_CODE_CLOSE) { break; } else { ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "need \";\" or \"%c\"", IP_MATCH_CODE_CLOSE); return (STR(byte_codes)); } } else { ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "need decimal number 0..255"); return (STR(byte_codes)); } } VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE); break; /* * Invalid field. */ default: ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "need decimal number 0..255 or \"%c\"", IP_MATCH_CODE_OPEN); return (STR(byte_codes)); } octet_count += 1; /* * Require four address fields. Not one more, not one less. */ if (octet_count == 4) { if (*cp != 0) { (void) ip_match_next_token(&cp, &saved_cp, &oval); ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "garbage after pattern"); return (STR(byte_codes)); } VSTRING_ADDCH(byte_codes, 0); return (0); } /* * Require "." before the next address field. */ if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') { ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, "need \".\""); return (STR(byte_codes)); } } } #ifdef TEST /* * Dummy main program for regression tests. */ #include #include #include #include #include #include #include #include int main(int argc, char **argv) { VSTRING *byte_codes = vstring_alloc(100); VSTRING *line_buf = vstring_alloc(100); char *bufp; char *err; char *user_pattern; char *user_address; int echo_input = !isatty(0); /* * Iterate over the input stream. The input format is a pattern, followed * by optional addresses to match against. */ while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) { bufp = STR(line_buf); if (echo_input) { vstream_printf("> %s\n", bufp); vstream_fflush(VSTREAM_OUT); } if (*bufp == '#') continue; if ((user_pattern = mystrtok(&bufp, " \t")) == 0) continue; /* * Parse and dump the pattern. */ if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) { vstream_printf("Error: %s\n", err); } else { vstream_printf("Code: %s\n", ip_match_dump(line_buf, STR(byte_codes))); } vstream_fflush(VSTREAM_OUT); /* * Match the optional patterns. */ while ((user_address = mystrtok(&bufp, " \t")) != 0) { struct in_addr netw_addr; switch (inet_pton(AF_INET, user_address, &netw_addr)) { case 1: vstream_printf("Match %s: %s\n", user_address, ip_match_execute(STR(byte_codes), (char *) &netw_addr.s_addr) ? "yes" : "no"); break; case 0: vstream_printf("bad address syntax: %s\n", user_address); break; case -1: vstream_printf("%s: %m\n", user_address); break; } vstream_fflush(VSTREAM_OUT); } } vstring_free(line_buf); vstring_free(byte_codes); exit(0); } #endif