diff options
Diffstat (limited to 'src/util/ip_match.c')
-rw-r--r-- | src/util/ip_match.c | 676 |
1 files changed, 676 insertions, 0 deletions
diff --git a/src/util/ip_match.c b/src/util/ip_match.c new file mode 100644 index 0000000..aeea799 --- /dev/null +++ b/src/util/ip_match.c @@ -0,0 +1,676 @@ +/*++ +/* NAME +/* ip_match 3 +/* SUMMARY +/* IP address pattern matching +/* SYNOPSIS +/* #include <ip_match.h> +/* +/* char *ip_match_parse(byte_codes, pattern) +/* VSTRING *byte_codes; +/* char *pattern; +/* +/* char *ip_match_save(byte_codes) +/* const VSTRING *byte_codes; +/* +/* int ip_match_execute(byte_codes, addr_bytes) +/* cost char *byte_codes; +/* const char *addr_bytes; +/* +/* char *ip_match_dump(printable, byte_codes) +/* VSTRING *printable; +/* const char *byte_codes; +/* DESCRIPTION +/* This module supports IP address pattern matching. See below +/* for a description of the supported address pattern syntax. +/* +/* This implementation aims to minimize the cost of encoding +/* the pattern in internal form, while still providing good +/* matching performance in the typical case. The first byte +/* of an encoded pattern specifies the expected address family +/* (for example, AF_INET); other details of the encoding are +/* private and are subject to change. +/* +/* ip_match_parse() converts the user-specified pattern to +/* internal form. The result value is a null pointer in case +/* of success, or a pointer into the byte_codes buffer with a +/* detailed problem description. +/* +/* ip_match_save() saves the result from ip_match_parse() for +/* longer-term usage. The result should be passed to myfree(). +/* +/* ip_match_execute() matches a binary network in addr_bytes +/* against a byte-code array in byte_codes. It is an error to +/* use different address families for the byte_codes and addr_bytes +/* arguments (the first byte-code value contains the expected +/* address family). The result is non-zero in case of success. +/* +/* ip_match_dump() produces an ASCII dump of a byte-code array. +/* The dump is supposed to be identical to the input pattern +/* modulo upper/lower case or leading nulls with IPv6). This +/* function is primarily a debugging aid. +/* +/* Arguments +/* .IP addr_bytes +/* Binary network address in network-byte order. +/* .IP byte_codes +/* Byte-code array produced by ip_match_parse(). +/* .IP pattern +/* Human-readable address pattern. +/* .IP printable +/* storage for ASCII dump of a byte-code array. +/* IPV4 PATTERN SYNTAX +/* .ad +/* .fi +/* An IPv4 address pattern has four fields separated by ".". +/* Each field is either a decimal number, or a sequence inside +/* "[]" that contains one or more ";"-separated decimal +/* numbers or number..number ranges. +/* +/* Examples of patterns are 1.2.3.4 (matches itself, as one +/* would expect) and 1.2.3.[2,4,6..8] (matches 1.2.3.2, 1.2.3.4, +/* 1.2.3.6, 1.2.3.7, 1.2.3.8). +/* +/* Thus, any pattern field can be a sequence inside "[]", but +/* a "[]" sequence cannot span multiple address fields, and +/* a pattern field cannot contain both a number and a "[]" +/* sequence at the same time. +/* +/* This means that the pattern 1.2.[3.4] is not valid (the +/* sequence [3.4] cannot span two address fields) and the +/* pattern 1.2.3.3[6..9] is also not valid (the last field +/* cannot be both number 3 and sequence [6..9] at the same +/* time). +/* +/* The syntax for IPv4 patterns is as follows: +/* +/* .in +5 +/* v4pattern = v4field "." v4field "." v4field "." v4field +/* .br +/* v4field = v4octet | "[" v4sequence "]" +/* .br +/* v4octet = any decimal number in the range 0 through 255 +/* .br +/* v4sequence = v4seq_member | v4sequence ";" v4seq_member +/* .br +/* v4seq_member = v4octet | v4octet ".." v4octet +/* .in +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this +/* software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/*--*/ + +/* System library. */ + +#include <sys_defs.h> +#include <sys/socket.h> +#include <ctype.h> +#include <string.h> + +/* Utility library. */ + +#include <msg.h> +#include <mymalloc.h> +#include <vstring.h> +#include <ip_match.h> + + /* + * Token values. The in-band values are also used as byte-code values. + */ +#define IP_MATCH_CODE_OPEN '[' /* in-band */ +#define IP_MATCH_CODE_CLOSE ']' /* in-band */ +#define IP_MATCH_CODE_OVAL 'N' /* in-band */ +#define IP_MATCH_CODE_RANGE 'R' /* in-band */ +#define IP_MATCH_CODE_EOF '\0' /* in-band */ +#define IP_MATCH_CODE_ERR 256 /* out-of-band */ + + /* + * SLMs. + */ +#define STR vstring_str +#define LEN VSTRING_LEN + +/* ip_match_save - make longer-term copy of byte code */ + +char *ip_match_save(const VSTRING *byte_codes) +{ + char *dst; + + dst = mymalloc(LEN(byte_codes)); + return (memcpy(dst, STR(byte_codes), LEN(byte_codes))); +} + +/* ip_match_dump - byte-code pretty printer */ + +char *ip_match_dump(VSTRING *printable, const char *byte_codes) +{ + const char *myname = "ip_match_dump"; + const unsigned char *bp; + int octet_count = 0; + int ch; + + /* + * Sanity check. Use different dumping loops for AF_INET and AF_INET6. + */ + if (*byte_codes != AF_INET) + msg_panic("%s: malformed byte-code header", myname); + + /* + * Pretty-print and sanity-check the byte codes. Note: the loops in this + * code have no auto-increment at the end of the iteration. Instead, each + * byte-code handler bumps the byte-code pointer appropriately. + */ + VSTRING_RESET(printable); + bp = (const unsigned char *) byte_codes + 1; + for (;;) { + + /* + * Simple numeric field. + */ + if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { + vstring_sprintf_append(printable, "%d", *bp); + bp += 1; + } + + /* + * Wild-card numeric field. + */ + else if (ch == IP_MATCH_CODE_OPEN) { + vstring_sprintf_append(printable, "["); + for (;;) { + /* Numeric range. */ + if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { + vstring_sprintf_append(printable, "%d..%d", bp[0], bp[1]); + bp += 2; + } + /* Number. */ + else if (ch == IP_MATCH_CODE_OVAL) { + vstring_sprintf_append(printable, "%d", *bp); + bp += 1; + } + /* End-of-wildcard. */ + else if (ch == IP_MATCH_CODE_CLOSE) { + break; + } + /* Corruption. */ + else { + msg_panic("%s: unexpected byte code (decimal %d) " + "after \"%s\"", myname, ch, STR(printable)); + } + /* Output the wild-card field separator and repeat the loop. */ + if (*bp != IP_MATCH_CODE_CLOSE) + vstring_sprintf_append(printable, ";"); + } + vstring_sprintf_append(printable, "]"); + } + + /* + * Corruption. + */ + else { + msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", + myname, ch, STR(printable)); + } + + /* + * Require four octets, not one more, not one less. + */ + if (++octet_count == 4) { + if (*bp != 0) + msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", + myname, ch, STR(printable)); + return (STR(printable)); + } + if (*bp == 0) + msg_panic("%s: truncated byte code after \"%s\"", + myname, STR(printable)); + + /* + * Output the address field separator and repeat the loop. + */ + vstring_sprintf_append(printable, "."); + } +} + +/* ip_match_print_code_prefix - printable byte-code prefix */ + +static char *ip_match_print_code_prefix(const char *byte_codes, size_t len) +{ + static VSTRING *printable = 0; + const char *fmt; + const char *bp; + + /* + * This is primarily for emergency debugging so we don't care about + * non-reentrancy. + */ + if (printable == 0) + printable = vstring_alloc(100); + else + VSTRING_RESET(printable); + + /* + * Use decimal for IPv4 and hexadecimal otherwise, so that address octet + * values are easy to recognize. + */ + fmt = (*byte_codes == AF_INET ? "%d " : "%02x "); + for (bp = byte_codes; bp < byte_codes + len; bp++) + vstring_sprintf_append(printable, fmt, *(const unsigned char *) bp); + + return (STR(printable)); +} + +/* ip_match_execute - byte-code matching engine */ + +int ip_match_execute(const char *byte_codes, const char *addr_bytes) +{ + const char *myname = "ip_match_execute"; + const unsigned char *bp; + const unsigned char *ap; + int octet_count = 0; + int ch; + int matched; + + /* + * Sanity check. Use different execute loops for AF_INET and AF_INET6. + */ + if (*byte_codes != AF_INET) + msg_panic("%s: malformed byte-code header (decimal %d)", + myname, *(const unsigned char *) byte_codes); + + /* + * Match the address bytes against the byte codes. Avoid problems with + * (char -> int) sign extension on architectures with signed characters. + */ + bp = (const unsigned char *) byte_codes + 1; + ap = (const unsigned char *) addr_bytes; + + for (octet_count = 0; octet_count < 4; octet_count++, ap++) { + + /* + * Simple numeric field. + */ + if ((ch = *bp++) == IP_MATCH_CODE_OVAL) { + if (*ap == *bp) + bp += 1; + else + return (0); + } + + /* + * Wild-card numeric field. + */ + else if (ch == IP_MATCH_CODE_OPEN) { + matched = 0; + for (;;) { + /* Numeric range. */ + if ((ch = *bp++) == IP_MATCH_CODE_RANGE) { + if (!matched) + matched = (*ap >= bp[0] && *ap <= bp[1]); + bp += 2; + } + /* Number. */ + else if (ch == IP_MATCH_CODE_OVAL) { + if (!matched) + matched = (*ap == *bp); + bp += 1; + } + /* End-of-wildcard. */ + else if (ch == IP_MATCH_CODE_CLOSE) { + break; + } + /* Corruption. */ + else { + size_t len = (const char *) bp - byte_codes - 1; + + msg_panic("%s: unexpected byte code (decimal %d) " + "after \"%s\"", myname, ch, + ip_match_print_code_prefix(byte_codes, len)); + } + } + if (matched == 0) + return (0); + } + + /* + * Corruption. + */ + else { + size_t len = (const char *) bp - byte_codes - 1; + + msg_panic("%s: unexpected byte code (decimal %d) after \"%s\"", + myname, ch, ip_match_print_code_prefix(byte_codes, len)); + } + } + return (1); +} + +/* ip_match_next_token - carve out the next token from input pattern */ + +static int ip_match_next_token(char **pstart, char **psaved_start, int *poval) +{ + unsigned char *cp; + int oval; /* octet value */ + int type; /* token value */ + + /* + * Return a literal, error, or EOF token. Update the read pointer to the + * start of the next token or leave it at the string terminator. + */ +#define IP_MATCH_RETURN_TOK(next, type) \ + do { *pstart = (char *) (next); return (type); } while (0) + + /* + * Return a token that contains an IPv4 address octet value. + */ +#define IP_MATCH_RETURN_TOK_VAL(next, type, oval) do { \ + *poval = (oval); IP_MATCH_RETURN_TOK((next), type); \ + } while (0) + + /* + * Light-weight tokenizer. Each result is an IPv4 address octet value, a + * literal character value, error, or EOF. + */ + *psaved_start = *pstart; + cp = (unsigned char *) *pstart; + if (ISDIGIT(*cp)) { + oval = *cp - '0'; + type = IP_MATCH_CODE_OVAL; + for (cp += 1; ISDIGIT(*cp); cp++) { + oval *= 10; + oval += *cp - '0'; + if (oval > 255) + type = IP_MATCH_CODE_ERR; + } + IP_MATCH_RETURN_TOK_VAL(cp, type, oval); + } else { + IP_MATCH_RETURN_TOK(*cp ? cp + 1 : cp, *cp); + } +} + +/* ipmatch_print_parse_error - formatted parsing error, with context */ + +static void PRINTFLIKE(5, 6) ipmatch_print_parse_error(VSTRING *reply, + char *start, + char *here, + char *next, + const char *fmt,...) +{ + va_list ap; + int start_width; + int here_width; + + /* + * Format the error type. + */ + va_start(ap, fmt); + vstring_vsprintf(reply, fmt, ap); + va_end(ap); + + /* + * Format the error context. The syntax is complex enough that it is + * worth the effort to precisely indicate what input is in error. + * + * XXX Workaround for %.*s to avoid output when a zero width is specified. + */ + if (start != 0) { + start_width = here - start; + here_width = next - here; + vstring_sprintf_append(reply, " at \"%.*s>%.*s<%s\"", + start_width, start_width == 0 ? "" : start, + here_width, here_width == 0 ? "" : here, next); + } +} + +/* ip_match_parse - parse an entire wild-card address pattern */ + +char *ip_match_parse(VSTRING *byte_codes, char *pattern) +{ + int octet_count; + char *saved_cp; + char *cp; + int token_type; + int look_ahead; + int oval; + int saved_oval; + + /* + * Simplify this if we change to {} for wildcard notation. + */ +#define FIND_TERMINATOR(start, cp) do { \ + int _level = 0; \ + for (cp = (start) ; *cp; cp++) { \ + if (*cp == '[') _level++; \ + if (*cp != ']') continue; \ + if (--_level == 0) break; \ + } \ + } while (0) + + /* + * Strip [] around the entire pattern. + */ + if (*pattern == '[') { + FIND_TERMINATOR(pattern, cp); + if (cp[0] == 0) { + vstring_sprintf(byte_codes, "missing \"]\" character"); + return (STR(byte_codes)); + } + if (cp[1] == 0) { + *cp = 0; + pattern += 1; + } + } + + /* + * Sanity check. In this case we can't show any error context. + */ + if (*pattern == 0) { + vstring_sprintf(byte_codes, "empty address pattern"); + return (STR(byte_codes)); + } + + /* + * Simple parser with on-the-fly encoding. For now, IPv4 support only. + * Use different parser loops for IPv4 and IPv6. + */ + VSTRING_RESET(byte_codes); + VSTRING_ADDCH(byte_codes, AF_INET); + octet_count = 0; + cp = pattern; + + /* + * Require four address fields separated by ".", each field containing a + * numeric octet value or a sequence inside []. The loop head has no test + * and does not step the loop variable. The tokenizer advances the loop + * variable, and the loop termination logic is inside the loop. + */ + for (;;) { + switch (token_type = ip_match_next_token(&cp, &saved_cp, &oval)) { + + /* + * Numeric address field. + */ + case IP_MATCH_CODE_OVAL: + VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); + VSTRING_ADDCH(byte_codes, oval); + break; + + /* + * Wild-card address field. + */ + case IP_MATCH_CODE_OPEN: + VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OPEN); + /* Require ";"-separated numbers or numeric ranges. */ + for (;;) { + token_type = ip_match_next_token(&cp, &saved_cp, &oval); + if (token_type == IP_MATCH_CODE_OVAL) { + saved_oval = oval; + look_ahead = ip_match_next_token(&cp, &saved_cp, &oval); + /* Numeric range. */ + if (look_ahead == '.') { + /* Brute-force parsing. */ + if (ip_match_next_token(&cp, &saved_cp, &oval) == '.' + && ip_match_next_token(&cp, &saved_cp, &oval) + == IP_MATCH_CODE_OVAL + && saved_oval <= oval) { + VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_RANGE); + VSTRING_ADDCH(byte_codes, saved_oval); + VSTRING_ADDCH(byte_codes, oval); + look_ahead = + ip_match_next_token(&cp, &saved_cp, &oval); + } else { + ipmatch_print_parse_error(byte_codes, pattern, + saved_cp, cp, + "numeric range error"); + return (STR(byte_codes)); + } + } + /* Single number. */ + else { + VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_OVAL); + VSTRING_ADDCH(byte_codes, saved_oval); + } + /* Require ";" or end-of-wildcard. */ + token_type = look_ahead; + if (token_type == ';') { + continue; + } else if (token_type == IP_MATCH_CODE_CLOSE) { + break; + } else { + ipmatch_print_parse_error(byte_codes, pattern, + saved_cp, cp, + "need \";\" or \"%c\"", + IP_MATCH_CODE_CLOSE); + return (STR(byte_codes)); + } + } else { + ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, + "need decimal number 0..255"); + return (STR(byte_codes)); + } + } + VSTRING_ADDCH(byte_codes, IP_MATCH_CODE_CLOSE); + break; + + /* + * Invalid field. + */ + default: + ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, + "need decimal number 0..255 or \"%c\"", + IP_MATCH_CODE_OPEN); + return (STR(byte_codes)); + } + octet_count += 1; + + /* + * Require four address fields. Not one more, not one less. + */ + if (octet_count == 4) { + if (*cp != 0) { + (void) ip_match_next_token(&cp, &saved_cp, &oval); + ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, + "garbage after pattern"); + return (STR(byte_codes)); + } + VSTRING_ADDCH(byte_codes, 0); + return (0); + } + + /* + * Require "." before the next address field. + */ + if (ip_match_next_token(&cp, &saved_cp, &oval) != '.') { + ipmatch_print_parse_error(byte_codes, pattern, saved_cp, cp, + "need \".\""); + return (STR(byte_codes)); + } + } +} + +#ifdef TEST + + /* + * Dummy main program for regression tests. + */ +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <stdlib.h> +#include <unistd.h> +#include <vstream.h> +#include <vstring_vstream.h> +#include <stringops.h> + +int main(int argc, char **argv) +{ + VSTRING *byte_codes = vstring_alloc(100); + VSTRING *line_buf = vstring_alloc(100); + char *bufp; + char *err; + char *user_pattern; + char *user_address; + int echo_input = !isatty(0); + + /* + * Iterate over the input stream. The input format is a pattern, followed + * by optional addresses to match against. + */ + while (vstring_fgets_nonl(line_buf, VSTREAM_IN)) { + bufp = STR(line_buf); + if (echo_input) { + vstream_printf("> %s\n", bufp); + vstream_fflush(VSTREAM_OUT); + } + if (*bufp == '#') + continue; + if ((user_pattern = mystrtok(&bufp, " \t")) == 0) + continue; + + /* + * Parse and dump the pattern. + */ + if ((err = ip_match_parse(byte_codes, user_pattern)) != 0) { + vstream_printf("Error: %s\n", err); + } else { + vstream_printf("Code: %s\n", + ip_match_dump(line_buf, STR(byte_codes))); + } + vstream_fflush(VSTREAM_OUT); + + /* + * Match the optional patterns. + */ + while ((user_address = mystrtok(&bufp, " \t")) != 0) { + struct in_addr netw_addr; + + switch (inet_pton(AF_INET, user_address, &netw_addr)) { + case 1: + vstream_printf("Match %s: %s\n", user_address, + ip_match_execute(STR(byte_codes), + (char *) &netw_addr.s_addr) ? + "yes" : "no"); + break; + case 0: + vstream_printf("bad address syntax: %s\n", user_address); + break; + case -1: + vstream_printf("%s: %m\n", user_address); + break; + } + vstream_fflush(VSTREAM_OUT); + } + } + vstring_free(line_buf); + vstring_free(byte_codes); + exit(0); +} + +#endif |