diff options
Diffstat (limited to '')
-rw-r--r-- | support/logresolve.c | 329 |
1 files changed, 329 insertions, 0 deletions
diff --git a/support/logresolve.c b/support/logresolve.c new file mode 100644 index 0000000..1cab753 --- /dev/null +++ b/support/logresolve.c @@ -0,0 +1,329 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * logresolve 2.0 + * + * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/ + * UUNET Canada, April 16, 1995 + * + * Rewritten by David Robinson. (drtr ast.cam.ac.uk) + * Rewritten again, and ported to APR by Colm MacCarthaigh + * + * Usage: logresolve [-s filename] [-c] < access_log > new_log + * + * Arguments: + * -s filename name of a file to record statistics + * -c check the DNS for a matching A record for the host. + * + * Notes: (For historical interest) + * + * To generate meaningful statistics from an HTTPD log file, it's good + * to have the domain name of each machine that accessed your site, but + * doing this on the fly can slow HTTPD down. + * + * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname + * resolution off. Before running your stats program, just run your log + * file through this program (logresolve) and all of your IP numbers will + * be resolved into hostnames (where possible). + * + * logresolve takes an HTTPD access log (in the COMMON log file format, + * or any other format that has the IP number/domain name as the first + * field for that matter), and outputs the same file with all of the + * domain names looked up. Where no domain name can be found, the IP + * number is left in. + * + * To minimize impact on your nameserver, logresolve has its very own + * internal hash-table cache. This means that each IP number will only + * be looked up the first time it is found in the log file. + * + * The -c option causes logresolve to apply the same check as httpd + * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP + * address, it looks up the IP addresses for the hostname and checks + * that one of these matches the original address. + */ + +#include "apr.h" +#include "apr_lib.h" +#include "apr_hash.h" +#include "apr_getopt.h" +#include "apr_strings.h" +#include "apr_file_io.h" +#include "apr_network_io.h" + +#if APR_HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#define READ_BUF_SIZE 128*1024 +#define WRITE_BUF_SIZE 128*1024 +#define LINE_BUF_SIZE 128*1024 + +static apr_file_t *errfile; +static const char *shortname = "logresolve"; +static apr_hash_t *cache; + +/* Statistics */ +static int cachehits = 0; +static int cachesize = 0; +static int entries = 0; +static int resolves = 0; +static int withname = 0; +static int doublefailed = 0; +static int noreverse = 0; + +/* + * prints various statistics to output + */ +#define NL APR_EOL_STR +static void print_statistics (apr_file_t *output) +{ + apr_file_printf(output, "logresolve Statistics:" NL); + apr_file_printf(output, "Entries: %d" NL, entries); + apr_file_printf(output, " With name : %d" NL, withname); + apr_file_printf(output, " Resolves : %d" NL, resolves); + + if (noreverse) { + apr_file_printf(output, " - No reverse : %d" NL, + noreverse); + } + + if (doublefailed) { + apr_file_printf(output, " - Double lookup failed : %d" NL, + doublefailed); + } + + apr_file_printf(output, "Cache hits : %d" NL, cachehits); + apr_file_printf(output, "Cache size : %d" NL, cachesize); +} + +/* + * usage info + */ +static void usage(void) +{ + apr_file_printf(errfile, + "%s -- Resolve IP-addresses to hostnames in Apache log files." NL + "Usage: %s [-s STATFILE] [-c]" NL + NL + "Options:" NL + " -s Record statistics to STATFILE when finished." NL + NL + " -c Perform double lookups when resolving IP addresses." NL, + shortname, shortname); + exit(1); +} +#undef NL + +int main(int argc, const char * const argv[]) +{ + apr_file_t * outfile; + apr_file_t * infile; + apr_getopt_t * o; + apr_pool_t * pool; + apr_pool_t *pline; + apr_status_t status; + const char * arg; + char * stats = NULL; + char * inbuffer; + char * outbuffer; + char * line; + int doublelookups = 0; + + if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) { + return 1; + } + atexit(apr_terminate); + + if (argc) { + shortname = apr_filepath_name_get(argv[0]); + } + + if (apr_pool_create(&pool, NULL) != APR_SUCCESS) { + return 1; + } + apr_file_open_stderr(&errfile, pool); + apr_getopt_init(&o, pool, argc, argv); + + while (1) { + char opt; + status = apr_getopt(o, "s:c", &opt, &arg); + if (status == APR_EOF) { + break; + } + else if (status != APR_SUCCESS) { + usage(); + } + else { + switch (opt) { + case 'c': + if (doublelookups) { + usage(); + } + doublelookups = 1; + break; + case 's': + if (stats) { + usage(); + } + stats = apr_pstrdup(pool, arg); + break; + } /* switch */ + } /* else */ + } /* while */ + + apr_file_open_stdout(&outfile, pool); + apr_file_open_stdin(&infile, pool); + + /* Allocate two new 10k file buffers */ + if ( (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL + || (inbuffer = apr_palloc(pool, READ_BUF_SIZE)) == NULL + || (line = apr_palloc(pool, LINE_BUF_SIZE)) == NULL) { + return 1; + } + + /* Set the buffers */ + apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE); + apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE); + + cache = apr_hash_make(pool); + if (apr_pool_create(&pline, pool) != APR_SUCCESS) { + return 1; + } + + while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) { + char *hostname; + char *space; + apr_sockaddr_t *ip; + apr_sockaddr_t *ipdouble; + char dummy[] = " " APR_EOL_STR; + + if (line[0] == '\0') { + continue; + } + + /* Count our log entries */ + entries++; + + /* Check if this could even be an IP address */ + if (!apr_isxdigit(line[0]) && line[0] != ':') { + withname++; + apr_file_puts(line, outfile); + continue; + } + + /* Terminate the line at the next space */ + if ((space = strchr(line, ' ')) != NULL) { + *space = '\0'; + } + else { + space = dummy; + } + + /* See if we have it in our cache */ + hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING); + if (hostname) { + apr_file_printf(outfile, "%s %s", hostname, space + 1); + cachehits++; + continue; + } + + /* Parse the IP address */ + status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline); + if (status != APR_SUCCESS) { + /* Not an IP address */ + withname++; + *space = ' '; + apr_file_puts(line, outfile); + continue; + } + + /* This does not make much sense, but historically "resolves" means + * "parsed as an IP address". It does not mean we actually resolved + * the IP address into a hostname. + */ + resolves++; + + /* From here on our we cache each result, even if it was not + * successful + */ + cachesize++; + + /* Try and perform a reverse lookup */ + status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS; + if (status || hostname == NULL) { + /* Could not perform a reverse lookup */ + *space = ' '; + apr_file_puts(line, outfile); + noreverse++; + + /* Add to cache */ + *space = '\0'; + apr_hash_set(cache, line, APR_HASH_KEY_STRING, + apr_pstrdup(apr_hash_pool_get(cache), line)); + continue; + } + + /* Perform a double lookup */ + if (doublelookups) { + /* Do a forward lookup on our hostname, and see if that matches our + * original IP address. + */ + status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0, + 0, pline); + if (status != APR_SUCCESS || + memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) { + /* Double-lookup failed */ + *space = ' '; + apr_file_puts(line, outfile); + doublefailed++; + + /* Add to cache */ + *space = '\0'; + apr_hash_set(cache, line, APR_HASH_KEY_STRING, + apr_pstrdup(apr_hash_pool_get(cache), line)); + continue; + } + } + + /* Output the resolved name */ + apr_file_printf(outfile, "%s %s", hostname, space + 1); + + /* Store it in the cache */ + apr_hash_set(cache, line, APR_HASH_KEY_STRING, + apr_pstrdup(apr_hash_pool_get(cache), hostname)); + + apr_pool_clear(pline); + } + + /* Flush any remaining output */ + apr_file_flush(outfile); + + if (stats) { + apr_file_t *statsfile; + if (apr_file_open(&statsfile, stats, + APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE, + APR_OS_DEFAULT, pool) != APR_SUCCESS) { + apr_file_printf(errfile, "%s: Could not open %s for writing.", + shortname, stats); + return 1; + } + print_statistics(statsfile); + apr_file_close(statsfile); + } + + return 0; +} |