summaryrefslogtreecommitdiffstats
path: root/support/logresolve.c
diff options
context:
space:
mode:
Diffstat (limited to 'support/logresolve.c')
-rw-r--r--support/logresolve.c329
1 files changed, 329 insertions, 0 deletions
diff --git a/support/logresolve.c b/support/logresolve.c
new file mode 100644
index 0000000..1cab753
--- /dev/null
+++ b/support/logresolve.c
@@ -0,0 +1,329 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * logresolve 2.0
+ *
+ * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
+ * UUNET Canada, April 16, 1995
+ *
+ * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
+ * Rewritten again, and ported to APR by Colm MacCarthaigh
+ *
+ * Usage: logresolve [-s filename] [-c] < access_log > new_log
+ *
+ * Arguments:
+ * -s filename name of a file to record statistics
+ * -c check the DNS for a matching A record for the host.
+ *
+ * Notes: (For historical interest)
+ *
+ * To generate meaningful statistics from an HTTPD log file, it's good
+ * to have the domain name of each machine that accessed your site, but
+ * doing this on the fly can slow HTTPD down.
+ *
+ * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
+ * resolution off. Before running your stats program, just run your log
+ * file through this program (logresolve) and all of your IP numbers will
+ * be resolved into hostnames (where possible).
+ *
+ * logresolve takes an HTTPD access log (in the COMMON log file format,
+ * or any other format that has the IP number/domain name as the first
+ * field for that matter), and outputs the same file with all of the
+ * domain names looked up. Where no domain name can be found, the IP
+ * number is left in.
+ *
+ * To minimize impact on your nameserver, logresolve has its very own
+ * internal hash-table cache. This means that each IP number will only
+ * be looked up the first time it is found in the log file.
+ *
+ * The -c option causes logresolve to apply the same check as httpd
+ * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
+ * address, it looks up the IP addresses for the hostname and checks
+ * that one of these matches the original address.
+ */
+
+#include "apr.h"
+#include "apr_lib.h"
+#include "apr_hash.h"
+#include "apr_getopt.h"
+#include "apr_strings.h"
+#include "apr_file_io.h"
+#include "apr_network_io.h"
+
+#if APR_HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#define READ_BUF_SIZE 128*1024
+#define WRITE_BUF_SIZE 128*1024
+#define LINE_BUF_SIZE 128*1024
+
+static apr_file_t *errfile;
+static const char *shortname = "logresolve";
+static apr_hash_t *cache;
+
+/* Statistics */
+static int cachehits = 0;
+static int cachesize = 0;
+static int entries = 0;
+static int resolves = 0;
+static int withname = 0;
+static int doublefailed = 0;
+static int noreverse = 0;
+
+/*
+ * prints various statistics to output
+ */
+#define NL APR_EOL_STR
+static void print_statistics (apr_file_t *output)
+{
+ apr_file_printf(output, "logresolve Statistics:" NL);
+ apr_file_printf(output, "Entries: %d" NL, entries);
+ apr_file_printf(output, " With name : %d" NL, withname);
+ apr_file_printf(output, " Resolves : %d" NL, resolves);
+
+ if (noreverse) {
+ apr_file_printf(output, " - No reverse : %d" NL,
+ noreverse);
+ }
+
+ if (doublefailed) {
+ apr_file_printf(output, " - Double lookup failed : %d" NL,
+ doublefailed);
+ }
+
+ apr_file_printf(output, "Cache hits : %d" NL, cachehits);
+ apr_file_printf(output, "Cache size : %d" NL, cachesize);
+}
+
+/*
+ * usage info
+ */
+static void usage(void)
+{
+ apr_file_printf(errfile,
+ "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
+ "Usage: %s [-s STATFILE] [-c]" NL
+ NL
+ "Options:" NL
+ " -s Record statistics to STATFILE when finished." NL
+ NL
+ " -c Perform double lookups when resolving IP addresses." NL,
+ shortname, shortname);
+ exit(1);
+}
+#undef NL
+
+int main(int argc, const char * const argv[])
+{
+ apr_file_t * outfile;
+ apr_file_t * infile;
+ apr_getopt_t * o;
+ apr_pool_t * pool;
+ apr_pool_t *pline;
+ apr_status_t status;
+ const char * arg;
+ char * stats = NULL;
+ char * inbuffer;
+ char * outbuffer;
+ char * line;
+ int doublelookups = 0;
+
+ if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
+ return 1;
+ }
+ atexit(apr_terminate);
+
+ if (argc) {
+ shortname = apr_filepath_name_get(argv[0]);
+ }
+
+ if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
+ return 1;
+ }
+ apr_file_open_stderr(&errfile, pool);
+ apr_getopt_init(&o, pool, argc, argv);
+
+ while (1) {
+ char opt;
+ status = apr_getopt(o, "s:c", &opt, &arg);
+ if (status == APR_EOF) {
+ break;
+ }
+ else if (status != APR_SUCCESS) {
+ usage();
+ }
+ else {
+ switch (opt) {
+ case 'c':
+ if (doublelookups) {
+ usage();
+ }
+ doublelookups = 1;
+ break;
+ case 's':
+ if (stats) {
+ usage();
+ }
+ stats = apr_pstrdup(pool, arg);
+ break;
+ } /* switch */
+ } /* else */
+ } /* while */
+
+ apr_file_open_stdout(&outfile, pool);
+ apr_file_open_stdin(&infile, pool);
+
+ /* Allocate two new 10k file buffers */
+ if ( (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL
+ || (inbuffer = apr_palloc(pool, READ_BUF_SIZE)) == NULL
+ || (line = apr_palloc(pool, LINE_BUF_SIZE)) == NULL) {
+ return 1;
+ }
+
+ /* Set the buffers */
+ apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE);
+ apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE);
+
+ cache = apr_hash_make(pool);
+ if (apr_pool_create(&pline, pool) != APR_SUCCESS) {
+ return 1;
+ }
+
+ while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) {
+ char *hostname;
+ char *space;
+ apr_sockaddr_t *ip;
+ apr_sockaddr_t *ipdouble;
+ char dummy[] = " " APR_EOL_STR;
+
+ if (line[0] == '\0') {
+ continue;
+ }
+
+ /* Count our log entries */
+ entries++;
+
+ /* Check if this could even be an IP address */
+ if (!apr_isxdigit(line[0]) && line[0] != ':') {
+ withname++;
+ apr_file_puts(line, outfile);
+ continue;
+ }
+
+ /* Terminate the line at the next space */
+ if ((space = strchr(line, ' ')) != NULL) {
+ *space = '\0';
+ }
+ else {
+ space = dummy;
+ }
+
+ /* See if we have it in our cache */
+ hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
+ if (hostname) {
+ apr_file_printf(outfile, "%s %s", hostname, space + 1);
+ cachehits++;
+ continue;
+ }
+
+ /* Parse the IP address */
+ status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
+ if (status != APR_SUCCESS) {
+ /* Not an IP address */
+ withname++;
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ continue;
+ }
+
+ /* This does not make much sense, but historically "resolves" means
+ * "parsed as an IP address". It does not mean we actually resolved
+ * the IP address into a hostname.
+ */
+ resolves++;
+
+ /* From here on our we cache each result, even if it was not
+ * successful
+ */
+ cachesize++;
+
+ /* Try and perform a reverse lookup */
+ status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
+ if (status || hostname == NULL) {
+ /* Could not perform a reverse lookup */
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ noreverse++;
+
+ /* Add to cache */
+ *space = '\0';
+ apr_hash_set(cache, line, APR_HASH_KEY_STRING,
+ apr_pstrdup(apr_hash_pool_get(cache), line));
+ continue;
+ }
+
+ /* Perform a double lookup */
+ if (doublelookups) {
+ /* Do a forward lookup on our hostname, and see if that matches our
+ * original IP address.
+ */
+ status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
+ 0, pline);
+ if (status != APR_SUCCESS ||
+ memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
+ /* Double-lookup failed */
+ *space = ' ';
+ apr_file_puts(line, outfile);
+ doublefailed++;
+
+ /* Add to cache */
+ *space = '\0';
+ apr_hash_set(cache, line, APR_HASH_KEY_STRING,
+ apr_pstrdup(apr_hash_pool_get(cache), line));
+ continue;
+ }
+ }
+
+ /* Output the resolved name */
+ apr_file_printf(outfile, "%s %s", hostname, space + 1);
+
+ /* Store it in the cache */
+ apr_hash_set(cache, line, APR_HASH_KEY_STRING,
+ apr_pstrdup(apr_hash_pool_get(cache), hostname));
+
+ apr_pool_clear(pline);
+ }
+
+ /* Flush any remaining output */
+ apr_file_flush(outfile);
+
+ if (stats) {
+ apr_file_t *statsfile;
+ if (apr_file_open(&statsfile, stats,
+ APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
+ APR_OS_DEFAULT, pool) != APR_SUCCESS) {
+ apr_file_printf(errfile, "%s: Could not open %s for writing.",
+ shortname, stats);
+ return 1;
+ }
+ print_statistics(statsfile);
+ apr_file_close(statsfile);
+ }
+
+ return 0;
+}