diff options
Diffstat (limited to 'support/logresolve.pl.in')
-rw-r--r-- | support/logresolve.pl.in | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/support/logresolve.pl.in b/support/logresolve.pl.in new file mode 100644 index 0000000..2cd212d --- /dev/null +++ b/support/logresolve.pl.in @@ -0,0 +1,225 @@ +#!@perlbin@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# logresolve.pl +# +# v 1.2 by robh imdb.com +# +# usage: logresolve.pl <infile >outfile +# +# input = Apache/NCSA/.. logfile with IP numbers at start of lines +# output = same logfile with IP addresses resolved to hostnames where +# name lookups succeeded. +# +# this differs from the C based 'logresolve' in that this script +# spawns a number ($CHILDREN) of subprocesses to resolve addresses +# concurrently and sets a short timeout ($TIMEOUT) for each lookup in +# order to keep things moving quickly. +# +# the parent process handles caching of IP->hostnames using a Perl hash +# it also avoids sending the same IP to multiple child processes to be +# resolved multiple times concurrently. +# +# Depending on the settings of $CHILDREN and $TIMEOUT you should see +# significant reductions in the overall time taken to resolve your +# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000 +# logfile lines processed per hour compared to ~45,000 per hour +# with 'logresolve'. +# +# I haven't yet seen any noticeable reduction in the percentage of IPs +# that fail to get resolved. Your mileage will no doubt vary. 5s is long +# enough to wait IMO. +# +# Known to work with FreeBSD 2.2 +# Known to have problems with Solaris +# +# 980417 - use 'sockaddr_un' for bind/connect to make the script work +# with linux. Fix from Luuk de Boer <luuk_de_boer pi.net> + +require 5.004; + +$|=1; + +use FileHandle; +use Socket; + +use strict; +no strict 'refs'; + +use vars qw($PROTOCOL); +$PROTOCOL = 0; + +my $CHILDREN = 40; +my $TIMEOUT = 5; + +my $filename; +my %hash = (); +my $parent = $$; + +my @children = (); +for (my $child = 1; $child <=$CHILDREN; $child++) { + my $f = fork(); + if (!$f) { + $filename = "./.socket.$parent.$child"; + if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";} + &child($child); + exit(0); + } + push(@children, $f); +} + +&parent; +&cleanup; + +## remove all temporary files before shutting down +sub cleanup { + # die kiddies, die + kill(15, @children); + for (my $child = 1; $child <=$CHILDREN; $child++) { + if (-e "./.socket.$parent.$child") { + unlink("./.socket.$parent.$child") + || warn ".socket.$parent.$child $!"; + } + } +} + +sub parent { + # Trap some possible signals to trigger temp file cleanup + $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup; + + my %CHILDSOCK; + my $filename; + + ## fork child processes. Each child will create a socket connection + ## to this parent and use an unique temp filename to do so. + for (my $child = 1; $child <=$CHILDREN; $child++) { + $CHILDSOCK{$child}= FileHandle->new; + + if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) { + warn "parent socket to child failed $!"; + } + $filename = "./.socket.$parent.$child"; + my $response; + do { + $response = connect($CHILDSOCK{$child}, sockaddr_un($filename)); + if ($response != 1) { + sleep(1); + } + } while ($response != 1); + $CHILDSOCK{$child}->autoflush; + } + ## All child processes should now be ready or at worst warming up + + my (@buffer, $child, $ip, $rest, $hostname, $response); + ## read the logfile lines from STDIN + while(<STDIN>) { + @buffer = (); # empty the logfile line buffer array. + $child = 1; # children are numbered 1..N, start with #1 + + # while we have a child to talk to and data to give it.. + do { + push(@buffer, $_); # buffer the line + ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest + + unless ($hash{$ip}) { # resolve if unseen IP + $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child + $hash{$ip} = $ip; # don't look it up again. + $child++; + } + } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>)); + + ## now poll each child for a response + while (--$child > 0) { + $response = $CHILDSOCK{$child}->getline; + chomp($response); + # child sends us back both the IP and HOSTNAME, no need for us + # to remember what child received any given IP, and no worries + # what order we talk to the children + ($ip, $hostname) = split(/\|/, $response, 2); + $hash{$ip} = $hostname; + } + + # resolve all the logfiles lines held in the log buffer array.. + for (my $line = 0; $line <=$#buffer; $line++) { + # get next buffered line + ($ip, $rest) = split(/ /, $buffer[$line], 2); + # separate IP from rest and replace with cached hostname + printf STDOUT ("%s %s", $hash{$ip}, $rest); + } + } +} + +######################################## + +sub child { + # arg = numeric ID - how the parent refers to me + my $me = shift; + + # add trap for alarm signals. + $SIG{'ALRM'} = sub { die "alarmed"; }; + + # create a socket to communicate with parent + socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL) + || die "Error with Socket: !$\n"; + $filename = "./.socket.$parent.$me"; + bind(INBOUND, sockaddr_un($filename)) + || die "Error Binding $filename: $!\n"; + listen(INBOUND, 5) || die "Error Listening: $!\n"; + + my ($ip, $send_back); + my $talk = FileHandle->new; + + # accept a connection from the parent process. We only ever have + # have one connection where we exchange 1 line of info with the + # parent.. 1 line in (IP address), 1 line out (IP + hostname). + accept($talk, INBOUND) || die "Error Accepting: $!\n"; + # disable I/O buffering just in case + $talk->autoflush; + # while the parent keeps sending data, we keep responding.. + while(($ip = $talk->getline)) { + chomp($ip); + # resolve the IP if time permits and send back what we found.. + $send_back = sprintf("%s|%s", $ip, &nslookup($ip)); + $talk->print($send_back."\n"); + } +} + +# perform a time restricted hostname lookup. +sub nslookup { + # get the IP as an arg + my $ip = shift; + my $hostname = undef; + + # do the hostname lookup inside an eval. The eval will use the + # already configured SIGnal handler and drop out of the {} block + # regardless of whether the alarm occurred or not. + eval { + alarm($TIMEOUT); + $hostname = gethostbyaddr(gethostbyname($ip), AF_INET); + alarm(0); + }; + if ($@ =~ /alarm/) { + # useful for debugging perhaps.. + # print "alarming, isn't it? ($ip)"; + } + + # return the hostname or the IP address itself if there is no hostname + $hostname ne "" ? $hostname : $ip; +} + + |