Blame support/logresolve.pl.in

Packit 90a5c9
#!@perlbin@
Packit 90a5c9
#
Packit 90a5c9
# Licensed to the Apache Software Foundation (ASF) under one or more
Packit 90a5c9
# contributor license agreements.  See the NOTICE file distributed with
Packit 90a5c9
# this work for additional information regarding copyright ownership.
Packit 90a5c9
# The ASF licenses this file to You under the Apache License, Version 2.0
Packit 90a5c9
# (the "License"); you may not use this file except in compliance with
Packit 90a5c9
# the License.  You may obtain a copy of the License at
Packit 90a5c9
#
Packit 90a5c9
#     http://www.apache.org/licenses/LICENSE-2.0
Packit 90a5c9
#
Packit 90a5c9
# Unless required by applicable law or agreed to in writing, software
Packit 90a5c9
# distributed under the License is distributed on an "AS IS" BASIS,
Packit 90a5c9
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Packit 90a5c9
# See the License for the specific language governing permissions and
Packit 90a5c9
# limitations under the License.
Packit 90a5c9
#
Packit 90a5c9
#
Packit 90a5c9
# logresolve.pl
Packit 90a5c9
#
Packit 90a5c9
# v 1.2 by robh imdb.com
Packit 90a5c9
# 
Packit 90a5c9
# usage: logresolve.pl <infile >outfile
Packit 90a5c9
#
Packit 90a5c9
# input = Apache/NCSA/.. logfile with IP numbers at start of lines
Packit 90a5c9
# output = same logfile with IP addresses resolved to hostnames where
Packit 90a5c9
#  name lookups succeeded.
Packit 90a5c9
#
Packit 90a5c9
# this differs from the C based 'logresolve' in that this script
Packit 90a5c9
# spawns a number ($CHILDREN) of subprocesses to resolve addresses
Packit 90a5c9
# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
Packit 90a5c9
# order to keep things moving quickly.
Packit 90a5c9
#
Packit 90a5c9
# the parent process handles caching of IP->hostnames using a Perl hash
Packit 90a5c9
# it also avoids sending the same IP to multiple child processes to be
Packit 90a5c9
# resolved multiple times concurrently.
Packit 90a5c9
#
Packit 90a5c9
# Depending on the settings of $CHILDREN and $TIMEOUT you should see
Packit 90a5c9
# significant reductions in the overall time taken to resolve your
Packit 90a5c9
# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
Packit 90a5c9
# logfile lines processed per hour compared to ~45,000 per hour
Packit 90a5c9
# with 'logresolve'.
Packit 90a5c9
#
Packit 90a5c9
# I haven't yet seen any noticeable reduction in the percentage of IPs
Packit 90a5c9
# that fail to get resolved. Your mileage will no doubt vary. 5s is long
Packit 90a5c9
# enough to wait IMO.
Packit 90a5c9
#
Packit 90a5c9
# Known to work with FreeBSD 2.2
Packit 90a5c9
# Known to have problems with Solaris
Packit 90a5c9
#
Packit 90a5c9
# 980417 - use 'sockaddr_un' for bind/connect to make the script work
Packit 90a5c9
#  with linux. Fix from Luuk de Boer <luuk_de_boer pi.net>
Packit 90a5c9
Packit 90a5c9
require 5.004;
Packit 90a5c9
Packit 90a5c9
$|=1;
Packit 90a5c9
Packit 90a5c9
use FileHandle;
Packit 90a5c9
use Socket;
Packit 90a5c9
Packit 90a5c9
use strict;
Packit 90a5c9
no strict 'refs';
Packit 90a5c9
Packit 90a5c9
use vars qw($PROTOCOL);
Packit 90a5c9
$PROTOCOL = 0;
Packit 90a5c9
Packit 90a5c9
my $CHILDREN = 40;
Packit 90a5c9
my $TIMEOUT  = 5;
Packit 90a5c9
Packit 90a5c9
my $filename;
Packit 90a5c9
my %hash = ();
Packit 90a5c9
my $parent = $$;
Packit 90a5c9
Packit 90a5c9
my @children = ();
Packit 90a5c9
for (my $child = 1; $child <=$CHILDREN; $child++) {
Packit 90a5c9
	my $f = fork();	
Packit 90a5c9
	if (!$f) {
Packit 90a5c9
		$filename = "./.socket.$parent.$child";
Packit 90a5c9
		if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
Packit 90a5c9
		&child($child);
Packit 90a5c9
		exit(0);
Packit 90a5c9
	}
Packit 90a5c9
	push(@children, $f);
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
&parent;
Packit 90a5c9
&cleanup;
Packit 90a5c9
Packit 90a5c9
## remove all temporary files before shutting down
Packit 90a5c9
sub cleanup {
Packit 90a5c9
	 # die kiddies, die
Packit 90a5c9
	kill(15, @children);
Packit 90a5c9
	for (my $child = 1; $child <=$CHILDREN; $child++) {
Packit 90a5c9
		if (-e "./.socket.$parent.$child") {
Packit 90a5c9
			unlink("./.socket.$parent.$child")
Packit 90a5c9
				|| warn ".socket.$parent.$child $!";
Packit 90a5c9
		}
Packit 90a5c9
	}
Packit 90a5c9
}
Packit 90a5c9
	
Packit 90a5c9
sub parent {
Packit 90a5c9
	# Trap some possible signals to trigger temp file cleanup
Packit 90a5c9
	$SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
Packit 90a5c9
Packit 90a5c9
	my %CHILDSOCK;
Packit 90a5c9
	my $filename;
Packit 90a5c9
 
Packit 90a5c9
	 ## fork child processes. Each child will create a socket connection
Packit 90a5c9
	 ## to this parent and use an unique temp filename to do so.
Packit 90a5c9
	for (my $child = 1; $child <=$CHILDREN; $child++) {
Packit 90a5c9
		$CHILDSOCK{$child}= FileHandle->new;
Packit 90a5c9
Packit 90a5c9
		if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
Packit 90a5c9
			warn "parent socket to child failed $!";
Packit 90a5c9
		}
Packit 90a5c9
		$filename = "./.socket.$parent.$child";
Packit 90a5c9
		my $response;
Packit 90a5c9
		do {
Packit 90a5c9
			$response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
Packit 90a5c9
			if ($response != 1) {
Packit 90a5c9
				sleep(1);
Packit 90a5c9
			}                       
Packit 90a5c9
		} while ($response != 1);
Packit 90a5c9
		$CHILDSOCK{$child}->autoflush;
Packit 90a5c9
	}
Packit 90a5c9
	## All child processes should now be ready or at worst warming up 
Packit 90a5c9
Packit 90a5c9
	my (@buffer, $child, $ip, $rest, $hostname, $response);
Packit 90a5c9
	 ## read the logfile lines from STDIN
Packit 90a5c9
	while(<STDIN>) {
Packit 90a5c9
		@buffer = ();	# empty the logfile line buffer array.
Packit 90a5c9
		$child = 1;		# children are numbered 1..N, start with #1
Packit 90a5c9
Packit 90a5c9
		# while we have a child to talk to and data to give it..
Packit 90a5c9
		do {
Packit 90a5c9
			push(@buffer, $_);					# buffer the line
Packit 90a5c9
			($ip, $rest) = split(/ /, $_, 2);	# separate IP form rest
Packit 90a5c9
Packit 90a5c9
			unless ($hash{$ip}) {				# resolve if unseen IP
Packit 90a5c9
				$CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
Packit 90a5c9
				$hash{$ip} = $ip;				# don't look it up again.
Packit 90a5c9
				$child++;
Packit 90a5c9
			}
Packit 90a5c9
		} while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
Packit 90a5c9
Packit 90a5c9
		 ## now poll each child for a response
Packit 90a5c9
		while (--$child > 0) { 
Packit 90a5c9
			$response = $CHILDSOCK{$child}->getline;
Packit 90a5c9
			chomp($response);
Packit 90a5c9
			 # child sends us back both the IP and HOSTNAME, no need for us
Packit 90a5c9
			 # to remember what child received any given IP, and no worries
Packit 90a5c9
			 # what order we talk to the children
Packit 90a5c9
			($ip, $hostname) = split(/\|/, $response, 2);
Packit 90a5c9
			$hash{$ip} = $hostname;
Packit 90a5c9
		}
Packit 90a5c9
Packit 90a5c9
		 # resolve all the logfiles lines held in the log buffer array..
Packit 90a5c9
		for (my $line = 0; $line <=$#buffer; $line++) {
Packit 90a5c9
			 # get next buffered line
Packit 90a5c9
			($ip, $rest) = split(/ /, $buffer[$line], 2);
Packit 90a5c9
			 # separate IP from rest and replace with cached hostname
Packit 90a5c9
			printf STDOUT ("%s %s", $hash{$ip}, $rest);
Packit 90a5c9
		}
Packit 90a5c9
	}
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
########################################
Packit 90a5c9
Packit 90a5c9
sub child {
Packit 90a5c9
	 # arg = numeric ID - how the parent refers to me
Packit 90a5c9
	my $me = shift;
Packit 90a5c9
Packit 90a5c9
	 # add trap for alarm signals.
Packit 90a5c9
	$SIG{'ALRM'} = sub { die "alarmed"; };
Packit 90a5c9
Packit 90a5c9
	 # create a socket to communicate with parent
Packit 90a5c9
	socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
Packit 90a5c9
		|| die "Error with Socket: !$\n";
Packit 90a5c9
	$filename = "./.socket.$parent.$me";
Packit 90a5c9
	bind(INBOUND, sockaddr_un($filename))
Packit 90a5c9
		|| die "Error Binding $filename: $!\n";
Packit 90a5c9
	listen(INBOUND, 5) || die "Error Listening: $!\n";
Packit 90a5c9
Packit 90a5c9
	my ($ip, $send_back);
Packit 90a5c9
	my $talk = FileHandle->new;
Packit 90a5c9
Packit 90a5c9
	 # accept a connection from the parent process. We only ever have
Packit 90a5c9
	 # have one connection where we exchange 1 line of info with the
Packit 90a5c9
	 # parent.. 1 line in (IP address), 1 line out (IP + hostname).
Packit 90a5c9
	accept($talk, INBOUND) || die "Error Accepting: $!\n";
Packit 90a5c9
	 # disable I/O buffering just in case
Packit 90a5c9
	$talk->autoflush;
Packit 90a5c9
	 # while the parent keeps sending data, we keep responding..
Packit 90a5c9
	while(($ip = $talk->getline)) {
Packit 90a5c9
		chomp($ip);
Packit 90a5c9
		 # resolve the IP if time permits and send back what we found..
Packit 90a5c9
		$send_back = sprintf("%s|%s", $ip, &nslookup($ip));
Packit 90a5c9
		$talk->print($send_back."\n");
Packit 90a5c9
	}
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9
# perform a time restricted hostname lookup.
Packit 90a5c9
sub nslookup {
Packit 90a5c9
	 # get the IP as an arg
Packit 90a5c9
	my $ip = shift;
Packit 90a5c9
	my $hostname = undef;
Packit 90a5c9
Packit 90a5c9
	 # do the hostname lookup inside an eval. The eval will use the
Packit 90a5c9
	 # already configured SIGnal handler and drop out of the {} block
Packit 90a5c9
	 # regardless of whether the alarm occurred or not.
Packit 90a5c9
	eval {
Packit 90a5c9
		alarm($TIMEOUT);
Packit 90a5c9
		$hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
Packit 90a5c9
		alarm(0);
Packit 90a5c9
	};
Packit 90a5c9
	if ($@ =~ /alarm/) {
Packit 90a5c9
		 # useful for debugging perhaps..
Packit 90a5c9
		# print "alarming, isn't it? ($ip)";
Packit 90a5c9
	}
Packit 90a5c9
Packit 90a5c9
	 # return the hostname or the IP address itself if there is no hostname
Packit 90a5c9
	$hostname ne "" ? $hostname : $ip;
Packit 90a5c9
}
Packit 90a5c9
Packit 90a5c9