httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Rob Hartill <r...@imdb.com>
Subject [support] logresolve.pl
Date Wed, 31 Dec 1997 12:26:55 GMT

Something I've been playing with..


#!/usr/local/bin/perl

# ====================================================================
# Copyright (c) 1995-1998 The Apache Group.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer. 
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
#
# 3. All advertising materials mentioning features or use of this
#    software must display the following acknowledgment:
#    "This product includes software developed by the Apache Group
#    for use in the Apache HTTP server project (http://www.apache.org/)."
#
# 4. The names "Apache Server" and "Apache Group" must not be used to
#    endorse or promote products derived from this software without
#    prior written permission. For written permission, please contact
#    apache@apache.org.
#
# 5. Redistributions of any form whatsoever must retain the following
#    acknowledgment:
#    "This product includes software developed by the Apache Group
#    for use in the Apache HTTP server project (http://www.apache.org/)."
#
# THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE GROUP OR
# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# ====================================================================
#
# This software consists of voluntary contributions made by many
# individuals on behalf of the Apache Group and was originally based
# on public domain software written at the National Center for
# Supercomputing Applications, University of Illinois, Urbana-Champaign.
# For more information on the Apache Group and the Apache HTTP server
# project, please see <http://www.apache.org/>.

# logresolve.pl
#
# v 1.0 by robh @ imdb.com
# 
# usage: logresolve.pl <infile >outfile
#
# input = Apache/NCSA/.. logfile with IP numbers at start of lines
# output = same logfile with IP addresses resolved to hostnames where
#  name lookups succeeded.
#
# this differs from the C based 'logresolve' in that this script
# spawns a number ($CHILDREN) of subprocesses to resolve addresses
# concurrently and sets a short timeout ($TIMEOUT) for each lookup in
# order to keep things moving quickly.
#
# the parent process handles caching of IP->hostnames using a Perl hash
# it also avoids sending the same IP to multiple child processes to be
# resolved multiple times concurrently.
#
# Depending on the settings of $CHILDREN and $TIMEOUT you should see
# significant reductions in the overall time taken to resolve your
# logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
# logfile lines processes per hour compared to ~45,000 per hour
# with 'logresolve'.
#
# I haven't yet seen any noticable reduction in the percentage of IPs
# that fail to get resolved. Your mileage will no doubt vary. 5s is long
# enough to wait IMO.

$|=1;

use FileHandle;
use Socket;

use strict;
no strict 'refs';

use vars qw($AF_UNIX $SOCK_STREAM $PROTOCOL);
($AF_UNIX, $SOCK_STREAM, $PROTOCOL) = (1, 1, 0);

my $CHILDREN = 40;
my $TIMEOUT  = 5;

my $filename;
my %hash = ();
my $parent = $$;

my @children = ();
for (my $child = 1; $child <=$CHILDREN; $child++) {
	my $f = fork();	
	if (!$f) {
		$filename = "./.socket.$parent.$child";
		if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
		&child($child);
		exit(0);
	}
	push(@children, $f);
}

&parent;
&cleanup;

## remove all temporary files before shutting down
sub cleanup {
	 # die kiddies, die
	kill(15, @children);
	for (my $child = 1; $child <=$CHILDREN; $child++) {
		if (-e "./.socket.$parent.$child") {
			unlink("./.socket.$parent.$child")
				|| warn ".socket.$parent.$child $!";
		}
	}
}
	
sub parent {
	# Trap some possible signals to trigger temp file cleanup
	$SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;

	my %CHILDSOCK;
	my $filename;
 
	 ## fork child processes. Each child will create a socket connection
	 ## to this parent and use an unique temp filename to do so.
	for (my $child = 1; $child <=$CHILDREN; $child++) {
		$CHILDSOCK{$child}= FileHandle->new;

		if (!socket($CHILDSOCK{$child}, $AF_UNIX, $SOCK_STREAM, $PROTOCOL)) {
			warn "parent socket to child failed $!";
		}
		$filename = "./.socket.$parent.$child";
		my $response;
		do {
			$response = connect($CHILDSOCK{$child}, "$filename");
			if ($response != 1) {
				sleep(1);
			}                       
		} while ($response != 1);
		$CHILDSOCK{$child}->autoflush;
	}
	## All child processes should now be ready or at worst warming up 

	my (@buffer, $child, $ip, $rest, $hostname, $response);
	 ## read the logfile lines from STDIN
	while(<STDIN>) {
		@buffer = ();	# empty the logfile line buffer array.
		$child = 1;		# children are numbered 1..N, start with #1

		# while we have a child to talk to and data to give it..
		do {
			push(@buffer, $_);					# buffer the line
			($ip, $rest) = split(/ /, $_, 2);	# separate IP form rest

			unless ($hash{$ip}) {				# resolve if unseen IP
				$CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
				$hash{$ip} = $ip;				# don't look it up again.
				$child++;
			}
		} while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));

		 ## now poll each child for a response
		while (--$child > 0) { 
			$response = $CHILDSOCK{$child}->getline;
			chomp($response);
			 # child sends us back both the IP and HOSTNAME, no need for us
			 # to remember what child received any given IP, and no worries
			 # what order we talk to the children
			($ip, $hostname) = split(/\|/, $response, 2);
			$hash{$ip} = $hostname;
		}

		 # resolve all the logfiles lines held in the log buffer array..
		for (my $line = 0; $line <=$#buffer; $line++) {
			 # get next buffered line
			($ip, $rest) = split(/ /, $buffer[$line], 2);
			 # separate IP from rest and replace with cached hostname
			printf STDOUT ("%s %s", $hash{$ip}, $rest);
		}
	}
}

########################################

sub child {
	 # arg = numeric ID - how the parent refers to me
	my $me = shift;

	 # add trap for alarm signals.
	$SIG{'ALRM'} = sub { die "alarmed"; };

	 # create a socket to communicate with parent
	socket(INBOUND, $AF_UNIX, $SOCK_STREAM, $PROTOCOL)
		|| die "Error with Socket: !$\n";
	$filename = "./.socket.$parent.$me";
	bind(INBOUND, $filename) || die "Error Binding $filename: $!\n";
	listen(INBOUND, 5) || die "Error Listening: $!\n";

	my ($ip, $send_back);
	my $talk = FileHandle->new;

	 # accept a connection from the parent process. We only ever have
	 # have one connection where we exchange 1 line of info with the
	 # parent.. 1 line in (IP address), 1 line out (IP + hostname).
	accept($talk, INBOUND) || die "Error Accepting: $!\n";
	 # disable I/O buffering just in case
	$talk->autoflush;
	 # while the parent keeps sending data, we keep responding..
	while(($ip = $talk->getline)) {
		chomp($ip);
		 # resolve the IP if time permits and send back what we found..
		$send_back = sprintf("%s|%s", $ip, &nslookup($ip));
		$talk->print($send_back."\n");
	}
}

# perform a time restricted hostname lookup.
sub nslookup {
	 # get the IP as an arg
	my $ip = shift;
	my $hostname = undef;

	 # do the hostname lookup inside an eval. The eval will use the
	 # already configured SIGnal handler and drop out of the {} block
	 # regardless of whether the alarm occured or not.
	eval {
		alarm($TIMEOUT);
		$hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
		alarm(0);
	};
	if ($@ =~ /alarm/) {
		 # useful for debugging perhaps..
		# print "alarming, isn't it? ($ip)";
	}

	 # return the hostname or the IP address itself if there is no hostname
	$hostname ne "" ? $hostname : $ip;
}




Mime
View raw message