#!/usr/bin/perl -w # # Copyright 1998 DevDaily Interactive. All Rights Reserved. # #---------------------------------------------------------------------# # PROGRAM: readlog2.pl # # PURPOSE: Read an Apache access_log file in standard ECLF format, # and print out the number of hits counted for each # $clientAddress recorded. #---------------------------------------------------------------------# #------------------------------------------------------------------------------# # Global variables that control the program action and output. # #------------------------------------------------------------------------------# $PRINT_HITS_BY_ADDRESS = 1; # set 'true' to see num hits by IP address $NUM_RECS_TO_PRINT = 50; # num output recs to print per section #------------------------------------------------------------------------------# sub usage { print STDERR "\n\tUsage: readlog2.pl access_log_file > output_file\n"; } #--------------------------<< main >>-----------------------------# #------------------------------------------------------------# # First, make sure the program is invoked with the correct # # number of command-line arguments. # #------------------------------------------------------------# $numArgs = $#ARGV + 1; if ($numArgs != 1) { &usage; exit 1; } $logFile = $ARGV[0]; open (LOGFILE,"$logFile") || die " Error opening log file $logFile.\n"; #----------------------------------------------------------------# # Begin processing the LOGFILE, record-by-record, until the # # end of the file. Create a hash to store the number of times # # we've been accessed from each $clientAddress. # #----------------------------------------------------------------# while() { chomp; #----------------------------------------------# # condense one or more whitespace characters # # to one single space # #----------------------------------------------# s/\s+/ /go; #----------------------------------------------------------# # the next line breaks each line of the access_log into # # nine variables # #----------------------------------------------------------# ($clientAddress, $rfc1413, $username, $localTime, $httpRequest, $statusCode, $bytesSentToClient, $referer, $clientSoftware) = /^(\S+) (\S+) (\S+) \[(.+)\] \"(.+)\" (\S+) (\S+) \"(.*)\" \"(.*)\"/o; #------------------------------------------------------------# # Count the number of hits to the web site by each # # client address. This way you can tell how many hits to # # attribute to each client. # #------------------------------------------------------------# # @numHits is a 'hash' # #------------------------------------------------------------# $numHits{$clientAddress}++; } close (LOGFILE); #--------------------------------------------# # Output the number of hits per IP address # #--------------------------------------------# print "NUMBER OF HITS PER IP ADDRESS:\n"; print "------------------------------\n\n"; $count=0; foreach $key (sort {$numHits{$b} <=> $numHits{$a}} (keys(%numHits))) { last if ($count >= $NUM_RECS_TO_PRINT); print "$numHits{$key} \t\t $key\n"; $count++; } print "\n\n";