#!/usr/bin/perl # # Name : Matthew Reeves # # This program will analyze a web log using perl # #use diagnostics; use Modern::Perl; use Net::hostent; #checks if file entered at cmd line, exits w/ usage stmnt if not if ($ARGV[0] ~~ undef) { print "\nUsage: $0 \n\n"; exit; } #declare/initialize variables my $filecount = 0; my $total = 0; my $count = 0; my $totalbytes = 0; my %hostnames = (); my %domains = (); my %dates = (); my %hours = (); my %status = (); my %url = (); my %filetype = (); my %browsers = (); my %browserfamily = (); my %referer = (); my %referdomain = (); my %operatingsystem = (); #opens output file for write access or die open (OUTPUT, ">", "mdreeves.report") or die "\nUnable to open file: mdreeves.report: $!\n"; #while that executes functionality of program once per file entered while ($ARGV[$filecount]) { #opens file entered as cmd line arguement, or die my $filename = $ARGV[$filecount]; open (FILE, $filename) or die "\nUnable to open file: $filename: $!\n"; #while loop to read in data from current file, and extract desired data while () { $_ =~ /(.+?)\s.+?\[(.+?):(\d\d):.+?\s.+?"(.+?)\s(.+?)\s(.+?)"\s(.+?)\s(.+?)\s"(.+?)"\s"(.+?)"/; my $tmp = $1; my $name = $1; #runs reverse lookup if (my $h = gethost($tmp)) { $name = $h->name(); $tmp = reverse($name); $tmp =~ /(.+?\..+?)\..+/; $tmp = $1; $tmp = reverse($tmp); } else { $tmp = "OTHER"; } #stores value as key, occurences as value. 1 hash per report $hostnames{$name}++; $domains{$tmp}++; $dates{$2}++; $hours{$3}++; $status{$7}++; $url{$5}++; $tmp = $5; #given statement to identify file types given ($tmp) { when (/.+?\.cgi/i) { $filetype{"CGI Program"}++; } when (/.+?\.css/i) { $filetype{"Style Sheet"}++; } when (/.+?\.htm/i) { $filetype{"Web Pages"}++; } when (/.+?\.gif/i) { $filetype{"Image"}++; } when (/.+?\.jpg/i) { $filetype{"Image"}++; } when (/.+?\.jpeg/i) { $filetype{"Image"}++; } when (/.+?\.png/i) { $filetype{"Image"}++; } when (/.+?\.ico/i) { $filetype{"Image"}++; } default { $filetype{"Other Request"}++; } } $tmp = $10; #checks if browser ID was present $tmp = "NO BROWSER ID" if ($10 ~~ "-"); $browsers{$tmp}++; #given statement to id browser families given ($tmp) { when (/Chrome/) { $browserfamily{"Chrome"}++; } when (/Firefox/) { $browserfamily{"Firefox"}++; } when (/MSIE/) { $browserfamily{"MSIE"}++; } when (/Opera/) { $browserfamily{"Opera"}++; } when (/Safari/) { $browserfamily{"Safari"}++; } default { $browserfamily{"Unknown"}++; } } #given statement to id OS's given ($tmp) { when (/Windows/) { $operatingsystem{"Windows"}++; } when (/Macintosh/) { $operatingsystem{"Macintosh"}++; } default { $operatingsystem{"Other"}++; } } my $bytes = $8; $tmp = $9; #given statement to ID presence of referer given ($tmp) { when (/http/) { $referer{$tmp}++; } default { $referer{"NO REFERER"}++; } } #if/else block to ID presence of referer domain if ($tmp =~ /http.+?\.(\w+\.\w+)\//) { $referdomain{$1}++; } else { $referdomain{"NONE"}++; } #track total number of bytes from each log file $totalbytes += $bytes if ($bytes =~ /\d+/); #increments count to track entries per file $count++; } #increments filecount to track number of files entered $filecount++; #closes input file close FILE; } ################################################################################ #prints formatted summary of analyzer results for all files print OUTPUT "########################################################\n"; print OUTPUT "Web Server Log Analyzer - SUMMARY\n"; print OUTPUT "########################################################\n"; print OUTPUT "\n"; print OUTPUT "Processed $count entries from the following $filecount files: "; my $x = 0; while ($x < $filecount) { print OUTPUT "\n\t- $ARGV[$x]"; $x++; } print OUTPUT "\n"; ################################################################################ #prints formatted results for hostnames print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "HOSTNAMES\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; my @hash = sort {$a cmp $b} keys %hostnames; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $hostnames{$x}, ($hostnames{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for domains print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "DOMAINS\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %domains; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $domains{$x}, ($domains{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for dates print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "DATES\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %dates; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $dates{$x}, ($dates{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for hours print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "HOURS\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a <=> $b} keys %hours; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $hours{$x}, ($hours{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for status codes print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "STATUS CODES\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a <=> $b} keys %status; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $status{$x}, ($status{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for urls print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "URLS\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %url; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $url{$x}, ($url{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for filestypes print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "FILETYPES\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; while ( my ($value,$occur) = each %filetype) { printf OUTPUT ("%6d %5.2f %s\n", $occur, ($occur/$count)*100, $value); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for browsers print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "BROWSERS\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %browsers; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $browsers{$x}, ($browsers{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for browser families print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "BROWSER FAMILIES\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %browserfamily; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $browserfamily{$x}, ($browserfamily{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for referer print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "REFERER\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %referer; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $referer{$x}, ($referer{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for referer domain print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "REFERER DOMAIN\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; @hash = sort {$a cmp $b} keys %referdomain; foreach my $x (@hash) { printf OUTPUT ("%6d %5.2f %s\n", $referdomain{$x}, ($referdomain{$x}/$count)*100, $x); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for operating systems print OUTPUT "\n"; print OUTPUT "========================================================\n"; print OUTPUT "OPERATING SYSTEMS\n"; print OUTPUT "========================================================\n"; print OUTPUT "\n"; print OUTPUT " Hits %-age Resource\n"; print OUTPUT " ---- ----- --------\n"; while ( my ($value,$occur) = each %operatingsystem) { printf OUTPUT ("%6d %5.2f %s\n", $occur, ($occur/$count)*100, $value); } print OUTPUT " ----\n"; printf OUTPUT ("%6d entries displayed\n", $count); ################################################################################ #prints formatted results for total bytes print OUTPUT "\n########################################################\n"; print OUTPUT "Total bytes served: $totalbytes bytes\n"; print OUTPUT "########################################################\n\n"; ################################################################################ #closes output file close OUTPUT; #prints to console status message indicating successful completion print "\nSuccessfully Processed $filecount files."; print "\nAnalyzed results stored: mdreeves.report\n\n";