#!/usr/bin/perl
# sitebuild-log-check
# by Revford, 2008
# pull some useful data out of the log files

use Term::ANSIColor qw(:constants);
use Net::Whois::Raw qw( whois );

$logfiledir   = "/home/gav/web/blue-logs/logs/";

$daysago = $ARGV[0];
if ($daysago eq undef) {$daysago = 1;}

$longdate = gmtime(time() - ( 24 * 60 * 60 * $daysago)); # remove 24 hours, 60 mins, 60 sec * $daysago
$longdate =~ s/\ \ /\ /;
($day, $month, $mday, $fullTime, $year) = split(/ /, $longdate);
%months = (Jan => '01', Feb => '02', Mar => '03', Apr => '04', May => '05', Jun => '06',
           Jul => '07', Aug => '08', Sep => '09', Oct => '10', Nov => '11', Dec => '12');
$mday = sprintf "%02d", "$mday";
$monthnumber = $months{$month};
$myyesterday = "$year$monthnumber$mday";
$logdate = "$day, $mday $month $year";


open LOGFILE, "$logfiledir/access.$myyesterday" or die RED, "File access.$myyesterday could not be read.\n";
@logfull = <LOGFILE>;
close LOGFILE;


# strip stuff out
#  all requests for the avatar.jpg file
@log = grep {!/avatar.jpg/} @logfull;
# all the requests from my IP
#@log = grep {!/X.X.X.X/} @log;
# all taps from forums
@log = grep {!/forum.backofbeyond.de|thephoenixwargamersclub.com|www.worldeaters.net/} @log;
# search engines, robots and crawlers
@log = grep {!/Ask\ Jeeves|Yahoo-MMCrawler|Yahoo\!\ Slurp|images.search.yahoo.com|search.msn.com|Googlebot|psbot|Twiceler|MJ12bot|TurnitinBot|www.page-store.com|heritrix|Netcraft\ Web\ Server\ Survey|EnaBot|Gigabot|VadixBot/} @log;
# validators
@log = grep {!/FeedValidator|W3C_CSS_Validator|Validator.nu|W3C_Validator/} @log;
# all 404s
@log = grep {!/error-404.html/} @log;


# unique IPs
foreach (@log)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @iplist)) {$ipdupes++;}
  else {push(@iplist, $thisip);}
  }
$ipcount = @iplist;

# Hits based on OS
#XP
@usersXP = grep {/NT\ 5.1|NT\ 5.2/} @log;
foreach (@usersXP)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @usersXPlist)) {$whatever++;}
  else {push(@usersXPlist, $thisip);}
  }
$usersXPcount = @usersXPlist;
#Vista
@usersVISTA = grep {/NT\ 6.0/} @log;
foreach (@usersVISTA)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @usersVISTAlist)) {$whatever++;}
  else {push(@usersVISTAlist, $thisip);}
  }
$usersVISTAcount = @usersVISTAlist;
#Mac
@usersMAC = grep {/Mac\ OS\ X/} @log;
foreach (@usersMAC)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @usersMAClist)) {$whatever++;}
  else {push(@usersMAClist, $thisip);}
  }
$usersMACcount = @usersMAClist;
# UNIX
@usersUNIX = grep {/X11|Linux/} @log;
foreach (@usersUNIX)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @usersUNIXlist)) {$whatever++;}
  else {push(@usersUNIXlist, $thisip);}
  }
$usersUNIXcount = @usersUNIXlist;



# count feed viewers
@feedlog = grep {/rss.xml/} @log;

foreach (@feedlog)
  {
  @splitline = split(/\ /, $_);
  $thisip = @splitline[0];

  if (grep(/$thisip/, @feediplist)) {$feedipdupes++;}
  else {push(@feediplist, $thisip);}
  }
$feedipcount = @feediplist;


# count 404s
@fzfs = grep {/error-404.html/} @logfull;
$fzfcount = @fzfs;


# Browser Strings
foreach (@log)
  {
  @splitline = split(/\"/, $_);
  $thisbrowser = @splitline[5];

  $thisbrowser =~ s/\[|\]|\?|\+|\(|\)//g;

  if (grep(/$thisbrowser/, @browserlist)) {$browserdupes++;}
  else {push(@browserlist, $thisbrowser);}
  }
$browsercount = @browserlist;
@explorers = grep {/MSIE/} @log;
$explorercount = @explorers;
$explorercount = sprintf "%\ 4d", "$explorercount";

@firefoxes = grep {/Firefox/} @log;
$firefoxcount = @firefoxes;
$firefoxcount = sprintf "%\ 4d", "$firefoxcount";

@safaris = grep {/Safari/} @log;
$safaricount = @safaris;
$safaricount = sprintf "%\ 4d", "$safaricount";

@operas = grep {/Opera/} @log;
$operacount = @operas;
$operacount = sprintf "%\ 4d", "$operacount";


# robot counter
@fullrobotlist = grep {/robots.txt/} @logfull;
foreach (@fullrobotlist)
  {
  @splitline = split(/\"/, $_);
  $thisrobot = @splitline[5];

  $thisrobot =~ s/\[|\]|\?|\+|\(|\)//g;

  if (grep(/$thisrobot/, @robotlist)) {$robotdupes++;}
  else {push(@robotlist, $thisrobot);}
  }
$robotcount = @robotlist;


# print the results
print "\n";

print GREEN, "Results for $logdate\n", RESET;
print "\n";

print CYAN, "$ipcount unique human IPs with $ipdupes dupes.\n", RESET;
foreach (@iplist)
  {
  print "  $_";
  print " - ";

  # some WHOIS based fun
  @whoami = split(/\n/, whois($_));

  @country = grep(/country/, @whoami);
  $mycountry = @country[0];
  $mycountry =~ s/country//;
  $mycountry =~ s/\ //g;
  $mycountry =~ s/\://g;
  print "$mycountry";
  print " - ";

  @descr = grep(/descr/, @whoami);
  foreach (@descr)
    {
    $_ =~ s/descr//;
    $_ =~ s/\ \ //g;
    $_  =~ s/\://g;
    $_  =~ s/\=//g;
    };
  $despop = pop(@descr);
  print pop(@descr) ." [$despop]\n";
  };
print "\n";

print CYAN, "$browsercount different browsers.\n", RESET;
print YELLOW, "  Firefox hits   $firefoxcount\n", RESET;
print YELLOW, "  Explorer hits  $explorercount\n", RESET;
print YELLOW, "  Safari hits    $safaricount\n", RESET;
print YELLOW, "  Opera hits     $operacount\n", RESET;
foreach (@browserlist) {print "  $_\n";};
print "\n";

print CYAN, "Users by OS\n", RESET;
print YELLOW, "  WinXP users   $usersXPcount\n", RESET;
print YELLOW, "  Vista users   $usersVISTAcount\n", RESET;
print YELLOW, "  MacOS users   $usersMACcount\n", RESET;
print YELLOW, "  UNIX  users   $usersUNIXcount\n", RESET;


print CYAN, "$robotcount different robots.\n", RESET;
foreach (@robotlist) {print "  $_\n";};
print "\n";

print CYAN, "$feedipcount different feed hits.\n", RESET;
print "\n";

   if ($fzfcount > 20) {print RED, "$fzfcount 404 errors.\n", RESET;}
elsif ($fzfcount > 10) {print MAGENTA, "$fzfcount 404 errors.\n", RESET;}
                  else {print GREEN, "$fzfcount 404 errors.\n", RESET;}

