#!/usr/bin/perl # # Name : Matthew Reeves # # scrapes box office data from rottentomatoes.com, and emails to email # address entered as a command line arguement # #use diagnostics; use warnings; use strict; use Mail::Sendmail; #if check to verify either one or two command line arguments have been #entered. If 0, or more than 2 entered, then print usage statement and exit #elsif block checks for T-meter values entered that are out of accepted range if ($ARGV[0] ~~ undef || $ARGV[2]) { print "\nUsage: $0 "; print " \n\n"; exit; } elsif ($ARGV[1] && ($ARGV[1] > 100 || $ARGV[1] < 0)) { print "\nT-Meter rating entered must be 0-100 (inclusive)\n"; print "\nUsage: $0 "; print " \n\n"; exit; } #declares and sets initial values for scalars for optional user specified #T-Meter rating with if block to set check scalar for later checks if user #entered the optional T-meter rank. my $t_meter = 3001; my $t_meter_check = 0; my $msg_tmet; my $msg_tmet_check = 0; if ($ARGV[1] || $ARGV[1] ~~ 0) { #variable to hold t-meter rating $t_meter = $ARGV[1]; #check variable to see if T-meter value entered $t_meter_check=1; } #declare and initialize scalars for sending email my $mailTo = $ARGV[0]; my $mailFrom = 'mdreeves@unomaha.edu'; my $subject = "Weekend Box Office Report - EXTRA CREDIT"; #declare and initialize scalars for dumping data from rottentomatoes via lynx my $pageToGrab = "http://www.rottentomatoes.com/movie/box_office.php"; my $command = "/usr/bin/lynx -dump -width=150 $pageToGrab"; my @pageFile = `$command`; #declare arrays and scalars needed for data scraping and manipulation my $count = 0; my $tmp; my @debut; my $x; my $title; #declare scalars for determining and storing biggest gains/losses in rank my $big_gain; my $num_gain=0; my $big_loss; my $num_loss=0; #declares message scalar, and stores url data was scraped from, and column labels #needed to identify the different types of data scraped from the site my $message = sprintf("
\nData scraped from: $pageToGrab\n\n\n%3s  %3s  %-35s  %7s  %7s  %7s\n", '##', '##', 'Movie Title', 'Weekend', 'Cume', 'T-Meter');  

#if check to assign label data to $msg_tmet if an optional T-meter value was
#input by the user at the command line
if ($t_meter_check)
{
	$msg_tmet = sprintf("\n\n\nThe following movies achieved a T-meter rating of $t_meter or higher:\n\n\n%3s  %3s  %-35s  %7s  %7s  %7s\n", '##', '##', 'Movie Title', 'Weekend', 'Cume', 'T-Meter');  
}

#for loop to run regex that uses back-referencing to identify relevant data
#from dump of rottentomatoes.com site and store in appropriate array. 
foreach (@pageFile)
{
	if ($_ =~ /(\d+)\s*(\d+|-+)\s*(\d+%|-+|N\/A)\s*\[.+\](.*?)\s*\d+\s*(\$\d+\.*\d*(M|k|))\s*(\$\d+\.*\d*(M|k|))/)
	{
		#assigns title pulled via regex to modifiable scalar $title
		$title = $4;

		#check if movie title is greater than 35 characters. If it is, 
		#truncates title down to 35 characters
		if (length $title > 35)
		{
			$title = substr ($title, 0, 35);
		}	 

		#appends on to $message formatted data scraped from the site
		$tmp = sprintf("\n%3s  %3s  %-35s  %7s  %7s  %7s", 
			$1, $2, $title, $5, $7, $3);

		$message=$message.$tmp;

		#if block that checks if optional t-meter value was entered 
		#at command line, and if it was, and if currently being 
		#processed movie data has a t-meter value of the input 
		#value or higher, appends it onto $msg_tmet
		if ($t_meter_check)
		{
			#stores current tmet value into temporary scalar $x
			$x = $3;

			#sets $x to 0 if non-numeric, otherwise chops off % sign
			if ($x ~~ "N/A" || $x ~~ "----")	
			{					
				$x = 0;
			}
			else
			{
				chop($x);
			}

			if ($x >= $t_meter)
			{
				$msg_tmet = $msg_tmet.$tmp;
				$msg_tmet_check = 1;
			}
		}
	
		#checks if movie was a debut or not	
		if ($2~~"--")
		{
			$tmp = sprintf("%s (%d)", $title, $count+1);
			push(@debut, $tmp);		
		}	
		else #if not, checks for, and stores largest gainer/loser
		{
			#stores in $tmp the rank gain/loss of current title
			$tmp = $2-$1; 

			if ($tmp > $num_gain)
			{
				$num_gain = $tmp;
				$big_gain = $title;
			}
			elsif ($tmp ~~ $num_gain)
			{
				$big_gain = $big_gain.", $title";
			}
			elsif ($tmp < $num_loss)
			{
				$num_loss = $tmp;
				$big_loss = $title;
			}
			elsif ($tmp ~~ $num_loss)
			{
				$big_loss = $big_loss.", $title ";
			}
		}

		$count++;
	}
}

#the following series of assignment operations store string identifying best 
#and worst debuts. Strings built utilizing string concatenation.
my $bestdebut = "\n\n\nBiggest Debut: ".$debut[0]."\n";
my $worstdebut = "Weakest Debut: ".$debut[-1];

#append best and worst debut strings onto $message
$message = $message.$bestdebut.$worstdebut;

#appends onto $message string stating biggest rank gain
$message = $message."\nBiggest Gain: ".$big_gain." ($num_gain places)";

#allows biggest loss to be displayed as a positive number
$num_loss*=-1;

#appends onto $message string stating biggest rank loss
$message = $message."\nBiggest Loss: ".$big_loss." ($num_loss places)";

#if needed appends on optional T-meter data, if none met input rating
#adjust output to inform user 
if ($t_meter_check)
{
	$msg_tmet = 
	"\n\n\nNo movies achieved a T-Meter rating of $t_meter or higher." 
		if $msg_tmet_check ~~ 0; 

	$message = $message.$msg_tmet;
}

#appends closing 
html tag that allows email client to display #properly formatted data stored in $message $message = $message.""; #sets up email message that will be sent my %mail = ( To => $mailTo, From => $mailFrom, Subject => $subject, Message => $message, 'Content-Type' => 'text/html; charset="utf-8"' ); #prints sent success/error message based on status of mail sent if (sendmail %mail) { print "\nSuccessfully sent mail to $mailTo\n\n"; } else { print "\nError sending mail: $Mail::Sendmail::error \n"; }