RSS Parser Email Script 2
The following file is a perl script that reads RSS URLs and notifies of new items via email. This is an upgrade from an earlier php script.
Please note some CPAN modules are required:
XML::Simple
Data::Dumper
Net::SMTP
LWP
URI
#!/usr/bin/perl
##################################
# Andres #
# afelotreyu @ gmail . com #
# Date: June 20 2011 #
##################################
#
# This perl Script reads RSS URLS and emails its contents to a given address.
# The RSS urls must be in a file see below.
# ########### Log Format
# Please note the 'rss_log' file must have the following format
# URL|||title
# http://file.xml|||title
# http://file.xml
use vars qw/ $REVISION $VERSION /;
$REVISION='0.01';
$VERSION="1.0 (Build $REVISION)";
# Use CPAN modules
use strict;
use XML::Simple;
use Data::Dumper;
use CGI;
use LWP;
use URI;
use Net::SMTP;
my $cgi = new CGI;
my $browser = LWP::UserAgent->new;
########################
# Configuration
# please modify carefully as needed
my %configval = (
'rss_log' => 'somelog.txt', # File where to find RSS list and log it
'tmp_xml_file' => 'sometimefile.xml',
'subject_email' => 'RSS Parse email',
'email_method' => 'sendmail', # sendmail or smtp
'email_to' => 'some_user@somedomain.com', # To - Email Address(es)
'email_from' => 'some_user@somedomain.com', # From - email address
'sendmail' => '/usr/sbin/sendmail -t', # Sendmail Location and params
'smtp_server' => 'somedomain.local', # This is the SMTP
'smtp_timeout' => '30',
);
########################
# Important Variables
my ($data,$source,$content);
my $new = 0; # New items = no!
my $send = 0; # Send Email = no!
# Temporal variables that we use over the script
my ($tmp,$a,$e,$line,$link,$last);
########################
# Lest get started
# First we read the log file
$source = ReadLOG_file();
# $content builds the email message
$content = "RSS Parse Email Listing\n\n";
# Be nice, print some output for html
print $cgi->header('text/html');
# Loop over every RSS link
for ( $a=0;$a < = $#{$source}; $a++) {
$last = 0;
$content = $content."RSS->[$a]: \n";
# Download the File of the RSS
&DownloadXML_file($source->[$a]->[0]);
# Read the file
$data = &ReadXML_file($configval{'tmp_xml_file'});
# Check if there are any items defined
if ($data->{channel}->{item}) {
# We have items, lets chech output
if (ref($data->{channel}->{item}) eq 'ARRAY') {
foreach $e ( @{$data->{channel}->{item}} ) {
# OK we are in, now we store all posts since last one
if ("$e->{title}" eq "$source->[$a]->[1]" ) {
$content = $content."\nLast Item: $source->[$a]->[1] \n";
last;
} else {
# New Item, we need to store it!
if ( $new == 0 ) { $new = 1; }
if ( $last == 0 ) { $source->[$a]->[2] = $e->{title}; $last = 1; }
$send = 1; # Send Email
$content = $content."\nTitle: $e->{title} \n";
$content = $content."Link: $e->{link} \n";
$content = $content."Description: $e->{description} \n";
$content = $content."Date: $e->{pubDate} \n";
}
}
} else {
# Only one item, is it the same as log?
if ("$data->{channel}->{item}->{title}" eq "$source->[$a]->[1]") {
# Same item as before
$content = $content."\nLast Item: $source->[$a]->[1] \n";
} else { # New Item
# New Item, we need to store it!
if ( $new == 0 ) { $new = 1; }
if ( $last == 0 ) { $source->[$a]->[2] = $data->{channel}->{item}->{title}; $last = 1; }
$send = 1; # Send Email
$content = $content."title: $data->{channel}->{item}->{title} \n";
$content = $content."Link: $data->{channel}->{item}->{link} \n";
$content = $content."Description: $data->{channel}->{item}->{description} \n";
$content = $content."Date: $data->{channel}->{item}->{pubDate} \n";
}
}
} else { # No items, maybe empty?
$content = $content."RSS Feed is currently Empty - \n";
}
# Next RSS!
$content = $content."\n_____________________________________\n\n";
};
# A new item was set, update log file
if ( $new == 1 ) {
open (NEWLOG,">",$configval{'rss_log'}) or die (print "$configval{'rss_log'} - File could not open to write $!");
# Loop over every RSS link
for ( $a=0;$a < = $#{$source}; $a++) {
if ($source->[$a]->[2]) {
print NEWLOG $source->[$a]->[0].'|||'.$source->[$a]->[2]."\n";
} else {
print NEWLOG $source->[$a]->[0].'|||'.$source->[$a]->[1]."\n";
}
}
close NEWLOG;
}
# Do we need to email?
if ($send == 1) {
&SendMail($configval{'subject_email'},$content);
}
# Print something for HTML access
print "<html><body><h1>Finished</h1><h1><body></body></h1></body></html>";
########################
# Subroutines go here
########################
# ReadXML_file
# Needs to get a file location to read its contents
# It returns a dereference hash ref
########################
sub ReadXML_file() {
my $file = $_[0];
# Read the XML file
my $xml = new XML::Simple (
KeyAttr => [],
SuppressEmpty => undef);
# read XML file
$data = $xml->XMLin($file);
return $data
##### Sample code!
# Use of the XML::Simple output!
# dereference hash ref
# access <employee> array
#foreach $e (@{$data->{data}->{robots}->{robot}})
#{
#print $e->{name}, "\n";
#print "Age/Sex: ", $e->{id}, "/", $e->{icon}, "\n";
#print "Department: ", $e->{url_company}, "\n";
#print "\n";
#}
}
########################
# ReadLOG_file
########################
sub ReadLOG_file() {
open (READLOG,"< ",$configval{'rss_log'}) or die "File $configval{'rss_log'} could not open $! \n";
flock (READLOG,2);
seek(READLOG,0,0);
$e = 0;
foreach $line (<READLOG>) {
chomp $line;
if ($line =~ m/^http/ ) {
my @pair = split(/\|\|\|/,$line);
$source->[$e]->[0] = $pair[0];
$source->[$e]->[1] = $pair[1];
$e++;
}
}
close READLOG;
return $source;
}
########################
# DownloadXML_file
# needs a URL to connect to and download the XML file.
########################
sub DownloadXML_file() {
# Download XML
# We are going to connect to the page and download the XML file
my $url = URI->new($_[0]);
$url->query_form();
my $mfilecont = $browser->get($url);
$tmp = $configval{'tmp_xml_file'};
open (TEMPORAL,">", $tmp) or die (print "$tmp - File could not open $!");
print TEMPORAL $mfilecont->content;
close TEMPORAL;
# Return a success signal '
return 1;
}
########################
# SendMail(subject,message);
# Needs Subject and Message content
# Uses SMTP configuration
########################
sub SendMail() {
my $subject = $_[0];
my $message = $_[1];
#### Email
# Here is where the email is created
for ($configval{'email_method'}) {
if (/^smtp$/) {
my $smtp = Net::SMTP->new(
$configval{'smtp_server'}, # MailHost
Timeout => $configval{'smtp_timeout'}, # TimeOut
);
$smtp->mail("$configval{'email_from'}") || die ( print "From_email call bad $! \n" ); # Address Email is created from (Check if exist)
$smtp->to("$configval{'email_to'}") || die ( print "To email call bad $! \n" ); # Address Email is going to (Check if exist)
$smtp->data();
$smtp->datasend("To: $configval{'email_to'} \n"); # Address Email is going to
$smtp->datasend("Subject: $subject \n"); # Subject
$smtp->datasend("\n");
$smtp->datasend("$message");
$smtp->dataend();
$smtp->quit;
} elsif (/^sendmail$/) {
open(SENDMAIL, "| $configval{'sendmail'} ") or die "Cannot open $configval{'sendmail'}: $!";
print SENDMAIL "Reply-to: ".$configval{'email_from'}."\n";
print SENDMAIL "Subject: ".$subject."\n";
print SENDMAIL "To: ".$configval{'email_to'}."\n";
print SENDMAIL "Content-type: text/plain\n\n";
print SENDMAIL $message."\n";
print SENDMAIL '.';
close(SENDMAIL);
}
}
#### Done Email
}
########################
1;
__END__
##################################
# Andres #
# afelotreyu @ gmail . com #
# Date: June 20 2011 #
##################################
#
# This perl Script reads RSS URLS and emails its contents to a given address.
# The RSS urls must be in a file see below.
# ########### Log Format
# Please note the 'rss_log' file must have the following format
# URL|||title
# http://file.xml|||title
# http://file.xml
use vars qw/ $REVISION $VERSION /;
$REVISION='0.01';
$VERSION="1.0 (Build $REVISION)";
# Use CPAN modules
use strict;
use XML::Simple;
use Data::Dumper;
use CGI;
use LWP;
use URI;
use Net::SMTP;
my $cgi = new CGI;
my $browser = LWP::UserAgent->new;
########################
# Configuration
# please modify carefully as needed
my %configval = (
'rss_log' => 'somelog.txt', # File where to find RSS list and log it
'tmp_xml_file' => 'sometimefile.xml',
'subject_email' => 'RSS Parse email',
'email_method' => 'sendmail', # sendmail or smtp
'email_to' => 'some_user@somedomain.com', # To - Email Address(es)
'email_from' => 'some_user@somedomain.com', # From - email address
'sendmail' => '/usr/sbin/sendmail -t', # Sendmail Location and params
'smtp_server' => 'somedomain.local', # This is the SMTP
'smtp_timeout' => '30',
);
########################
# Important Variables
my ($data,$source,$content);
my $new = 0; # New items = no!
my $send = 0; # Send Email = no!
# Temporal variables that we use over the script
my ($tmp,$a,$e,$line,$link,$last);
########################
# Lest get started
# First we read the log file
$source = ReadLOG_file();
# $content builds the email message
$content = "RSS Parse Email Listing\n\n";
# Be nice, print some output for html
print $cgi->header('text/html');
# Loop over every RSS link
for ( $a=0;$a < = $#{$source}; $a++) {
$last = 0;
$content = $content."RSS->[$a]: \n";
# Download the File of the RSS
&DownloadXML_file($source->[$a]->[0]);
# Read the file
$data = &ReadXML_file($configval{'tmp_xml_file'});
# Check if there are any items defined
if ($data->{channel}->{item}) {
# We have items, lets chech output
if (ref($data->{channel}->{item}) eq 'ARRAY') {
foreach $e ( @{$data->{channel}->{item}} ) {
# OK we are in, now we store all posts since last one
if ("$e->{title}" eq "$source->[$a]->[1]" ) {
$content = $content."\nLast Item: $source->[$a]->[1] \n";
last;
} else {
# New Item, we need to store it!
if ( $new == 0 ) { $new = 1; }
if ( $last == 0 ) { $source->[$a]->[2] = $e->{title}; $last = 1; }
$send = 1; # Send Email
$content = $content."\nTitle: $e->{title} \n";
$content = $content."Link: $e->{link} \n";
$content = $content."Description: $e->{description} \n";
$content = $content."Date: $e->{pubDate} \n";
}
}
} else {
# Only one item, is it the same as log?
if ("$data->{channel}->{item}->{title}" eq "$source->[$a]->[1]") {
# Same item as before
$content = $content."\nLast Item: $source->[$a]->[1] \n";
} else { # New Item
# New Item, we need to store it!
if ( $new == 0 ) { $new = 1; }
if ( $last == 0 ) { $source->[$a]->[2] = $data->{channel}->{item}->{title}; $last = 1; }
$send = 1; # Send Email
$content = $content."title: $data->{channel}->{item}->{title} \n";
$content = $content."Link: $data->{channel}->{item}->{link} \n";
$content = $content."Description: $data->{channel}->{item}->{description} \n";
$content = $content."Date: $data->{channel}->{item}->{pubDate} \n";
}
}
} else { # No items, maybe empty?
$content = $content."RSS Feed is currently Empty - \n";
}
# Next RSS!
$content = $content."\n_____________________________________\n\n";
};
# A new item was set, update log file
if ( $new == 1 ) {
open (NEWLOG,">",$configval{'rss_log'}) or die (print "$configval{'rss_log'} - File could not open to write $!");
# Loop over every RSS link
for ( $a=0;$a < = $#{$source}; $a++) {
if ($source->[$a]->[2]) {
print NEWLOG $source->[$a]->[0].'|||'.$source->[$a]->[2]."\n";
} else {
print NEWLOG $source->[$a]->[0].'|||'.$source->[$a]->[1]."\n";
}
}
close NEWLOG;
}
# Do we need to email?
if ($send == 1) {
&SendMail($configval{'subject_email'},$content);
}
# Print something for HTML access
print "<html><body><h1>Finished</h1><h1><body></body></h1></body></html>";
########################
# Subroutines go here
########################
# ReadXML_file
# Needs to get a file location to read its contents
# It returns a dereference hash ref
########################
sub ReadXML_file() {
my $file = $_[0];
# Read the XML file
my $xml = new XML::Simple (
KeyAttr => [],
SuppressEmpty => undef);
# read XML file
$data = $xml->XMLin($file);
return $data
##### Sample code!
# Use of the XML::Simple output!
# dereference hash ref
# access <employee> array
#foreach $e (@{$data->{data}->{robots}->{robot}})
#{
#print $e->{name}, "\n";
#print "Age/Sex: ", $e->{id}, "/", $e->{icon}, "\n";
#print "Department: ", $e->{url_company}, "\n";
#print "\n";
#}
}
########################
# ReadLOG_file
########################
sub ReadLOG_file() {
open (READLOG,"< ",$configval{'rss_log'}) or die "File $configval{'rss_log'} could not open $! \n";
flock (READLOG,2);
seek(READLOG,0,0);
$e = 0;
foreach $line (<READLOG>) {
chomp $line;
if ($line =~ m/^http/ ) {
my @pair = split(/\|\|\|/,$line);
$source->[$e]->[0] = $pair[0];
$source->[$e]->[1] = $pair[1];
$e++;
}
}
close READLOG;
return $source;
}
########################
# DownloadXML_file
# needs a URL to connect to and download the XML file.
########################
sub DownloadXML_file() {
# Download XML
# We are going to connect to the page and download the XML file
my $url = URI->new($_[0]);
$url->query_form();
my $mfilecont = $browser->get($url);
$tmp = $configval{'tmp_xml_file'};
open (TEMPORAL,">", $tmp) or die (print "$tmp - File could not open $!");
print TEMPORAL $mfilecont->content;
close TEMPORAL;
# Return a success signal '
return 1;
}
########################
# SendMail(subject,message);
# Needs Subject and Message content
# Uses SMTP configuration
########################
sub SendMail() {
my $subject = $_[0];
my $message = $_[1];
# Here is where the email is created
for ($configval{'email_method'}) {
if (/^smtp$/) {
my $smtp = Net::SMTP->new(
$configval{'smtp_server'}, # MailHost
Timeout => $configval{'smtp_timeout'}, # TimeOut
);
$smtp->mail("$configval{'email_from'}") || die ( print "From_email call bad $! \n" ); # Address Email is created from (Check if exist)
$smtp->to("$configval{'email_to'}") || die ( print "To email call bad $! \n" ); # Address Email is going to (Check if exist)
$smtp->data();
$smtp->datasend("To: $configval{'email_to'} \n"); # Address Email is going to
$smtp->datasend("Subject: $subject \n"); # Subject
$smtp->datasend("\n");
$smtp->datasend("$message");
$smtp->dataend();
$smtp->quit;
} elsif (/^sendmail$/) {
open(SENDMAIL, "| $configval{'sendmail'} ") or die "Cannot open $configval{'sendmail'}: $!";
print SENDMAIL "Reply-to: ".$configval{'email_from'}."\n";
print SENDMAIL "Subject: ".$subject."\n";
print SENDMAIL "To: ".$configval{'email_to'}."\n";
print SENDMAIL "Content-type: text/plain\n\n";
print SENDMAIL $message."\n";
print SENDMAIL '.';
close(SENDMAIL);
}
}
#### Done Email
}
########################
1;
__END__

