#!/usr/bin/perl -w # # rssmailfeed V1.02 # Copyright (C) 2007 Baerana.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # use strict; use warnings 'untie'; use warnings; use LWP::UserAgent; use Data::Dumper; use GDBM_File; use URI; use XML::Twig; use HTML::Scrubber; use HTTP::Cookies; sub ProcessOptions(); sub UpdateConfig($$); sub UpdateCache($$); sub Initialize(); sub Run(); sub AddEmail($); sub DelEmail($); sub AddURI($); sub DelURI($); sub ListURI(); sub FetchContent(@); sub SendEmail($); sub getcred($$); # have to subclass LWP::UserAgent, because everything looks like a nail { no strict; package SpecialUA; require LWP::UserAgent; @ISA = qw(LWP::UserAgent); sub new {my $self=LWP::UserAgent::new(@_);}; sub get_basic_credentials { shift; #that was the class ::getcred(shift, shift); }; } my %config; my %cache; my $sendoutput=1; my $runthistime=0; my $verbose=0; my $initialized=0; my $basedir; my $ua = SpecialUA->new(agent => 'rssmailfeed-perl/1.02'); my $scrubber = HTML::Scrubber->new(allow => qw[ br ], default => 0); &ProcessOptions; &UnInitialize; exit 0; sub ProcessOptions () { while($_ = shift @ARGV) { if(/^-?-?(update-?)?config/) { &UpdateConfig($ARGV[0], $ARGV[1]); shift @ARGV; shift @ARGV; } elsif (/^-?-?(?:del-?e?-?mail|remove-?e?mail)/) { &DelEmail($ARGV[0]); shift @ARGV; } elsif (/^-?-?(?:new|email|newemail|add-?e?mail)/) { &AddEmail($ARGV[0]); shift @ARGV; } elsif (/^-?-?(?:add|(new-)?ur[il])/) { &AddURI($ARGV[0]); shift @ARGV; } elsif (/^-?-?(?:del-?(ur[il])?)/) { &DelURI($ARGV[0]); shift @ARGV; } elsif (/^-?-?(?:list-?(ur[il])?)/) { &ListURI; } elsif (/^-?-?verbose/) { $verbose=1; } elsif (/^-?-?run/) { $runthistime=1; } elsif (/^-?-?(use-?)?dir/) { $basedir=$ARGV[0]; shift @ARGV; &UnInitialize and warn "Base directory specified after files were initialized. \n Please check that you are calling the options in the correct order. \n Error occured "; } elsif (/^-?-?send/) { $sendoutput=1; } elsif (/^-?-?update-?cache/) { &UpdateCache($ARGV[0], $ARGV[1]); shift @ARGV; shift @ARGV; } elsif (/^-?-?nosend/) { $sendoutput=0; } elsif (/^-?-?dump-?cache/) { &Initialize; my $d = Data::Dumper->new([\%cache], ['cache']); print $d->Dump,"\n"; } elsif (/^-?-?dump-?config/) { &Initialize; my $d = Data::Dumper->new([\%config], ['config']); print $d->Dump,"\n"; } else { warn "Error parsing option... Skipping."; }; } &Initialize; &Run if $runthistime; }; sub UnInitialize () { return undef unless $initialized; $initialized=0; untie %config; untie %cache; return 1; }; sub Initialize () { return undef if $initialized; $initialized=1; $basedir = $ENV{'HOME'}.'/.rssmailfeed' unless $basedir; unless ( -d $basedir ) { mkdir $basedir or die "Base directory doesn't exist and couldn't be made"; }; $ua->env_proxy(); $ua->cookie_jar(HTTP::Cookies->new(file => $basedir.'/cookies.txt')); tie( %config, 'GDBM_File', $basedir .'/config.gdbm', GDBM_WRCREAT|GDBM_SYNC, 0600 ); tie( %cache, 'GDBM_File', $basedir .'/cache.gdbm', GDBM_WRCREAT|GDBM_SYNC, 0600 ); $config{'mail-from'} = 'rssmailfeed' unless defined $config{'mail-from'}; $config{'mail-subject'} = 'rssmailfeed' unless defined $config{'mail-subject'}; return 1; }; sub DelEmail ($) { my $oldaddr = shift; my @maillist = (); &Initialize; @maillist = split /\0/, $config{'maillist'} if defined $config{'maillist'}; my $addrQM = quotemeta($oldaddr); my $addrRE = qr/^$addrQM$/; warn "List is " . join(", ", grep { !/$addrRE/ } @maillist) if $verbose; &UpdateConfig('maillist',join("\0", grep { !/$addrRE/ } @maillist)); }; sub AddEmail ($) { my $newaddr = shift; my @maillist = (); &Initialize; @maillist = split /\0/, $config{'maillist'} if defined $config{'maillist'}; my $addrQM = quotemeta($newaddr); my $addrRE = qr/^$addrQM$/; return undef if grep { m/$addrRE/ } @maillist; push @maillist, $newaddr; warn "List is " . join(", ", grep { /[@]/ } @maillist) if $verbose; &UpdateConfig('maillist',join("\0", grep { /[@]/ } @maillist)); }; sub AddURI ($) { my $newURI = shift; my @urilist = (); &Initialize; @urilist = split /\0/, $config{'urilist'} if defined $config{'urilist'}; $newURI =~ /([a-z]+:\/\/?)?([^@\/]+[@])?(([^@\/:]+)(:[0-9]+)?([\/].*)?)/i or do { warn "Unable to parse URI"; return undef; }; my $uriCred = $2; my $uriObj = URI->new($1 . $3); $uriCred =~ s/[@]$// if $uriCred; my $uriStr = $uriObj->canonical; my $uriQM = quotemeta($uriStr); my $uriRE = qr/^$uriQM$/; return undef if grep { m/$uriRE/ } @urilist; &UpdateConfig('cred_' . $uriStr,join("\0", split(/:/,$uriCred,2))) if $uriCred; push @urilist, $uriStr; &UpdateConfig('urilist', join("\0", grep { /./ } @urilist)); }; sub DelURI ($) { my $oldURI = shift; my @urilist = (); &Initialize; @urilist = split /\0/, $config{'urilist'} if defined $config{'urilist'}; $oldURI =~ /([a-z]+:\/\/?)?([^@\/]+[@])?(([^@\/:]+)(:[0-9]+)?([\/].*)?)/i or do { warn "Unable to parse URI"; return undef; }; my $uriObj = URI->new($1 . $3); my $uriStr = $uriObj->canonical; my $uriQM = quotemeta($uriStr); my $uriRE = qr/^$uriQM$/; return undef if grep { !/$uriRE/ } @urilist; &UpdateConfig('cred_' . $uriStr,''); &UpdateConfig('urilist', join("\0", grep { !/$uriRE/ } @urilist)); }; sub getcred ($$) { my($realm, $uri) = @_; &Initialize; my @ret = (undef, undef); my $credvar = 'cred_'.$uri; @ret = split(/\0/, $config{$credvar},2) if defined $config{$credvar}; return @ret; }; sub ListURI () { my @urilist = (); &Initialize; @urilist = split /\0/, $config{'urilist'} if defined $config{'urilist'}; print join("\n\t", 'URI List: ', @urilist),"\n"; }; sub UpdateConfig ($$) { my $var = shift; my $val = shift; &Initialize; if(length($val) > 0) { $config{$var} = $val; } else { delete $config{$var}; } } sub UpdateCache ($$) { my $var = shift; my $val = shift; &Initialize; if(length($val) > 0) { warn "Set $var in cache to string length ".length($val) if $verbose; $cache{$var} = $val; } else { warn "Deleting $var from cache" if $verbose; delete $cache{$var}; } } sub FetchContent (@) { my $message = ''; &Initialize; foreach my $uri (@_) { my %newentries = (); my $req = HTTP::Request->new( HEAD => $uri ); my $res = $ua->request($req); if( not $res->is_success ) { warn "Warning, HEAD $uri returned: " . $res->status_line; next; }; if(defined $cache{'last_modified_'.$uri} and $res->last_modified <= $cache{'last_modified_'.$uri}) { warn "URL is old. Skipping." if $verbose; next; $res->content($cache{'content_'.$uri}); } else { $req = HTTP::Request->new( GET => $uri ); $res = $ua->request($req); if( not $res->is_success ) { warn "Warning, GET $uri returned: " . $res->status_line; next; }; &UpdateCache('content_'.$uri, $res->content); &UpdateCache('last_modified_'.$uri, $res->last_modified); }; my $channel; my $tw1 = XML::Twig->new(discard_spaces => 1, TwigHandlers => { channel => sub { my($twig,$element) = @_; $channel = $element->first_child('description')->text; $twig->purge; } } )->parse($res->content); my $tw2 = XML::Twig->new(discard_spaces => 1, TwigHandlers => { item => sub { my($twig,$element) = @_; my $guid = $element->first_child('guid')->text; unless(defined $cache{'guid_' . $guid} and length $cache{'guid_' . $guid}){ my $title = $element->first_child('title')->text; my $description = $element->first_child('description')->text; my $link = $element->first_child('link')->text; my $date = $element->first_child('pubDate')->text; my $entry = <scrub($entry); $entry =~ s/
/ \n/g; $entry =~ s/<//g; $entry =~ s/&/>/g; $entry =~ s/\xe2\x80\x99/'/g; $newentries{$guid} = $entry; &UpdateCache('guid_'.$guid, $entry); } $twig->purge; } } )->parse($res->content); my @keys = keys %newentries; next if(scalar(@keys) < 1) ; $message .= join("\n" . "="x70 . "\n", $channel ,values %newentries , ''); }; return $message; }; sub SendEmail ($) { my $message = shift; return unless $sendoutput; return unless defined $message and length $message; my @maillist = (); my $mailua; $mailua = SpecialUA->new(agent => 'rssmailfeed-perl/1.02'); @maillist = split /\0/, $config{'maillist'} if defined $config{'maillist'}; for my $addr (@maillist) { warn "Sendmail mail to " . $addr if $verbose; my $req = HTTP::Request->new(POST => 'mailto:' . $addr); $req->header(From => $config{'mail-from'}); $req->header(Subject => $config{'mail-subject'}); $req->content($message); my $res = $mailua->request($req); warn "Sending email failed to ".$addr unless $res->is_success; }; return; }; sub Run () { my($req,$res); my @urilist = (); @urilist = split /\0/, $config{'urilist'} if defined $config{'urilist'}; &SendEmail(&FetchContent(@urilist)); };