#!/usr/bin/perl # # compile some RSS feeds into a single page # use CGI qw( fatalsToBrowser -debug ); use XML::RSS; use Date::Parse; use POSIX; use Data::Dumper; use Digest::MD5; use URI::Escape; use Storable; use DBI; use DBD::mysql; my $query = new CGI; print $query->redirect( 'http://www.waider.ie/cgi-bin/sqlrss.pl' ); exit; my $splitscreen = $query->param( "splitscreen" ); $splitscreen ||= 0; my $cachedir = $ENV{"BASEDIR"}; $cachedir ||= "/var/tmp/rss-cache-" . $ENV{"LOGNAME"}; # Q&D config file open( C, "/var/tmp/rss.conf" ) or die "conf: $!"; my @conf; while () { chomp; push @conf, $_; } close( C ); my ( $host, $database, $username, $password ) = @conf; my $dbh = DBI->connect( "DBI:mysql:host=$host:database=$database", $username, $password ) or die $DBI::errstr; # Generate a session key my $id = $query->cookie('rssid'); my $seenbefore; if ( !defined( $id ) or !$id ) { my $md5 = new Digest::MD5; my $remote = $ENV{REMOTE_ADDR} . $ENV{REMOTE_PORT}; $id = $md5->md5_base64( time, $$, $remote ); $id =~ tr|+/=|-_.|; # Make non-word chars URL-friendly $seenbefore= ""; } else { $seenbefore = "Welcome back! "; } my $seen; my %thisrun; eval { -d "$cachedir" or mkdir "$cachedir", 0755; -d "$cachedir/profiles" or mkdir "$cachedir/profiles", 0755; $seen = retrieve( "$cachedir/profiles/$id" ); }; if ( !defined( $seen )) { $seen = {}; } my $debug = $ENV{'DEBUG'} || 0; my @feeds = $query->param( "feeds" ); my $cmd = $query->param( "cmd" ); my $dump = $query->param( "dump" ); my %rss; $| = 1; $cmd ||= ""; my %entries; if ( $query->param( "zapem" )) { $cmd = "zap"; } if (( !@feeds and !$cmd ) or ( $cmd eq "list" ) or ( $cmd eq "zap" ) or ( $cmd eq "zapafter" )) { my $feeds = $dbh->selectall_arrayref( 'SELECT name FROM feeds' ) or die $DBI::errstr; for my $f ( @{$feeds} ) { push @feeds, $f->[0]; } } else { print STDERR "something's gotten hold of my arse\n"; } #map { $_ = $basedir . "/$_" } @feeds; if ( $query->param( "opml" )) { print $query->header( -Content_Type => "text/plain" ); print "\n"; print ""; print "my subscriptions"; for my $feed ( @feeds ) { next if $feed =~ /\..*$/; my $rss = new XML::RSS; eval { next unless $rss->parsefile( $feed ); }; if ( $@ ) { next; } my $channel = $rss->{channel}; printf( "", ( $channel->{description}||""), "en", ( $channel->{title}||""), "", ( $channel->{title}||""), "", ( $channel->{link}||"")); } print ""; } my $cookie = $query->cookie( -name => 'rssid', -value => $id, -path => '/', -expires => '+1y' ); print $query->header( -charset => 'utf-8', -cookie => $cookie ); if ( $splitscreen eq "frameset" ) { print "\n"; print "\n"; print "\n"; print ""; exit; } elsif ( $splitscreen eq "content" ) { my $article = $query->param( "article" ); print "ARTICLE BITS"; exit; } my $target = $splitscreen ? "target=\"content\" " : ""; my $form = "
"; if ( $cmd ne "list" ) { $form = ""; } my $hdate = strftime( "%B %d at %H:%M:%S", localtime( time )); my $rcsid = '$Id: rss.pl,v 1.22 2005/09/04 11:18:14 waider Exp $'; print <<"EOF"; RSS Reader

$ {seenbefore}This page generated on $hdate.

$form
EOF if ( $cmd ) { if ( $cmd eq "list" ) { if ( !@feeds ) { print "
Either no feeds, or I can't load 'em!
"; exit; } print "
Feeds List

"; } elsif ( $cmd eq "zap" or $cmd eq "zapafter" ) { my $e = $query->param( "article" ); if ( defined( $e )) { $e = uri_unescape( $e ); $seen->{$e} = "zap " . ( $seen->{$e}||"" ); #print "
Zapped $e

" if $debug; } else { for my $e ( $query->param()) { if ( $e =~ /^zap_(.*)$/ ) { $e = uri_unescape( $1 ); $seen->{$e} = "zap " . ( $seen->{$e} || "" ); print "
Zapped $e
"; } else { # debug #print "
Skipping $e
"; } } } } else { $cmd = quotemeta( $cmd ); print "
Unknown command $cmd
"; exit; } } while ( @feeds ) { my $file = shift @feeds; next if $file =~ /\..*$/; # for my own special abuse my $feeddata = $dbh->selectall_arrayref( 'SELECT feedid, name, url, ' . 'UNIX_TIMESTAMP(lastupdate), rawfeed, ' . 'etag, modified, cleanfeed ' . 'FROM feeds WHERE name=?', undef, $file ); next unless $feeddata and @{$feeddata}; my %thisfeed = ( feedid => $feeddata->[0]->[0], name => $feeddata->[0]->[1], url => $feeddata->[0]->[2], lastupdate => $feeddata->[0]->[3], rawfeed => $feeddata->[0]->[4], etag => $feeddata->[0]->[5], modified => $feeddata->[0]->[6], cleanfeed => $feeddata->[0]->[7], ); if ( !($thisfeed{cleanfeed}||'')) { print "
$file is waiting for update
\n"; next; } my ( $feedfile ) = $file =~ m@.*/([^/]+)$@; my $rss = new XML::RSS; # these mofos have already been parsed by XML::RSS so they should be # somewhat compliant, dammit. eval { next unless $rss->parse( $thisfeed{cleanfeed} ); }; if ( $@ ) { print STDERR "Error parsing $file:\n"; print STDERR "$@\n"; if ( open( PARP, ">$cachedir/" . $thisfeed{name} . ".broke" )) { print PARP $thisfeed{cleanfeed}; close( PARP ); } next; } else { unlink( $cachedir . "/" . $thisfeed{name} . ".broke" ); } my $lastupated = $thisfeed{lastupate}; my $channel = $rss->{channel}; if ( $rss->{image} ) { $channel->{image} = $rss->{image}; } next unless $channel; print STDERR "Working on [" . $channel->{title} . "]\n" if $debug; my $feed = $channel->{title}; $rss{$feed} = $rss; my $date = ""; if ( defined( $channel->{dc} ) and defined( $channel->{dc}->{date} )) { $date = $channel->{dc}->{date}; } if ( defined( $channel->{pubDate} )) { $date ||= $channel->{pubDate}; } if ( defined( $rss->{items}->[0]->{dc}) and defined( $rss->{items}->[0]->{dc}->{date})) { $date ||= $rss->{items}->[0]->{dc}->{date}; } $date ||= scalar(gmtime($lastupdated)); $date = stupid_rss_date( $date, $lastupdated ); @items = @{$rss->{items}}; if ( $cmd eq "list" ) { print "
$feed
\n"; print "
Last updated: " . scalar( gmtime( $date )) . "
\n"; print "Source URL: {link} . "\">" . $channel->{link} . "
\n" if $channel->{link}; print "Description: " . $channel->{description} . "
\n" if $channel->{description}; print "Items: " . scalar( @items ) . "
\n"; print "
\n"; print "
\n"; next; } if ( !@items ) { print STDERR "No items!\n"; next; } else { print STDERR " " . scalar( @items ) . " items\n" if $debug; } for my $item ( @items ) { my $itemdate = $lastupdated; my $trydate; if ( defined( $item->{dc} )) { if ( defined( $item->{dc}->{date})) { if ( defined( $item->{dc}->{date} )) { $trydate = stupid_rss_date( $item->{dc}->{date} ); if ( !defined( $trydate ) or !$trydate ) { print STDERR " failed to parse date" . $item->{dc}->{date} . "\n" if $debug; } else { $itemdate = $trydate; } } } $item->{dc}->{creator} = "" if !defined( $item->{dc}->{creator}); } my $index = 0; my $itemname = sprintf( "%d_%03d_%s", $itemdate, $index, $feed ); while ( 1 ) { $itemname = sprintf( "%d_%03d_%s", $itemdate, $index, $feed ); last if !defined( $entries{ $itemname }); $index++; } $entries{$itemname} = $item; print STDERR " Added $itemname to list\n" if $debug; } } my $oldday; my $oldchannel = ""; my $idx = 1; my $seenzap = 0; for my $entry ( sort { $b <=> $a } keys %entries ) { my ( $date, $index, $feed ) = split( '_', $entry ); my $item = $entries{$entry}; my $rss = $rss{$feed}; my $image = $rss->{channel}->{image}; my $channel = $rss->{channel}; my $title = $feed; # skip anything over a month old last if ( time - $date ) > 60 * 60 * 24 * 30; # skip anything we've marked as zapped. if ( defined( $seen->{$entry})) { if ( $cmd eq "zapafter" ) { my $e; if ( !$seenzap ) { my $e = $query->param( "article" ); if ( defined( $e )) { $e = uri_unescape( $e ); if ( $entry eq $e ) { $seen->{$entry} = "zap $hdate"; $seenzap = 1; } } } else { $seen->{$entry} = "zap $hdate"; } } if ( $seen->{$entry} =~ /^zap/ ) { $seen->{$entry} = "zap $hdate"; $thisrun{$entry} = $hdate; next; } } # visual break between days if ( defined( $oldday )) { if ( $oldday ne strftime( '%d', localtime( $date ))) { print <<"EOT";
EOT print strftime( "
%a, %b %d %Y
\n", localtime( $date )); print <<"EOT";
EOT $oldchannel = ""; } } else { print strftime( "
%a, %b %d %Y
\n", localtime( $date )); print <<"EOT";
EOT } $oldday = strftime( '%d', localtime( $date )); # the horror, the horror if ( $oldchannel and ( $oldchannel ne $channel )) { print <<"EOT";
EOT print "
"; if ( $image and $image->{url}) { print "{url} . "\">"; } print "{link} . "\">" if $channel->{link}; print $channel->{title}||$title; print "" if $channel->{link}; print "
"; } if ( !$oldchannel ) { print "
"; if ( $image and $image->{url}) { print "{url} . "\">"; } print "{link} . "\">" if $channel->{link}; print $channel->{title}||$title; print "" if $channel->{link}; print "
"; } $oldchannel = $channel; my $byline = ""; if ( $item->{dc}->{creator}) { $byline = " (" . $item->{dc}->{creator} . ")"; } print "
"; print strftime( '%H:%M:%S', localtime( $date )); print " ["; print ""; print ""; print "zap"; print ""; print "]"; $item->{link} ||= $channel->{link}; # more horror print ": {link} . "\" name=\"$idx\">" . ( $item->{title} || $channel->{description} ) . " "; if ( defined( $seen->{$entry})) { print "[seen on " . $seen->{$entry} . "] "; } else { $seen->{$entry} = $hdate; } $thisrun{$entry} = $hdate; print "["; print ""; print "zap after"; print ""; print "]"; print "
\n"; if ( $item->{description} ) { print "
"; # safety clown says... $item->{description} =~ s{}{}g; print $item->{description} . "$byline

"; print "
\n"; } else { #print "{link} . "\">[go to item]

\n"; } $idx++; } print "
" if $cmd eq "list"; print "
" if $cmd eq "list"; print "

Select Feeds
" if $cmd ne "list"; print "
\n"; print "
"; print "\n\n"; if ( $cmd ne "list" ) { for my $k ( keys %{$seen} ) { # delete $seen->{$k} if !defined( $thisrun{$k} ); # delete $seen->{$k} if $seen->{$k} !~ /$hdate/; } } store( $seen, "$cachedir/profiles/$id" ); sub stupid_rss_date { my $date = shift; my $lastupdated = shift; $lastupdated ||= 0; # Blogger's variation: 2003-07-16 17:44:13Z $date =~ s/[T ](\d+:\d+)(:\d+)?Z$/T$1+00:00/; # Z = Zulu time = GMT # stupid rss date format: YYYY-MM-DDTHH:SS[+-]ZZZZ # ISO 8601, people... $date =~ s/(\d)T(\d)/$1 $2/; $date =~ s/(\d\d):(\d\d)$/$1$2/; my $unixtime = str2time( $date ); # HURGH BLARGH HONK. More Blogger stupidity. if ( defined( $unixtime )) { if ( $lastupdated > $unixtime ) { $unixtime = $lastupdated; } $date = $unixtime; } else { $date = "(can't parse $date)"; } $date; }