summaryrefslogtreecommitdiffstats
path: root/scripts/urlfeed.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--scripts/urlfeed.pl262
1 files changed, 262 insertions, 0 deletions
diff --git a/scripts/urlfeed.pl b/scripts/urlfeed.pl
new file mode 100644
index 0000000..936fc14
--- /dev/null
+++ b/scripts/urlfeed.pl
@@ -0,0 +1,262 @@
+#!/usr/bin/perl -w
+#
+# this is a VERY experimental code, use at own risk
+#
+# WARNING:
+# I am still not sure of the UTF-8 handling. It may only work if you
+# are on a UTF-8 terminal, with UTF-8ized settings.
+#
+# TODO:
+# - make urlfeed_title, urlfeed_link, urlfeed_description work for
+# already-created feeds, not only the new ones
+# - some exclude-list would be useful I guess
+# - enhance urlfeed_find_url() maybe
+# - TEST IT! it's not idiot-proof at the moment
+#
+
+use strict;
+use vars qw($VERSION %IRSSI);
+use POSIX qw(strftime);
+use Irssi;
+use Irssi::Irc;
+use Encode;
+use XML::RSS;
+use Regexp::Common qw /URI/;
+
+$VERSION = '1.31';
+
+%IRSSI = (
+ authors => 'Jakub Jankowski',
+ contact => 'shasta@toxcorp.com',
+ name => 'URLfeed',
+ description => 'Provides RSS feeds with URLs pasted on your channels.',
+ license => 'GNU GPLv2 or later',
+ url => 'http://toxcorp.com/irc/irssi/urlfeed/',
+ changed => '2019-03-02'
+);
+
+# These rules apply only to per-channel RSS files, NOT to the bundle!
+# $stripchan is replaced with channel name, BUT with stripped #!&+
+# $chan is replaced with channel name
+# $tag is replaced with server tag
+
+my $rss_title = 'URLs on $chan';
+my $rss_link = 'http://toxcorp.com/irc/irssi/';
+my $rss_description = 'List of URLs recently pasted on $chan $tag channel';
+my $rss_path = $ENV{HOME}.'/public_html/rss/$tag/$stripchan.rdf';
+my $rss_bundle_path = $ENV{HOME}.'/public_html/rss/all.rdf';
+my $max_items = 15;
+my $bundle_max_items = 40;
+my $debug = 1;
+my $provide_bundle = 0;
+
+sub urlfeed_build_path {
+ my ($tag, $chan) = @_;
+ my ($stripchan) = $chan =~ /^[\!\#\&\+](.+)/g;
+ my $str = Irssi::settings_get_str('urlfeed_path');
+ $str =~ s/\$tag/$tag/gi;
+ $str =~ s/\$chan/$chan/gi;
+ $str =~ s/\$stripchan/$stripchan/gi;
+ $str .= $chan . ".rdf" if ($str =~ /\/$/);
+ return $str;
+}
+
+sub urlfeed_replace ($$$) {
+ my ($str, $tag, $chan) = @_;
+ my ($stripchan) = $chan =~ /^[\!\#\&\+](.+)/g;
+ $str =~ s/\$tag/$tag/gi;
+ $str =~ s/\$chan/$chan/gi;
+ $str =~ s/\$stripchan/$stripchan/gi;
+ return $str;
+}
+
+sub urlfeed_touch_file ($) {
+ my ($f) = @_;
+ my ($basedir) = $f =~ /(.*)\/[^\/]*$/;
+ my @dirs = split(/[\/]+/, $basedir);
+ local *FH;
+ my $path = "";
+
+ foreach my $idx (1..$#dirs) {
+ $path .= "/" . $dirs[$idx];
+ if (! -d $path) {
+ Irssi::print("URLfeed warning: $path is not a dir, trying to mkdir");
+ eval { mkdir($path); };
+ if ($@) {
+ Irssi::print("URLfeed error: couldn't mkdir($path): $@");
+ return 0;
+ }
+ }
+ }
+
+ if (! -w $basedir) {
+ Irssi::print("URLfeed error: $basedir isn't writable");
+ return 0;
+ }
+
+ eval { open(FH, '+<',$f); };
+ if ($@) {
+ Irssi::print("URLfeed error: couldn't open $f for writing: $@");
+ return 0;
+ }
+
+ close(FH);
+
+ return 1;
+}
+
+sub urlfeed_format_time ($) {
+ my @t = localtime($_[0]);
+ my $time = strftime("%Y-%m-%dT%H:%M:%S", @t);
+ my $tzd = strftime("%z", @t);
+ return sprintf("%s%s:%s", $time, substr($tzd,0,3), substr($tzd,3));
+}
+
+# we might make use of timestamp someday
+sub urlfeed_rss_add {
+ my ($timestamp, $tag, $chan, $nickname, $text, $url) = @_;
+
+ return 0 unless (defined $url && defined $tag && defined $chan);
+
+ $nickname = "guest" unless (defined $nickname);
+ $text = $url unless (defined $text);
+
+ my $filename = urlfeed_build_path($tag, $chan);
+ if (!urlfeed_touch_file($filename)) {
+ Irssi::print("URLfeed error: Couldn't touch $filename");
+ return 0;
+ }
+
+ # UTF-8 is the default encoding
+ my $rss = new XML::RSS (version => '1.0' );
+ eval { $rss->parsefile($filename); };
+ if ($@) {
+ Irssi::print("URLfeed notice: rss->parsefile($filename) failed. Creating new RSS") if (Irssi::settings_get_bool('urlfeed_debug'));
+ $rss->channel(
+ title => urlfeed_replace(Irssi::settings_get_str('urlfeed_title'), $tag, $chan),
+ link => urlfeed_replace(Irssi::settings_get_str('urlfeed_link'), $tag, $chan),
+ description => urlfeed_replace(Irssi::settings_get_str('urlfeed_description'), $tag, $chan)
+ );
+ }
+
+ # tiny spam protection
+ foreach my $item (@{$rss->{'items'}}) {
+ return 0 if (lc($url) eq lc($item->{'link'}));
+ }
+
+ my $guard = 0;
+ while (@{$rss->{'items'}} >= Irssi::settings_get_int('urlfeed_max_items') && $guard++ < 10000) {
+ pop(@{$rss->{'items'}});
+ }
+
+ $rss->add_item(title => Encode::decode_utf8($text),
+ link => $url,
+ dc => { creator => $nickname, date => urlfeed_format_time($timestamp) },
+ mode => 'insert'
+ );
+
+ $rss->save($filename);
+
+ return 1 unless (Irssi::settings_get_bool('urlfeed_provide_bundle'));
+
+ # now do the bundle part
+ $filename = Irssi::settings_get_str('urlfeed_bundle_path');
+ if (!urlfeed_touch_file($filename)) {
+ Irssi::print("URLfeed error: Couldn't touch $filename");
+ return 0;
+ }
+ my $brss = new XML::RSS (version => '1.0' );
+ eval { $brss->parsefile($filename); };
+ if ($@) {
+ Irssi::print("URLfeed notice: rss->parsefile($filename) failed. Creating new RSS") if (Irssi::settings_get_bool('urlfeed_debug'));
+ $brss->channel(
+ title => $rss_title,
+ link => $rss_link,
+ description => $rss_description
+ );
+ }
+
+ # tiny spam protection
+ foreach my $item (@{$brss->{'items'}}) {
+ return 0 if (lc($url) eq lc($item->{'link'}));
+ }
+
+ my $guard = 0;
+ while (@{$brss->{'items'}} >= Irssi::settings_get_int('urlfeed_bundle_max_items') && $guard++ < 10000) {
+ pop(@{$brss->{'items'}});
+ }
+
+ $brss->add_item(title => Encode::decode_utf8($text),
+ link => $url,
+ dc => { creator => $nickname . " on " . $tag, date => urlfeed_format_time($timestamp) },
+ mode => 'insert'
+ );
+
+ $brss->save($filename);
+
+ return 1;
+}
+
+# based on urlgrab.pl by David Leadbeater
+sub urlfeed_find_urls {
+ my ($text) = @_;
+ my @chunks = split(/[ \t]+/, $text);
+ my @urls = ();
+
+ foreach my $chunk (@chunks) {
+ if ($chunk =~ /($RE{URI}{HTTP}{-scheme => qr#https?#})/ ||
+ $chunk =~ /($RE{URI}{FTP})/ ||
+ $chunk =~ /($RE{URI}{NNTP})/ ||
+ $chunk =~ /($RE{URI}{news})/) {
+ push(@urls, $1);
+ } elsif ($chunk =~ /(www\.[a-zA-Z0-9\/\\\:\?\%\.\&\;=#\-\_\!\+\~\,]+)/i) {
+ push(@urls, "http://" . $1);
+ }
+ }
+ return @urls;
+}
+
+sub urlfeed_process {
+ my ($time, $tag, $target, $nick, $text) = @_;
+
+ my @urls = urlfeed_find_urls($text);
+
+ foreach my $url (@urls) {
+ my $retval = urlfeed_rss_add($time, $tag, $target, $nick, $text, $url);
+ if (Irssi::settings_get_bool('urlfeed_debug')) {
+ # escape url, in case it needs to be Irssi::print()ed
+ $url =~ s/\%/\%\%/g;
+ if ($retval == 1) {
+ Irssi::print("URLfeed notice: URL $url (pasted by $nick on $target/$tag) successfully added to RSS feed.");
+ } elsif ($retval == 0) {
+ Irssi::print("URLfeed notice: Adding URL $url (pasted by $nick on $target/$tag) to RSS failed.");
+ }
+ }
+ }
+}
+
+sub urlfeed_message_own_public {
+ my ($server, $text, $target) = @_;
+ return unless ($target =~ /^[\!\#\&\+]/);
+ $target = '!' . substr($target, 6) if ($target =~ /^\!/);
+ urlfeed_process(time, $server->{tag}, lc($target), $server->{nick}, $text);
+}
+
+sub urlfeed_message_public {
+ my ($server, $text, $nick, $hostmask, $target) = @_;
+ return unless ($target =~ /^[\!\#\&\+]/);
+ urlfeed_process(time, $server->{tag}, lc($target), $nick, $text);
+}
+
+Irssi::settings_add_bool('urlfeed', 'urlfeed_debug', $debug);
+Irssi::settings_add_bool('urlfeed', 'urlfeed_provide_bundle', $provide_bundle);
+Irssi::settings_add_int ('urlfeed', 'urlfeed_max_items', $max_items);
+Irssi::settings_add_int ('urlfeed', 'urlfeed_bundle_max_items', $bundle_max_items);
+Irssi::settings_add_str ('urlfeed', 'urlfeed_title', $rss_title);
+Irssi::settings_add_str ('urlfeed', 'urlfeed_link', $rss_link);
+Irssi::settings_add_str ('urlfeed', 'urlfeed_description', $rss_description);
+Irssi::settings_add_str ('urlfeed', 'urlfeed_path', $rss_path);
+Irssi::settings_add_str ('urlfeed', 'urlfeed_bundle_path', $rss_bundle_path);
+
+Irssi::signal_add_last('message public', 'urlfeed_message_public');
+Irssi::signal_add_last('message own_public', 'urlfeed_message_own_public');