diff options
Diffstat (limited to 'scripts/url_log.pl')
-rw-r--r-- | scripts/url_log.pl | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/scripts/url_log.pl b/scripts/url_log.pl new file mode 100644 index 0000000..8ee9777 --- /dev/null +++ b/scripts/url_log.pl @@ -0,0 +1,399 @@ +# url grabber, yes it sucks +# +# infected with the gpl virus +# +# Thomas Graf <tgraf@europe.com> +# +# version: 0.2 +# +# Commands: +# +# /URL LIST +# /URL CLEAR +# /URL OPEN [<nr>] +# /URL QUOTE [<nr>] +# /URL HEAD [<nr>] !! Blocking !! +# /HEAD <url> !! Blocking !! +# +# Config Values +# +# [url logfile] +# url_log log urls to url_log_file +# url_log_file file to save urls +# url_log_format format in url logfile +# url_log_timestamp format of timestamp in url logfile +# +# [url log in memory] +# url_log_browser command to execute to open url, %f will be replaced with the url +# url_log_size keep that many urls in the list +# +# [http head stuff] +# url_head_format format of HEAD output +# url_auto_head do a head on every url received +# url_auto_head_format format of auto head output +# +# +# Database installation +# - create database and user +# - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, +# time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255)); +# or similiar :) +# +# +# todo: +# +# - fix XXX marks +# - xml output? +# - don't output "bytes" if content-length is not available +# - prefix with http:// if no prefix is given + +use strict; +use Irssi; +use Irssi::Irc; +use vars qw($VERSION %IRSSI); + +$VERSION = "0.3"; +%IRSSI = ( + authors => 'Thomas Graf', + contact => 'irssi@reeler.org', + name => 'url_log', + description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.', + license => 'GNU GPLv2 or later', + url => 'http://irssi.reeler.org/url/', +); + +use LWP; +use LWP::UserAgent; +use HTTP::Status; +use DBI; + +use POSIX qw(strftime); + +my @urls; +my $user_agent = new LWP::UserAgent; + +$user_agent->agent("IrssiUrlLog/0.3"); + +# hmm... stolen.. +# -verbatim- import expand +sub expand { + my ($string, %format) = @_; + my ($exp, $repl); + $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format)); + return $string; +} +# -verbatim- end + +sub print_msg +{ + Irssi::active_win()->print("@_"); +} + +# +# open url in brower using url_log_brower command +# +sub open_url +{ + my ($data) = @_; + + my ($nick, $target, $url) = split(/ /, $data); + + my $pid = fork(); + + if ($pid) { + Irssi::pidwait_add($pid); + } elsif (defined $pid) { # $pid is zero here if defined + my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url); + # XXX use exec + system $data; + POSIX::_exit(1); + } else { + # weird fork error + print_msg "Can't fork: $!"; + } +} + +sub head +{ + my ($url) = @_; + my $req = new HTTP::Request HEAD => $url; + my $res = $user_agent->request($req); + return $res; +} + +# +# do a HEAD +# +sub do_head +{ + my ($url) = @_; + + my $res = head($url); + + if ($res->code ne RC_OK) { + Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" . + $res->status_line()); + } else { + + my $t = expand(Irssi::settings_get_str("url_head_format"), + "u", $url, + "t", scalar $res->content_type, + "l", scalar $res->content_length, + "s", scalar $res->server); + + Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t); + } +} + +# +# called if url is detected, should do a HEAD and print a 1-liner +# +sub do_auto_head +{ + my ($url, $window) = @_; + + return if ($url !~ /^http:\/\//); + + my $res = head($url); + + if ($res->code ne RC_OK) { + $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line()); + } else { + + my $t = expand(Irssi::settings_get_str("url_auto_head_format"), + "u", $url, + "c", $res->code, + "t", scalar $res->content_type, + "l", scalar $res->content_length, + "s", scalar $res->server); + + $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t); + } +} + +# +# log url to file +# +sub log_to_file +{ + my ($nick, $target, $text) = @_; + my ($lfile) = glob Irssi::settings_get_str("url_log_file"); + + if ( open(LFD, ">>", $lfile) ) { + + my %h = { + time => time, + nick => $nick, + target => $target, + url => $text + }; + + print LFD expand(Irssi::settings_get_str("url_log_format"), + "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime), + "n", $nick, + "t", $target, + "u", $text), "\n"; + + close LFD; + } else { + print_msg "Warning: Unable to open file $lfile $!"; + } +} + + +# +# log url to database +# +sub log_to_database +{ + my ($nick, $target, $text) = @_; + + # this is quite expensive, but... + my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"), + Irssi::settings_get_str("url_log_db_user"), + Irssi::settings_get_str("url_log_db_password")) + or print_msg "Can't connect to database " . $DBI::errstr; + + if ($dbh) { + + my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," . + $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")"; + + $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr; + + $dbh->disconnect(); + } +} + +# +# head command handler +# +sub sig_head +{ + my ($cmd_line, $server, $win_item) = @_; + my @args = split(' ', $cmd_line); + + my $url; + + if (@args <= 0) { + + if ($#urls eq 0) { + return; + } + + $url = $urls[$#urls]; + $url =~ s/^.*?\s.*?\s//; + } else { + $url = lc(shift(@args)); + } + + do_head($url); +} + +# +# msg handler +# +sub sig_msg +{ + my ($server, $data, $nick, $address) = @_; + my ($target, $text) = split(/ :/, $data, 2); + + # very special, but better than just \w::/* and www.* + while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) { + + return if ($1 =~ /^\.\./); + + push @urls, "$nick $target $2"; + + # XXX resize correctly if delta is > 1 + if ($#urls >= Irssi::settings_get_int("url_log_size")) { + shift @urls; + } + + my $ischannel = $server->ischannel($target); + my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS; + $target = $nick unless $ischannel; + my $window = $server->window_find_closest($target, $level); + + if ( Irssi::settings_get_bool("url_log_auto_head") ) { + do_auto_head($2, $window); + } + + if ( Irssi::settings_get_bool("url_log") ) { + log_to_file($nick, $target, $2); + } + + if ( Irssi::settings_get_bool("url_log_db") ) { + log_to_database($nick, $target, $2); + } + } +} + +sub print_url_list_item +{ + my ($n, $data) = @_; + my ($src, $dst, $url) = split(/ /, $data); + + Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url); +} + +# +# url command handler +# +sub sig_url +{ + my ($cmd_line, $server, $win_item) = @_; + my @args = split(' ', $cmd_line); + + if (@args <= 0) { + print_msg "URL LIST [<nr>] list all url(s)"; + print_msg " OPEN [<nr>] open url in browser"; + print_msg " QUOTE [<nr>] quote url (print to current channel)"; + print_msg " HEAD send HEAD to server"; + print_msg " CLEAR clear url list"; + return; + } + + my $action = lc(shift(@args)); + + if ($action eq "list") { + + if (@args > 0) { + my $i = shift(@args); + print_url_list_item($i, $urls[$i]); + } else { + my $i = 0; + foreach my $l (@urls) { + print_url_list_item($i, $l); + $i++; + } + } + + } elsif($action eq "open") { + + my $i = $#urls; + if (@args > 0) { + $i = shift(@args); + } + open_url($urls[$i]); + + } elsif ($action eq "quote") { + + my $i = $#urls; + if (@args > 0) { + $i = shift(@args); + } + Irssi::active_win()->command("SAY URL: " . $urls[$i]); + + } elsif ($action eq "clear") { + + splice @urls; + + } elsif ($action eq "head") { + + my $i = $#urls; + if (@args > 0) { + $i = shift(@args); + } + my $url = $urls[$i]; + $url =~ s/^.*?\s.*?\s//; + + do_head($url); + + } else { + print_msg "Unknown action"; + } +} + +Irssi::command_bind('head', 'sig_head'); +Irssi::command_bind('url', 'sig_url'); +Irssi::command_bind('url list', 'sig_url'); +Irssi::command_bind('url clear', 'sig_url'); +Irssi::command_bind('url open', 'sig_url'); +Irssi::command_bind('url quote', 'sig_url'); +Irssi::command_bind('url head', 'sig_url'); +Irssi::signal_add_first('event privmsg', 'sig_msg'); + +Irssi::settings_add_bool("url_log", "url_log", 1); +Irssi::settings_add_bool("url_log", "url_log_auto_head", 1); +Irssi::settings_add_bool("url_log", "url_log_db", 0); +Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost'); +Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url'); +Irssi::settings_add_str("url_log", "url_log_db_password", 'nada'); +Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url"); +Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c'); +Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u'); +Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null'); +Irssi::settings_add_int("url_log", "url_log_size", 25); +Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes'); +Irssi::settings_add_str("url_log", "url_head_format", ' +Content-Type: %t +Length: %l bytes +Server: %s'); + + +Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-', + 'url_auto_head', '[%gHEAD%n] $0-', + 'url_list', '[$0] $1 %W$2%n $3-']); + +# vim:set ts=4 sw=4 expandtab: |