# url grabber, yes it sucks # # infected with the gpl virus # # Thomas Graf # # version: 0.2 # # Commands: # # /URL LIST # /URL CLEAR # /URL OPEN [] # /URL QUOTE [] # /URL HEAD [] !! Blocking !! # /HEAD !! Blocking !! # # Config Values # # [url logfile] # url_log log urls to url_log_file # url_log_file file to save urls # url_log_format format in url logfile # url_log_timestamp format of timestamp in url logfile # # [url log in memory] # url_log_browser command to execute to open url, %f will be replaced with the url # url_log_size keep that many urls in the list # # [http head stuff] # url_head_format format of HEAD output # url_auto_head do a head on every url received # url_auto_head_format format of auto head output # # # Database installation # - create database and user # - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, # time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255)); # or similiar :) # # # todo: # # - fix XXX marks # - xml output? # - don't output "bytes" if content-length is not available # - prefix with http:// if no prefix is given use strict; use Irssi; use Irssi::Irc; use vars qw($VERSION %IRSSI); $VERSION = "0.3"; %IRSSI = ( authors => 'Thomas Graf', contact => 'irssi@reeler.org', name => 'url_log', description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.', license => 'GNU GPLv2 or later', url => 'http://irssi.reeler.org/url/', ); use LWP; use LWP::UserAgent; use HTTP::Status; use DBI; use POSIX qw(strftime); my @urls; my $user_agent = new LWP::UserAgent; $user_agent->agent("IrssiUrlLog/0.3"); # hmm... stolen.. # -verbatim- import expand sub expand { my ($string, %format) = @_; my ($exp, $repl); $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format)); return $string; } # -verbatim- end sub print_msg { Irssi::active_win()->print("@_"); } # # open url in brower using url_log_brower command # sub open_url { my ($data) = @_; my ($nick, $target, $url) = split(/ /, $data); my $pid = fork(); if ($pid) { Irssi::pidwait_add($pid); } elsif (defined $pid) { # $pid is zero here if defined my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url); # XXX use exec system $data; POSIX::_exit(1); } else { # weird fork error print_msg "Can't fork: $!"; } } sub head { my ($url) = @_; my $req = new HTTP::Request HEAD => $url; my $res = $user_agent->request($req); return $res; } # # do a HEAD # sub do_head { my ($url) = @_; my $res = head($url); if ($res->code ne RC_OK) { Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" . $res->status_line()); } else { my $t = expand(Irssi::settings_get_str("url_head_format"), "u", $url, "t", scalar $res->content_type, "l", scalar $res->content_length, "s", scalar $res->server); Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t); } } # # called if url is detected, should do a HEAD and print a 1-liner # sub do_auto_head { my ($url, $window) = @_; return if ($url !~ /^http:\/\//); my $res = head($url); if ($res->code ne RC_OK) { $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line()); } else { my $t = expand(Irssi::settings_get_str("url_auto_head_format"), "u", $url, "c", $res->code, "t", scalar $res->content_type, "l", scalar $res->content_length, "s", scalar $res->server); $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t); } } # # log url to file # sub log_to_file { my ($nick, $target, $text) = @_; my ($lfile) = glob Irssi::settings_get_str("url_log_file"); if ( open(LFD, ">>", $lfile) ) { my %h = { time => time, nick => $nick, target => $target, url => $text }; print LFD expand(Irssi::settings_get_str("url_log_format"), "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime), "n", $nick, "t", $target, "u", $text), "\n"; close LFD; } else { print_msg "Warning: Unable to open file $lfile $!"; } } # # log url to database # sub log_to_database { my ($nick, $target, $text) = @_; # this is quite expensive, but... my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"), Irssi::settings_get_str("url_log_db_user"), Irssi::settings_get_str("url_log_db_password")) or print_msg "Can't connect to database " . $DBI::errstr; if ($dbh) { my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," . $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")"; $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr; $dbh->disconnect(); } } # # head command handler # sub sig_head { my ($cmd_line, $server, $win_item) = @_; my @args = split(' ', $cmd_line); my $url; if (@args <= 0) { if ($#urls eq 0) { return; } $url = $urls[$#urls]; $url =~ s/^.*?\s.*?\s//; } else { $url = lc(shift(@args)); } do_head($url); } # # msg handler # sub sig_msg { my ($server, $data, $nick, $address) = @_; my ($target, $text) = split(/ :/, $data, 2); # very special, but better than just \w::/* and www.* while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) { return if ($1 =~ /^\.\./); push @urls, "$nick $target $2"; # XXX resize correctly if delta is > 1 if ($#urls >= Irssi::settings_get_int("url_log_size")) { shift @urls; } my $ischannel = $server->ischannel($target); my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS; $target = $nick unless $ischannel; my $window = $server->window_find_closest($target, $level); if ( Irssi::settings_get_bool("url_log_auto_head") ) { do_auto_head($2, $window); } if ( Irssi::settings_get_bool("url_log") ) { log_to_file($nick, $target, $2); } if ( Irssi::settings_get_bool("url_log_db") ) { log_to_database($nick, $target, $2); } } } sub print_url_list_item { my ($n, $data) = @_; my ($src, $dst, $url) = split(/ /, $data); Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url); } # # url command handler # sub sig_url { my ($cmd_line, $server, $win_item) = @_; my @args = split(' ', $cmd_line); if (@args <= 0) { print_msg "URL LIST [] list all url(s)"; print_msg " OPEN [] open url in browser"; print_msg " QUOTE [] quote url (print to current channel)"; print_msg " HEAD send HEAD to server"; print_msg " CLEAR clear url list"; return; } my $action = lc(shift(@args)); if ($action eq "list") { if (@args > 0) { my $i = shift(@args); print_url_list_item($i, $urls[$i]); } else { my $i = 0; foreach my $l (@urls) { print_url_list_item($i, $l); $i++; } } } elsif($action eq "open") { my $i = $#urls; if (@args > 0) { $i = shift(@args); } open_url($urls[$i]); } elsif ($action eq "quote") { my $i = $#urls; if (@args > 0) { $i = shift(@args); } Irssi::active_win()->command("SAY URL: " . $urls[$i]); } elsif ($action eq "clear") { splice @urls; } elsif ($action eq "head") { my $i = $#urls; if (@args > 0) { $i = shift(@args); } my $url = $urls[$i]; $url =~ s/^.*?\s.*?\s//; do_head($url); } else { print_msg "Unknown action"; } } Irssi::command_bind('head', 'sig_head'); Irssi::command_bind('url', 'sig_url'); Irssi::command_bind('url list', 'sig_url'); Irssi::command_bind('url clear', 'sig_url'); Irssi::command_bind('url open', 'sig_url'); Irssi::command_bind('url quote', 'sig_url'); Irssi::command_bind('url head', 'sig_url'); Irssi::signal_add_first('event privmsg', 'sig_msg'); Irssi::settings_add_bool("url_log", "url_log", 1); Irssi::settings_add_bool("url_log", "url_log_auto_head", 1); Irssi::settings_add_bool("url_log", "url_log_db", 0); Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost'); Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url'); Irssi::settings_add_str("url_log", "url_log_db_password", 'nada'); Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url"); Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c'); Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u'); Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null'); Irssi::settings_add_int("url_log", "url_log_size", 25); Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes'); Irssi::settings_add_str("url_log", "url_head_format", ' Content-Type: %t Length: %l bytes Server: %s'); Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-', 'url_auto_head', '[%gHEAD%n] $0-', 'url_list', '[$0] $1 %W$2%n $3-']); # vim:set ts=4 sw=4 expandtab: