diff options
Diffstat (limited to 'scripts/urlplot.pl')
-rw-r--r-- | scripts/urlplot.pl | 841 |
1 files changed, 841 insertions, 0 deletions
diff --git a/scripts/urlplot.pl b/scripts/urlplot.pl new file mode 100644 index 0000000..a56d124 --- /dev/null +++ b/scripts/urlplot.pl @@ -0,0 +1,841 @@ +use strict; +#use warnings; # Not a default module in perl 5.005 + +use vars qw($VERSION %IRSSI); + +$VERSION = '1.2'; +%IRSSI = ( + authors => 'bwolf', + contact => 'bwolf@geekmind.org', + name => 'urlplot', + description => 'URL grabber with HTML generation and cmd execution', + license => 'BSD', + url => 'http://www.geekmind.net', + changed => 'Sun Jun 16 14:00:13 CEST 2002' +); + +# To read the documentation you may use one of the following commands: +# +# pod2man urlplot.pl | nroff -man | more +# pod2text urlplot.pl | more +# pod2man urlplot.pl | troff -man -Tps -t > urlplot.ps + +=head1 NAME + +urlplot + +=head1 SYNOPSIS + +All URL loggers suck. This one just sucks less. + +=head1 DESCRIPTION + +urlplot watches your channels for URLs and creates nice HTML logfiles of it. +Actually it parses normal text and topic changes for URLs. Internally it uses +two caches to prevent flooding and logging of duplicate URLs. As an additional +feature urlplot can create CSV datafiles. Logfiles can be created for all +channels and for separate channels. Logging can be allowed and denied on a per +channel/nick basis. A lockfile is used to protect the caches and logfiles from +accessing them by multiple irssi instances. A command allows you to send a +logged URL to your webbrowser of choice. + +The format of the CSV logfiles is as follows: +date nick channel url + +=head1 GETTING STARTED + +Copy urlplot.pl intoF< $HOME/.irssi/scripts> and create the necessary +directories withC< mkdir -p>F< $HOME/.irssi/urlplot/urls>. +Look for the settingsC< url_log_basedir> andC< url_db_basedir> if you want to +change the directories urlplot will populate with files. +Follow the documentation and configure urlplot to fit your needs. + +=head1 COMMANDS + +=head2 /url <integer> + +Executes the commandC< url_command> with an URL from the cache as its +argument. If no number has been specified it defaults to nth URL logged which +references the most recently logged URL. + +=head2 /url -list + +Displays a list of all logged URLs. + +=head2 /url -clearcache + +Clears the cache databases. + +=head /url -showlog + +ExecutesC< url_command> withC< url_navigate> as its argument. It can be used +to display the main logfile in your favourite webbrowser. + +=head1 SETTINGS + +=head2 Pathnames + +Please note that you can't use $HOME or any environment variables in the +settings because irssi/urlplot isn't a shell ;) + +=head2 /set url_command <string> + +Command to be executed to display an URL (see /url). The command string should +contain the sequence C<__URL__> which will be replaced by a certain URL. + +The default is: +C< mozilla -remote "openURL(__URL__)" E<gt> /dev/null 2E<gt>&1 || \ > +C< mozilla "__URL__"& > + +This will send a certain URL to mozilla or it will start mozilla if it is not +already there. The string can be anything. For example I use the following: +C< ssh host /home/user/bin/mozopenurl "'__URL__'" >/dev/null 2>&1 &> +where mozopenurl is a shell script that contains similar code as the mozilla +-remote example above. + +=head2 /set url_cache_max <integer> + +Specifies the maximum count of items which will be held in the persisten URL +caches. A value of zero disables automatic cache resizing (round-robbin). The +default is to keep the last 90 URLs. + +=head2 /set url_log_basedir <path> + +Specifies the logging base directory used to create the log files beneath it. +The default isF< $HOME/.irssi/urlplot/urls/>. You have to create directories +by yourself:C< mkdir -p>F< $HOME/.irssi/urlplot/urls>. + +=head2 /set url_log_file_name <relative-filename> + +Defines the filename of the full logfile. It will be passed to I< +strftime(3)>. This can be usefull to create logfiles with a timestamp. +The file will be created relative toC< url_log_basedir>. The default +isF< ircurls.html>. + +=head2 /set url_chan_prefix <string> + +Defines the filename prefix for channel logfiles. The leadingC< # >of the +channel name will be replaced by this prefix. It will be passed to +I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The +default isF< chan_>. + +=head2 /set url_chan_logging <bool> + +Enables or disable channel logging globally. +The default isC< ON>. + +=head2 /set url_log_csv_file_name <relative-filename> + +Defines the filename of the full CSV logfile. It will be passed to +I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The +default isF< ircurls.csv>. + +=head2 /set url_log_csv_file_max_size <integer> + +Defines the maximum size of the full CSV logfile. If it reaches the specified +maximum size in bytes it will be simply resized to zero. The default isC< 30*1024> +bytes. + +=head2 /set url_log_csv_separator <string> + +Defines the separator used as a delimeter for the fields of the CSV files. +The default isC< |>. + +=head2 /set url_csv_logging <bool> + +Conditionally turns on or off CSV logging for the full logfile. The default +isC< OFF>. + +=head2 /set url_csv_chan_logging <bool> + +Conditionally turns on or off CSV logging of the channel logfiles. The default isC< OFF>. + +=head2 /set url_time_format <string> + +Specifies the time format that will be passed toI< strftime(3)> to produce an +ASCII representation of the time/date when an URL was grabbed. It will be used +in the logfiles. The default isC< %Y:%m:%d - %H:%M:%S>. + +=head2 /set url_log_file_max_size <integer> + +Defines the maximum size of the full logfile and the channel logfile. If it +reaches the specified maximum size in bytes it will be simply resized to zero. +The default isC< 30*1024> bytes. + +=head2 /set url_log_file_autoreload_time <integer> + +Intervall in seconds used for the HTML logfile header. The logfile reloads +itself every N seconds. The default isC< 90> seconds. + +=head2 /set url_db_basedir <path> + +Specifies the database base directory where two database files and a lockfile +will be created. The default isF< $HOME/.irssi/urlplot>. You have to create +the directory by yourself. + +=head2 /set url_db_cache_a_filename <relative-filename> + +Defines the filename of the index URL database. The file will be created +relative toC< url_db_basedir>. The default isF< a_cache>. + +=head2 /set url_db_cache_h_filename <relative-filename> + +Defines the filename of the hash URL database. The file will be created +relative toC< url_db_basedir>. The default isF< h_cache>. + +=head2 /set url_db_lock_filename <relative-filename> + +Defines the filename of the lockfile used to lock all logfiles and the cache +databases. It will be created relative toC< url_db_basedir>. The default +isF< lockfile>. + +=head2 /set url_policy_default <allow|deny> + +Specifies the default policy that will be used to decide if logging ist +permitted for a certain nick or channel. This can be eitherC< allow> +orC< deny>. If you set this toC< deny> you will have to allow explicitly those +channels and nicks for which logging should be permitted. In contrast if you +set it to allow, you can deny logging for certain nicks and channels. +The keysC< url_policy_chans> andC< url_policy_nicks> control the allow, deny +behaviour depending onC< url_policy_default>. The default isC< allow> which +permits logging of all channels and nicks. + +=head2 /set url_policy_chans <string> + +Specifies those channels for whoom logging is permitted or denied. Multiple +channels may be specified by usingC< ,>C< ;>C< :> or a space to separate the +items. + +=head2 /set url_policy_nicks <string> + +SeeC< url_policy_chans> and replace the word channel by nick. + +=head2 /set url_navigate <string> + +ExecutesC< url_command> withC< url_navigate> as its argument. It can be used +to display the main logfile in your favourite webbrowser. Because you may pass +this command at anytime to your webbrowser it will not be passed to strftime. +Thus you can only specify a static file here. + +=head1 AUTHOR + +Marcus Geiger <bwolf@geekmind.org> + +=cut + +use integer; +use Irssi; +use POSIX qw(strftime); +use Fcntl qw(:DEFAULT :flock); +use DB_File; + +# Regexps +sub URL_SCHEME_REGEX() { '(http|ftp|https|news|irc)' } +sub URL_GUESS_REGEX() { '(www|ftp)' } +sub URL_BASE_REGEX() { '[a-z0-9_\-+\\/:?%.&!~;,=\#<>]' } + +# Other +sub BACKWARD_SEEK_BYTES() { 130 } +sub LOG_FILE_MARKER() { '<!-- bottom-line -->' } + +# Keys for settings +sub KEY_URL_COMMAND() { 'url_command' } +sub KEY_URL_CACHE_MAX() { 'url_cache_max' } +sub KEY_URL_LOG_BASEDIR() { 'url_log_basedir' } +sub KEY_URL_LOG_FILE_NAME() { 'url_log_file_name' } +sub KEY_URL_CHAN_PREFIX() { 'url_chan_prefix' } +sub KEY_URL_CHAN_LOGGING() { 'url_chan_logging' } +sub KEY_URL_LOG_CSV_FILE_NAME() { 'url_log_csv_file_name' } +sub KEY_URL_LOG_CSV_FILE_MAX_SIZE() { 'url_log_csv_file_max_size' } +sub KEY_URL_LOG_CSV_SEPARATOR() { 'url_log_csv_separator' } +sub KEY_URL_CSV_LOGGING() { 'url_csv_logging' } +sub KEY_URL_CSV_CHAN_LOGGING() { 'url_csv_chan_logging' } +sub KEY_URL_TIME_FORMAT() { 'url_time_format' } +sub KEY_URL_LOG_FILE_MAX_SIZE() { 'url_log_file_max_size' } +sub KEY_URL_LOG_FILE_AUTORELOAD_TIME() { 'url_log_file_autoreload_time' } +sub KEY_URL_DB_BASEDIR() { 'url_db_basedir' } +sub KEY_URL_DB_CACHE_A_FILENAME() { 'url_db_cache_a_filename' } +sub KEY_URL_DB_CACHE_H_FILENAME() { 'url_db_cache_h_filename' } +sub KEY_URL_DB_LOCK_FILENAME() { 'url_db_lock_filename' } +sub KEY_URL_POLICY_DEFAULT() { 'url_policy_default' } +sub KEY_URL_POLICY_CHANS() { 'url_policy_chans' } +sub KEY_URL_POLICY_NICKS() { 'url_policy_nicks' } +sub KEY_URL_NAVIGATE() { 'url_navigate' } + +# Defaults +sub DEF_URL_COMMAND() { + 'mozilla -remote "openURL(__URL__)" > /dev/null 2>&1 || mozilla "__URL__"&' } +sub DEF_URL_CACHE_MAX() { 90 } +sub DEF_URL_LOG_FILE_AUTORELOAD_TIME() { 120 } +sub DEF_URL_TIME_FORMAT() { '%Y:%m:%d - %H:%M:%S' } +sub DEF_URL_DO_FILE_RESIZE() { '0' } +sub DEF_URL_LOG_FILE_MAX_SIZE() { 1024 * 30 } +sub DEF_URL_LOG_BASEDIR() { '.irssi/urlplot/urls/' } +sub DEF_URL_LOG_FILE_NAME() { 'ircurls.html' } +sub DEF_URL_CHAN_PREFIX() { 'chan_' } +sub DEF_URL_CHAN_LOGGING() { '1' } +sub DEF_URL_LOG_CSV_FILE_NAME() { 'ircurls.csv' } +sub DEF_URL_LOG_CSV_FILE_MAX_SIZE() { 1024 * 30 } +sub DEF_URL_LOG_CSV_SEPARATOR() { '|' } +sub DEF_URL_CSV_LOGGING() { '' } +sub DEF_URL_CSV_CHAN_LOGGING() { '' } +sub DEF_URL_DB_BASEDIR() { '.irssi/urlplot/' } +sub DEF_URL_DB_CACHE_A_FILENAME() { 'a_cache' } +sub DEF_URL_DB_CACHE_H_FILENAME() { 'h_cache' } +sub DEF_URL_DB_LOCK_FILENAME() { 'lockfile' } +sub DEF_URL_POLICY_DEFAULT() { 'allow' } +sub DEF_URL_POLICY_CHANS() { '' } +sub DEF_URL_POLICY_NICKS() { '' } +sub DEF_URL_NAVIGATE() { '.irssi/urlplot/urls/ircurls.html' } + +sub print_full_log_file_template { + my ($fh, $reload) = @_; + print $fh <<EOT; +<?xml version="1.0" encoding="iso-8859-1"?> + <!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>IRC-URLs</title> + <meta http-equiv="cache-control" content="no-cache" /> + <meta http-equiv="refresh" content="$reload;" /> + <style type="text/css"> + <!-- + .small { font-size: small; } + .xsmall { font-size: x-small; } + --> + </style> + </head> + <body> + <h1>IRC-URLs</h1> + <p class="xsmall"> + Visit <a href="http://www.geekmind.net">geekmind.net</a> + </p> + <p>This page reloads itself every $reload seconds.</p> + <p> + <a name="top" /> + <a class="small" href="#bottom">Page bottom</a> + <br /> + <br /> + </p> + <table rules="rows" frame="void" width="100%" cellpadding="5"> + <tr align="left"> + <th><b>Date/Time</b></th> + <th><b>Nick</b></th> + <th><b>Channel/Nick</b></th> + <th><b>URL</b></th> + </tr> +EOT +} + +sub print_chan_log_file_template { + my ($fh, $reload, $channel, $full_log) = @_; + print $fh <<EOT; +<?xml version="1.0" encoding="iso-8859-1"?> + <!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>IRC-URLs of $channel</title> + <meta http-equiv="cache-control" content="no-cache" /> + <meta http-equiv="refresh" content="$reload;" /> + <style type="text/css"> + <!-- + .small { font-size: small; } + .xsmall { font-size: x-small; } + --> + </style> + </head> + <body> + <h1>IRC-URLs of $channel</h1> + <p class="xsmall"> + Visit <a href="http://www.geekmind.net">geekmind.net</a> + </p> + <p>This page reloads itself every $reload seconds.</p> + <p><a href="$full_log">Complete</a> listing.</p> + <p> + <a name="top" /> + <a class="small" href="#bottom">Page bottom</a> + <br /> + <br /> + </p> + <table rules="rows" frame="void" width="100%" cellpadding="5"> + <tr align="left"> + <th><b>Date/Time</b></th> + <th><b>Nick</b></th> + <th><b>URL</b></th> + </tr> +EOT +} + +sub LOG_FILE_TAIL () { + return <<"EOT"; + + @{[ LOG_FILE_MARKER ]} + </table> + <p> + <a class="small" href="#top">Page top</a> + <a name="bottom" /> + </p> + </body> +</html> +EOT +} + +sub print_chan_log_file_entry { + my ($fh, $date, $nick, $channel, $url) = @_; + print $fh <<EOURL; + <tr> + <td>$date</td> + <td><em>$nick</em></td> + <td><a href=\"$url\">$url</a></td> + </tr> +EOURL + print $fh LOG_FILE_TAIL; +}; + +sub print_full_log_file_entry { + my ($fh, $date, $nick, $channel, $chan_log, $url) = @_; + print $fh <<EOURL; + <tr> + <td>$date</td> + <td><em>$nick</em></td> + <td><a href="$chan_log">$channel</a></td> + <td><a href=\"$url\">$url</a></td> + </tr> +EOURL + print $fh LOG_FILE_TAIL; +} + +sub p_error { # Error printing (directly to the current window) + Irssi::print("urlplot: @_"); +} + +sub p_normal { # Normal printing (to the msg window) + Irssi::print("@_", MSGLEVEL_MSGS+MSGLEVEL_NOHILIGHT); +} + +sub scan_url { + my $rawtext = shift; + return $1 if $rawtext =~ m|(@{[ URL_SCHEME_REGEX ]}://@{[ URL_BASE_REGEX ]}+)|io; + # The URL misses a scheme, try to be smart + if ($rawtext =~ m|@{[ URL_GUESS_REGEX ]}\.@{[ URL_BASE_REGEX ]}+|io) { + my $preserve = $&; + return "http://$preserve" if $1 =~ /^www/; + return "ftp://$preserve" if $1 =~ /^ftp/; + } + return undef; +} + +sub aquire_lock { + my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR) + || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}"; + my $lockfile = Irssi::settings_get_str(KEY_URL_DB_LOCK_FILENAME) + || die "missing setting for @{[ KEY_URL_DB_LOCK_FILENAME ]}"; + + local *LOCK_F; + my $fh; + $db_base .= '/' if $db_base !~ m#/$#; + $lockfile = "${db_base}${lockfile}"; + + die "directory $db_base doesn't exist or isn't readable" + unless -d $db_base and -r $db_base; + + sysopen(LOCK_F, $lockfile, O_RDONLY | O_CREAT) + || die "can't open/create lockfile $lockfile: $!"; + flock(LOCK_F, LOCK_EX | LOCK_NB) + || die "can't exclusively lock $lockfile: $!"; + # Can't pass back localized typeglob reference + $fh = *LOCK_F; + return $fh; +} + +sub open_caches { + my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR) + || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}"; + my $dbfile_a = Irssi::settings_get_str(KEY_URL_DB_CACHE_A_FILENAME) + || die "missing setting for @{[ KEY_URL_DB_CACHE_A_FILENAME ]}"; + my $dbfile_h = Irssi::settings_get_str(KEY_URL_DB_CACHE_H_FILENAME) + || die "missing setting for @{[ KEY_URL_DB_CACHE_H_FILENAME ]}"; + + my (@cache, %cache); + $db_base .= '/' if $db_base !~ m#/$#; + $dbfile_a = "${db_base}${dbfile_a}"; + $dbfile_h = "${db_base}${dbfile_h}"; + + die "directory $db_base doesn't exist or isn't readable" + unless -d $db_base and -r $db_base; + + tie @cache, 'DB_File', $dbfile_a, O_RDWR | O_CREAT, 0666, $DB_RECNO + or die "can't tie urlcache db $dbfile_a: $!"; + tie %cache, 'DB_File', $dbfile_h, O_RDWR | O_CREAT, 0666 + or die "can't tie urlcache db $dbfile_h: $!"; + return \(@cache, %cache); +} + +sub create_chan_template { + my ($full_log, $file, $channel) = @_; + my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME); + local *FH; + open(FH, ">", $file) + || die "can't create logfile $file: $!"; + print_chan_log_file_template(\*FH, $reload, $channel, $full_log); + print FH LOG_FILE_TAIL; + close(FH); +} + +sub create_full_template { + my $file = shift; + my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME); + local *FH; + open(FH, ">", $file) + || die "can't create logfile $file: $!"; + print_full_log_file_template(\*FH, $reload); + print FH LOG_FILE_TAIL; + close(FH); +} + +sub create_csv_file { + my $file = shift; + open(FH, ">", $file) + || die "can't create $file: $!"; + close FH; +} + +sub log_csv { + my $csv_log = shift; + my $sep = Irssi::settings_get_str(KEY_URL_LOG_CSV_SEPARATOR); + my $fields = join $sep, @_; + local *FH; + open(FH, ">>", $csv_log) + || die "can't open $csv_log: $!"; + print FH "$fields\n"; + close FH; +} + +sub position_log_file { + my $file = shift; + my ($fh, $pos, $buf, @lines, $off, $got_it); + local *FH; + my $hint = "Conside manual removal of this file"; + sysopen(FH, $file, O_RDWR) + || die "can't open $file: $!"; + $pos = sysseek(FH, 0, 2) + || die "can't seek to EOF in $file. ${hint}: $!"; + $pos -= BACKWARD_SEEK_BYTES; + sysseek(FH, $pos, 0) + || die "can't seek backwards to $pos in $file. ${hint}: $!"; + sysread(FH, $buf, 2048) + || die "can't read rest of $file. ${hint}: $!"; + $off = 0; + @lines = split /\n/, $buf; + for (@lines) { + $off += length; + $off += 1; + chomp; + next if /^$/; + if (/@{[ LOG_FILE_MARKER ]}/io) { + $got_it = 1; + $off -= length; + $off -= 1; + last; + } + } + die "Can't locate @{[ LOG_FILE_MARKER ]} in $file. ${hint}" + unless $got_it; + $pos += $off; + sysseek(FH, $pos, 0) + || die "Can't seek to $pos in $file. ${hint}: $!"; + # Can't pass back localized typeglob reference + $fh = *FH; + return $fh; +} + +sub log_url { + my ($nick, $channel, $url) = @_; + my $log_base = Irssi::settings_get_str(KEY_URL_LOG_BASEDIR) + || die "missing setting for @{[ KEY_URL_LOG_BASEDIR ]}"; + my $fullfile = Irssi::settings_get_str(KEY_URL_LOG_FILE_NAME) + || die "missing setting for @{[ KEY_URL_LOG_FILE_NAME ]}"; + my $csvfile = Irssi::settings_get_str(KEY_URL_LOG_CSV_FILE_NAME) + || die "missing setting for @{[ KEY_URL_LOG_CSV_FILE_NAME ]}"; + my $csv_max = Irssi::settings_get_int(KEY_URL_LOG_CSV_FILE_MAX_SIZE); + my $csv_logging = Irssi::settings_get_bool(KEY_URL_CSV_LOGGING); + my $csv_chan_logging = Irssi::settings_get_bool(KEY_URL_CSV_CHAN_LOGGING); + my $time_fmt = Irssi::settings_get_str(KEY_URL_TIME_FORMAT) + || die "missing setting for @{[ KEY_URL_TIME_FORMAT ]}"; + my $max = Irssi::settings_get_int(KEY_URL_LOG_FILE_MAX_SIZE); + my $chan_prefix = Irssi::settings_get_str(KEY_URL_CHAN_PREFIX) + || die "missing setting for @{[ KEY_URL_CHAN_PREFIX ]}"; + my $chan_logging = Irssi::settings_get_bool(KEY_URL_CHAN_LOGGING); + + my @curr_time = localtime(time()); + $log_base .= '/' if $log_base !~ m#/$#; + + die "directory $log_base doesn't exist or isn't readable" + unless -d $log_base and -r $log_base; + + # Make channel filename + my $tmp = POSIX::strftime($chan_prefix, @curr_time); + my $chan_fname = lc $channel; + $chan_fname =~ s/^#/$tmp/; + my $chan_log = "${log_base}${chan_fname}.html"; + + # Make full filename + $tmp = POSIX::strftime($fullfile, @curr_time); + my $full_fname = $tmp; + my $full_log = $log_base . $tmp; + + # Replace spaces in date string to show up as ' ' to prevent line + # breaks. + my $date = POSIX::strftime($time_fmt, @curr_time); + my $html_date = $date; + $html_date =~ s/ /\ /g; + + my $fh; + + # Channel logging + if ($chan_logging) { + create_chan_template $full_fname, $chan_log, $channel + if not -r $chan_log or ($max > 0 and (stat($chan_log))[7] > $max); + $fh = undef; + $fh = position_log_file $chan_log; + print_chan_log_file_entry($fh, $html_date, $nick, $channel, $url); + close $fh; + } + + # Full logging + create_full_template $full_log + if not -r $full_log or ($max > 0 and (stat($full_log))[7] > $max); + $fh = undef; + $fh = position_log_file $full_log; + print_full_log_file_entry($fh, $html_date, $nick, $channel, + "${chan_fname}.html", $url); + close $fh; + + # CSV logging + if ($csv_logging) { + $tmp = POSIX::strftime($csvfile, @curr_time); + my $log = $log_base . $tmp; + create_csv_file $log + if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max); + log_csv($log, $date, $nick, $channel, $url); + } + + # CSV channel logging + if ($csv_chan_logging) { + my $log = "${log_base}${chan_fname}.csv"; + create_csv_file $log + if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max); + log_csv($log, $date, $nick, $channel, $url); + } +} + +sub mk_home($) { + my $arg = shift; + return "$ENV{HOME}/$arg"; +} + +sub logging_permited { + my ($nick, $chan_or_nick) = @_; + my $default_policy = Irssi::settings_get_str(KEY_URL_POLICY_DEFAULT) + || die "missing setting for @{[ KEY_URL_POLICY_DEFAULT ]}"; + my $chans = Irssi::settings_get_str(KEY_URL_POLICY_CHANS); + my $nicks = Irssi::settings_get_str(KEY_URL_POLICY_NICKS); + my @policy_chans = split /[,;: ]/, $chans; + my @policy_nicks = split /[,;: ]/, $nicks; + my $permit; + + if ($default_policy eq 'deny') { + # logging must be explicitly permited + $permit = 0; + for (@policy_chans) { + return 1 if $_ eq $chan_or_nick; + } + for (@policy_nicks) { + return 1 if $_ eq $nick; + } + } elsif ($default_policy eq 'allow') { + # logging must be explicitly denied + $permit = 1; + for (@policy_chans) { + return 0 if $_ eq $chan_or_nick; + } + for (@policy_nicks) { + return 0 if $_ eq $nick; + } + } else { + p_error("setting @{[ KEY_URL_POLICY_DEFAULT ]} can be either " . + "'allow' or 'deny'"); + return undef; + } + return $permit; +} + +sub do_locked { + my $f = shift or die "missing function argument " . caller; + my $lockf; + eval { $lockf = aquire_lock }; + if ($@) { + p_error("$@"); + return; + } + eval { $f->(@_) }; + p_error("$@") if $@; + eval { close $lockf }; +} + +sub do_with_caches { + my $f = shift or die "missing function argument " . caller; + my ($cache_a, $cache_h) = (); + eval { ($cache_a, $cache_h) = open_caches }; + if ($@) { + p_error("$@"); + eval { untie %$cache_h } if defined $cache_h; + eval { untie @$cache_a } if defined $cache_a; + return; + } + eval { $f->($cache_a, $cache_h, @_) }; + p_error("$@") if $@; + eval { untie %$cache_h }; + eval { untie @$cache_a }; +} + +sub url_msg_log { + my ($cache_a, $cache_h, $nick, $chan_or_nick, $url) = @_; + my ($cache_size, $tmp); + my $max_cache = Irssi::settings_get_int(KEY_URL_CACHE_MAX); + + unless (exists $cache_h->{$url}) { + $cache_size = scalar(@$cache_a) + 1; + $cache_h->{$url} = '1'; + # push the URL to the end of the file seems to work better on + # some systems in contrast to unshift. + push @$cache_a, $url; + if ($max_cache > 0 && $cache_size > $max_cache) { + $tmp = shift @$cache_a; + delete $cache_h->{$tmp}; + } + log_url($nick, $chan_or_nick, $url); + } +} + +sub url_topic { + my ($server, $channel, $topic, $nick, $hostmask) = @_; + url_message($server, $topic, $nick, $hostmask, $channel); +} + +sub url_message { + my ($server, $rawtext, $nick, $hostmask, $channel) = @_; + my ($url, $permit, $chan_or_nick); + + if (defined($url = scan_url($rawtext))) { + $chan_or_nick = defined $channel ? $channel : $server->{nick}; + if (defined($permit = logging_permited($nick, $chan_or_nick)) && $permit) { + do_locked(\&do_with_caches, \&url_msg_log, $nick, $chan_or_nick, $url); + } + } +} + +sub url_cmd_show { + my ($cache_a, $cache_h) = @_; + my $n = 0; + p_normal("urlplot: total of " . scalar(@$cache_a) . " URLs"); + foreach my $url (@$cache_a) { + p_normal(sprintf("%02d - %s", $n++, $url)); + } +} + +sub url_cmd_clearcaches { + my ($cache_a, $cache_h) = @_; + @$cache_a = (); + %$cache_h = (); +} + +sub url_cmd_real_navigate { + my ($url) = @_; + die 'no URLs captured so far' unless $url; + my $url_cmd = Irssi::settings_get_str(KEY_URL_COMMAND) + || die "missing setting for @{[ KEY_URL_COMMAND ]}"; + unless ($url_cmd =~ s/__URL__/$url/g) { + die "setting url_cmd doesn't contain an URL placeholder '__URL__'"; + } + system($url_cmd); +} + +sub url_cmd_navigate { + my ($cache_a, $cache_h, $n) = @_; + my ($len, $url) = scalar @$cache_a; + unless (defined $n) { + $n = $len > 0 ? $len - 1 : $len; + } + die "no such URL; I've only $len" unless $n < $len; + $url = $cache_a->[$n]; + die 'no URLs captured so far' unless $url; + url_cmd_real_navigate $url; +} + +sub url_command { + my ($data, $server, $witem) = @_; + $_ = $data; + if (/^-list/) { + do_locked(\&do_with_caches, \&url_cmd_show); + } elsif (/^-clearcache/) { + do_locked(\&do_with_caches, \&url_cmd_clearcaches); + } elsif (/^-showlog/) { + my $nav_url = Irssi::settings_get_str(KEY_URL_NAVIGATE) + || die "missing setting for @{[ KEY_URL_NAVIGATE ]}"; + url_cmd_real_navigate $nav_url; + } else { + my $n; + if (/^(\d+)/) { + $n = $1; + if ($n < 0) { + p_error("argument must be a positive integer"); + return; + } + } elsif (/^$/) { + $n = undef; + } else { + p_error("usage for /url [-list|-showlog|-clearcache|<digit>]"); + return; + } + do_locked(\&do_with_caches, \&url_cmd_navigate, $n); + } +} + +Irssi::signal_add_last('message public', 'url_message'); +Irssi::signal_add_last('message private', 'url_message'); +Irssi::signal_add_last('message topic', 'url_topic'); +Irssi::command_bind('url', 'url_command'); + +Irssi::settings_add_str('misc', KEY_URL_COMMAND, DEF_URL_COMMAND); +Irssi::settings_add_int('misc', KEY_URL_CACHE_MAX, DEF_URL_CACHE_MAX); +Irssi::settings_add_str('misc', KEY_URL_LOG_BASEDIR, mk_home(DEF_URL_LOG_BASEDIR)); +Irssi::settings_add_str('misc', KEY_URL_LOG_FILE_NAME, DEF_URL_LOG_FILE_NAME); +Irssi::settings_add_str('misc', KEY_URL_CHAN_PREFIX, DEF_URL_CHAN_PREFIX); +Irssi::settings_add_bool('misc', KEY_URL_CHAN_LOGGING, DEF_URL_CHAN_LOGGING); +Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_FILE_NAME, DEF_URL_LOG_CSV_FILE_NAME); +Irssi::settings_add_int('misc', KEY_URL_LOG_CSV_FILE_MAX_SIZE, DEF_URL_LOG_CSV_FILE_MAX_SIZE); +Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_SEPARATOR, DEF_URL_LOG_CSV_SEPARATOR); +Irssi::settings_add_bool('misc', KEY_URL_CSV_LOGGING, DEF_URL_CSV_LOGGING); +Irssi::settings_add_bool('misc', KEY_URL_CSV_CHAN_LOGGING, DEF_URL_CSV_CHAN_LOGGING); +Irssi::settings_add_str('misc', KEY_URL_TIME_FORMAT, DEF_URL_TIME_FORMAT); +Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_MAX_SIZE, DEF_URL_LOG_FILE_MAX_SIZE); +Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_AUTORELOAD_TIME, + DEF_URL_LOG_FILE_AUTORELOAD_TIME); +Irssi::settings_add_str('misc', KEY_URL_DB_BASEDIR, mk_home(DEF_URL_DB_BASEDIR)); +Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_A_FILENAME, DEF_URL_DB_CACHE_A_FILENAME); +Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_H_FILENAME, DEF_URL_DB_CACHE_H_FILENAME); +Irssi::settings_add_str('misc', KEY_URL_DB_LOCK_FILENAME, DEF_URL_DB_LOCK_FILENAME); + +Irssi::settings_add_str('misc', KEY_URL_POLICY_DEFAULT, DEF_URL_POLICY_DEFAULT); +Irssi::settings_add_str('misc', KEY_URL_POLICY_CHANS, DEF_URL_POLICY_CHANS); +Irssi::settings_add_str('misc', KEY_URL_POLICY_NICKS, DEF_URL_POLICY_NICKS); +Irssi::settings_add_str('misc', KEY_URL_NAVIGATE, 'file://' . mk_home(DEF_URL_NAVIGATE)); + +# +# $Log$ +# |