summaryrefslogtreecommitdiffstats
path: root/scripts/urlplot.pl
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/urlplot.pl')
-rw-r--r--scripts/urlplot.pl841
1 files changed, 841 insertions, 0 deletions
diff --git a/scripts/urlplot.pl b/scripts/urlplot.pl
new file mode 100644
index 0000000..a56d124
--- /dev/null
+++ b/scripts/urlplot.pl
@@ -0,0 +1,841 @@
+use strict;
+#use warnings; # Not a default module in perl 5.005
+
+use vars qw($VERSION %IRSSI);
+
+$VERSION = '1.2';
+%IRSSI = (
+ authors => 'bwolf',
+ contact => 'bwolf@geekmind.org',
+ name => 'urlplot',
+ description => 'URL grabber with HTML generation and cmd execution',
+ license => 'BSD',
+ url => 'http://www.geekmind.net',
+ changed => 'Sun Jun 16 14:00:13 CEST 2002'
+);
+
+# To read the documentation you may use one of the following commands:
+#
+# pod2man urlplot.pl | nroff -man | more
+# pod2text urlplot.pl | more
+# pod2man urlplot.pl | troff -man -Tps -t > urlplot.ps
+
+=head1 NAME
+
+urlplot
+
+=head1 SYNOPSIS
+
+All URL loggers suck. This one just sucks less.
+
+=head1 DESCRIPTION
+
+urlplot watches your channels for URLs and creates nice HTML logfiles of it.
+Actually it parses normal text and topic changes for URLs. Internally it uses
+two caches to prevent flooding and logging of duplicate URLs. As an additional
+feature urlplot can create CSV datafiles. Logfiles can be created for all
+channels and for separate channels. Logging can be allowed and denied on a per
+channel/nick basis. A lockfile is used to protect the caches and logfiles from
+accessing them by multiple irssi instances. A command allows you to send a
+logged URL to your webbrowser of choice.
+
+The format of the CSV logfiles is as follows:
+date nick channel url
+
+=head1 GETTING STARTED
+
+Copy urlplot.pl intoF< $HOME/.irssi/scripts> and create the necessary
+directories withC< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
+Look for the settingsC< url_log_basedir> andC< url_db_basedir> if you want to
+change the directories urlplot will populate with files.
+Follow the documentation and configure urlplot to fit your needs.
+
+=head1 COMMANDS
+
+=head2 /url <integer>
+
+Executes the commandC< url_command> with an URL from the cache as its
+argument. If no number has been specified it defaults to nth URL logged which
+references the most recently logged URL.
+
+=head2 /url -list
+
+Displays a list of all logged URLs.
+
+=head2 /url -clearcache
+
+Clears the cache databases.
+
+=head /url -showlog
+
+ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
+to display the main logfile in your favourite webbrowser.
+
+=head1 SETTINGS
+
+=head2 Pathnames
+
+Please note that you can't use $HOME or any environment variables in the
+settings because irssi/urlplot isn't a shell ;)
+
+=head2 /set url_command <string>
+
+Command to be executed to display an URL (see /url). The command string should
+contain the sequence C<__URL__> which will be replaced by a certain URL.
+
+The default is:
+C< mozilla -remote "openURL(__URL__)" E<gt> /dev/null 2E<gt>&1 || \ >
+C< mozilla "__URL__"& >
+
+This will send a certain URL to mozilla or it will start mozilla if it is not
+already there. The string can be anything. For example I use the following:
+C< ssh host /home/user/bin/mozopenurl "'__URL__'" >/dev/null 2>&1 &>
+where mozopenurl is a shell script that contains similar code as the mozilla
+-remote example above.
+
+=head2 /set url_cache_max <integer>
+
+Specifies the maximum count of items which will be held in the persisten URL
+caches. A value of zero disables automatic cache resizing (round-robbin). The
+default is to keep the last 90 URLs.
+
+=head2 /set url_log_basedir <path>
+
+Specifies the logging base directory used to create the log files beneath it.
+The default isF< $HOME/.irssi/urlplot/urls/>. You have to create directories
+by yourself:C< mkdir -p>F< $HOME/.irssi/urlplot/urls>.
+
+=head2 /set url_log_file_name <relative-filename>
+
+Defines the filename of the full logfile. It will be passed to I<
+strftime(3)>. This can be usefull to create logfiles with a timestamp.
+The file will be created relative toC< url_log_basedir>. The default
+isF< ircurls.html>.
+
+=head2 /set url_chan_prefix <string>
+
+Defines the filename prefix for channel logfiles. The leadingC< # >of the
+channel name will be replaced by this prefix. It will be passed to
+I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
+default isF< chan_>.
+
+=head2 /set url_chan_logging <bool>
+
+Enables or disable channel logging globally.
+The default isC< ON>.
+
+=head2 /set url_log_csv_file_name <relative-filename>
+
+Defines the filename of the full CSV logfile. It will be passed to
+I<strftime(3)>. The file will be created relative toC< url_log_basedir>. The
+default isF< ircurls.csv>.
+
+=head2 /set url_log_csv_file_max_size <integer>
+
+Defines the maximum size of the full CSV logfile. If it reaches the specified
+maximum size in bytes it will be simply resized to zero. The default isC< 30*1024>
+bytes.
+
+=head2 /set url_log_csv_separator <string>
+
+Defines the separator used as a delimeter for the fields of the CSV files.
+The default isC< |>.
+
+=head2 /set url_csv_logging <bool>
+
+Conditionally turns on or off CSV logging for the full logfile. The default
+isC< OFF>.
+
+=head2 /set url_csv_chan_logging <bool>
+
+Conditionally turns on or off CSV logging of the channel logfiles. The default isC< OFF>.
+
+=head2 /set url_time_format <string>
+
+Specifies the time format that will be passed toI< strftime(3)> to produce an
+ASCII representation of the time/date when an URL was grabbed. It will be used
+in the logfiles. The default isC< %Y:%m:%d - %H:%M:%S>.
+
+=head2 /set url_log_file_max_size <integer>
+
+Defines the maximum size of the full logfile and the channel logfile. If it
+reaches the specified maximum size in bytes it will be simply resized to zero.
+The default isC< 30*1024> bytes.
+
+=head2 /set url_log_file_autoreload_time <integer>
+
+Intervall in seconds used for the HTML logfile header. The logfile reloads
+itself every N seconds. The default isC< 90> seconds.
+
+=head2 /set url_db_basedir <path>
+
+Specifies the database base directory where two database files and a lockfile
+will be created. The default isF< $HOME/.irssi/urlplot>. You have to create
+the directory by yourself.
+
+=head2 /set url_db_cache_a_filename <relative-filename>
+
+Defines the filename of the index URL database. The file will be created
+relative toC< url_db_basedir>. The default isF< a_cache>.
+
+=head2 /set url_db_cache_h_filename <relative-filename>
+
+Defines the filename of the hash URL database. The file will be created
+relative toC< url_db_basedir>. The default isF< h_cache>.
+
+=head2 /set url_db_lock_filename <relative-filename>
+
+Defines the filename of the lockfile used to lock all logfiles and the cache
+databases. It will be created relative toC< url_db_basedir>. The default
+isF< lockfile>.
+
+=head2 /set url_policy_default <allow|deny>
+
+Specifies the default policy that will be used to decide if logging ist
+permitted for a certain nick or channel. This can be eitherC< allow>
+orC< deny>. If you set this toC< deny> you will have to allow explicitly those
+channels and nicks for which logging should be permitted. In contrast if you
+set it to allow, you can deny logging for certain nicks and channels.
+The keysC< url_policy_chans> andC< url_policy_nicks> control the allow, deny
+behaviour depending onC< url_policy_default>. The default isC< allow> which
+permits logging of all channels and nicks.
+
+=head2 /set url_policy_chans <string>
+
+Specifies those channels for whoom logging is permitted or denied. Multiple
+channels may be specified by usingC< ,>C< ;>C< :> or a space to separate the
+items.
+
+=head2 /set url_policy_nicks <string>
+
+SeeC< url_policy_chans> and replace the word channel by nick.
+
+=head2 /set url_navigate <string>
+
+ExecutesC< url_command> withC< url_navigate> as its argument. It can be used
+to display the main logfile in your favourite webbrowser. Because you may pass
+this command at anytime to your webbrowser it will not be passed to strftime.
+Thus you can only specify a static file here.
+
+=head1 AUTHOR
+
+Marcus Geiger <bwolf@geekmind.org>
+
+=cut
+
+use integer;
+use Irssi;
+use POSIX qw(strftime);
+use Fcntl qw(:DEFAULT :flock);
+use DB_File;
+
+# Regexps
+sub URL_SCHEME_REGEX() { '(http|ftp|https|news|irc)' }
+sub URL_GUESS_REGEX() { '(www|ftp)' }
+sub URL_BASE_REGEX() { '[a-z0-9_\-+\\/:?%.&!~;,=\#<>]' }
+
+# Other
+sub BACKWARD_SEEK_BYTES() { 130 }
+sub LOG_FILE_MARKER() { '<!-- bottom-line -->' }
+
+# Keys for settings
+sub KEY_URL_COMMAND() { 'url_command' }
+sub KEY_URL_CACHE_MAX() { 'url_cache_max' }
+sub KEY_URL_LOG_BASEDIR() { 'url_log_basedir' }
+sub KEY_URL_LOG_FILE_NAME() { 'url_log_file_name' }
+sub KEY_URL_CHAN_PREFIX() { 'url_chan_prefix' }
+sub KEY_URL_CHAN_LOGGING() { 'url_chan_logging' }
+sub KEY_URL_LOG_CSV_FILE_NAME() { 'url_log_csv_file_name' }
+sub KEY_URL_LOG_CSV_FILE_MAX_SIZE() { 'url_log_csv_file_max_size' }
+sub KEY_URL_LOG_CSV_SEPARATOR() { 'url_log_csv_separator' }
+sub KEY_URL_CSV_LOGGING() { 'url_csv_logging' }
+sub KEY_URL_CSV_CHAN_LOGGING() { 'url_csv_chan_logging' }
+sub KEY_URL_TIME_FORMAT() { 'url_time_format' }
+sub KEY_URL_LOG_FILE_MAX_SIZE() { 'url_log_file_max_size' }
+sub KEY_URL_LOG_FILE_AUTORELOAD_TIME() { 'url_log_file_autoreload_time' }
+sub KEY_URL_DB_BASEDIR() { 'url_db_basedir' }
+sub KEY_URL_DB_CACHE_A_FILENAME() { 'url_db_cache_a_filename' }
+sub KEY_URL_DB_CACHE_H_FILENAME() { 'url_db_cache_h_filename' }
+sub KEY_URL_DB_LOCK_FILENAME() { 'url_db_lock_filename' }
+sub KEY_URL_POLICY_DEFAULT() { 'url_policy_default' }
+sub KEY_URL_POLICY_CHANS() { 'url_policy_chans' }
+sub KEY_URL_POLICY_NICKS() { 'url_policy_nicks' }
+sub KEY_URL_NAVIGATE() { 'url_navigate' }
+
+# Defaults
+sub DEF_URL_COMMAND() {
+ 'mozilla -remote "openURL(__URL__)" > /dev/null 2>&1 || mozilla "__URL__"&' }
+sub DEF_URL_CACHE_MAX() { 90 }
+sub DEF_URL_LOG_FILE_AUTORELOAD_TIME() { 120 }
+sub DEF_URL_TIME_FORMAT() { '%Y:%m:%d - %H:%M:%S' }
+sub DEF_URL_DO_FILE_RESIZE() { '0' }
+sub DEF_URL_LOG_FILE_MAX_SIZE() { 1024 * 30 }
+sub DEF_URL_LOG_BASEDIR() { '.irssi/urlplot/urls/' }
+sub DEF_URL_LOG_FILE_NAME() { 'ircurls.html' }
+sub DEF_URL_CHAN_PREFIX() { 'chan_' }
+sub DEF_URL_CHAN_LOGGING() { '1' }
+sub DEF_URL_LOG_CSV_FILE_NAME() { 'ircurls.csv' }
+sub DEF_URL_LOG_CSV_FILE_MAX_SIZE() { 1024 * 30 }
+sub DEF_URL_LOG_CSV_SEPARATOR() { '|' }
+sub DEF_URL_CSV_LOGGING() { '' }
+sub DEF_URL_CSV_CHAN_LOGGING() { '' }
+sub DEF_URL_DB_BASEDIR() { '.irssi/urlplot/' }
+sub DEF_URL_DB_CACHE_A_FILENAME() { 'a_cache' }
+sub DEF_URL_DB_CACHE_H_FILENAME() { 'h_cache' }
+sub DEF_URL_DB_LOCK_FILENAME() { 'lockfile' }
+sub DEF_URL_POLICY_DEFAULT() { 'allow' }
+sub DEF_URL_POLICY_CHANS() { '' }
+sub DEF_URL_POLICY_NICKS() { '' }
+sub DEF_URL_NAVIGATE() { '.irssi/urlplot/urls/ircurls.html' }
+
+sub print_full_log_file_template {
+ my ($fh, $reload) = @_;
+ print $fh <<EOT;
+<?xml version="1.0" encoding="iso-8859-1"?>
+ <!DOCTYPE html
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ <head>
+ <title>IRC-URLs</title>
+ <meta http-equiv="cache-control" content="no-cache" />
+ <meta http-equiv="refresh" content="$reload;" />
+ <style type="text/css">
+ <!--
+ .small { font-size: small; }
+ .xsmall { font-size: x-small; }
+ -->
+ </style>
+ </head>
+ <body>
+ <h1>IRC-URLs</h1>
+ <p class="xsmall">
+ Visit <a href="http://www.geekmind.net">geekmind.net</a>
+ </p>
+ <p>This page reloads itself every $reload seconds.</p>
+ <p>
+ <a name="top" />
+ <a class="small" href="#bottom">Page bottom</a>
+ <br />
+ <br />
+ </p>
+ <table rules="rows" frame="void" width="100%" cellpadding="5">
+ <tr align="left">
+ <th><b>Date/Time</b></th>
+ <th><b>Nick</b></th>
+ <th><b>Channel/Nick</b></th>
+ <th><b>URL</b></th>
+ </tr>
+EOT
+}
+
+sub print_chan_log_file_template {
+ my ($fh, $reload, $channel, $full_log) = @_;
+ print $fh <<EOT;
+<?xml version="1.0" encoding="iso-8859-1"?>
+ <!DOCTYPE html
+ PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+ "DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+ <head>
+ <title>IRC-URLs of $channel</title>
+ <meta http-equiv="cache-control" content="no-cache" />
+ <meta http-equiv="refresh" content="$reload;" />
+ <style type="text/css">
+ <!--
+ .small { font-size: small; }
+ .xsmall { font-size: x-small; }
+ -->
+ </style>
+ </head>
+ <body>
+ <h1>IRC-URLs of $channel</h1>
+ <p class="xsmall">
+ Visit <a href="http://www.geekmind.net">geekmind.net</a>
+ </p>
+ <p>This page reloads itself every $reload seconds.</p>
+ <p><a href="$full_log">Complete</a> listing.</p>
+ <p>
+ <a name="top" />
+ <a class="small" href="#bottom">Page bottom</a>
+ <br />
+ <br />
+ </p>
+ <table rules="rows" frame="void" width="100%" cellpadding="5">
+ <tr align="left">
+ <th><b>Date/Time</b></th>
+ <th><b>Nick</b></th>
+ <th><b>URL</b></th>
+ </tr>
+EOT
+}
+
+sub LOG_FILE_TAIL () {
+ return <<"EOT";
+
+ @{[ LOG_FILE_MARKER ]}
+ </table>
+ <p>
+ <a class="small" href="#top">Page top</a>
+ <a name="bottom" />
+ </p>
+ </body>
+</html>
+EOT
+}
+
+sub print_chan_log_file_entry {
+ my ($fh, $date, $nick, $channel, $url) = @_;
+ print $fh <<EOURL;
+ <tr>
+ <td>$date</td>
+ <td><em>$nick</em></td>
+ <td><a href=\"$url\">$url</a></td>
+ </tr>
+EOURL
+ print $fh LOG_FILE_TAIL;
+};
+
+sub print_full_log_file_entry {
+ my ($fh, $date, $nick, $channel, $chan_log, $url) = @_;
+ print $fh <<EOURL;
+ <tr>
+ <td>$date</td>
+ <td><em>$nick</em></td>
+ <td><a href="$chan_log">$channel</a></td>
+ <td><a href=\"$url\">$url</a></td>
+ </tr>
+EOURL
+ print $fh LOG_FILE_TAIL;
+}
+
+sub p_error { # Error printing (directly to the current window)
+ Irssi::print("urlplot: @_");
+}
+
+sub p_normal { # Normal printing (to the msg window)
+ Irssi::print("@_", MSGLEVEL_MSGS+MSGLEVEL_NOHILIGHT);
+}
+
+sub scan_url {
+ my $rawtext = shift;
+ return $1 if $rawtext =~ m|(@{[ URL_SCHEME_REGEX ]}://@{[ URL_BASE_REGEX ]}+)|io;
+ # The URL misses a scheme, try to be smart
+ if ($rawtext =~ m|@{[ URL_GUESS_REGEX ]}\.@{[ URL_BASE_REGEX ]}+|io) {
+ my $preserve = $&;
+ return "http://$preserve" if $1 =~ /^www/;
+ return "ftp://$preserve" if $1 =~ /^ftp/;
+ }
+ return undef;
+}
+
+sub aquire_lock {
+ my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
+ || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
+ my $lockfile = Irssi::settings_get_str(KEY_URL_DB_LOCK_FILENAME)
+ || die "missing setting for @{[ KEY_URL_DB_LOCK_FILENAME ]}";
+
+ local *LOCK_F;
+ my $fh;
+ $db_base .= '/' if $db_base !~ m#/$#;
+ $lockfile = "${db_base}${lockfile}";
+
+ die "directory $db_base doesn't exist or isn't readable"
+ unless -d $db_base and -r $db_base;
+
+ sysopen(LOCK_F, $lockfile, O_RDONLY | O_CREAT)
+ || die "can't open/create lockfile $lockfile: $!";
+ flock(LOCK_F, LOCK_EX | LOCK_NB)
+ || die "can't exclusively lock $lockfile: $!";
+ # Can't pass back localized typeglob reference
+ $fh = *LOCK_F;
+ return $fh;
+}
+
+sub open_caches {
+ my $db_base = Irssi::settings_get_str(KEY_URL_DB_BASEDIR)
+ || die "missing setting for @{[ KEY_URL_DB_BASEDIR ]}";
+ my $dbfile_a = Irssi::settings_get_str(KEY_URL_DB_CACHE_A_FILENAME)
+ || die "missing setting for @{[ KEY_URL_DB_CACHE_A_FILENAME ]}";
+ my $dbfile_h = Irssi::settings_get_str(KEY_URL_DB_CACHE_H_FILENAME)
+ || die "missing setting for @{[ KEY_URL_DB_CACHE_H_FILENAME ]}";
+
+ my (@cache, %cache);
+ $db_base .= '/' if $db_base !~ m#/$#;
+ $dbfile_a = "${db_base}${dbfile_a}";
+ $dbfile_h = "${db_base}${dbfile_h}";
+
+ die "directory $db_base doesn't exist or isn't readable"
+ unless -d $db_base and -r $db_base;
+
+ tie @cache, 'DB_File', $dbfile_a, O_RDWR | O_CREAT, 0666, $DB_RECNO
+ or die "can't tie urlcache db $dbfile_a: $!";
+ tie %cache, 'DB_File', $dbfile_h, O_RDWR | O_CREAT, 0666
+ or die "can't tie urlcache db $dbfile_h: $!";
+ return \(@cache, %cache);
+}
+
+sub create_chan_template {
+ my ($full_log, $file, $channel) = @_;
+ my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
+ local *FH;
+ open(FH, ">", $file)
+ || die "can't create logfile $file: $!";
+ print_chan_log_file_template(\*FH, $reload, $channel, $full_log);
+ print FH LOG_FILE_TAIL;
+ close(FH);
+}
+
+sub create_full_template {
+ my $file = shift;
+ my $reload = Irssi::settings_get_int(KEY_URL_LOG_FILE_AUTORELOAD_TIME);
+ local *FH;
+ open(FH, ">", $file)
+ || die "can't create logfile $file: $!";
+ print_full_log_file_template(\*FH, $reload);
+ print FH LOG_FILE_TAIL;
+ close(FH);
+}
+
+sub create_csv_file {
+ my $file = shift;
+ open(FH, ">", $file)
+ || die "can't create $file: $!";
+ close FH;
+}
+
+sub log_csv {
+ my $csv_log = shift;
+ my $sep = Irssi::settings_get_str(KEY_URL_LOG_CSV_SEPARATOR);
+ my $fields = join $sep, @_;
+ local *FH;
+ open(FH, ">>", $csv_log)
+ || die "can't open $csv_log: $!";
+ print FH "$fields\n";
+ close FH;
+}
+
+sub position_log_file {
+ my $file = shift;
+ my ($fh, $pos, $buf, @lines, $off, $got_it);
+ local *FH;
+ my $hint = "Conside manual removal of this file";
+ sysopen(FH, $file, O_RDWR)
+ || die "can't open $file: $!";
+ $pos = sysseek(FH, 0, 2)
+ || die "can't seek to EOF in $file. ${hint}: $!";
+ $pos -= BACKWARD_SEEK_BYTES;
+ sysseek(FH, $pos, 0)
+ || die "can't seek backwards to $pos in $file. ${hint}: $!";
+ sysread(FH, $buf, 2048)
+ || die "can't read rest of $file. ${hint}: $!";
+ $off = 0;
+ @lines = split /\n/, $buf;
+ for (@lines) {
+ $off += length;
+ $off += 1;
+ chomp;
+ next if /^$/;
+ if (/@{[ LOG_FILE_MARKER ]}/io) {
+ $got_it = 1;
+ $off -= length;
+ $off -= 1;
+ last;
+ }
+ }
+ die "Can't locate @{[ LOG_FILE_MARKER ]} in $file. ${hint}"
+ unless $got_it;
+ $pos += $off;
+ sysseek(FH, $pos, 0)
+ || die "Can't seek to $pos in $file. ${hint}: $!";
+ # Can't pass back localized typeglob reference
+ $fh = *FH;
+ return $fh;
+}
+
+sub log_url {
+ my ($nick, $channel, $url) = @_;
+ my $log_base = Irssi::settings_get_str(KEY_URL_LOG_BASEDIR)
+ || die "missing setting for @{[ KEY_URL_LOG_BASEDIR ]}";
+ my $fullfile = Irssi::settings_get_str(KEY_URL_LOG_FILE_NAME)
+ || die "missing setting for @{[ KEY_URL_LOG_FILE_NAME ]}";
+ my $csvfile = Irssi::settings_get_str(KEY_URL_LOG_CSV_FILE_NAME)
+ || die "missing setting for @{[ KEY_URL_LOG_CSV_FILE_NAME ]}";
+ my $csv_max = Irssi::settings_get_int(KEY_URL_LOG_CSV_FILE_MAX_SIZE);
+ my $csv_logging = Irssi::settings_get_bool(KEY_URL_CSV_LOGGING);
+ my $csv_chan_logging = Irssi::settings_get_bool(KEY_URL_CSV_CHAN_LOGGING);
+ my $time_fmt = Irssi::settings_get_str(KEY_URL_TIME_FORMAT)
+ || die "missing setting for @{[ KEY_URL_TIME_FORMAT ]}";
+ my $max = Irssi::settings_get_int(KEY_URL_LOG_FILE_MAX_SIZE);
+ my $chan_prefix = Irssi::settings_get_str(KEY_URL_CHAN_PREFIX)
+ || die "missing setting for @{[ KEY_URL_CHAN_PREFIX ]}";
+ my $chan_logging = Irssi::settings_get_bool(KEY_URL_CHAN_LOGGING);
+
+ my @curr_time = localtime(time());
+ $log_base .= '/' if $log_base !~ m#/$#;
+
+ die "directory $log_base doesn't exist or isn't readable"
+ unless -d $log_base and -r $log_base;
+
+ # Make channel filename
+ my $tmp = POSIX::strftime($chan_prefix, @curr_time);
+ my $chan_fname = lc $channel;
+ $chan_fname =~ s/^#/$tmp/;
+ my $chan_log = "${log_base}${chan_fname}.html";
+
+ # Make full filename
+ $tmp = POSIX::strftime($fullfile, @curr_time);
+ my $full_fname = $tmp;
+ my $full_log = $log_base . $tmp;
+
+ # Replace spaces in date string to show up as '&#160;' to prevent line
+ # breaks.
+ my $date = POSIX::strftime($time_fmt, @curr_time);
+ my $html_date = $date;
+ $html_date =~ s/ /\&#160;/g;
+
+ my $fh;
+
+ # Channel logging
+ if ($chan_logging) {
+ create_chan_template $full_fname, $chan_log, $channel
+ if not -r $chan_log or ($max > 0 and (stat($chan_log))[7] > $max);
+ $fh = undef;
+ $fh = position_log_file $chan_log;
+ print_chan_log_file_entry($fh, $html_date, $nick, $channel, $url);
+ close $fh;
+ }
+
+ # Full logging
+ create_full_template $full_log
+ if not -r $full_log or ($max > 0 and (stat($full_log))[7] > $max);
+ $fh = undef;
+ $fh = position_log_file $full_log;
+ print_full_log_file_entry($fh, $html_date, $nick, $channel,
+ "${chan_fname}.html", $url);
+ close $fh;
+
+ # CSV logging
+ if ($csv_logging) {
+ $tmp = POSIX::strftime($csvfile, @curr_time);
+ my $log = $log_base . $tmp;
+ create_csv_file $log
+ if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
+ log_csv($log, $date, $nick, $channel, $url);
+ }
+
+ # CSV channel logging
+ if ($csv_chan_logging) {
+ my $log = "${log_base}${chan_fname}.csv";
+ create_csv_file $log
+ if not -r $log or ($csv_max > 0 and (stat($log))[7] > $max);
+ log_csv($log, $date, $nick, $channel, $url);
+ }
+}
+
+sub mk_home($) {
+ my $arg = shift;
+ return "$ENV{HOME}/$arg";
+}
+
+sub logging_permited {
+ my ($nick, $chan_or_nick) = @_;
+ my $default_policy = Irssi::settings_get_str(KEY_URL_POLICY_DEFAULT)
+ || die "missing setting for @{[ KEY_URL_POLICY_DEFAULT ]}";
+ my $chans = Irssi::settings_get_str(KEY_URL_POLICY_CHANS);
+ my $nicks = Irssi::settings_get_str(KEY_URL_POLICY_NICKS);
+ my @policy_chans = split /[,;: ]/, $chans;
+ my @policy_nicks = split /[,;: ]/, $nicks;
+ my $permit;
+
+ if ($default_policy eq 'deny') {
+ # logging must be explicitly permited
+ $permit = 0;
+ for (@policy_chans) {
+ return 1 if $_ eq $chan_or_nick;
+ }
+ for (@policy_nicks) {
+ return 1 if $_ eq $nick;
+ }
+ } elsif ($default_policy eq 'allow') {
+ # logging must be explicitly denied
+ $permit = 1;
+ for (@policy_chans) {
+ return 0 if $_ eq $chan_or_nick;
+ }
+ for (@policy_nicks) {
+ return 0 if $_ eq $nick;
+ }
+ } else {
+ p_error("setting @{[ KEY_URL_POLICY_DEFAULT ]} can be either " .
+ "'allow' or 'deny'");
+ return undef;
+ }
+ return $permit;
+}
+
+sub do_locked {
+ my $f = shift or die "missing function argument " . caller;
+ my $lockf;
+ eval { $lockf = aquire_lock };
+ if ($@) {
+ p_error("$@");
+ return;
+ }
+ eval { $f->(@_) };
+ p_error("$@") if $@;
+ eval { close $lockf };
+}
+
+sub do_with_caches {
+ my $f = shift or die "missing function argument " . caller;
+ my ($cache_a, $cache_h) = ();
+ eval { ($cache_a, $cache_h) = open_caches };
+ if ($@) {
+ p_error("$@");
+ eval { untie %$cache_h } if defined $cache_h;
+ eval { untie @$cache_a } if defined $cache_a;
+ return;
+ }
+ eval { $f->($cache_a, $cache_h, @_) };
+ p_error("$@") if $@;
+ eval { untie %$cache_h };
+ eval { untie @$cache_a };
+}
+
+sub url_msg_log {
+ my ($cache_a, $cache_h, $nick, $chan_or_nick, $url) = @_;
+ my ($cache_size, $tmp);
+ my $max_cache = Irssi::settings_get_int(KEY_URL_CACHE_MAX);
+
+ unless (exists $cache_h->{$url}) {
+ $cache_size = scalar(@$cache_a) + 1;
+ $cache_h->{$url} = '1';
+ # push the URL to the end of the file seems to work better on
+ # some systems in contrast to unshift.
+ push @$cache_a, $url;
+ if ($max_cache > 0 && $cache_size > $max_cache) {
+ $tmp = shift @$cache_a;
+ delete $cache_h->{$tmp};
+ }
+ log_url($nick, $chan_or_nick, $url);
+ }
+}
+
+sub url_topic {
+ my ($server, $channel, $topic, $nick, $hostmask) = @_;
+ url_message($server, $topic, $nick, $hostmask, $channel);
+}
+
+sub url_message {
+ my ($server, $rawtext, $nick, $hostmask, $channel) = @_;
+ my ($url, $permit, $chan_or_nick);
+
+ if (defined($url = scan_url($rawtext))) {
+ $chan_or_nick = defined $channel ? $channel : $server->{nick};
+ if (defined($permit = logging_permited($nick, $chan_or_nick)) && $permit) {
+ do_locked(\&do_with_caches, \&url_msg_log, $nick, $chan_or_nick, $url);
+ }
+ }
+}
+
+sub url_cmd_show {
+ my ($cache_a, $cache_h) = @_;
+ my $n = 0;
+ p_normal("urlplot: total of " . scalar(@$cache_a) . " URLs");
+ foreach my $url (@$cache_a) {
+ p_normal(sprintf("%02d - %s", $n++, $url));
+ }
+}
+
+sub url_cmd_clearcaches {
+ my ($cache_a, $cache_h) = @_;
+ @$cache_a = ();
+ %$cache_h = ();
+}
+
+sub url_cmd_real_navigate {
+ my ($url) = @_;
+ die 'no URLs captured so far' unless $url;
+ my $url_cmd = Irssi::settings_get_str(KEY_URL_COMMAND)
+ || die "missing setting for @{[ KEY_URL_COMMAND ]}";
+ unless ($url_cmd =~ s/__URL__/$url/g) {
+ die "setting url_cmd doesn't contain an URL placeholder '__URL__'";
+ }
+ system($url_cmd);
+}
+
+sub url_cmd_navigate {
+ my ($cache_a, $cache_h, $n) = @_;
+ my ($len, $url) = scalar @$cache_a;
+ unless (defined $n) {
+ $n = $len > 0 ? $len - 1 : $len;
+ }
+ die "no such URL; I've only $len" unless $n < $len;
+ $url = $cache_a->[$n];
+ die 'no URLs captured so far' unless $url;
+ url_cmd_real_navigate $url;
+}
+
+sub url_command {
+ my ($data, $server, $witem) = @_;
+ $_ = $data;
+ if (/^-list/) {
+ do_locked(\&do_with_caches, \&url_cmd_show);
+ } elsif (/^-clearcache/) {
+ do_locked(\&do_with_caches, \&url_cmd_clearcaches);
+ } elsif (/^-showlog/) {
+ my $nav_url = Irssi::settings_get_str(KEY_URL_NAVIGATE)
+ || die "missing setting for @{[ KEY_URL_NAVIGATE ]}";
+ url_cmd_real_navigate $nav_url;
+ } else {
+ my $n;
+ if (/^(\d+)/) {
+ $n = $1;
+ if ($n < 0) {
+ p_error("argument must be a positive integer");
+ return;
+ }
+ } elsif (/^$/) {
+ $n = undef;
+ } else {
+ p_error("usage for /url [-list|-showlog|-clearcache|<digit>]");
+ return;
+ }
+ do_locked(\&do_with_caches, \&url_cmd_navigate, $n);
+ }
+}
+
+Irssi::signal_add_last('message public', 'url_message');
+Irssi::signal_add_last('message private', 'url_message');
+Irssi::signal_add_last('message topic', 'url_topic');
+Irssi::command_bind('url', 'url_command');
+
+Irssi::settings_add_str('misc', KEY_URL_COMMAND, DEF_URL_COMMAND);
+Irssi::settings_add_int('misc', KEY_URL_CACHE_MAX, DEF_URL_CACHE_MAX);
+Irssi::settings_add_str('misc', KEY_URL_LOG_BASEDIR, mk_home(DEF_URL_LOG_BASEDIR));
+Irssi::settings_add_str('misc', KEY_URL_LOG_FILE_NAME, DEF_URL_LOG_FILE_NAME);
+Irssi::settings_add_str('misc', KEY_URL_CHAN_PREFIX, DEF_URL_CHAN_PREFIX);
+Irssi::settings_add_bool('misc', KEY_URL_CHAN_LOGGING, DEF_URL_CHAN_LOGGING);
+Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_FILE_NAME, DEF_URL_LOG_CSV_FILE_NAME);
+Irssi::settings_add_int('misc', KEY_URL_LOG_CSV_FILE_MAX_SIZE, DEF_URL_LOG_CSV_FILE_MAX_SIZE);
+Irssi::settings_add_str('misc', KEY_URL_LOG_CSV_SEPARATOR, DEF_URL_LOG_CSV_SEPARATOR);
+Irssi::settings_add_bool('misc', KEY_URL_CSV_LOGGING, DEF_URL_CSV_LOGGING);
+Irssi::settings_add_bool('misc', KEY_URL_CSV_CHAN_LOGGING, DEF_URL_CSV_CHAN_LOGGING);
+Irssi::settings_add_str('misc', KEY_URL_TIME_FORMAT, DEF_URL_TIME_FORMAT);
+Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_MAX_SIZE, DEF_URL_LOG_FILE_MAX_SIZE);
+Irssi::settings_add_int('misc', KEY_URL_LOG_FILE_AUTORELOAD_TIME,
+ DEF_URL_LOG_FILE_AUTORELOAD_TIME);
+Irssi::settings_add_str('misc', KEY_URL_DB_BASEDIR, mk_home(DEF_URL_DB_BASEDIR));
+Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_A_FILENAME, DEF_URL_DB_CACHE_A_FILENAME);
+Irssi::settings_add_str('misc', KEY_URL_DB_CACHE_H_FILENAME, DEF_URL_DB_CACHE_H_FILENAME);
+Irssi::settings_add_str('misc', KEY_URL_DB_LOCK_FILENAME, DEF_URL_DB_LOCK_FILENAME);
+
+Irssi::settings_add_str('misc', KEY_URL_POLICY_DEFAULT, DEF_URL_POLICY_DEFAULT);
+Irssi::settings_add_str('misc', KEY_URL_POLICY_CHANS, DEF_URL_POLICY_CHANS);
+Irssi::settings_add_str('misc', KEY_URL_POLICY_NICKS, DEF_URL_POLICY_NICKS);
+Irssi::settings_add_str('misc', KEY_URL_NAVIGATE, 'file://' . mk_home(DEF_URL_NAVIGATE));
+
+#
+# $Log$
+#