summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--.gitignore12
-rw-r--r--COPYING27
-rw-r--r--Makefile118
-rw-r--r--NEWS181
-rw-r--r--README24
-rw-r--r--control29
-rwxr-xr-xfilter-example.py13
-rwxr-xr-xfilter-test.py35
-rw-r--r--hacking.adoc78
-rw-r--r--install.adoc110
-rwxr-xr-xirk65
-rw-r--r--irk.xml84
-rwxr-xr-xirkerd1058
-rw-r--r--irkerd.service16
-rw-r--r--irkerd.xml249
-rwxr-xr-xirkerhook.py581
-rw-r--r--irkerhook.xml417
-rw-r--r--org.catb.irkerd.plist20
-rw-r--r--requirements.txt1
-rw-r--r--security.adoc268
20 files changed, 3386 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a001f85
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+# Git clutter
+*.orig
+
+# Python bits
+/*.pyc
+
+# Man Pages
+/*.8
+/*.1
+
+# HTML Docs
+/*.html
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..0498485
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,27 @@
+ BSD LICENSE
+
+Copyright (c) 2015, Eric S. Raymond
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..50da117
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,118 @@
+# Makefile for the irker relaying daemon
+
+VERS := $(shell sed -n 's/version = "\(.\+\)"/\1/p' irkerd)
+SYSTEMDSYSTEMUNITDIR := $(shell pkg-config --variable=systemdsystemunitdir systemd)
+
+# `prefix`, `mandir` & `DESTDIR` can and should be set on the command
+# line to control installation locations
+prefix ?= /usr
+mandir ?= /share/man
+target = $(DESTDIR)$(prefix)
+
+docs: irkerd.html irkerd.8 irkerhook.html irkerhook.1 irk.html irk.1
+
+irkerd.8: irkerd.xml
+ xmlto man irkerd.xml
+irkerd.html: irkerd.xml
+ xmlto html-nochunks irkerd.xml
+
+irkerhook.1: irkerhook.xml
+ xmlto man irkerhook.xml
+irkerhook.html: irkerhook.xml
+ xmlto html-nochunks irkerhook.xml
+
+irk.1: irk.xml
+ xmlto man irk.xml
+irk.html: irk.xml
+ xmlto html-nochunks irk.xml
+
+install.html: install.adoc
+ asciidoc -o install.html install.adoc
+security.html: security.adoc
+ asciidoc -o security.html security.adoc
+hacking.html: hacking.adoc
+ asciidoc -o hacking.html hacking.adoc
+
+install: irk.1 irkerd.8 irkerhook.1 uninstall
+ install -m 755 -o 0 -g 0 -d "$(target)/bin"
+ install -m 755 -o 0 -g 0 irkerd "$(target)/bin/irkerd"
+ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),)
+ install -m 755 -o 0 -g 0 -d "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)"
+ install -m 644 -o 0 -g 0 irkerd.service "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)"
+endif
+ install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man8"
+ install -m 755 -o 0 -g 0 irkerd.8 "$(target)$(mandir)/man8/irkerd.8"
+ install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man1"
+ install -m 755 -o 0 -g 0 irkerhook.1 "$(target)$(mandir)/man1/irkerhook.1"
+ install -m 755 -o 0 -g 0 irk.1 "$(target)$(mandir)/man1/irk.1"
+
+uninstall:
+ rm -f "$(target)/bin/irkerd"
+ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),)
+ rm -f "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)/irkerd.service"
+endif
+ rm -f "$(target)$(mandir)/man8/irkerd.8"
+ rm -f "$(target)$(mandir)/man1/irkerhook.1"
+ rm -f "$(target)$(mandir)/man1/irk.1"
+
+clean:
+ rm -f irkerd.8 irkerhook.1 irk.1 irker-*.tar.gz *~ *.html
+
+PYLINTOPTS = --rcfile=/dev/null --reports=n \
+ --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
+ --dummy-variables-rgx='^_'
+SUPPRESSIONS = "C0103,C0111,C0301,C0302,C0330,C1001,R0201,R0902,R0903,R0912,R0913,R0914,R0915,E1101,W0142,W0201,W0212,W0621,W0702,W0703,W1201,F0401,E0611"
+pylint:
+ @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerd
+ @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerhook.py
+
+loc:
+ @echo "LOC:"; wc -l irkerd irkerhook.py
+ @echo -n "LLOC: "; grep -vE '(^ *#|^ *$$)' irkerd irkerhook.py | wc -l
+
+DOCS = \
+ README \
+ COPYING \
+ NEWS \
+ install.adoc \
+ security.adoc \
+ hacking.adoc \
+ irkerhook.xml \
+ irkerd.xml \
+ irk.xml \
+
+SOURCES = \
+ $(DOCS) \
+ irkerd \
+ irkerhook.py \
+ filter-example.py \
+ filter-test.py \
+ irk \
+ Makefile
+
+EXTRA_DIST = \
+ org.catb.irkerd.plist \
+ irkerd.service \
+ irker-logo.png
+
+version:
+ @echo $(VERS)
+
+irker-$(VERS).tar.gz: $(SOURCES) irkerd.8 irkerhook.1 irk.1
+ mkdir irker-$(VERS)
+ cp -pR $(SOURCES) $(EXTRA_DIST) irker-$(VERS)/
+ @COPYFILE_DISABLE=1 tar -cvzf irker-$(VERS).tar.gz irker-$(VERS)
+ rm -fr irker-$(VERS)
+
+irker-$(VERS).md5:
+ @md5sum irker-$(VERS).tar.gz >irker-$(VERS).md5
+
+dist: irker-$(VERS).tar.gz irker-$(VERS).md5
+
+WEBDOCS = irkerd.html irk.html irkerhook.html install.html security.html hacking.html
+
+release: irker-$(VERS).tar.gz irker-$(VERS).md5 $(WEBDOCS)
+ shipper version=$(VERS) | sh -e -x
+
+refresh: $(WEBDOCS)
+ shipper -N -w version=$(VERS) | sh -e -x
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..82f1823
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,181 @@
+ irker history
+
+2.19: 2020-06-29
+ Codebase is now fully forward-poerted to Python 3.
+
+2.18: 2016-06-02
+ Add the ability to set the notification-message template (Debian bug #824512)
+
+2.17: 2016-03-14
+ Add a reconnect delay (Debian bug #749650).
+ Add proxy support (requres setting some variables in the source file).
+ Use git abbreviated hash to address Debian complaints.
+
+2.16: 2016-02-18
+ Code now runs under either Python 2 or Python 3
+
+2.15: 2016-01-12
+ Emergency backout of getaddrinfo, it randomly hangs.
+
+2.14: 2016-01-12
+ Lookup with getaddrinfo allows use with IPv6.
+ Documentation improvements.
+
+2.13: 2015-06-14
+ SSL validation fix.
+ Hardening against Unicode decode errors.
+ irk becomes a library so it can be re-used.
+
+2.12: 2014-10-22
+ Catch erroneous UTF-8 or non-UTF-8 from servers.
+ Also autodetect the right logging device under FreeBSD: /var/run/syslog
+
+2.11: 2014-06-20
+ With -i, message string argument now optional, stdin is read if it is absent.
+ Auto-adapt to BSD & OS X log device as well as Linux's.
+
+2.10: 2014-06-19
+ irk no longer fails on ircs channel URLs.
+
+2.9: 2014-06-01
+ If irkerd is running in background, log to /dev/syslog (facility daemon).
+ New -H option to set host listening address.
+ Add support for using CertFP to auth to the IRC server, and document it.
+
+2.8: 2014-05-30
+ Various minor improvements to irk.
+ Cope better with branch names containing slashes.
+
+2.7: 2014-03-15
+ Add support for ircs:// and SSL/TLS connections to IRC servers.
+ Add support for per-URL usernames and passwords.
+
+2.6: 2014-02-04
+ Fix for an infinite loop on failing to connect to IRC
+
+2.5: 2013-12-24
+ Bug fix - remove a deadlock we inherited from irclib.
+
+2.4: 2013-12-03
+ Bug fix release - some users reported failure to connect with 2.3.
+ Also prevent a crash if Unicode shows up in the wrong place.
+
+2.3: 2013-11-30
+ -i option enables immediate sending of one line in foreground.
+
+2.2: 2013-11-29
+ Fixed Unicode processing - got busted in 2.0 when irclib was removed.
+ Show Python traceback on higher debug levels.
+
+2.1: 2013-11-26
+ A performance improvement in the git repository hook.
+ Documentation polishing.
+
+2.0: 2013-11-16
+ The dependency on irclib is gone.
+ An email delivery method, suitable for use on SourceForge.
+ irkerhook can now be used as a hg changegroup hook.
+ Prevent misbehavior on UTF-8 in commit metadata.
+ Fix a crash bug on invalid hostnames.
+
+1.20: 2013-05-17
+ Compatibility back to Python 2.4 (provided simplejson is present).
+ Increased anti-flood delay to avoid trouble with freenode.
+
+1.19: 2013-05-06
+ Fixed a minor bug in argument processing
+
+1.18: 2013-04-16
+ Added -l option; irker can now be used as a channel monitor.
+ Added -n and -p option: the nick can be forced and authenticated.
+
+1.17: 2013-02-03
+ Various minor fixes and bulletproofing.
+
+1.16: 2013-01-24
+ Deal gracefully with non-ASCII author names and '|' in the command line.
+
+1.15: 2012-12-08
+ Don't append an extra newline in the Subversion hook.
+
+1.14: 2012-11-26
+ irclib 5.0 and urlparse compatibility fixes.
+
+1.13: 2012-11-06
+ Fix for a very rare thread race found by AI0867.
+ Work around a midesign in the IRC library.
+
+1.12: 2012-10-11
+ Emergency workaround for a Unicode-handling error buried deep in irclib.
+ The IRC library at version 3.2 or later is required for this version!
+ Only ship to freenode #commits by default.
+
+1.11: 2012-10-10
+ Code is now fully Unicode-safe.
+ A 'cialike' option emulates the file-summary behavior on the old CIA service.
+
+1.10: 2012-10-09
+ Expire disconnected connections if they aren't needed or can't reconnect.
+ Eventlet support removed - didn't play well with the library mutex.
+
+1.9: 2012-10-08
+ Proper mutex locks prevent an occasional thread crash on session timeout.
+ There's now systemd installation support for irkerd.
+
+1.8: 2012-10-06
+ It's now possible to send to nick URLs.
+ Cope gracefully if an IRC server dies or hangs during the nick handshake.
+
+1.7: 2012-10-05
+ Optional metadata filtering with a user-specified command.
+ irkerd code is now armored against IRC library errors in the delivery threads.
+
+1.6: 2012-10-04
+ In 1.5 trying to appease pylint broke the Mercurial hook.
+ Added credits for contributors in hacking.txt.
+ Fix the aging out of connections when we hit a resource limit.
+
+1.5: 2012-10-03
+ Mercurial support.
+ Shorten nick negotiation by choosing a random nick base from a large range.
+ Make irkerd exit cleanly on control-C.
+
+1.4: 2012-10-02
+ Graceful handling of server disconnects and kicks.
+ Distribution now inclues an installable irkerd plist for Mac OS/X.
+ The color variable is no longer boolean; may be miRC or ANSI.
+ The installation instructions for irkerhook.py have changed!
+
+1.3: 2012-10-01
+ Support for an irker.conf file to set irkerhook variables under Subversion.
+ Color highlighting of notification fields can be enabled.
+ irkerhook.py now has its own manual page.
+ Added channelmax variable for rate-limiting.
+ irkerd now uses green threads, with much lower overhead.
+ Fix a bug in handling of channel names with no prefix.
+
+1.2: 2012-09-30
+ All segments of a message with embedded newlines are now transmitted.
+ Message reduction - irkerhook drops the filelist on excessively long ones.
+ Shell quote hardening in irkerhook.py and some anti-DoS logic.
+
+1.1: 2012-09-28
+ Add a delay to avoid threads spinning on the empty-queue-check, eating CPU.
+ Fix a bug in reporting of multi-file commits.
+
+1.0: 2012-09-27
+ First production version, somewhat rushed by the sudden death of cia.vc
+ on 24 September.
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README b/README
new file mode 100644
index 0000000..7cd810b
--- /dev/null
+++ b/README
@@ -0,0 +1,24 @@
+ irker - submission tools for IRC notifications
+
+irkerd is a specialized IRC client that runs as a daemon, allowing
+other programs to ship IRC notifications by sending JSON objects to a
+listening socket.
+
+It is meant to be used by hook scripts in version-control
+repositories, allowing them to send commit notifications to project
+IRC channels. A hook script, irkerhook.py, supporting git, hg, and
+Subversion is included in the distribution; see the install.txt file
+for installation instructions.
+
+The advantage of using this daemon over individual scripted sends
+is that it can maintain connection state for multiple channels,
+avoiding obnoxious join/leave spam.
+
+The file install.txt describes how to install the software safely, so
+it can't be used as a spam conduit.
+
+Please read the files security.txt and hacking.txt before modifying
+this code.
+
+ Eric S. Raymond
+ September 2012
diff --git a/control b/control
new file mode 100644
index 0000000..1ff3344
--- /dev/null
+++ b/control
@@ -0,0 +1,29 @@
+# This is not a real Debian control file, though the syntax is compatible.
+# It's project metadata for the shipper tool
+
+Package: irker
+
+Description: An IRC client that runs as a daemon accepting notification requests.
+ You preesnt them JSON objects presented to a listening socket. It is
+ meant to be used by hook scripts in version-control repositories,
+ allowing them to send commit notifications to project IRC channels.
+ A hook script that works with git, hg, and svn is included in the
+ distribution.
+
+XBS-Destinations: mailto:ubuntu-devel-discuss@lists.ubuntu.com
+
+Homepage: mailto:packages@qa.debian.org
+
+XBS-HTML-Target: index.html
+
+XBS-Repository-URL: https://gitlab.com/esr/irker
+
+XBS-Debian-Packages: irker
+
+XBS-OpenHub-URL: http://www.openhub.net/p/irker
+
+XBS-IRC-Channel: irc://chat.freenode.net/#irker
+
+XBS-Logo: irker-logo.png
+
+XBS-VC-Tag-Template: %(version)s
diff --git a/filter-example.py b/filter-example.py
new file mode 100755
index 0000000..12908b4
--- /dev/null
+++ b/filter-example.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+# This is a trivial example of a metadata filter.
+# All it does is change the name of the commit's author.
+# It could do other things, including modifying the
+# channels list
+#
+import sys, json
+metadata = json.loads(sys.argv[1])
+
+metadata['author'] = "The Great and Powerful Oz"
+
+print json.dumps(metadata)
+# end
diff --git a/filter-test.py b/filter-test.py
new file mode 100755
index 0000000..030d3d4
--- /dev/null
+++ b/filter-test.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+#
+# Test hook to launch an irker instance (if it doesn't already exist)
+# just before shipping the notification. We start it in in another terminal
+# so you can watch the debug messages. Intended to be used in the root
+# directory of the irker repo. Probably only of interest only to irker
+# developers
+#
+# To use this, set up irkerhook.py to fire on each commit. Creating a
+# .git/hooks/post-commit file containing the line "irkerhook.py"; be
+# sure to make the opos-commit file executable. Then set the
+# filtercmd variable in your repo config as follows:
+#
+# [irker]
+# filtercmd = filter-test.py
+
+import os, sys, json, subprocess, time
+metadata = json.loads(sys.argv[1])
+
+ps = subprocess.Popen("ps -U %s uh" % os.getenv("LOGNAME"),
+ shell=True,
+ stdout=subprocess.PIPE)
+data = ps.stdout.read()
+irkerd_count = len([x for x in data.split("\n") if x.find("irkerd") != -1])
+
+if irkerd_count:
+ sys.stderr.write("Using a running irker instance...\n")
+else:
+ sys.stderr.write("Launching a new irker instance...\n")
+ os.system("gnome-terminal --title 'irkerd' -e 'irkerd -d 2' &")
+
+time.sleep(1.5) # Avoid a race condition
+
+print json.dumps(metadata)
+# end
diff --git a/hacking.adoc b/hacking.adoc
new file mode 100644
index 0000000..8e121d4
--- /dev/null
+++ b/hacking.adoc
@@ -0,0 +1,78 @@
+= Hacker's Guide to irker =
+
+== Design philosopy ==
+
+Points to you if some of this seems familiar from GPSD...
+
+=== Keep mechanism and policy separate ===
+
+Mechanism goes in irkerd. Policy goes in irkerhook.py
+
+irkerd is intended to be super-simple and completely indifferent to
+what content passes through it. It doesn't know, in any sense, that
+the use-case it was designed for is broadcasting notifications from
+version control systems. irkerhook.py is the part that knows about how
+to mine data from repositories and sets the format of notifications.
+
+=== If you think the mechanism needs an option, think again ===
+
+Because irkerhook.py does policy, it takes policy options. Because
+irkerd is pure mechanism, it shouldn't need any. If you think it
+does, you have almost certainly got a bug in your thinking. Fix
+that before you modify code.
+
+=== Never configure what you can autoconfigure ===
+
+Human attention is more expensive than machine time. Humans are
+careless and failure-prone. Therefore, whenever you make a user tell
+your code something the code can deduce for itself, you are
+introducing unnecessary inefficiency and unnecessary failure modes.
+
+This, in particular, is why irkerhook.py doesn't have a repository
+type switch. It can deduce the repo type by looking, so it should.
+
+== Release procedure ==
+
+1. Check for merge requests at the repository.
+
+2. Do 'make pylint' to audit the code.
+
+3. Run irk with a sample message; look at #irker on freenode to verify.
+
+4. Bump the version numbers in irkerd and irkerhook.py
+
+5. Update the NEWS file
+
+6. git commit -a
+
+7. make release
+
+== Thanks where due ==
+
+Alexander van Gessel (AI0867) <ai0867@gmail.com> contributed the
+Subversion support in irkerhook.py. Since the 1.0 release he has
+kept as close an eye on the code as the author and has fixed at least
+as many bugs.
+
+//W. here causes asciidoc to see thus as a list entry.
+W Trevor King <wking@tremily.us> added SSL/TLS support and did
+significant refactoring work.
+
+Daniel Franke <dfoxfranke@gmail.com> performed a security audit of irkerd.
+
+Georg Brandl <georg@python.org> contributed the Mercurial support in
+irkerhook.py and explained how to make Control-C work right.
+
+Laurent Bachelier <laurent@bachelier.name> fixed the Makefile so it
+wouldn't break stuff and wrote the first version of the external
+filtering option.
+
+dak180 (name withheld by request) wrote the OS X launchd plist.
+
+Wulf C. Krueger <philantrop@exherbo.org> wrote the systemd
+installation support.
+
+Other people on the freenode #irker channel (Kingpin, fpcfan,
+shadowm, Rick) smoked out bugs in irkerd before they could seriously
+bug anybody.
+
diff --git a/install.adoc b/install.adoc
new file mode 100644
index 0000000..93c20bb
--- /dev/null
+++ b/install.adoc
@@ -0,0 +1,110 @@
+= Forge installation instructions =
+
+irker and irkerhook.py are intended to be installed on forge sites
+such as SourceForge, GitHub, GitLab, Gna, and Savannah. This
+file explains the theory of operation, how to install the code,
+and how to test it.
+
+== Theory of operation ==
+
+irkerhook.py creates JSON notification requests and ships them to
+irkerd's listener socket. irkerd run as a daemon in order to maintain
+all the client state required to post multiple notifications while generating
+a minimum of join/leave messages (which, from the point of view of
+humans watching irkerd's output, are mere spam).
+
+See the security.txt document for a detailed discussion of security
+and DoS vulnerabilities related to irker. The short version: as
+long as your firewall blocks port 6659 and irkerd is running inside
+it, you should be fine.
+
+== Prerequisites ==
+
+You will need either
+
+1. Python at version 2.6 or later, which has JSON built in
+
+2. Python at version no older than 2.4, and a version of the
+ simplejson library installed that it can use. Some newer
+ versions of simplejson discard 2.4 compatibility; 2.0.9
+ is known to work.
+
+== Installing irkerd ==
+
+irker needs to run constantly, watching for TCP and UDP traffic on
+port 6659. Install it accordingly. It has no config file; you can
+just start it up with no arguments. If you want to see what it's
+doing, give it command-line options -d info for sparse messages and
+-d debug to show all traffic with IRC servers.
+
+You should *not* make irker visible from outside the site firewall, as
+it can be used to spam IRC channels while masking the source address.
+The firewall should block port 6659.
+
+The design of irker assumes the machine on which it is running is also
+inside the firewall, so that repository hooks can reach port 6659.
+
+The file org.catb.irkerd.plist is a Mac OS/X plist that can be
+installed to launch irkerd as a boot-time service on that system.
+
+irker.service is a systemd unit that can run irkerd as a boot-time
+service on systems that support systemd. This is configured to
+run irkerd under a seperate user account (irker), so this needs to
+be setup before starting irker, or the unit needs to be modified
+to use a different user.
+
+== Installing irkerhook.py ==
+
+Under git, a call to irkerhook.py should be installed in the update
+hook script of your repo. Under Subversion, the call goes in your
+repo's post-commit script. Under Mercurial there are two different
+ways to install it. See the irkerhook manual page for details; the
+source is irkerhook.xml in this distribution.
+
+Note that if you were using the CIA service and have ciabot.py in your
+git update script, you can simply replace this
+
+/path/to/ciabot.py ${refname} $(git rev-list ${oldhead}..${newhead} | tac)
+
+with this:
+
+/path/to/irkerhook.py --refname=${refname} $(git rev-list ${oldhead}..${newhead} | tac)
+
+SourceForge is a special case: see
+
+https://github.com/AI0867/sf-git-irker-pipeline
+
+for tools and instructions on how to work around its limitations.
+
+== Testing ==
+
+To verify that your repo produces well-formed JSON notifications,
+you can run irkerhook.py in the repo directory using the -n switch,
+which emits JSON to standard output rather than attempting to ship
+to an irkerd instance.
+
+Then, start irkerd and call irkerhook.py while watching the freenode
+#commits channel.
+
+The 'irk' script is a little test tool that takes two arguments,
+a channel and a message, and does what you'd expect.
+
+If you need help, there's a project chat channel at
+
+ irc://chat.freenode.net/#irker
+
+== Read-only access ==
+
+If, for whatever reason, you can't modify the hook scripts in your
+repository, there is still hope.
+
+There's a proxy that takes CIA XML-RPC notifications
+and passes them to a local irker instance. Find it here:
+
+ https://github.com/nenolod/irker-cia-proxy
+
+There's also a poller daemon that can watch activity in a Subversion
+repository and ship notifications via an irker instance.
+
+ https://github.com/shikadilord/irker-svnpoller
+
diff --git a/irk b/irk
new file mode 100755
index 0000000..fc9e153
--- /dev/null
+++ b/irk
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# Illustrates how to test irkerd.
+#
+# First argument must be a channel URL. If it does not begin with "irc",
+# the base URL for freenode is prepended.
+#
+# Second argument must be a payload string. Standard C-style escapes
+# such as \n and \t are decoded.
+#
+# SPDX-License-Identifier: BSD-2-Clause
+import json
+import socket
+import sys
+import fileinput
+
+DEFAULT_SERVER = ("localhost", 6659)
+
+def connect(server = DEFAULT_SERVER):
+ return socket.create_connection(server)
+
+def send(s, target, message):
+ data = {"to": target, "privmsg" : message}
+ dump = json.dumps(data)
+ if not isinstance(dump, bytes):
+ dump = dump.encode('ascii')
+ s.sendall(dump)
+
+def irk(target, message, server = DEFAULT_SERVER):
+ s = connect(server)
+ if "irc:" not in target and "ircs:" not in target:
+ target = "irc://chat.freenode.net/{0}".format(target)
+ if message == '-':
+ for line in fileinput.input('-'):
+ send(s, target, line.rstrip('\n'))
+ else:
+ send(s, target, message)
+ s.close()
+
+def main():
+ if len(sys.argv) < 2:
+ sys.stderr.write("irk: a URL argument is required\n")
+ sys.exit(1)
+ target = sys.argv[1]
+ message = " ".join(sys.argv[2:])
+ # Allows pretty formatting of irker messages
+ if str == bytes:
+ message = message.decode('string_escape')
+
+ # The actual IRC limit is 512. Avoid any off-by-ones
+ chunksize = 511
+ try:
+ while message[:chunksize]:
+ irk(target, message[:chunksize])
+ message = message[chunksize:]
+ except socket.error as e:
+ sys.stderr.write("irk: write to server failed: %r\n" % e)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
+
+# The following sets edit modes for GNU EMACS
+# Local Variables:
+# mode:python
+# End:
diff --git a/irk.xml b/irk.xml
new file mode 100644
index 0000000..afa836f
--- /dev/null
+++ b/irk.xml
@@ -0,0 +1,84 @@
+<!DOCTYPE refentry PUBLIC
+ "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "docbook/docbookx.dtd">
+<refentry id='irk.8'>
+<refmeta>
+<refentrytitle>irk</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class='date'>Apr 30 2014</refmiscinfo>
+<refmiscinfo class='source'>irker</refmiscinfo>
+<refmiscinfo class='product'>irker</refmiscinfo>
+<refmiscinfo class='manual'>Commands</refmiscinfo>
+</refmeta>
+<refnamediv id='name'>
+<refname>irk</refname>
+<refpurpose>test program for irkerd</refpurpose>
+</refnamediv>
+<refsynopsisdiv id='synopsis'>
+
+<cmdsynopsis>
+ <command>irk</command>
+ <arg><replaceable>target</replaceable></arg>
+ <arg choice='opt'><replaceable>message text</replaceable></arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1 id='description'><title>DESCRIPTION</title>
+
+<para><application>irk</application> is a simple test program for
+<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>. It
+will construct a simple JSON object and pass it to the daemon running
+on localhost.</para>
+</refsect1>
+
+<refsect1 id='options'><title>OPTIONS</title>
+
+<para><application>irk</application> takes the following options:</para>
+
+<variablelist>
+<varlistentry>
+<term>target</term>
+<listitem><para>Which server and channel to join to announced the
+message. If not prefixed with "irc:", it will prefix
+"irc://chat.freenode.net/" to the argument before passing it directly
+to irkerd. This argument is passed as the "to" parameter in the JSON
+object.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>message</term>
+<listitem><para>Which message to send to the target specified
+above. If the string "-", the message will be read from standard
+input, with newlines stripped.</para></listitem>
+</varlistentry>
+</variablelist>
+
+</refsect1>
+
+<refsect1 id='limitations'><title>LIMITATIONS</title>
+
+<para><application>irk</application> has no commandline usage and may
+be riddled with bugs.</para>
+
+<para><application>irk</application> doesn't know how to talk to your
+favorite VCS. You will generally want to use
+<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>
+instead</para>
+
+<para><application>irk</application> has also all the limitations of
+<application>irkerd</application>.</para>
+</refsect1>
+
+<refsect1 id='see_also'><title>SEE ALSO</title>
+<para>
+<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+</para>
+</refsect1>
+
+<refsect1 id='authors'><title>AUTHOR</title>
+<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the
+project page at <ulink
+url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink>
+for updates and other resources, including an installable repository
+hook script.</para>
+</refsect1>
+</refentry>
diff --git a/irkerd b/irkerd
new file mode 100755
index 0000000..d42f56e
--- /dev/null
+++ b/irkerd
@@ -0,0 +1,1058 @@
+#!/usr/bin/env python
+"""
+irkerd - a simple IRC multiplexer daemon
+
+Listens for JSON objects of the form {'to':<irc-url>, 'privmsg':<text>}
+and relays messages to IRC channels. Each request must be followed by
+a newline.
+
+The <text> must be a string. The value of the 'to' attribute can be a
+string containing an IRC URL (e.g. 'irc://chat.freenet.net/botwar') or
+a list of such strings; in the latter case the message is broadcast to
+all listed channels. Note that the channel portion of the URL need
+*not* have a leading '#' unless the channel name itself does.
+
+Design and code by Eric S. Raymond <esr@thyrsus.com>. See the project
+resource page at <http://www.catb.org/~esr/irker/>.
+
+Requires Python 2.7, or:
+* 2.6 with the argparse package installed.
+* Any 3.x
+
+"""
+# SPDX-License-Identifier: BSD-2-Clause
+
+# These things might need tuning
+
+HOST = "localhost"
+PORT = 6659
+
+PROXY_TYPE = None # Use proxy if set 1: SOCKS4, 2: SOCKS5, 3: HTTP
+PROXY_HOST = ""
+PROXY_PORT = 1080
+
+XMIT_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit
+PING_TTL = (15 * 60) # Time to live, seconds from last PING
+HANDSHAKE_TTL = 60 # Time to live, seconds from nick transmit
+CHANNEL_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit
+DISCONNECT_TTL = (24 * 60 * 60) # Time to live, seconds from last connect
+UNSEEN_TTL = 60 # Time to live, seconds since first request
+CHANNEL_MAX = 18 # Max channels open per socket (default)
+ANTI_FLOOD_DELAY = 1.0 # Anti-flood delay after transmissions, seconds
+ANTI_BUZZ_DELAY = 0.09 # Anti-buzz delay after queue-empty check
+CONNECTION_MAX = 200 # To avoid hitting a thread limit
+RECONNECT_DELAY = 3 # Don't spam servers with connection attempts
+
+# No user-serviceable parts below this line
+
+version = "2.19"
+
+import argparse
+import logging
+import logging.handlers
+import json
+import os
+import os.path
+try: # Python 3
+ import queue
+except ImportError: # Python 2
+ import Queue as queue
+import random
+import re
+import select
+import signal
+import socket
+try:
+ import socks
+ socks_on = True
+except ImportError:
+ socks_on = False
+try: # Python 3
+ import socketserver
+except ImportError: # Python 2
+ import SocketServer as socketserver
+import ssl
+import sys
+import threading
+import time
+import traceback
+try: # Python 3
+ import urllib.parse as urllib_parse
+except ImportError: # Python 2
+ import urlparse as urllib_parse
+
+
+LOG = logging.getLogger(__name__)
+LOG.setLevel(logging.ERROR)
+LOG_LEVELS = ['critical', 'error', 'warning', 'info', 'debug']
+
+try: # Python 2
+ UNICODE_TYPE = unicode
+except NameError: # Python 3
+ UNICODE_TYPE = str
+
+
+# Sketch of implementation:
+#
+# One Irker object manages multiple IRC sessions. It holds a map of
+# Dispatcher objects, one per (server, port) combination, which are
+# responsible for routing messages to one of any number of Connection
+# objects that do the actual socket conversations. The reason for the
+# Dispatcher layer is that IRC daemons limit the number of channels a
+# client (that is, from the daemon's point of view, a socket) can be
+# joined to, so each session to a server needs a flock of Connection
+# instances each with its own socket.
+#
+# Connections are timed out and removed when either they haven't seen a
+# PING for a while (indicating that the server may be stalled or down)
+# or there has been no message traffic to them for a while, or
+# even if the queue is nonempty but efforts to connect have failed for
+# a long time.
+#
+# There are multiple threads. One accepts incoming traffic from all
+# servers. Each Connection also has a consumer thread and a
+# thread-safe message queue. The program main appends messages to
+# queues as JSON requests are received; the consumer threads try to
+# ship them to servers. When a socket write stalls, it only blocks an
+# individual consumer thread; if it stalls long enough, the session
+# will be timed out. This solves the biggest problem with a
+# single-threaded implementation, which is that you can't count on a
+# single stalled write not hanging all other traffic - you're at the
+# mercy of the length of the buffers in the TCP/IP layer.
+#
+# Message delivery is thus not reliable in the face of network stalls,
+# but this was considered acceptable because IRC (notoriously) has the
+# same problem - there is little point in reliable delivery to a relay
+# that is down or unreliable.
+#
+# This code uses only NICK, JOIN, PART, MODE, PRIVMSG, USER, and QUIT.
+# It is strictly compliant to RFC1459, except for the interpretation and
+# use of the DEAF and CHANLIMIT and (obsolete) MAXCHANNELS features.
+#
+# CHANLIMIT is as described in the Internet RFC draft
+# draft-brocklesby-irc-isupport-03 at <http://www.mirc.com/isupport.html>.
+# The ",isnick" feature is as described in
+# <http://ftp.ics.uci.edu/pub/ietf/uri/draft-mirashi-url-irc-01.txt>.
+
+# Historical note: the IRCClient and IRCServerConnection classes
+# (~270LOC) replace the overweight, overcomplicated 3KLOC mass of
+# irclib code that irker formerly used as a service library. They
+# still look similar to parts of irclib because I contributed to that
+# code before giving up on it.
+
+class IRCError(BaseException):
+ "An IRC exception"
+ pass
+
+class InvalidRequest(ValueError):
+ "An invalid JSON request"
+ pass
+
+class IRCClient():
+ "An IRC client session to one or more servers."
+ def __init__(self):
+ self.mutex = threading.RLock()
+ self.server_connections = []
+ self.event_handlers = {}
+ self.add_event_handler("ping",
+ lambda c, e: c.ship("PONG %s" % e.target))
+
+ def newserver(self):
+ "Initialize a new server-connection object."
+ conn = IRCServerConnection(self)
+ with self.mutex:
+ self.server_connections.append(conn)
+ return conn
+
+ def spin(self, timeout=0.2):
+ "Spin processing data from connections forever."
+ # Outer loop should specifically *not* be mutex-locked.
+ # Otherwise no other thread would ever be able to change
+ # the shared state of an IRC object running this function.
+ while True:
+ nextsleep = 0
+ with self.mutex:
+ connected = [x for x in self.server_connections
+ if x is not None and x.socket is not None]
+ sockets = [x.socket for x in connected]
+ if sockets:
+ connmap = dict([(c.socket.fileno(), c) for c in connected])
+ (insocks, _o, _e) = select.select(sockets, [], [], timeout)
+ for s in insocks:
+ try:
+ connmap[s.fileno()].consume()
+ except UnicodeDecodeError as e:
+ LOG.warn('{0}: invalid encoding ({1})'.format(
+ self, e))
+ else:
+ nextsleep = timeout
+ time.sleep(nextsleep)
+
+ def add_event_handler(self, event, handler):
+ "Set a handler to be called later."
+ with self.mutex:
+ event_handlers = self.event_handlers.setdefault(event, [])
+ event_handlers.append(handler)
+
+ def handle_event(self, connection, event):
+ with self.mutex:
+ h = self.event_handlers
+ th = h.get("all_events", []) + h.get(event.type, [])
+ for handler in th:
+ handler(connection, event)
+
+ def drop_connection(self, connection):
+ with self.mutex:
+ self.server_connections.remove(connection)
+
+
+class LineBufferedStream():
+ "Line-buffer a read stream."
+ _crlf_re = re.compile(b'\r?\n')
+
+ def __init__(self):
+ self.buffer = b''
+
+ def append(self, newbytes):
+ self.buffer += newbytes
+
+ def lines(self):
+ "Iterate over lines in the buffer."
+ lines = self._crlf_re.split(self.buffer)
+ self.buffer = lines.pop()
+ return iter(lines)
+
+ def __iter__(self):
+ return self.lines()
+
+class IRCServerConnectionError(IRCError):
+ pass
+
+class IRCServerConnection():
+ command_re = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?")
+ # The full list of numeric-to-event mappings is in Perl's Net::IRC.
+ # We only need to ensure that if some ancient server throws numerics
+ # for the ones we actually want to catch, they're mapped.
+ codemap = {
+ "001": "welcome",
+ "005": "featurelist",
+ "432": "erroneusnickname",
+ "433": "nicknameinuse",
+ "436": "nickcollision",
+ "437": "unavailresource",
+ }
+
+ def __init__(self, master):
+ self.master = master
+ self.socket = None
+
+ # PROTOCOL_SSLv23 selects the highest version that both client and server support
+ def _wrap_socket(self, socket, target, certfile=None, cafile=None,
+ protocol=ssl.PROTOCOL_SSLv23):
+ try: # Python 3.2 and greater
+ ssl_context = ssl.SSLContext(protocol)
+ except AttributeError: # Python < 3.2
+ self.socket = ssl.wrap_socket(
+ socket, certfile=certfile, cert_reqs=ssl.CERT_REQUIRED,
+ ssl_version=protocol, ca_certs=cafile)
+ else:
+ ssl_context.verify_mode = ssl.CERT_REQUIRED
+ if certfile:
+ ssl_context.load_cert_chain(certfile)
+ if cafile:
+ ssl_context.load_verify_locations(cafile=cafile)
+ else:
+ ssl_context.set_default_verify_paths()
+ kwargs = {}
+ if ssl.HAS_SNI:
+ kwargs['server_hostname'] = target.servername
+ self.socket = ssl_context.wrap_socket(socket, **kwargs)
+ return self.socket
+
+ def _check_hostname(self, target):
+ if hasattr(ssl, 'match_hostname'): # Python >= 3.2
+ cert = self.socket.getpeercert()
+ try:
+ ssl.match_hostname(cert, target.servername)
+ except ssl.CertificateError as e:
+ raise IRCServerConnectionError(
+ 'Invalid SSL/TLS certificate: %s' % e)
+ else: # Python < 3.2
+ LOG.warning(
+ 'cannot check SSL/TLS hostname with Python %s' % sys.version)
+
+ def connect(self, target, nickname, username=None, realname=None,
+ **kwargs):
+ LOG.debug("connect(server=%r, port=%r, nickname=%r, ...)" % (
+ target.servername, target.port, nickname))
+ if self.socket is not None:
+ self.disconnect("Changing servers")
+
+ self.buffer = LineBufferedStream()
+ self.event_handlers = {}
+ self.real_server_name = ""
+ self.target = target
+ self.nickname = nickname
+ try:
+ if socks_on and PROXY_TYPE:
+ self.socket = socks.socksocket(socket.AF_INET,socket.SOCK_STREAM)
+ self.socket.set_proxy(PROXY_TYPE, PROXY_HOST, PROXY_PORT)
+ else:
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ if target.ssl:
+ self.socket = self._wrap_socket(
+ socket=self.socket, target=target, **kwargs)
+ self.socket.bind(('', 0))
+ self.socket.connect((target.servername, target.port))
+ except socket.error as err:
+ raise IRCServerConnectionError("Couldn't connect to socket: %s" % err)
+
+ if target.ssl:
+ self._check_hostname(target=target)
+ if target.password:
+ self.ship("PASS " + target.password)
+ self.nick(self.nickname)
+ self.user(
+ username=target.username or username or 'irker',
+ realname=realname or 'irker relaying client')
+ return self
+
+ def close(self):
+ # Without this thread lock, there is a window during which
+ # select() can find a closed socket, leading to an EBADF error.
+ with self.master.mutex:
+ self.disconnect("Closing object")
+ self.master.drop_connection(self)
+
+ def consume(self):
+ try:
+ incoming = self.socket.recv(16384)
+ except socket.error:
+ # Server hung up on us.
+ self.disconnect("Connection reset by peer")
+ return
+ if not incoming:
+ # Dead air also indicates a connection reset.
+ self.disconnect("Connection reset by peer")
+ return
+
+ self.buffer.append(incoming)
+
+ for line in self.buffer:
+ if not isinstance(line, UNICODE_TYPE):
+ line = UNICODE_TYPE(line, 'utf-8')
+ LOG.debug("FROM: %s" % line)
+
+ if not line:
+ continue
+
+ prefix = None
+ command = None
+ arguments = None
+ self.handle_event(Event("every_raw_message",
+ self.real_server_name,
+ None,
+ [line]))
+
+ m = IRCServerConnection.command_re.match(line)
+ if m.group("prefix"):
+ prefix = m.group("prefix")
+ if not self.real_server_name:
+ self.real_server_name = prefix
+ if m.group("command"):
+ command = m.group("command").lower()
+ if m.group("argument"):
+ a = m.group("argument").split(" :", 1)
+ arguments = a[0].split()
+ if len(a) == 2:
+ arguments.append(a[1])
+
+ command = IRCServerConnection.codemap.get(command, command)
+ if command in ["privmsg", "notice"]:
+ target = arguments.pop(0)
+ else:
+ target = None
+
+ if command == "quit":
+ arguments = [arguments[0]]
+ elif command == "ping":
+ target = arguments[0]
+ else:
+ target = arguments[0]
+ arguments = arguments[1:]
+
+ LOG.debug("command: %s, source: %s, target: %s, arguments: %s" % (
+ command, prefix, target, arguments))
+ self.handle_event(Event(command, prefix, target, arguments))
+
+ def handle_event(self, event):
+ self.master.handle_event(self, event)
+ if event.type in self.event_handlers:
+ for fn in self.event_handlers[event.type]:
+ fn(self, event)
+
+ def is_connected(self):
+ return self.socket is not None
+
+ def disconnect(self, message=""):
+ if self.socket is None:
+ return
+ # Don't send a QUIT here - causes infinite loop!
+ try:
+ self.socket.shutdown(socket.SHUT_WR)
+ self.socket.close()
+ except socket.error:
+ pass
+ del self.socket
+ self.socket = None
+ self.handle_event(
+ Event("disconnect", self.target.server, "", [message]))
+
+ def join(self, channel, key=""):
+ self.ship("JOIN %s%s" % (channel, (key and (" " + key))))
+
+ def mode(self, target, command):
+ self.ship("MODE %s %s" % (target, command))
+
+ def nick(self, newnick):
+ self.ship("NICK " + newnick)
+
+ def part(self, channel, message=""):
+ cmd_parts = ['PART', channel]
+ if message:
+ cmd_parts.append(message)
+ self.ship(' '.join(cmd_parts))
+
+ def privmsg(self, target, text):
+ self.ship("PRIVMSG %s :%s" % (target, text))
+
+ def quit(self, message=""):
+ self.ship("QUIT" + (message and (" :" + message)))
+
+ def user(self, username, realname):
+ self.ship("USER %s 0 * :%s" % (username, realname))
+
+ def ship(self, string):
+ "Ship a command to the server, appending CR/LF"
+ try:
+ self.socket.send(string.encode('utf-8') + b'\r\n')
+ LOG.debug("TO: %s" % string)
+ except socket.error:
+ self.disconnect("Connection reset by peer.")
+
+class Event(object):
+ def __init__(self, evtype, source, target, arguments=None):
+ self.type = evtype
+ self.source = source
+ self.target = target
+ if arguments is None:
+ arguments = []
+ self.arguments = arguments
+
+def is_channel(string):
+ return string and string[0] in "#&+!"
+
+class Connection:
+ def __init__(self, irker, target, nick_template, nick_needs_number=False,
+ password=None, **kwargs):
+ self.irker = irker
+ self.target = target
+ self.nick_template = nick_template
+ self.nick_needs_number = nick_needs_number
+ self.password = password
+ self.kwargs = kwargs
+ self.nick_trial = None
+ self.connection = None
+ self.status = None
+ self.last_xmit = time.time()
+ self.last_ping = time.time()
+ self.channels_joined = {}
+ self.channel_limits = {}
+ # The consumer thread
+ self.queue = queue.Queue()
+ self.thread = None
+ def nickname(self, n=None):
+ "Return a name for the nth server connection."
+ if n is None:
+ n = self.nick_trial
+ if self.nick_needs_number:
+ return self.nick_template % n
+ else:
+ return self.nick_template
+ def handle_ping(self):
+ "Register the fact that the server has pinged this connection."
+ self.last_ping = time.time()
+ def handle_welcome(self):
+ "The server says we're OK, with a non-conflicting nick."
+ self.status = "ready"
+ LOG.info("nick %s accepted" % self.nickname())
+ if self.password:
+ self.connection.privmsg("nickserv", "identify %s" % self.password)
+ def handle_badnick(self):
+ "The server says our nick is ill-formed or has a conflict."
+ LOG.info("nick %s rejected" % self.nickname())
+ if self.nick_needs_number:
+ # Randomness prevents a malicious user or bot from
+ # anticipating the next trial name in order to block us
+ # from completing the handshake.
+ self.nick_trial += random.randint(1, 3)
+ self.last_xmit = time.time()
+ self.connection.nick(self.nickname())
+ # Otherwise fall through, it might be possible to
+ # recover manually.
+ def handle_disconnect(self):
+ "Server disconnected us for flooding or some other reason."
+ self.connection = None
+ if self.status != "expired":
+ self.status = "disconnected"
+ # Avoid flooding the server if it disconnects
+ # immediately on sucessful login.
+ time.sleep(RECONNECT_DELAY)
+ def handle_kick(self, outof):
+ "We've been kicked."
+ self.status = "handshaking"
+ try:
+ del self.channels_joined[outof]
+ except KeyError:
+ LOG.error("irkerd: kicked by %s from %s that's not joined" % (
+ self.target, outof))
+ qcopy = []
+ while not self.queue.empty():
+ (channel, message, key) = self.queue.get()
+ if channel != outof:
+ qcopy.append((channel, message, key))
+ for (channel, message, key) in qcopy:
+ self.queue.put((channel, message, key))
+ self.status = "ready"
+ def enqueue(self, channel, message, key, quit_after=False):
+ "Enque a message for transmission."
+ if self.thread is None or not self.thread.is_alive():
+ self.status = "unseen"
+ self.thread = threading.Thread(target=self.dequeue)
+ self.thread.setDaemon(True)
+ self.thread.start()
+ self.queue.put((channel, message, key))
+ if quit_after:
+ self.queue.put((channel, None, key))
+ def dequeue(self):
+ "Try to ship pending messages from the queue."
+ try:
+ while True:
+ # We want to be kind to the IRC servers and not hold unused
+ # sockets open forever, so they have a time-to-live. The
+ # loop is coded this particular way so that we can drop
+ # the actual server connection when its time-to-live
+ # expires, then reconnect and resume transmission if the
+ # queue fills up again.
+ if self.queue.empty():
+ # Queue is empty, at some point we want to time out
+ # the connection rather than holding a socket open in
+ # the server forever.
+ now = time.time()
+ xmit_timeout = now > self.last_xmit + XMIT_TTL
+ ping_timeout = now > self.last_ping + PING_TTL
+ if self.status == "disconnected":
+ # If the queue is empty, we can drop this connection.
+ self.status = "expired"
+ break
+ elif xmit_timeout or ping_timeout:
+ LOG.info((
+ "timing out connection to %s at %s "
+ "(ping_timeout=%s, xmit_timeout=%s)") % (
+ self.target, time.asctime(), ping_timeout,
+ xmit_timeout))
+ with self.irker.irc.mutex:
+ self.connection.context = None
+ self.connection.quit("transmission timeout")
+ self.connection = None
+ self.status = "disconnected"
+ else:
+ # Prevent this thread from hogging the CPU by pausing
+ # for just a little bit after the queue-empty check.
+ # As long as this is less that the duration of a human
+ # reflex arc it is highly unlikely any human will ever
+ # notice.
+ time.sleep(ANTI_BUZZ_DELAY)
+ elif self.status == "disconnected" \
+ and time.time() > self.last_xmit + DISCONNECT_TTL:
+ # Queue is nonempty, but the IRC server might be
+ # down. Letting failed connections retain queue
+ # space forever would be a memory leak.
+ self.status = "expired"
+ break
+ elif not self.connection and self.status != "expired":
+ # Queue is nonempty but server isn't connected.
+ with self.irker.irc.mutex:
+ self.connection = self.irker.irc.newserver()
+ self.connection.context = self
+ # Try to avoid colliding with other instances
+ self.nick_trial = random.randint(1, 990)
+ self.channels_joined = {}
+ try:
+ # This will throw
+ # IRCServerConnectionError on failure
+ self.connection.connect(
+ target=self.target,
+ nickname=self.nickname(),
+ **self.kwargs)
+ self.status = "handshaking"
+ LOG.info("XMIT_TTL bump (%s connection) at %s" % (
+ self.target, time.asctime()))
+ self.last_xmit = time.time()
+ self.last_ping = time.time()
+ except IRCServerConnectionError as e:
+ LOG.error("irkerd: %s" % e)
+ self.status = "expired"
+ break
+ elif self.status == "handshaking":
+ if time.time() > self.last_xmit + HANDSHAKE_TTL:
+ self.status = "expired"
+ break
+ else:
+ # Don't buzz on the empty-queue test while we're
+ # handshaking
+ time.sleep(ANTI_BUZZ_DELAY)
+ elif self.status == "unseen" \
+ and time.time() > self.last_xmit + UNSEEN_TTL:
+ # Nasty people could attempt a denial-of-service
+ # attack by flooding us with requests with invalid
+ # servernames. We guard against this by rapidly
+ # expiring connections that have a nonempty queue but
+ # have never had a successful open.
+ self.status = "expired"
+ break
+ elif self.status == "ready":
+ (channel, message, key) = self.queue.get()
+ if channel not in self.channels_joined:
+ self.connection.join(channel, key=key)
+ LOG.info("joining %s on %s." % (channel, self.target))
+ # None is magic - it's a request to quit the server
+ if message is None:
+ self.connection.quit()
+ # An empty message might be used as a keepalive or
+ # to join a channel for logging, so suppress the
+ # privmsg send unless there is actual traffic.
+ elif message:
+ for segment in message.split("\n"):
+ # Truncate the message if it's too long,
+ # but we're working with characters here,
+ # not bytes, so we could be off.
+ # 500 = 512 - CRLF - 'PRIVMSG ' - ' :'
+ maxlength = 500 - len(channel)
+ if len(segment) > maxlength:
+ segment = segment[:maxlength]
+ try:
+ self.connection.privmsg(channel, segment)
+ except ValueError as err:
+ LOG.warning((
+ "rejected a message to %s on %s "
+ "because: %s") % (
+ channel, self.target, UNICODE_TYPE(err)))
+ LOG.debug(traceback.format_exc())
+ time.sleep(ANTI_FLOOD_DELAY)
+ self.last_xmit = self.channels_joined[channel] = time.time()
+ LOG.info("XMIT_TTL bump (%s transmission) at %s" % (
+ self.target, time.asctime()))
+ self.queue.task_done()
+ elif self.status == "expired":
+ LOG.error(
+ "irkerd: we're expired but still running! This is a bug.")
+ break
+ except Exception as e:
+ LOG.error("irkerd: exception %s in thread for %s" % (e, self.target))
+ # Maybe this should have its own status?
+ self.status = "expired"
+ LOG.debug(traceback.format_exc())
+ finally:
+ # Make sure we don't leave any zombies behind
+ if self.connection:
+ self.connection.close()
+ def live(self):
+ "Should this connection not be scavenged?"
+ return self.status != "expired"
+ def joined_to(self, channel):
+ "Is this connection joined to the specified channel?"
+ return channel in self.channels_joined
+ def accepting(self, channel):
+ "Can this connection accept a join of this channel?"
+ if self.channel_limits:
+ match_count = 0
+ for already in self.channels_joined:
+ # This obscure code is because the RFCs allow separate limits
+ # by channel type (indicated by the first character of the name)
+ # a feature that is almost never actually used.
+ if already[0] == channel[0]:
+ match_count += 1
+ return match_count < self.channel_limits.get(channel[0], CHANNEL_MAX)
+ else:
+ return len(self.channels_joined) < CHANNEL_MAX
+
+class Target():
+ "Represent a transmission target."
+ def __init__(self, url):
+ self.url = url
+ parsed = urllib_parse.urlparse(url)
+ self.ssl = parsed.scheme == 'ircs'
+ if self.ssl:
+ default_ircport = 6697
+ else:
+ default_ircport = 6667
+ self.username = parsed.username
+ self.password = parsed.password
+ self.servername = parsed.hostname
+ self.port = parsed.port or default_ircport
+ # IRC channel names are case-insensitive. If we don't smash
+ # case here we may run into problems later. There was a bug
+ # observed on irc.rizon.net where an irkerd user specified #Channel,
+ # got kicked, and irkerd crashed because the server returned
+ # "#channel" in the notification that our kick handler saw.
+ self.channel = parsed.path.lstrip('/').lower()
+ # This deals with a tweak in recent versions of urlparse.
+ if parsed.fragment:
+ self.channel += "#" + parsed.fragment
+ isnick = self.channel.endswith(",isnick")
+ if isnick:
+ self.channel = self.channel[:-7]
+ if self.channel and not isnick and self.channel[0] not in "#&+":
+ self.channel = "#" + self.channel
+ # support both channel?secret and channel?key=secret
+ self.key = ""
+ if parsed.query:
+ self.key = re.sub("^key=", "", parsed.query)
+
+ def __str__(self):
+ "Represent this instance as a string"
+ return self.servername or self.url or repr(self)
+
+ def validate(self):
+ "Raise InvalidRequest if the URL is missing a critical component"
+ if not self.servername:
+ raise InvalidRequest(
+ 'target URL missing a servername: %r' % self.url)
+ if not self.channel:
+ raise InvalidRequest(
+ 'target URL missing a channel: %r' % self.url)
+ def server(self):
+ "Return a hashable tuple representing the destination server."
+ return (self.servername, self.port)
+
+class Dispatcher:
+ "Manage connections to a particular server-port combination."
+ def __init__(self, irker, **kwargs):
+ self.irker = irker
+ self.kwargs = kwargs
+ self.connections = []
+ def dispatch(self, channel, message, key, quit_after=False):
+ "Dispatch messages for our server-port combination."
+ # First, check if there is room for another channel
+ # on any of our existing connections.
+ connections = [x for x in self.connections if x.live()]
+ eligibles = [x for x in connections if x.joined_to(channel)] \
+ or [x for x in connections if x.accepting(channel)]
+ if eligibles:
+ eligibles[0].enqueue(channel, message, key, quit_after)
+ return
+ # All connections are full up. Look for one old enough to be
+ # scavenged.
+ ancients = []
+ for connection in connections:
+ for (chan, age) in connections.channels_joined.items():
+ if age < time.time() - CHANNEL_TTL:
+ ancients.append((connection, chan, age))
+ if ancients:
+ ancients.sort(key=lambda x: x[2])
+ (found_connection, drop_channel, _drop_age) = ancients[0]
+ found_connection.part(drop_channel, "scavenged by irkerd")
+ del found_connection.channels_joined[drop_channel]
+ #time.sleep(ANTI_FLOOD_DELAY)
+ found_connection.enqueue(channel, message, key, quit_after)
+ return
+ # All existing channels had recent activity
+ newconn = Connection(self.irker, **self.kwargs)
+ self.connections.append(newconn)
+ newconn.enqueue(channel, message, key, quit_after)
+ def live(self):
+ "Does this server-port combination have any live connections?"
+ self.connections = [x for x in self.connections if x.live()]
+ return len(self.connections) > 0
+ def pending(self):
+ "Return all connections with pending traffic."
+ return [x for x in self.connections if not x.queue.empty()]
+ def last_xmit(self):
+ "Return the time of the most recent transmission."
+ return max(x.last_xmit for x in self.connections)
+
+class Irker:
+ "Persistent IRC multiplexer."
+ def __init__(self, logfile=None, **kwargs):
+ self.logfile = logfile
+ self.kwargs = kwargs
+ self.irc = IRCClient()
+ self.irc.add_event_handler("ping", self._handle_ping)
+ self.irc.add_event_handler("welcome", self._handle_welcome)
+ self.irc.add_event_handler("erroneusnickname", self._handle_badnick)
+ self.irc.add_event_handler("nicknameinuse", self._handle_badnick)
+ self.irc.add_event_handler("nickcollision", self._handle_badnick)
+ self.irc.add_event_handler("unavailresource", self._handle_badnick)
+ self.irc.add_event_handler("featurelist", self._handle_features)
+ self.irc.add_event_handler("disconnect", self._handle_disconnect)
+ self.irc.add_event_handler("kick", self._handle_kick)
+ self.irc.add_event_handler("every_raw_message", self._handle_every_raw_message)
+ self.servers = {}
+ def thread_launch(self):
+ thread = threading.Thread(target=self.irc.spin)
+ thread.setDaemon(True)
+ self.irc._thread = thread
+ thread.start()
+ def _handle_ping(self, connection, _event):
+ "PING arrived, bump the last-received time for the connection."
+ if connection.context:
+ connection.context.handle_ping()
+ def _handle_welcome(self, connection, _event):
+ "Welcome arrived, nick accepted for this connection."
+ if connection.context:
+ connection.context.handle_welcome()
+ def _handle_badnick(self, connection, _event):
+ "Nick not accepted for this connection."
+ if connection.context:
+ connection.context.handle_badnick()
+ def _handle_features(self, connection, event):
+ "Determine if and how we can set deaf mode."
+ if connection.context:
+ cxt = connection.context
+ arguments = event.arguments
+ for lump in arguments:
+ if lump.startswith("DEAF="):
+ if not self.logfile:
+ connection.mode(cxt.nickname(), "+"+lump[5:])
+ elif lump.startswith("MAXCHANNELS="):
+ m = int(lump[12:])
+ for pref in "#&+":
+ cxt.channel_limits[pref] = m
+ LOG.info("%s maxchannels is %d" % (connection.target, m))
+ elif lump.startswith("CHANLIMIT=#:"):
+ limits = lump[10:].split(",")
+ try:
+ for token in limits:
+ (prefixes, limit) = token.split(":")
+ limit = int(limit)
+ for c in prefixes:
+ cxt.channel_limits[c] = limit
+ LOG.info("%s channel limit map is %s" % (
+ connection.target, cxt.channel_limits))
+ except ValueError:
+ LOG.error("irkerd: ill-formed CHANLIMIT property")
+ def _handle_disconnect(self, connection, _event):
+ "Server hung up the connection."
+ LOG.info("server %s disconnected" % connection.target)
+ connection.close()
+ if connection.context:
+ connection.context.handle_disconnect()
+ def _handle_kick(self, connection, event):
+ "Server hung up the connection."
+ target = event.target
+ LOG.info("irker has been kicked from %s on %s" % (
+ target, connection.target))
+ if connection.context:
+ connection.context.handle_kick(target)
+ def _handle_every_raw_message(self, _connection, event):
+ "Log all messages when in watcher mode."
+ if self.logfile:
+ with open(self.logfile, "ab") as logfp:
+ message = u"%03f|%s|%s\n" % \
+ (time.time(), event.source, event.arguments[0])
+ logfp.write(message.encode('utf-8'))
+
+ def pending(self):
+ "Do we have any pending message traffic?"
+ return [k for (k, v) in self.servers.items() if v.pending()]
+
+ def _parse_request(self, line):
+ "Request-parsing helper for the handle() method"
+ request = json.loads(line.strip())
+ if not isinstance(request, dict):
+ raise InvalidRequest(
+ "request is not a JSON dictionary: %r" % request)
+ if "to" not in request or "privmsg" not in request:
+ raise InvalidRequest(
+ "malformed request - 'to' or 'privmsg' missing: %r" % request)
+ channels = request['to']
+ message = request['privmsg']
+ if not isinstance(channels, (list, UNICODE_TYPE)):
+ raise InvalidRequest(
+ "malformed request - unexpected channel type: %r" % channels)
+ if not isinstance(message, UNICODE_TYPE):
+ raise InvalidRequest(
+ "malformed request - unexpected message type: %r" % message)
+ if not isinstance(channels, list):
+ channels = [channels]
+ targets = []
+ for url in channels:
+ try:
+ if not isinstance(url, UNICODE_TYPE):
+ raise InvalidRequest(
+ "malformed request - URL has unexpected type: %r" %
+ url)
+ target = Target(url)
+ target.validate()
+ except InvalidRequest as e:
+ LOG.error("irkerd: " + UNICODE_TYPE(e))
+ else:
+ targets.append(target)
+ return (targets, message)
+
+ def handle(self, line, quit_after=False):
+ "Perform a JSON relay request."
+ try:
+ targets, message = self._parse_request(line=line)
+ for target in targets:
+ if target.server() not in self.servers:
+ self.servers[target.server()] = Dispatcher(
+ self, target=target, **self.kwargs)
+ self.servers[target.server()].dispatch(
+ target.channel, message, target.key, quit_after=quit_after)
+ # GC dispatchers with no active connections
+ servernames = self.servers.keys()
+ for servername in servernames:
+ if not self.servers[servername].live():
+ del self.servers[servername]
+ # If we might be pushing a resource limit even
+ # after garbage collection, remove a session. The
+ # goal here is to head off DoS attacks that aim at
+ # exhausting thread space or file descriptors.
+ # The cost is that attempts to DoS this service
+ # will cause lots of join/leave spam as we
+ # scavenge old channels after connecting to new
+ # ones. The particular method used for selecting a
+ # session to be terminated doesn't matter much; we
+ # choose the one longest idle on the assumption
+ # that message activity is likely to be clumpy.
+ if len(self.servers) >= CONNECTION_MAX:
+ oldest = min(
+ self.servers.keys(),
+ key=lambda name: self.servers[name].last_xmit())
+ del self.servers[oldest]
+ except InvalidRequest as e:
+ LOG.error("irkerd: " + UNICODE_TYPE(e))
+ except ValueError:
+ LOG.error("irkerd: " + "can't recognize JSON on input: %r" % line)
+ except RuntimeError:
+ LOG.error("irkerd: " + "wildly malformed JSON blew the parser stack.")
+
+class IrkerTCPHandler(socketserver.StreamRequestHandler):
+ def handle(self):
+ while True:
+ line = self.rfile.readline()
+ if not line:
+ break
+ if not isinstance(line, UNICODE_TYPE):
+ line = UNICODE_TYPE(line, 'utf-8')
+ irker.handle(line=line.strip())
+
+class IrkerUDPHandler(socketserver.BaseRequestHandler):
+ def handle(self):
+ line = self.request[0].strip()
+ #socket = self.request[1]
+ if not isinstance(line, UNICODE_TYPE):
+ line = UNICODE_TYPE(line, 'utf-8')
+ irker.handle(line=line.strip())
+
+def in_background():
+ "Is this process running in background?"
+ try:
+ return os.getpgrp() != os.tcgetpgrp(1)
+ except OSError:
+ return True
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description=__doc__.strip().splitlines()[0])
+ parser.add_argument(
+ '-c', '--ca-file', metavar='PATH',
+ help='file of trusted certificates for SSL/TLS')
+ parser.add_argument(
+ '-e', '--cert-file', metavar='PATH',
+ help='pem file used to authenticate to the server')
+ parser.add_argument(
+ '-d', '--log-level', metavar='LEVEL', choices=LOG_LEVELS,
+ help='how much to log to the log file (one of %(choices)s)')
+ parser.add_argument(
+ '-H', '--host', metavar='ADDRESS', default=HOST,
+ help='IP address to listen on')
+ parser.add_argument(
+ '-l', '--log-file', metavar='PATH',
+ help='file for saving captured message traffic')
+ parser.add_argument(
+ '-n', '--nick', metavar='NAME', default='irker%03d',
+ help="nickname (optionally with a '%%.*d' server connection marker)")
+ parser.add_argument(
+ '-p', '--password', metavar='PASSWORD',
+ help='NickServ password')
+ parser.add_argument(
+ '-i', '--immediate', metavar='IRC-URL',
+ help=(
+ 'send a single message to IRC-URL and exit. The message is the '
+ 'first positional argument.'))
+ parser.add_argument(
+ '-V', '--version', action='version',
+ version='%(prog)s {0}'.format(version))
+ parser.add_argument(
+ 'message', metavar='MESSAGE', nargs='?',
+ help='message for --immediate mode')
+ args = parser.parse_args()
+
+ if not args.log_file and in_background():
+ # There's a case for falling back to address = ('localhost', 514)
+ # But some systems (including OS X) disable this for security reasons.
+ handler = logging.handlers.SysLogHandler(facility='daemon')
+ else:
+ handler = logging.StreamHandler()
+
+ LOG.addHandler(handler)
+ if args.log_level:
+ log_level = getattr(logging, args.log_level.upper())
+ LOG.setLevel(log_level)
+
+ irker = Irker(
+ logfile=args.log_file,
+ nick_template=args.nick,
+ nick_needs_number=re.search('%.*d', args.nick),
+ password=args.password,
+ cafile=args.ca_file,
+ certfile=args.cert_file,
+ )
+ LOG.info("irkerd version %s" % version)
+ if args.immediate:
+ if not args.message:
+ # We want newline to become '\n' and tab to become '\t';
+ # the JSON decoder will undo these transformations.
+ # This will also encode backslash, backspace, formfeed,
+ # and high-half characters, which might produce unexpected
+ # results on output.
+ args.message = sys.stdin.read().encode("string_escape")
+ irker.irc.add_event_handler("quit", lambda _c, _e: sys.exit(0))
+ irker.handle('{"to":"%s","privmsg":"%s"}' % (
+ args.immediate, args.message), quit_after=True)
+ irker.irc.spin()
+ else:
+ if args.message:
+ LOG.error(
+ 'irkerd: message argument given (%r), but --immediate not set' % (
+ args.message))
+ raise SystemExit(1)
+ irker.thread_launch()
+ try:
+ tcpserver = socketserver.TCPServer((args.host, PORT), IrkerTCPHandler)
+ udpserver = socketserver.UDPServer((args.host, PORT), IrkerUDPHandler)
+ for server in [tcpserver, udpserver]:
+ server = threading.Thread(target=server.serve_forever)
+ server.setDaemon(True)
+ server.start()
+ try:
+ signal.pause()
+ except KeyboardInterrupt:
+ raise SystemExit(1)
+ except socket.error as e:
+ LOG.error("irkerd: server launch failed: %r\n" % e)
+
+# end
diff --git a/irkerd.service b/irkerd.service
new file mode 100644
index 0000000..4e75ae2
--- /dev/null
+++ b/irkerd.service
@@ -0,0 +1,16 @@
+# Copyright 2012 Wulf C. Krueger <philantrop@exherbo.org>
+# Distributed under the terms of the BSD LICENSE
+
+[Unit]
+Description=Internet Relay Chat (IRC) notification daemon
+Requires=network.target
+Documentation=man:irkerd(8) man:irkerhook(1) man:irk(1)
+
+[Service]
+User=irker
+ExecStart=/usr/bin/irkerd
+User=irker
+
+[Install]
+WantedBy=multi-user.target
+Alias=irker.service
diff --git a/irkerd.xml b/irkerd.xml
new file mode 100644
index 0000000..59b7dae
--- /dev/null
+++ b/irkerd.xml
@@ -0,0 +1,249 @@
+<!DOCTYPE refentry PUBLIC
+ "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "docbook/docbookx.dtd">
+<refentry id='irkerd.8'>
+<refmeta>
+<refentrytitle>irkerd</refentrytitle>
+<manvolnum>8</manvolnum>
+<refmiscinfo class='date'>Aug 27 2012</refmiscinfo>
+<refmiscinfo class='source'>irker</refmiscinfo>
+<refmiscinfo class='product'>irker</refmiscinfo>
+<refmiscinfo class='manual'>Commands</refmiscinfo>
+</refmeta>
+<refnamediv id='name'>
+<refname>irkerd</refname>
+<refpurpose>relay for shipping notifications to IRC servers</refpurpose>
+</refnamediv>
+<refsynopsisdiv id='synopsis'>
+
+<cmdsynopsis>
+ <command>irkerd</command>
+ <arg>-c <replaceable>ca-file</replaceable></arg>
+ <arg>-d <replaceable>debuglevel</replaceable></arg>
+ <arg>-e <replaceable>cert-file</replaceable></arg>
+ <arg>-l <replaceable>logfile</replaceable></arg>
+ <arg>-H <replaceable>host</replaceable></arg>
+ <arg>-n <replaceable>nick</replaceable></arg>
+ <arg>-p <replaceable>password</replaceable></arg>
+ <arg>-i <replaceable>IRC-URL</replaceable></arg>
+ <arg>-V</arg>
+ <arg>-h</arg>
+ <arg choice='opt'><replaceable>message text</replaceable></arg>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1 id='description'><title>DESCRIPTION</title>
+
+<para><application>irkerd</application> is a specialized write-only IRC
+client intended to be used for shipping notification messages to IRC
+channels. The use case in mind when it was designed was broadcasting
+notifications from commit hooks in version-control systems.</para>
+
+<para>The main advantage of relaying through this daemon over
+individual scripted sends from applications is that it can maintain
+connection state for multiple channels, rather than producing obnoxious
+join/leave channel spam on every message.</para>
+
+<para><application>irkerd</application> is a socket server that
+listens on for UDP or TCP packets on port 6659 for textual request
+lines containing JSON objects and terminated by a newline. Each JSON
+object must have two members: "to" specifying a destination or
+destination list, and "privmsg" specifying the message text.
+Examples:
+
+<programlisting>
+{"to":"irc://chat.freenode.net/git-ciabot", "privmsg":"Hello, world!"}
+{"to":["irc://chat.freenode.net/#git-ciabot","irc://chat.freenode.net/#gpsd"],"privmsg":"Multichannel test"}
+{"to":"irc://chat.hypothetical.net:6668/git-ciabot", "privmsg":"Hello, world!"}
+{"to":"ircs://chat.hypothetical.net/git-private?key=topsecret", "privmsg":"Keyed channel test"}
+{"to":"ircs://:topsecret@chat.example.net/git-private", "privmsg":"Password-protected server test"}
+</programlisting></para>
+
+<para>If the channel part of the URL does not have one of the prefix
+characters <quote>#</quote>, <quote>&amp;</quote>, or
+<quote>+</quote>, a <quote>#</quote> will be prepended to it before
+shipping - <emphasis>unless</emphasis> the channel part has the suffix
+",isnick" (which is unconditionally removed).</para>
+
+<para>The host part of the URL may have a port-number suffix separated by a
+colon, as shown in the third example; otherwise
+<application>irkerd</application> sends plaintext messages to the default
+6667 IRC port of each server, and SSL/TLS messages to 6697.</para>
+
+<para>The password for password-protected servers can be set using the
+usual <quote>[{username}:{password}@]{host}:{port}</quote> defined in
+RFC 3986, as shown in the fifth example. Non-empty URL usernames
+override the default <quote>irker</quote> username.</para>
+
+<para>When the <quote>to</quote> URL uses the <quote>ircs</quote>
+scheme (as shown in the fourth and fifth examples), the connection to
+the IRC server is made via SSL/TLS (vs. a plaintext connection with the
+<quote>irc</quote> scheme). To connect via SSL/TLS with Python 2.x,
+you need to explicitly declare the certificate authority file used to
+verify server certificates. For example, <quote>-c
+/etc/ssl/certs/ca-certificates.crt</quote>. In Python 3.2 and later,
+you can still set this option to declare a custom CA file, but
+<application>irkerd</application>; if you don't set it
+<application>irkerd</application> will use OpenSSL's default file
+(using Python's
+<quote>ssl.SSLContext.set_default_verify_paths</quote>). In Python
+3.2 and later, <quote>ssl.match_hostname</quote> is used to ensure the
+server certificate belongs to the intended host, as well as being
+signed by a trusted CA.</para>
+
+<para>To join password-protected (mode +k) channels, the channel part of the
+URL may be followed with a query-string indicating the channel key, of the
+form <quote>?secret</quote> or <quote>?key=secret</quote>, where
+<quote>secret</quote> is the channel key.</para>
+
+<para>An empty message is legal and will cause
+<application>irkerd</application> to join or maintain a connection to
+the target channels without actually emitting a message. This may be
+useful for advertising that an instance is up and running, or for
+joining a channel to log its traffic.</para>
+</refsect1>
+
+<refsect1 id='options'><title>OPTIONS</title>
+
+<para><application>irkerd</application> takes the following options:</para>
+
+<variablelist>
+<varlistentry>
+<term>-d</term>
+<listitem>
+ <para>
+ Takes a following value, setting the debugging level from it;
+ possible values are 'critical', 'error', 'warning', 'info',
+ 'debug'. This option will generally only be of interest to
+ developers, as the logs are designed to help trace
+ <application>irkerd</application>'s internal state. These tracing
+ logs are independent of the traffic logs controlled by
+ <quote>-l</quote>.
+ </para>
+ <para>
+ Logging will be to standard error (if
+ <application>irkerd</application> is running in the foreground) or
+ to <quote>/dev/syslog</quote> with facility "daemon" (if
+ <application>irkerd</application> is running in the background).
+ The background-ness of <application>irkerd</application> is
+ determined by comparing the process group id with the process
+ group associated with the terminal attached to stdout (with
+ non-matches for background processes). We assume you aren't
+ running <application>irkerd</application> in Windows or another OS
+ that doesn't support <quote>os.getpgrp</quote> or
+ <quote>tcgetpgrp</quote>. We assume that if stdout is attached to
+ a TTY associated with the same process group as
+ <application>irkerd</application>, you do intend to log to stderr
+ and not syslog.
+ </para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>-e</term>
+<listitem><para>Takes a following filename in pem format and uses it
+to authenticate to the IRC server. You must be connecting to the IRC server
+over SSL for this to function properly. This is commonly known as
+<quote>CertFP.</quote>
+</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-e</term>
+<listitem><para>Takes a following filename in pem format and uses it
+to authenticate to the IRC server. You must be connecting to the IRC
+server over SSL for this to function properly. This is commonly known
+as <quote>CertFP.</quote></para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>-l</term>
+<listitem><para>Takes a following filename, logs traffic to that file.
+Each log line consists of three |-separated fields; a numeric
+timestamp in Unix time, the FQDN of the sending server, and the
+message data.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-H</term>
+<listitem><para>Takes a following hostname, and binds to that address
+when listening for messages. <application>irkerd</application> binds
+to localhost by default, but you may want to use your host's public
+address to listen on a local network. Listening on a public interface
+is not recommended, as it makes spamming IRC channels very
+easy.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-n</term>
+<listitem><para>Takes a following value, setting the nick
+to be used. If the nick contains a numeric format element
+(such as %03d) it is used to generate suffixed fallback names
+in the event of a nick collision.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-p</term>
+<listitem><para>Takes a following value, setting a nickserv
+password to be used. If given, this password is shipped to
+authenticate the nick on receipt of a welcome message.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-i</term>
+<listitem><para>Immediate mode, to be run in foreground. Takes a following
+following value interpreted as a channel URL. May take a second
+argument giving a message string; if the second argument is absent the
+message is read from standard input (and may contain newlines).
+Sends the message, then quits.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-V</term>
+<listitem><para>Write the program version to stdout and
+terminate.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-h</term>
+<listitem><para>Print usage instructions and terminate.</para></listitem>
+</varlistentry>
+</variablelist>
+</refsect1>
+
+<refsect1 id='limitations'><title>LIMITATIONS</title>
+<para>Requests via UDP optimizes for lowest latency and network load
+by avoiding TCP connection setup time; the cost is that delivery is
+not reliable in the face of packet loss.</para>
+
+<para>An <application>irkerd</application> instance with a
+publicly-accessible request socket could complicate blocking of IRC
+spam by making it easy for spammers to submit while hiding their IP
+addresses; the better way to deploy, then, is on places like
+project-hosting sites where the <application>irkerd</application>
+socket can be visible from commit-hook code but not exposed to the
+outside world. Priming your firewall with blocklists of IP addresses
+known to spew spam is always a good idea.</para>
+
+<para>The absence of any option to set the service port is deliberate.
+If you think you need to do that, you have a problem better solved at
+your firewall.</para>
+
+<para>IRC has a message length limit of 510 bytes; generate your
+privmsg attribute values with appropriate care.</para>
+
+<para>IRC ignores any text after an embedded newline. Be aware that
+<application>irkerd</application> will turn payload strings with
+embedded newlines into multiple IRC sends to avoid having message data
+discarded. </para>
+
+<para>Due to a bug in Python URL parsing, IRC urls with both a # and a
+key part may fail unexpectedly. The workaround is to remove the #.</para>
+</refsect1>
+
+<refsect1 id='see_also'><title>SEE ALSO</title>
+<para>
+<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
+</para>
+</refsect1>
+
+<refsect1 id='authors'><title>AUTHOR</title>
+<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the
+project page at <ulink
+url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink>
+for updates and other resources, including an installable repository
+hook script.</para>
+</refsect1>
+</refentry>
diff --git a/irkerhook.py b/irkerhook.py
new file mode 100755
index 0000000..7dcd6b2
--- /dev/null
+++ b/irkerhook.py
@@ -0,0 +1,581 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 Eric S. Raymond <esr@thyrsus.com>
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# This script contains git porcelain and porcelain byproducts.
+# Requires either Python 2.6, or 2.5 with the simplejson library installed
+# or Python 3.x.
+#
+# usage: irkerhook.py [-V] [-n] [--variable=value...] [commit_id...]
+#
+# This script is meant to be run in an update or post-commit hook.
+# Try it with -n to see the notification dumped to stdout and verify
+# that it looks sane. With -V this script dumps its version and exits.
+#
+# See the irkerhook manual page in the distribution for a detailed
+# explanation of how to configure this hook.
+from __future__ import print_function
+
+# The default location of the irker proxy, if the project configuration
+# does not override it.
+#
+# SPDX-License-Identifier: BSD-2-Clause
+from __future__ import print_function, absolute_import
+
+default_server = "localhost"
+IRKER_PORT = 6659
+
+# The default service used to turn your web-view URL into a tinyurl so it
+# will take up less space on the IRC notification line.
+default_tinyifier = u"http://tinyurl.com/api-create.php?url="
+
+# Map magic urlprefix values to actual URL prefixes.
+urlprefixmap = {
+ "viewcvs": "http://%(host)s/viewcvs/%(repo)s?view=revision&revision=",
+ "gitweb": "http://%(host)s/cgi-bin/gitweb.cgi?p=%(repo)s;a=commit;h=",
+ "cgit": "http://%(host)s/cgi-bin/cgit.cgi/%(repo)s/commit/?id=",
+ }
+
+# By default, ship to the freenode #commits list
+default_channels = u"irc://chat.freenode.net/#commits"
+
+#
+# No user-serviceable parts below this line:
+#
+
+version = "2.19"
+
+import os, sys, socket, subprocess, locale, datetime, re
+from pipes import quote as shellquote
+
+try:
+ from urllib2 import urlopen, HTTPError
+except ImportError:
+ from urllib.error import HTTPError
+ from urllib.request import urlopen
+
+try:
+ import simplejson as json # Faster, also makes us Python-2.5-compatible
+except ImportError:
+ import json
+
+if sys.version_info.major == 2:
+ string_type = unicode
+else:
+ string_type = str
+
+try:
+ getstatusoutput = subprocess.getstatusoutput
+except AttributeError:
+ import commands
+ getstatusoutput = commands.getstatusoutput
+
+def do(command):
+ if sys.version_info.major == 2:
+ return string_type(getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8')
+ else:
+ return getstatusoutput(command)[1]
+
+class Commit:
+ def __init__(self, extractor, commit):
+ "Per-commit data."
+ self.commit = commit
+ self.branch = None
+ self.rev = None
+ self.mail = None
+ self.author = None
+ self.files = None
+ self.logmsg = None
+ self.url = None
+ self.author_date = None
+ self.commit_date = None
+ self.__dict__.update(extractor.__dict__)
+
+ if sys.version_info.major == 2:
+ # Convert __str__ to __unicode__ for python 2
+ self.__unicode__ = self.__str__
+ # Not really needed, but maybe useful for debugging
+ self.__str__ = lambda x: x.__unicode__().encode('utf-8')
+
+ def __str__(self):
+ "Produce a notification string from this commit."
+ if self.urlprefix.lower() == "none":
+ self.url = ""
+ else:
+ urlprefix = urlprefixmap.get(self.urlprefix, self.urlprefix)
+ webview = (urlprefix % self.__dict__) + self.commit
+ try:
+ # See it the url is accessible
+ res = urlopen(webview)
+ if self.tinyifier and self.tinyifier.lower() != "none":
+ try:
+ # Didn't get a retrieval error on the web
+ # view, so try to tinyify a reference to it.
+ self.url = urlopen(self.tinyifier + webview).read()
+ try:
+ self.url = self.url.decode('UTF-8')
+ except UnicodeError:
+ pass
+ except IOError:
+ self.url = webview
+ else:
+ self.url = webview
+ except HTTPError as e:
+ if e.code == 401:
+ # Authentication error, so we assume the view is valid
+ self.url = webview
+ else:
+ self.url = ""
+ except IOError:
+ self.url = ""
+ res = self.template % self.__dict__
+ return string_type(res, 'UTF-8') if not isinstance(res, string_type) else res
+
+class GenericExtractor:
+ "Generic class for encapsulating data from a VCS."
+ booleans = ["tcp"]
+ numerics = ["maxchannels"]
+ strings = ["email"]
+ def __init__(self, arguments):
+ self.arguments = arguments
+ self.project = None
+ self.repo = None
+ # These aren't really repo data but they belong here anyway...
+ self.email = None
+ self.tcp = True
+ self.tinyifier = default_tinyifier
+ self.server = None
+ self.channels = None
+ self.maxchannels = 0
+ self.template = None
+ self.urlprefix = None
+ self.host = socket.getfqdn()
+ self.cialike = None
+ self.filtercmd = None
+ # Color highlighting is disabled by default.
+ self.color = None
+ self.bold = self.green = self.blue = self.yellow = ""
+ self.brown = self.magenta = self.cyan = self.reset = ""
+ def activate_color(self, style):
+ "IRC color codes."
+ if style == 'mIRC':
+ # mIRC colors are mapped as closely to the ANSI colors as
+ # possible. However, bright colors (green, blue, red,
+ # yellow) have been made their dark counterparts since
+ # ChatZilla does not properly darken mIRC colors in the
+ # Light Motif color scheme.
+ self.bold = '\x02'
+ self.green = '\x0303'
+ self.blue = '\x0302'
+ self.red = '\x0305'
+ self.yellow = '\x0307'
+ self.brown = '\x0305'
+ self.magenta = '\x0306'
+ self.cyan = '\x0310'
+ self.reset = '\x0F'
+ if style == 'ANSI':
+ self.bold = '\x1b[1m'
+ self.green = '\x1b[1;32m'
+ self.blue = '\x1b[1;34m'
+ self.red = '\x1b[1;31m'
+ self.yellow = '\x1b[1;33m'
+ self.brown = '\x1b[33m'
+ self.magenta = '\x1b[35m'
+ self.cyan = '\x1b[36m'
+ self.reset = '\x1b[0m'
+ def load_preferences(self, conf):
+ "Load preferences from a file in the repository root."
+ if not os.path.exists(conf):
+ return
+ ln = 0
+ for line in open(conf):
+ ln += 1
+ if line.startswith("#") or not line.strip():
+ continue
+ elif line.count('=') != 1:
+ sys.stderr.write('"%s", line %d: missing = in config line\n' \
+ % (conf, ln))
+ continue
+ fields = line.split('=')
+ if len(fields) != 2:
+ sys.stderr.write('"%s", line %d: too many fields in config line\n' \
+ % (conf, ln))
+ continue
+ variable = fields[0].strip()
+ value = fields[1].strip()
+ if value.lower() == "true":
+ value = True
+ elif value.lower() == "false":
+ value = False
+ # User cannot set maxchannels - only a command-line arg can do that.
+ if variable == "maxchannels":
+ return
+ setattr(self, variable, value)
+ def do_overrides(self):
+ "Make command-line overrides possible."
+ for tok in self.arguments:
+ for key in self.__dict__:
+ if tok.startswith("--" + key + "="):
+ val = tok[len(key)+3:]
+ setattr(self, key, val)
+ for (key, val) in self.__dict__.items():
+ if key in GenericExtractor.booleans:
+ if type(val) == type("") and val.lower() == "true":
+ setattr(self, key, True)
+ elif type(val) == type("") and val.lower() == "false":
+ setattr(self, key, False)
+ elif key in GenericExtractor.numerics:
+ setattr(self, key, int(val))
+ elif key in GenericExtractor.strings:
+ setattr(self, key, val)
+ if not self.project:
+ sys.stderr.write("irkerhook.py: no project name set!\n")
+ raise SystemExit(1)
+ if not self.repo:
+ self.repo = self.project.lower()
+ if not self.channels:
+ self.channels = default_channels % self.__dict__
+ if self.color and self.color.lower() != "none":
+ self.activate_color(self.color)
+
+def has(dirname, paths):
+ "Test for existence of a list of paths."
+ # all() is a python2.5 construct
+ for exists in [os.path.exists(os.path.join(dirname, x)) for x in paths]:
+ if not exists:
+ return False
+ return True
+
+# VCS-dependent code begins here
+
+class GitExtractor(GenericExtractor):
+ "Metadata extraction for the git version control system."
+ @staticmethod
+ def is_repository(dirname):
+ # Must detect both ordinary and bare repositories
+ return has(dirname, [".git"]) or \
+ has(dirname, ["HEAD", "refs", "objects"])
+ def __init__(self, arguments):
+ GenericExtractor.__init__(self, arguments)
+ # Get all global config variables
+ self.project = do("git config --get irker.project")
+ self.repo = do("git config --get irker.repo")
+ self.server = do("git config --get irker.server")
+ self.channels = do("git config --get irker.channels")
+ self.email = do("git config --get irker.email")
+ self.tcp = do("git config --bool --get irker.tcp")
+ self.template = do("git config --get irker.template") or u'%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s'
+ self.tinyifier = do("git config --get irker.tinyifier") or default_tinyifier
+ self.color = do("git config --get irker.color")
+ self.urlprefix = do("git config --get irker.urlprefix") or u"gitweb"
+ self.cialike = do("git config --get irker.cialike")
+ self.filtercmd = do("git config --get irker.filtercmd")
+ # These are git-specific
+ self.refname = do("git symbolic-ref HEAD 2>/dev/null")
+ self.revformat = do("git config --get irker.revformat")
+ # The project variable defaults to the name of the repository toplevel.
+ if not self.project:
+ bare = do("git config --bool --get core.bare")
+ if bare.lower() == "true":
+ keyfile = "HEAD"
+ else:
+ keyfile = ".git/HEAD"
+ here = os.getcwd()
+ while True:
+ if os.path.exists(os.path.join(here, keyfile)):
+ self.project = os.path.basename(here)
+ if self.project.endswith('.git'):
+ self.project = self.project[0:-4]
+ break
+ elif here == '/':
+ sys.stderr.write("irkerhook.py: no git repo below root!\n")
+ sys.exit(1)
+ here = os.path.dirname(here)
+ # Get overrides
+ self.do_overrides()
+ def head(self):
+ "Return a symbolic reference to the tip commit of the current branch."
+ return "HEAD"
+ def commit_factory(self, commit_id):
+ "Make a Commit object holding data for a specified commit ID."
+ commit = Commit(self, commit_id)
+ commit.branch = re.sub(r"^refs/[^/]*/", "", self.refname)
+ # Compute a description for the revision
+ if self.revformat == 'raw':
+ commit.rev = commit.commit
+ elif self.revformat == 'short':
+ commit.rev = ''
+ else: # self.revformat == 'describe'
+ commit.rev = do("git describe %s 2>/dev/null" % shellquote(commit.commit))
+ if not commit.rev:
+ # Query git for the abbreviated hash
+ commit.rev = do("git log -1 '--pretty=format:%h' " + shellquote(commit.commit))
+ if self.urlprefix in ('gitweb', 'cgit'):
+ # Also truncate the commit used for the announced urls
+ commit.commit = commit.rev
+ # Extract the meta-information for the commit
+ commit.files = do("git diff-tree -r --name-only " + shellquote(commit.commit))
+ commit.files = " ".join(commit.files.strip().split("\n")[1:])
+ # Design choice: for git we ship only the first message line, which is
+ # conventionally supposed to be a summary of the commit. Under
+ # other VCSes a different choice may be appropriate.
+ commit.author_name, commit.mail, commit.logmsg = \
+ do("git log -1 '--pretty=format:%an%n%ae%n%s' " + shellquote(commit.commit)).split("\n")
+ # This discards the part of the author's address after @.
+ # Might be be nice to ship the full email address, if not
+ # for spammers' address harvesters - getting this wrong
+ # would make the freenode #commits channel into harvester heaven.
+ commit.author = commit.mail.split("@")[0]
+ commit.author_date, commit.commit_date = \
+ do("git log -1 '--pretty=format:%ai|%ci' " + shellquote(commit.commit)).split("|")
+ return commit
+
+class SvnExtractor(GenericExtractor):
+ "Metadata extraction for the svn version control system."
+ @staticmethod
+ def is_repository(dirname):
+ return has(dirname, ["format", "hooks", "locks"])
+ def __init__(self, arguments):
+ GenericExtractor.__init__(self, arguments)
+ # Some things we need to have before metadata queries will work
+ self.repository = '.'
+ for tok in arguments:
+ if tok.startswith("--repository="):
+ self.repository = tok[13:]
+ self.project = os.path.basename(self.repository)
+ self.template = '%(bold)s%(project)s%(reset)s: %(green)s%(author)s%(reset)s %(repo)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s'
+ self.urlprefix = "viewcvs"
+ self.load_preferences(os.path.join(self.repository, "irker.conf"))
+ self.do_overrides()
+ def head(self):
+ sys.stderr.write("irker: under svn, hook requires a commit argument.\n")
+ raise SystemExit(1)
+ def commit_factory(self, commit_id):
+ self.id = commit_id
+ commit = Commit(self, commit_id)
+ commit.branch = ""
+ commit.rev = "r%s" % self.id
+ commit.author = self.svnlook("author")
+ commit.commit_date = self.svnlook("date").partition('(')[0]
+ commit.files = self.svnlook("dirs-changed").strip().replace("\n", " ")
+ commit.logmsg = self.svnlook("log").strip()
+ return commit
+ def svnlook(self, info):
+ return do("svnlook %s %s --revision %s" % (shellquote(info), shellquote(self.repository), shellquote(self.id)))
+
+class HgExtractor(GenericExtractor):
+ "Metadata extraction for the Mercurial version control system."
+ @staticmethod
+ def is_repository(directory):
+ return has(directory, [".hg"])
+ def __init__(self, arguments):
+ # This fiddling with arguments is necessary since the Mercurial hook can
+ # be run in two different ways: either directly via Python (in which
+ # case hg should be pointed to the hg_hook function below) or as a
+ # script (in which case the normal __main__ block at the end of this
+ # file is exercised). In the first case, we already get repository and
+ # ui objects from Mercurial, in the second case, we have to create them
+ # from the root path.
+ self.repository = None
+ if arguments and type(arguments[0]) == type(()):
+ # Called from hg_hook function
+ ui, self.repository = arguments[0]
+ arguments = [] # Should not be processed further by do_overrides
+ else:
+ # Called from command line: create repo/ui objects
+ from mercurial import hg, ui as uimod
+
+ repopath = '.'
+ for tok in arguments:
+ if tok.startswith('--repository='):
+ repopath = tok[13:]
+ ui = uimod.ui()
+ ui.readconfig(os.path.join(repopath, '.hg', 'hgrc'), repopath)
+ self.repository = hg.repository(ui, repopath)
+
+ GenericExtractor.__init__(self, arguments)
+ # Extract global values from the hg configuration file(s)
+ self.project = ui.config('irker', 'project')
+ self.repo = ui.config('irker', 'repo')
+ self.server = ui.config('irker', 'server')
+ self.channels = ui.config('irker', 'channels')
+ self.email = ui.config('irker', 'email')
+ self.tcp = str(ui.configbool('irker', 'tcp')) # converted to bool again in do_overrides
+ self.template = ui.config('irker', 'template') or '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s'
+ self.tinyifier = ui.config('irker', 'tinyifier') or default_tinyifier
+ self.color = ui.config('irker', 'color')
+ self.urlprefix = (ui.config('irker', 'urlprefix') or
+ ui.config('web', 'baseurl') or '')
+ if self.urlprefix:
+ # self.commit is appended to this by do_overrides
+ self.urlprefix = self.urlprefix.rstrip('/') + '/rev/'
+ self.cialike = ui.config('irker', 'cialike')
+ self.filtercmd = ui.config('irker', 'filtercmd')
+ if not self.project:
+ self.project = os.path.basename(self.repository.root.rstrip('/'))
+ self.do_overrides()
+ def head(self):
+ "Return a symbolic reference to the tip commit of the current branch."
+ return "-1"
+ def commit_factory(self, commit_id):
+ "Make a Commit object holding data for a specified commit ID."
+ from mercurial.node import short
+ from mercurial.templatefilters import person
+ node = self.repository.lookup(commit_id)
+ commit = Commit(self, short(node))
+ # Extract commit-specific values from a "context" object
+ ctx = self.repository.changectx(node)
+ commit.rev = '%d:%s' % (ctx.rev(), commit.commit)
+ commit.branch = ctx.branch()
+ commit.author = person(ctx.user())
+ commit.author_date = \
+ datetime.datetime.fromtimestamp(ctx.date()[0]).strftime('%Y-%m-%d %H:%M:%S')
+ commit.logmsg = ctx.description()
+ # Extract changed files from status against first parent
+ st = self.repository.status(ctx.p1().node(), ctx.node())
+ commit.files = ' '.join(st.modified + st.added + st.removed)
+ return commit
+
+def hg_hook(ui, repo, **kwds):
+ # To be called from a Mercurial "commit", "incoming" or "changegroup" hook.
+ # Example configuration:
+ # [hooks]
+ # incoming.irker = python:/path/to/irkerhook.py:hg_hook
+ extractor = HgExtractor([(ui, repo)])
+ start = repo[kwds['node']].rev()
+ end = len(repo)
+ if start != end:
+ # changegroup with multiple commits, so we generate a notification
+ # for each one
+ for rev in range(start, end):
+ ship(extractor, rev, False)
+ else:
+ ship(extractor, kwds['node'], False)
+
+# The files we use to identify a Subversion repo might occur as content
+# in a git or hg repo, but the special subdirectories for those are more
+# reliable indicators. So test for Subversion last.
+extractors = [GitExtractor, HgExtractor, SvnExtractor]
+
+# VCS-dependent code ends here
+
+def convert_message(message):
+ """Convert the message to bytes to send to the socket"""
+ return message.encode(locale.getlocale()[1] or 'UTF-8') + b'\n'
+
+def ship(extractor, commit, debug):
+ "Ship a notification for the specified commit."
+ metadata = extractor.commit_factory(commit)
+
+ # This is where we apply filtering
+ if extractor.filtercmd:
+ cmd = '%s %s' % (shellquote(extractor.filtercmd),
+ shellquote(json.dumps(metadata.__dict__)))
+ data = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout.read()
+ try:
+ metadata.__dict__.update(json.loads(data))
+ except ValueError:
+ sys.stderr.write("irkerhook.py: could not decode JSON: %s\n" % data)
+ raise SystemExit(1)
+
+ # Rewrite the file list if too long. The objective here is only
+ # to be easier on the eyes.
+ if extractor.cialike \
+ and extractor.cialike.lower() != "none" \
+ and len(metadata.files) > int(extractor.cialike):
+ files = metadata.files.split()
+ dirs = set([d.rpartition('/')[0] for d in files])
+ if len(dirs) == 1:
+ metadata.files = "(%s files)" % (len(files),)
+ else:
+ metadata.files = "(%s files in %s dirs)" % (len(files), len(dirs))
+ # Message reduction. The assumption here is that IRC can't handle
+ # lines more than 510 characters long. If we exceed that length, we
+ # try knocking out the file list, on the theory that for notification
+ # purposes the commit text is more important. If it's still too long
+ # there's nothing much can be done other than ship it expecting the IRC
+ # server to truncate.
+ privmsg = string_type(metadata)
+ if len(privmsg) > 510:
+ metadata.files = ""
+ privmsg = string_type(metadata)
+
+ # Anti-spamming guard. It's deliberate that we get maxchannels not from
+ # the user-filtered metadata but from the extractor data - means repo
+ # administrators can lock in that setting.
+ channels = metadata.channels.split(",")
+ if extractor.maxchannels != 0:
+ channels = channels[:extractor.maxchannels]
+
+ # Ready to ship.
+ message = json.dumps({"to": channels, "privmsg": privmsg})
+ if debug:
+ print(message)
+ elif channels:
+ try:
+ if extractor.email:
+ # We can't really figure out what our SF username is without
+ # exploring our environment. The mail pipeline doesn't care
+ # about who sent the mail, other than being from sourceforge.
+ # A better way might be to simply call mail(1)
+ sender = "irker@users.sourceforge.net"
+ msg = """From: %(sender)s
+Subject: irker json
+
+%(message)s""" % {"sender":sender, "message":message}
+ import smtplib
+ smtp = smtplib.SMTP()
+ smtp.connect()
+ smtp.sendmail(sender, extractor.email, msg)
+ smtp.quit()
+ elif extractor.tcp:
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((extractor.server or default_server, IRKER_PORT))
+ sock.sendall(convert_message(message))
+ finally:
+ sock.close()
+ else:
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ sock.sendto(convert_message(message), (extractor.server or default_server, IRKER_PORT))
+ finally:
+ sock.close()
+ except socket.error as e:
+ sys.stderr.write("%s\n" % e)
+
+if __name__ == "__main__":
+ notify = True
+ repository = os.getcwd()
+ commits = []
+ for arg in sys.argv[1:]:
+ if arg == '-n':
+ notify = False
+ elif arg == '-V':
+ print("irkerhook.py: version", version)
+ sys.exit(0)
+ elif arg.startswith("--repository="):
+ repository = arg[13:]
+ elif not arg.startswith("--"):
+ commits.append(arg)
+
+ # Figure out which extractor we should be using
+ for candidate in extractors:
+ if candidate.is_repository(repository):
+ cls = candidate
+ break
+ else:
+ sys.stderr.write("irkerhook: cannot identify a repository type.\n")
+ raise SystemExit(1)
+ extractor = cls(sys.argv[1:])
+
+ # And apply it.
+ if not commits:
+ commits = [extractor.head()]
+ for commit in commits:
+ ship(extractor, commit, not notify)
+
+# The following sets edit modes for GNU EMACS
+# Local Variables:
+# mode:python
+# End:
diff --git a/irkerhook.xml b/irkerhook.xml
new file mode 100644
index 0000000..d6f7b51
--- /dev/null
+++ b/irkerhook.xml
@@ -0,0 +1,417 @@
+<!DOCTYPE refentry PUBLIC
+ "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "docbook/docbookx.dtd">
+<refentry id='irkerhook.1'>
+<refmeta>
+<refentrytitle>irkerhook</refentrytitle>
+<manvolnum>1</manvolnum>
+<refmiscinfo class='date'>Aug 27 2012</refmiscinfo>
+<refmiscinfo class='source'>irker</refmiscinfo>
+<refmiscinfo class='product'>irker</refmiscinfo>
+<refmiscinfo class='manual'>Commands</refmiscinfo>
+</refmeta>
+<refnamediv id='name'>
+<refname>irkerhook</refname>
+<refpurpose>repository hook script issuing irker notifications</refpurpose>
+</refnamediv>
+<refsynopsisdiv id='synopsis'>
+
+<cmdsynopsis>
+ <command>irkerhook.py</command>
+ <arg>-n</arg>
+ <arg>-V</arg>
+ <group><arg rep='repeat'><replaceable>--variable=value</replaceable></arg></group>
+ <group><arg rep='repeat'><replaceable>commit-id</replaceable></arg></group>
+</cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1 id='description'><title>DESCRIPTION</title>
+
+<para><application>irkerhook.py</application> is a Python script intended
+to be called from the post-commit hook of a version-control repository. Its
+job is to collect information about the commit that fired the hook (and
+possibly preferences set by the repository owner) and ship that information
+to an instance of <application>irkerd</application> for forwarding to
+various announcement channels.</para>
+
+<para>The proper invocation and behavior of
+<application>irkerhook.py</application> varies depending on which
+VCS (version-control system) is calling it. There are four different places
+from which it may extract information:</para>
+
+<orderedlist>
+<listitem><para>Calls to VCS utilities.</para></listitem>
+<listitem><para>In VCSes like git that support user-settable configuration
+variables, variables with the prefix "irker.".</para></listitem>
+<listitem><para>In other VCSes, a configuration file, "irker.conf", in the
+repository's internals directory.</para></listitem>
+<listitem><para>Command-line arguments of the form
+--variable=value.</para></listitem>
+</orderedlist>
+
+<para>The following variables are general to all supported VCSes:</para>
+
+<variablelist>
+<varlistentry>
+<term>project</term>
+<listitem>
+<para>The name of the project. Should be a relatively short identifier;
+will usually appear at the very beginning of a notification.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>repo</term>
+<listitem>
+<para>The name of the repository top-level directory. If not
+specified, defaults to a lowercased copy of the project name.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>channels</term>
+<listitem>
+<para>An IRC channel URL, or comma-separated list of same, identifying
+channels to which notifications are to be sent. If not specified, the
+default is the freenode #commits channel.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>server</term>
+<listitem>
+<para>The host on which the notification-relaying irker daemon is expected
+to reside. Defaults to "localhost".</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>email</term>
+<listitem>
+<para>If set, use email for communication rather than TCP or UDP.
+The value is used as the target mail address.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>tcp</term>
+<listitem>
+<para>If "true", use TCP for communication; if "false", use UDP.
+Defaults to "false".</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>urlprefix</term>
+<listitem>
+<para>Changeset URL prefix for your repo. When the commit ID is appended
+to this, it should point at a CGI that will display the commit
+through cgit, gitweb or something similar. The defaults will probably
+work if you have a typical gitweb/cgit setup.</para>
+
+<para>If the value of this variable is "None", generation of the URL
+field in commit notifications will be suppressed. Other magic values
+are "cgit", "gitweb", and "viewcvs", which expand to URL templates
+that will usually work with those systems.</para>
+
+<para>The magic cookies "%(host)s" and %(repo)s" may occur in this
+URL. The former is expanded to the FQDN of the host on which
+<application>irkerhook.py</application> is running; the latter is
+expanded to the value of the "repo" variable.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>tinyifier</term>
+<listitem>
+<para>URL template pointing to a service for compressing URLs so they
+will take up less space in the notification line. If the value of this
+variable is "None", no compression will be attempted.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>color</term>
+<listitem>
+<para>If "mIRC", highlight notification fields with mIRC color codes.
+If "ANSI", highlight notification fields with ANSI color escape
+sequences. Defaults to "none" (no colors). ANSI codes are supported
+in Chatzilla, irssi, ircle, and BitchX; mIRC codes only are recognized
+in mIRC, XChat, KVirc, Konversation, or weechat.</para>
+
+<para>Note: if you turn this on and notifications stop appearing on
+your channel, you need to turn off IRC's color filter on that channel.
+To do this you will need op privileges; issue the command "/mode
+&lt;channel&gt; -c" with &lt;channel&gt; replaced by your channel name.
+You may need to first issue the command "/msg chanserv set
+&lt;channel&gt; MLOCK +nt-slk".</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>maxchannels</term>
+<listitem>
+<para>Interpreted as an integer. If not zero, limits the number of
+channels the hook will interpret from the "channels" variable.</para>
+
+<para>This variable cannot be set through VCS configuration variables
+or <filename>irker.conf</filename>; it can only be set with a command-line
+argument. Thus, on a forge site in which repository owners are not
+allowed to modify their post-commit scripts, a site administrator can set it
+to prevent shotgun spamming by malicious project owners. Setting it to
+a value less than 2, however, would probably be unwise.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>cialike</term>
+<listitem>
+<para>If not empty and not "None" (the default), this emulates the old
+CIA behavior of dropping long lists of files in favor of a summary of
+the form (N files in M directories). The value must be numeric giving
+a threshold value for the length of the file list in
+characters.</para>
+</listitem>
+</varlistentry>
+<varlistentry>
+<term>template</term>
+<listitem>
+<para>Set the template used to generate notification messages. Only
+available in VCses with config variables; presently this means git or
+hg. All basic commit and extractor fields, including color switches,
+are available as %() substitutions.</para>
+</listitem>
+</varlistentry>
+</variablelist>
+
+<para>irkerhook.py will run under both python 2 and python 3, but it does
+not support mercurial repositories under python 3 yet.</para>
+
+<refsect2 id="git"><title>git</title>
+
+<para>Under git, the normal way to invoke this hook (from within the
+update hook) passes it a refname followed by a list of commits. Because
+<command>git rev-list</command> normally lists from most recent to oldest,
+you'll want to use --reverse to make notifications be omitted in chronological
+order. In a normal update script, the invocation should look like this</para>
+
+<programlisting>
+refname=$1
+old=$2
+new=$3
+irkerhook.py --refname=${refname} $(git rev-list --reverse ${old}..${new})
+</programlisting>
+
+<para>except that you'll need an absolute path for irkerhook.py.</para>
+
+<para>For testing purposes and backward compatibility, if you invoke
+<application>irkerhook.py</application> with no arguments (as in a
+post-commit hook) it will behave as though it had been called like
+this:</para>
+
+<programlisting>
+irkerhook.py --refname=refs/heads/master HEAD
+</programlisting>
+
+<para>However, this will not give the right result when you push to
+a non-default branch of a bare repo.</para>
+
+<para>A typical way to install this hook is actually in the
+<filename>post-receive</filename> hook, because it gets all the
+necessary details and will not abort the push on failure. Use the
+following script:</para>
+
+<programlisting>
+#!/bin/sh
+
+echo "sending IRC notification"
+while read old new refname; do
+ irkerhook --refname=${refname} $(git rev-list --reverse ${old}..${new})
+done
+</programlisting>
+
+<para>Preferences may be set in the repo <filename>config</filename>
+file in an [irker] section. Here is an example of what that can look
+like:</para>
+
+<programlisting>
+[irker]
+ project = gpsd
+ color = ANSI
+ channels = irc://chat.freenode.net/gpsd,irc://chat.freenode.net/commits
+</programlisting>
+
+<para> You should not set the "repository" variable (an equivalent
+will be computed). No attempt is made to interpret an
+<filename>irker.conf</filename> file.</para>
+
+<para>The default value of the "project" variable is the basename
+of the repository directory. The default value of the "urlprefix"
+variable is "cgit".</para>
+
+<para>There is one git-specific variable, "revformat", controlling
+the format of the commit identifier in a notification. It
+may have the following values:</para>
+
+<variablelist>
+<varlistentry>
+<term>raw</term>
+<listitem><para>full hex ID of commit</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>short</term>
+<listitem><para>first 12 chars of hex ID</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>describe</term>
+<listitem><para>describe relative to last tag, falling back to short</para></listitem>
+</varlistentry>
+</variablelist>
+
+<para>The default is 'describe'.</para>
+</refsect2>
+
+<refsect2 id="svn"><title>Subversion</title>
+
+<para>Under Subversion, <application>irkerhook.py</application>
+accepts a --repository option with value (the absolute pathname of the
+Subversion repository) and a commit argument (the numeric revision level of
+the commit). The defaults are the current working directory and HEAD,
+respectively.</para>
+
+<para>Note, however, that you <emphasis>cannot</emphasis> default the
+repository argument inside a Subversion post-commit hook; this is
+because of a limitation of Subversion, which is that getting the
+current directory is not reliable inside these hooks. Instead, the
+values must be the two arguments that Subversion passes to that hook
+as arguments. Thus, a typical invocation in the post-commit script
+will look like this:</para>
+
+<programlisting>
+REPO=$1
+REV=$2
+irkerhook.py --repository=$REPO $REV
+</programlisting>
+
+<para>Other --variable=value settings may also be
+given on the command line, and will override any settings in an
+<filename>irker.conf</filename> file.</para>
+
+<para>The default for the project variable is the basename of the
+repository. The default value of the "urlprefix" variable is
+"viewcvs".</para>
+
+<para>If an <filename>irker.conf</filename> file exists in the repository
+root directory (not the checkout directory but where internals such as the
+"format" file live) the hook will interpret variable settings from it. Here
+is an example of what such a file might look like:</para>
+
+<programlisting>
+# irkerhook variable settings for the irker project
+project = irker
+channels = irc://chat.freenode/irker,irc://chat.freenode/commits
+tcp = false
+</programlisting>
+
+<para>Don't set the "repository" or "commit" variables in this file;
+that would have unhappy results.</para>
+
+<para>There are no Subversion-specific variables.</para>
+
+</refsect2>
+
+<refsect2 id="hg"><title>Mercurial</title>
+
+<para>Under Mercurial, <application>irkerhook.py</application> can be
+invoked in two ways: either as a Python hook (preferred) or as a
+script.</para>
+
+<para>To call it as a Python hook, add the collowing to the
+"commit" or "incoming" hook declaration in your Mercurial
+repository:</para>
+
+<programlisting>
+[hooks]
+ incoming.irker = python:/path/to/irkerhook.py:hg_hook
+</programlisting>
+
+<para>When called as a script, the hook accepts a --repository option
+with value (the absolute pathname of the Mercurial repository) and can
+take a commit argument (the Mercurial hash ID of the commit or a
+reference to it). The default for the repository argument is the
+current directory. The default commit argument is '-1', designating
+the current tip commit.</para>
+
+<para>As for git, in both cases all variables may be set in the repo
+<filename>hgrc</filename> file in an [irker] section. Command-line
+variable=value arguments are accepted but not required for script
+invocation. No attempt is made to interpret an
+<filename>irker.conf</filename> file.</para>
+
+<para>The default value of the "project" variable is the basename
+of the repository directory. The default value of the "urlprefix"
+variable is the value of the "web.baseurl" config value, if it
+exists.</para>
+
+</refsect2>
+
+<refsect2 id="filter"><title>Filtering</title>
+
+<para>It is possible to filter commits before sending them to
+<application>irkerd</application>.</para>
+
+<para>You have to specify the <option>filtercmd</option> option, which
+will be the command <application>irkerhook.py</application> will
+run. This command should accept one arguments, which is a JSON
+representation of commit and extractor metadata (including the
+channels variable). The command should emit to standard output a JSON
+representation of (possibly altered) metadata.</para>
+
+<para>Below is an example filter:</para>
+
+<programlisting>
+#!/usr/bin/env python
+# This is a trivial example of a metadata filter.
+# All it does is change the name of the commit's author.
+#
+import sys, json
+metadata = json.loads(sys.argv[1])
+
+metadata['author'] = "The Great and Powerful Oz"
+
+print json.dumps(metadata)
+# end
+</programlisting>
+
+<para>Standard error is available to the hook for progress and
+error messages.</para>
+
+</refsect2>
+
+</refsect1>
+
+<refsect1 id='options'><title>OPTIONS</title>
+
+<para><application>irkerhook.py</application> takes the following
+options:</para>
+
+<variablelist>
+<varlistentry>
+<term>-n</term>
+<listitem><para>Suppress transmission to a daemon. Instead, dump the
+generated JSON request to standard output. Useful for
+debugging.</para></listitem>
+</varlistentry>
+<varlistentry>
+<term>-V</term>
+<listitem><para>Write the program version to stdout and
+terminate.</para></listitem>
+</varlistentry>
+</variablelist>
+
+</refsect1>
+
+<refsect1 id='see_also'><title>SEE ALSO</title>
+<para>
+<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
+</para>
+</refsect1>
+
+<refsect1 id='authors'><title>AUTHOR</title>
+<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the
+project page at <ulink
+url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink>
+for updates and other resources.</para>
+</refsect1>
+</refentry>
+
diff --git a/org.catb.irkerd.plist b/org.catb.irkerd.plist
new file mode 100644
index 0000000..3b30f92
--- /dev/null
+++ b/org.catb.irkerd.plist
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>KeepAlive</key>
+ <true/>
+ <key>Label</key>
+ <string>org.catb.irkerd</string>
+ <key>ProgramArguments</key>
+ <array>
+ <string>/usr/bin/irkerd</string>
+ </array>
+ <key>RunAtLoad</key>
+ <true/>
+ <key>UserName</key>
+ <string>nobody</string>
+ <key>GroupName</key>
+ <string>nobody</string>
+</dict>
+</plist>
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b1726fb
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+PySocks==1.5.6
diff --git a/security.adoc b/security.adoc
new file mode 100644
index 0000000..5a652d2
--- /dev/null
+++ b/security.adoc
@@ -0,0 +1,268 @@
+= Security analysis of irker =
+
+This is an analysis of security and DoS vulnerabilities associated
+with irker, exploring and explaining certain design choices. Much of
+it derives from a code audit and report by Daniel Franke.
+
+== Assumptions and Goals ==
+
+We begin by stating some assumptions about how irker will be deployed,
+and articulating a set of security goals.
+
+Communication flow in an irker deployment will look like this:
+
+-----------------------------------------------------------------------------
+ Committers
+ |
+ |
+ Version-control repositories
+ |
+ |
+ irkerhook.py
+ |
+ |
+ irkerd
+ |
+ |
+ IRC servers
+-----------------------------------------------------------------------------
+
+Here are our assumptions:
+
+1. The repositories are hosted on a public forge sites such as
+SourceForge, GitHub, Gitorious, Savannah, or Gna and must be
+accessible to untrusted users.
+
+2. Repository project owners can set properties on their repositories
+(including but not limited to irker.*), and may be able to set custom
+post-commit hooks which can execute arbitrary code on the repository
+server. In particular, these people my be able to modify the local
+copy of irkerhook.py.
+
+3. The machine which hosts irkerd has the same owner as the machine which
+hosts the the repo; these machines are possibly but not necessarily
+one and the same.
+
+4. The network is protected by a perimeter firewall, and only a
+trusted group is able to emit arbitrary packets from inside the
+perimeter; committers are not necessarily part of this group.
+
+5. irkerd communicates with IRC servers over the open internet,
+and an IRC server's administrator is assumed to hold no position of
+trust with any other party.
+
+We can, accordingly, identify the following groups of security
+principals:
+
+A. irker administrators.
+B. Project committers.
+C. Project owners
+D. IRC server administrators.
+E. Other people on irker's internal network.
+F. irkerd-IRC men-in-the-middle (i.e. people who control the network path
+ between irkerd and the IRC server).
+G. Random people on the internet.
+
+Our security goals for irker can be enumerated as follows:
+
+* Control: We don't want anyone outside group A gaining control of
+ the machines which host irkerd or the git repos.
+
+* Availability: Only group A should be able to to deny or degrade
+ irkerd's ability to receive commit messages and relay them to the
+ IRC server. We recognize and accept as inevitable that MITMs (groups
+ E and F) can do this too (by ARP spoofing, cable-cutting, etc.).
+ But, in particular, we would like irker-mediated services to be
+ resilient against DoS (denial of service) attacks.
+
+* Authentication/integrity: Notifications should be truthful, i.e.,
+ commit messages sent to IRC channels should actually reflect that a
+ corresponding commit has taken place. We accept that groups A, C,
+ D, and E can violate this property.
+
+* Secrecy: irker shouldn't aid spammers (group G) in harvesting
+ committers' email addresses.
+
+* Auditability: If people abuse irkerd, we want to be able to identify
+ the abusive account or IP address.
+
+== Control Issues ==
+
+We have audited the irker and irkerhook.py code for exploitable
+vulnerabilities. We have not found any in the code itself, and the
+use of Python gives us confidence in the absence of large classes of errors
+(such as buffer overruns) that afflict C programs.
+
+However, the fact that irkerhook.py relies on external binaries to
+mine data out of its repository opens up a well-known set of
+vulnerabilities if a malicious user is able to insert binaries in a
+carelessly-set execution path. Normal precautions against this should
+be taken.
+
+== Availability ==
+
+=== Solved problems ===
+
+When the original implementation of irkerd saw a nick collision it
+generated new nicks in a predictable sequence. A malicious IRC user
+could have continuously changed his own nick to the next one that
+irkerd is going to try. Some randomness has been added to nick
+generation to prevent this.
+
+=== Unsolved problems ===
+
+DoS attacks on any networked application can never completely
+prevented, only mitigated by forcing attackers to invest more
+resources. Here we consider the easiest attack paths against irker,
+and possible countermeasures.
+
+irker handles each connection to a particular IRC server in a separate
+thread - actually, due to server limits on open channels per
+connection, there may be multiple sessions per server. This may not
+scale well, especially on 32-bit architectures.
+
+Thread instance overhead, combined with the lack of any restriction on
+how many URLs can appear in the 'to' list, is a DoS vulnerability. If
+a repository's properties specify that notifications should go to more
+than about 500 unique hostnames, then on 32-bit architectures we'll
+hit the 4GB cap on virtual memory (even while the resident set size
+remains small).
+
+Another ceiling to watch out for is the ulimit on file descriptors,
+which defaults to 1024 on many Linux systems but can safely be set
+much larger. Each connection instance costs a file descriptor.
+
+We consider some possible ways of addressing the problem:
+
+1. Limit the number of URLs in a request. Pretty painless - it will
+be very rare that anyone wants to specify a larger set than a project
+channel plus freenode #commits - but also ineffective. A malicious
+hook could achieve DoS simply by spamming lots of requests.
+
+2. Limit the total number of requests than can be queued. Completely
+ineffective - just sets a target for the DoS attack.
+
+3. Limit the number of requests that can be queued by source IP address.
+This might be worth doing; it would stymie a single-source DoS attack through
+a publicly-exposed irkerd, though not a DDoS by a botnet. But there isn't
+a lot of win here for a properly installed irker (e.g. behind a firewall),
+which is typically going to get all its requests from a single repo host
+anyway.
+
+4. Rate-limit requests by source IP address - that is, after any request
+discard additional ones during some timeout period. Again, good for
+stopping a single-source DoS against an exposed irker, won't stop a
+DDoS. The real problem though, is that any such rate limit might interfere
+with legitimate high-volume use by a very active repo site.
+
+After this we appear to have run out of easy options, as source IP address
+is the only thing irkerd can see that an attacker can't spoof.
+
+We mitigate some availability risks by reaping old sessions when we're
+near resource limits. An ordinary DoS attack would then be prevented
+from completely blocking all message traffic; the cost would be a
+whole lot of join/leave spam due to connection churn.
+
+== Authentication/Integrity ==
+
+One way to help prevent DoS attacks would be in-band authentication -
+requiring irkerd submitters to present a credential along with each
+message submission. In principle this, if it existed, could also be used
+to verify that a submitter is authorized to issue notifications with
+respect to a given project.
+
+We rejected this approach. The design goal for irker was to make
+submissions fast, cheap, and stateless; baking an authentication
+system directly into the irkerd codebase would have conflicted with
+these objectives, not to mention probably becoming the camel's nose
+for a godawful amount of code bloat.
+
+The deployment advice in the installation instructions assumes that
+irkerd submitters are "authenticated" by being inside a firewall - that is,
+mesages are issued from an intranet and it can be trusted that anyone
+issuing messages from within a given intranet is authorized to do so.
+This fits the assumption that irker instances will run on forge sites
+receiving requests from instances of irkerhook.py.
+
+One larger issue (not unique to irker) is that because of the
+insecured nature of IRC it is essentially impossible to secure
+#commits against commit notifications that are either garbled by
+software errors and misconfigurations or maliciously crafted to
+confuse anyone attempting to gather statistics from that channel. The
+lesson here is that IRC monitoring isn't a good method for that
+purpose; going direct to the repositories via a toolkit such as Ohloh
+is a far better idea.
+
+When this analysis was originally written, we recommended using spiped
+or stunnel to solve the problem of passing notifications from irkerd
+to IRC servers over a potentially hostile network that might interfere
+with them. Later, SSL/TLS support proved easy to add and is now in
+irkerd itself.
+
+== Secrecy ==
+
+irkerd has no inherent secrecy risks.
+
+The distributed version of irkerhook.py removes the host part of
+author addresses specifically in order to prevent address harvesting
+from the notifications.
+
+== Auditability ==
+
+We previously noted that source IP address is the only thing irker can
+see that an attacker can't spoof. This makes auditability difficult
+unless we impose conventions on the notifications passing though it.
+
+The irkerhook.py that we ship inherits an auditability property from
+the CIA service it was designed to replace: the first field of every
+notification (terminated by a colon) is the name of the issuing
+project. The only other competitor to replace CIA known to us
+(kgb_bot) shares this property.
+
+In the general case we cannot guarantee this property against
+groups A and F.
+
+== Risks relative to centralized services ==
+
+irker and irkerhook.py were written as a replacement for the
+now-defunct CIA notification service. The author has written
+a critique of that service: "CIA and the perils of overengineering"
+at <http://esr.ibiblio.org/?p=4540>. It is thus worth considering how
+a risk assessment of CIA compares to this one.
+
+The principal advantages of CIA from a security point of view were (a)
+it provided a single point at which spam filtering and source blocking
+could be done with benefit to all projects using the service, and (b)
+since it had to have a database anyway for routing messages to project
+channels, the incremental overhead for an authentication feature would
+have been relatively low.
+
+As a matter of fact rather than theory CIA never fully exploited
+either possibility. Anyone could create a CIA project entry with
+fanout to any desired set of IRC channels. Notifications were not
+authenticated, so anyone could masquerade as a member of any project.
+The only check on abuse was human intervention to source-block
+spammers, and this was by no means completely effective - spam shipped
+via CIA was occasionally seen on on the freenode #commits channel.
+
+The principal security disadvantage of CIA was that it meant the
+entire notification system was subject to single-point failure due
+to software or hosting failures on cia.vc, or to DoS attacks
+against the server. While there is no evidence that the site
+was ever deliberately DoSed, failures were sufficiently common
+that a half-hearted DoS attack might not have been even noticed.
+
+Despite the absence of authentication, irker instances on
+properly firewalled intranets do not obviously pose additional
+spamming risks beyond those incurred by the CIA service. The
+overall robustness of the notification system as a whole should
+be greatly improved.
+
+== Conclusions ==
+
+The security and DoS issues irker has are not readily addressable by
+changing the irker codebase itself, short of a complete (much more
+complex and heavyweight) redesign. They are largely implicit risks of
+its operating environment and must be managed by properly controlling
+access to irker instances.
+