diff options
-rw-r--r-- | .gitignore | 12 | ||||
-rw-r--r-- | COPYING | 27 | ||||
-rw-r--r-- | Makefile | 118 | ||||
-rw-r--r-- | NEWS | 178 | ||||
-rw-r--r-- | README | 24 | ||||
-rw-r--r-- | control | 27 | ||||
-rwxr-xr-x | filter-example.py | 13 | ||||
-rwxr-xr-x | filter-test.py | 35 | ||||
-rw-r--r-- | hacking.txt | 78 | ||||
-rw-r--r-- | install.txt | 104 | ||||
-rwxr-xr-x | irk | 52 | ||||
-rw-r--r-- | irk.xml | 84 | ||||
-rwxr-xr-x | irkerd | 1063 | ||||
-rw-r--r-- | irkerd.service | 12 | ||||
-rw-r--r-- | irkerd.xml | 249 | ||||
-rwxr-xr-x | irkerhook.py | 547 | ||||
-rw-r--r-- | irkerhook.xml | 414 | ||||
-rw-r--r-- | org.catb.irkerd.plist | 20 | ||||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | security.txt | 268 |
20 files changed, 3326 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a001f85 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Git clutter +*.orig + +# Python bits +/*.pyc + +# Man Pages +/*.8 +/*.1 + +# HTML Docs +/*.html @@ -0,0 +1,27 @@ + BSD LICENSE + +Copyright (c) 2015, Eric S. Raymond +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ce8a1de --- /dev/null +++ b/Makefile @@ -0,0 +1,118 @@ +# Makefile for the irker relaying daemon + +VERS := $(shell sed -n 's/version = "\(.\+\)"/\1/p' irkerd) +SYSTEMDSYSTEMUNITDIR := $(shell pkg-config --variable=systemdsystemunitdir systemd) + +# `prefix`, `mandir` & `DESTDIR` can and should be set on the command +# line to control installation locations +prefix ?= /usr +mandir ?= /share/man +target = $(DESTDIR)$(prefix) + +docs: irkerd.html irkerd.8 irkerhook.html irkerhook.1 irk.html irk.1 + +irkerd.8: irkerd.xml + xmlto man irkerd.xml +irkerd.html: irkerd.xml + xmlto html-nochunks irkerd.xml + +irkerhook.1: irkerhook.xml + xmlto man irkerhook.xml +irkerhook.html: irkerhook.xml + xmlto html-nochunks irkerhook.xml + +irk.1: irk.xml + xmlto man irk.xml +irk.html: irk.xml + xmlto html-nochunks irkerhook.xml + +install.html: install.txt + asciidoc -o install.html install.txt +security.html: security.txt + asciidoc -o security.html security.txt +hacking.html: hacking.txt + asciidoc -o hacking.html hacking.txt + +install: irk.1 irkerd.8 irkerhook.1 uninstall + install -m 755 -o 0 -g 0 -d "$(target)/bin" + install -m 755 -o 0 -g 0 irkerd "$(target)/bin/irkerd" +ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),) + install -m 755 -o 0 -g 0 -d "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)" + install -m 644 -o 0 -g 0 irkerd.service "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)" +endif + install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man8" + install -m 755 -o 0 -g 0 irkerd.8 "$(target)$(mandir)/man8/irkerd.8" + install -m 755 -o 0 -g 0 -d "$(target)$(mandir)/man1" + install -m 755 -o 0 -g 0 irkerhook.1 "$(target)$(mandir)/man1/irkerhook.1" + install -m 755 -o 0 -g 0 irk.1 "$(target)$(mandir)/man1/irk.1" + +uninstall: + rm -f "$(target)/bin/irkerd" +ifneq ($(strip $(SYSTEMDSYSTEMUNITDIR)),) + rm -f "$(DESTDIR)$(SYSTEMDSYSTEMUNITDIR)/irkerd.service" +endif + rm -f "$(target)$(mandir)/man8/irkerd.8" + rm -f "$(target)$(mandir)/man1/irkerhook.1" + rm -f "$(target)$(mandir)/man1/irk.1" + +clean: + rm -f irkerd.8 irkerhook.1 irk.1 irker-*.tar.gz *~ *.html + +PYLINTOPTS = --rcfile=/dev/null --reports=n \ + --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \ + --dummy-variables-rgx='^_' +SUPPRESSIONS = "C0103,C0111,C0301,C0302,C0330,C1001,R0201,R0902,R0903,R0912,R0913,R0914,R0915,E1101,W0142,W0201,W0212,W0621,W0702,W0703,W1201,F0401,E0611" +pylint: + @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerd + @pylint $(PYLINTOPTS) --disable=$(SUPPRESSIONS) irkerhook.py + +loc: + @echo "LOC:"; wc -l irkerd irkerhook.py + @echo -n "LLOC: "; grep -vE '(^ *#|^ *$$)' irkerd irkerhook.py | wc -l + +DOCS = \ + README \ + COPYING \ + NEWS \ + install.txt \ + security.txt \ + hacking.txt \ + irkerhook.xml \ + irkerd.xml \ + irk.xml \ + +SOURCES = \ + $(DOCS) \ + irkerd \ + irkerhook.py \ + filter-example.py \ + filter-test.py \ + irk \ + Makefile + +EXTRA_DIST = \ + org.catb.irkerd.plist \ + irkerd.service \ + irker-logo.png + +version: + @echo $(VERS) + +irker-$(VERS).tar.gz: $(SOURCES) irkerd.8 irkerhook.1 irk.1 + mkdir irker-$(VERS) + cp -pR $(SOURCES) $(EXTRA_DIST) irker-$(VERS)/ + @COPYFILE_DISABLE=1 tar -cvzf irker-$(VERS).tar.gz irker-$(VERS) + rm -fr irker-$(VERS) + +irker-$(VERS).md5: + @md5sum irker-$(VERS).tar.gz >irker-$(VERS).md5 + +dist: irker-$(VERS).tar.gz irker-$(VERS).md5 + +WEBDOCS = irkerd.html irk.html irkerhook.html install.html security.html hacking.html + +release: irker-$(VERS).tar.gz irker-$(VERS).md5 $(WEBDOCS) + shipper version=$(VERS) | sh -e -x + +refresh: $(WEBDOCS) + shipper -N -w version=$(VERS) | sh -e -x @@ -0,0 +1,178 @@ + irker history + +2.18: 2016-06-02 + Add the ability to set the notification-message template (Debian bug #824512) + +2.17: 2016-03-14 + Add a reconnect delay (Debian bug #749650). + Add proxy support (requres setting some variables in the source file). + Use git abbreviated hash to address Debian complaints. + +2.16: 2016-02-18 + Code now runs under either Python 2 or Python 3 + +2.15: 2016-01-12 + Emergency backout of getaddrinfo, it randomly hangs. + +2.14: 2016-01-12 + Lookup with getaddrinfo allows use with IPv6. + Documentation improvements. + +2.13: 2015-06-14 + SSL validation fix. + Hardening against Unicode decode errors. + irk becomes a library so it can be re-used. + +2.12: 2014-10-22 + Catch erroneous UTF-8 or non-UTF-8 from servers. + Also autodetect the right logging device under FreeBSD: /var/run/syslog + +2.11: 2014-06-20 + With -i, message string argument now optional, stdin is read if it is absent. + Auto-adapt to BSD & OS X log device as well as Linux's. + +2.10: 2014-06-19 + irk no longer fails on ircs channel URLs. + +2.9: 2014-06-01 + If irkerd is running in background, log to /dev/syslog (facility daemon). + New -H option to set host listening address. + Add support for using CertFP to auth to the IRC server, and document it. + +2.8: 2014-05-30 + Various minor improvements to irk. + Cope better with branch names containing slashes. + +2.7: 2014-03-15 + Add support for ircs:// and SSL/TLS connections to IRC servers. + Add support for per-URL usernames and passwords. + +2.6: 2014-02-04 + Fix for an infinite loop on failing to connect to IRC + +2.5: 2013-12-24 + Bug fix - remove a deadlock we inherited from irclib. + +2.4: 2013-12-03 + Bug fix release - some users reported failure to connect with 2.3. + Also prevent a crash if Unicode shows up in the wrong place. + +2.3: 2013-11-30 + -i option enables immediate sending of one line in foreground. + +2.2: 2013-11-29 + Fixed Unicode processing - got busted in 2.0 when irclib was removed. + Show Python traceback on higher debug levels. + +2.1: 2013-11-26 + A performance improvement in the git repository hook. + Documentation polishing. + +2.0: 2013-11-16 + The dependency on irclib is gone. + An email delivery method, suitable for use on SourceForge. + irkerhook can now be used as a hg changegroup hook. + Prevent misbehavior on UTF-8 in commit metadata. + Fix a crash bug on invalid hostnames. + +1.20: 2013-05-17 + Compatibility back to Python 2.4 (provided simplejson is present). + Increased anti-flood delay to avoid trouble with freenode. + +1.19: 2013-05-06 + Fixed a minor bug in argument processing + +1.18: 2013-04-16 + Added -l option; irker can now be used as a channel monitor. + Added -n and -p option: the nick can be forced and authenticated. + +1.17: 2013-02-03 + Various minor fixes and bulletproofing. + +1.16: 2013-01-24 + Deal gracefully with non-ASCII author names and '|' in the command line. + +1.15: 2012-12-08 + Don't append an extra newline in the Subversion hook. + +1.14: 2012-11-26 + irclib 5.0 and urlparse compatibility fixes. + +1.13: 2012-11-06 + Fix for a very rare thread race found by AI0867. + Work around a midesign in the IRC library. + +1.12: 2012-10-11 + Emergency workaround for a Unicode-handling error buried deep in irclib. + The IRC library at version 3.2 or later is required for this version! + Only ship to freenode #commits by default. + +1.11: 2012-10-10 + Code is now fully Unicode-safe. + A 'cialike' option emulates the file-summary behavior on the old CIA service. + +1.10: 2012-10-09 + Expire disconnected connections if they aren't needed or can't reconnect. + Eventlet support removed - didn't play well with the library mutex. + +1.9: 2012-10-08 + Proper mutex locks prevent an occasional thread crash on session timeout. + There's now systemd installation support for irkerd. + +1.8: 2012-10-06 + It's now possible to send to nick URLs. + Cope gracefully if an IRC server dies or hangs during the nick handshake. + +1.7: 2012-10-05 + Optional metadata filtering with a user-specified command. + irkerd code is now armored against IRC library errors in the delivery threads. + +1.6: 2012-10-04 + In 1.5 trying to appease pylint broke the Mercurial hook. + Added credits for contributors in hacking.txt. + Fix the aging out of connections when we hit a resource limit. + +1.5: 2012-10-03 + Mercurial support. + Shorten nick negotiation by choosing a random nick base from a large range. + Make irkerd exit cleanly on control-C. + +1.4: 2012-10-02 + Graceful handling of server disconnects and kicks. + Distribution now inclues an installable irkerd plist for Mac OS/X. + The color variable is no longer boolean; may be miRC or ANSI. + The installation instructions for irkerhook.py have changed! + +1.3: 2012-10-01 + Support for an irker.conf file to set irkerhook variables under Subversion. + Color highlighting of notification fields can be enabled. + irkerhook.py now has its own manual page. + Added channelmax variable for rate-limiting. + irkerd now uses green threads, with much lower overhead. + Fix a bug in handling of channel names with no prefix. + +1.2: 2012-09-30 + All segments of a message with embedded newlines are now transmitted. + Message reduction - irkerhook drops the filelist on excessively long ones. + Shell quote hardening in irkerhook.py and some anti-DoS logic. + +1.1: 2012-09-28 + Add a delay to avoid threads spinning on the empty-queue-check, eating CPU. + Fix a bug in reporting of multi-file commits. + +1.0: 2012-09-27 + First production version, somewhat rushed by the sudden death of cia.vc + on 24 September. + + + + + + + + + + + + + @@ -0,0 +1,24 @@ + irker - submission tools for IRC notifications + +irkerd is a specialized IRC client that runs as a daemon, allowing +other programs to ship IRC notifications by sending JSON objects to a +listening socket. + +It is meant to be used by hook scripts in version-control +repositories, allowing them to send commit notifications to project +IRC channels. A hook script, irkerhook.py, supporting git, hg, and +Subversion is included in the distribution; see the install.txt file +for installation instructions. + +The advantage of using this daemon over individual scripted sends +is that it can maintain connection state for multiple channels, +avoiding obnoxious join/leave spam. + +The file install.txt describes how to install the software safely, so +it can't be used as a spam conduit. + +Please read the files security.txt and hacking.txt before modifying +this code. + + Eric S. Raymond + September 2012 @@ -0,0 +1,27 @@ +# This is not a real Debian control file, though the syntax is compatible. +# It's project metadata for the shipper tool + +Package: irker + +Description: An IRC client that runs as a daemon accepting notification requests. + You preesnt them JSON objects presented to a listening socket. It is + meant to be used by hook scripts in version-control repositories, + allowing them to send commit notifications to project IRC channels. + A hook script that works with git, hg, and svn is included in the + distribution. + +#XBS-Destinations: freshcode + +Homepage: http://www.catb.org/~esr/irker + +XBS-HTML-Target: index.html + +XBS-Repository-URL: https://gitlab.com/esr/irker + +XBS-OpenHub-URL: http://www.openhub.net/p/irker + +XBS-IRC-Channel: irc://chat.freenode.net/#irker + +XBS-Logo: irker-logo.png + +XBS-VC-Tag-Template: %(version)s diff --git a/filter-example.py b/filter-example.py new file mode 100755 index 0000000..12908b4 --- /dev/null +++ b/filter-example.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python +# This is a trivial example of a metadata filter. +# All it does is change the name of the commit's author. +# It could do other things, including modifying the +# channels list +# +import sys, json +metadata = json.loads(sys.argv[1]) + +metadata['author'] = "The Great and Powerful Oz" + +print json.dumps(metadata) +# end diff --git a/filter-test.py b/filter-test.py new file mode 100755 index 0000000..030d3d4 --- /dev/null +++ b/filter-test.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# +# Test hook to launch an irker instance (if it doesn't already exist) +# just before shipping the notification. We start it in in another terminal +# so you can watch the debug messages. Intended to be used in the root +# directory of the irker repo. Probably only of interest only to irker +# developers +# +# To use this, set up irkerhook.py to fire on each commit. Creating a +# .git/hooks/post-commit file containing the line "irkerhook.py"; be +# sure to make the opos-commit file executable. Then set the +# filtercmd variable in your repo config as follows: +# +# [irker] +# filtercmd = filter-test.py + +import os, sys, json, subprocess, time +metadata = json.loads(sys.argv[1]) + +ps = subprocess.Popen("ps -U %s uh" % os.getenv("LOGNAME"), + shell=True, + stdout=subprocess.PIPE) +data = ps.stdout.read() +irkerd_count = len([x for x in data.split("\n") if x.find("irkerd") != -1]) + +if irkerd_count: + sys.stderr.write("Using a running irker instance...\n") +else: + sys.stderr.write("Launching a new irker instance...\n") + os.system("gnome-terminal --title 'irkerd' -e 'irkerd -d 2' &") + +time.sleep(1.5) # Avoid a race condition + +print json.dumps(metadata) +# end diff --git a/hacking.txt b/hacking.txt new file mode 100644 index 0000000..8e121d4 --- /dev/null +++ b/hacking.txt @@ -0,0 +1,78 @@ += Hacker's Guide to irker = + +== Design philosopy == + +Points to you if some of this seems familiar from GPSD... + +=== Keep mechanism and policy separate === + +Mechanism goes in irkerd. Policy goes in irkerhook.py + +irkerd is intended to be super-simple and completely indifferent to +what content passes through it. It doesn't know, in any sense, that +the use-case it was designed for is broadcasting notifications from +version control systems. irkerhook.py is the part that knows about how +to mine data from repositories and sets the format of notifications. + +=== If you think the mechanism needs an option, think again === + +Because irkerhook.py does policy, it takes policy options. Because +irkerd is pure mechanism, it shouldn't need any. If you think it +does, you have almost certainly got a bug in your thinking. Fix +that before you modify code. + +=== Never configure what you can autoconfigure === + +Human attention is more expensive than machine time. Humans are +careless and failure-prone. Therefore, whenever you make a user tell +your code something the code can deduce for itself, you are +introducing unnecessary inefficiency and unnecessary failure modes. + +This, in particular, is why irkerhook.py doesn't have a repository +type switch. It can deduce the repo type by looking, so it should. + +== Release procedure == + +1. Check for merge requests at the repository. + +2. Do 'make pylint' to audit the code. + +3. Run irk with a sample message; look at #irker on freenode to verify. + +4. Bump the version numbers in irkerd and irkerhook.py + +5. Update the NEWS file + +6. git commit -a + +7. make release + +== Thanks where due == + +Alexander van Gessel (AI0867) <ai0867@gmail.com> contributed the +Subversion support in irkerhook.py. Since the 1.0 release he has +kept as close an eye on the code as the author and has fixed at least +as many bugs. + +//W. here causes asciidoc to see thus as a list entry. +W Trevor King <wking@tremily.us> added SSL/TLS support and did +significant refactoring work. + +Daniel Franke <dfoxfranke@gmail.com> performed a security audit of irkerd. + +Georg Brandl <georg@python.org> contributed the Mercurial support in +irkerhook.py and explained how to make Control-C work right. + +Laurent Bachelier <laurent@bachelier.name> fixed the Makefile so it +wouldn't break stuff and wrote the first version of the external +filtering option. + +dak180 (name withheld by request) wrote the OS X launchd plist. + +Wulf C. Krueger <philantrop@exherbo.org> wrote the systemd +installation support. + +Other people on the freenode #irker channel (Kingpin, fpcfan, +shadowm, Rick) smoked out bugs in irkerd before they could seriously +bug anybody. + diff --git a/install.txt b/install.txt new file mode 100644 index 0000000..c1a8f5a --- /dev/null +++ b/install.txt @@ -0,0 +1,104 @@ += Forge installation instructions = + +irker and irkerhook.py are intended to be installed on forge sites +such as SourceForge, GitHub, GitLab, Gna, and Savannah. This +file explains the theory of operation, how to install the code, +and how to test it. + +== Theory of operation == + +irkerhook.py creates JSON notification requests and ships them to +irkerd's listener socket. irkerd run as a daemon in order to maintain +all the client state required to post multiple notifications while generating +a minimum of join/leave messages (which, from the point of view of +humans watching irkerd's output, are mere spam). + +See the security.txt document for a detailed discussion of security +and DoS vulnerabilities related to irker. The short version: as +long as your firewall blocks port 6659 and irkerd is running inside +it, you should be fine. + +== Prerequisites == + +You will need either + +1. Python at version 2.6 or later, which has JSON built in + +2. Python at version no older than 2.4, and a version of the + simplejson library installed that it can use. Some newer + versions of simplejson discard 2.4 compatibility; 2.0.9 + is known to work. + +== Installing irkerd == + +irker needs to run constantly, watching for TCP and UDP traffic on +port 6659. Install it accordingly. It has no config file; you can +just start it up with no arguments. If you want to see what it's +doing, give it command-line options -d info for sparse messages and +-d debug to show all traffic with IRC servers. + +You should *not* make irker visible from outside the site firewall, as +it can be used to spam IRC channels while masking the source address. +The firewall should block port 6659. + +The design of irker assumes the machine on which it is running is also +inside the firewall, so that repository hooks can reach port 6659. + +The file org.catb.irkerd.plist is a Mac OS/X plist that can be +installed to launch irkerd as a boot-time service on that system. + +== Installing irkerhook.py == + +Under git, a call to irkerhook.py should be installed in the update +hook script of your repo. Under Subversion, the call goes in your +repo's post-commit script. Under Mercurial there are two different +ways to install it. See the irkerhook manual page for details; the +source is irkerhook.xml in this distribution. + +Note that if you were using the CIA service and have ciabot.py in your +git update script, you can simply replace this + +/path/to/ciabot.py ${refname} $(git rev-list ${oldhead}..${newhead} | tac) + +with this: + +/path/to/irkerhook.py --refname=${refname} $(git rev-list ${oldhead}..${newhead} | tac) + +SourceForge is a special case: see + +https://github.com/AI0867/sf-git-irker-pipeline + +for tools and instructions on how to work around its limitations. + +== Testing == + +To verify that your repo produces well-formed JSON notifications, +you can run irkerhook.py in the repo directory using the -n switch, +which emits JSON to standard output rather than attempting to ship +to an irkerd instance. + +Then, start irkerd and call irkerhook.py while watching the freenode +#commits channel. + +The 'irk' script is a little test tool that takes two arguments, +a channel and a message, and does what you'd expect. + +If you need help, there's a project chat channel at + + irc://chat.freenode.net/#irker + +== Read-only access == + +If, for whatever reason, you can't modify the hook scripts in your +repository, there is still hope. + +There's a proxy that takes CIA XML-RPC notifications +and passes them to a local irker instance. Find it here: + + https://github.com/nenolod/irker-cia-proxy + +There's also a poller daemon that can watch activity in a Subversion +repository and ship notifications via an irker instance. + + https://github.com/shikadilord/irker-svnpoller + @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# Illustrates how to test irkerd. +# +# First argument must be a channel URL. If it does not begin with "irc", +# the base URL for freenode is prepended. +# +# Second argument must be a payload string. Standard C-style escapes +# such as \n and \t are decoded. +# +# SPDX-License-Identifier: BSD-2-Clause +import json +import socket +import sys +import fileinput + +DEFAULT_SERVER = ("localhost", 6659) + +def connect(server = DEFAULT_SERVER): + return socket.create_connection(server) + +def send(s, target, message): + data = {"to": target, "privmsg" : message} + dump = json.dumps(data) + if not isinstance(dump, bytes): + dump = dump.encode('ascii') + s.sendall(dump) + +def irk(target, message, server = DEFAULT_SERVER): + s = connect(server) + if "irc:" not in target and "ircs:" not in target: + target = "irc://chat.freenode.net/{0}".format(target) + if message == '-': + for line in fileinput.input('-'): + send(s, target, line.rstrip('\n')) + else: + send(s, target, message) + s.close() + +def main(): + target = sys.argv[1] + message = " ".join(sys.argv[2:]) + # XXX: why is this necessary? + #message = message.decode('string_escape') + + try: + irk(target, message) + except socket.error as e: + sys.stderr.write("irk: write to server failed: %r\n" % e) + sys.exit(1) + +if __name__ == '__main__': + main() @@ -0,0 +1,84 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irk.8'> +<refmeta> +<refentrytitle>irk</refentrytitle> +<manvolnum>1</manvolnum> +<refmiscinfo class='date'>Apr 30 2014</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irk</refname> +<refpurpose>test program for irkerd</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irk</command> + <arg><replaceable>target</replaceable></arg> + <arg choice='opt'><replaceable>message text</replaceable></arg> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irk</application> is a simple test program for +<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>. It +will construct a simple JSON object and pass it to the daemon running +on localhost.</para> +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irk</application> takes the following options:</para> + +<variablelist> +<varlistentry> +<term>target</term> +<listitem><para>Which server and channel to join to announced the +message. If not prefixed with "irc:", it will prefix +"irc://chat.freenode.net/" to the argument before passing it directly +to irkerd. This argument is passed as the "to" parameter in the JSON +object.</para></listitem> +</varlistentry> +<varlistentry> +<term>message</term> +<listitem><para>Which message to send to the target specified +above. If the string "-", the message will be read from standard +input, with newlines stripped.</para></listitem> +</varlistentry> +</variablelist> + +</refsect1> + +<refsect1 id='limitations'><title>LIMITATIONS</title> + +<para><application>irk</application> has no commandline usage and may +be riddled with bugs.</para> + +<para><application>irk</application> doesn't know how to talk to your +favorite VCS. You will generally want to use +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry> +instead</para> + +<para><application>irk</application> has also all the limitations of +<application>irkerd</application>.</para> +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources, including an installable repository +hook script.</para> +</refsect1> +</refentry> @@ -0,0 +1,1063 @@ +#!/usr/bin/env python +""" +irkerd - a simple IRC multiplexer daemon + +Listens for JSON objects of the form {'to':<irc-url>, 'privmsg':<text>} +and relays messages to IRC channels. Each request must be followed by +a newline. + +The <text> must be a string. The value of the 'to' attribute can be a +string containing an IRC URL (e.g. 'irc://chat.freenet.net/botwar') or +a list of such strings; in the latter case the message is broadcast to +all listed channels. Note that the channel portion of the URL need +*not* have a leading '#' unless the channel name itself does. + +Design and code by Eric S. Raymond <esr@thyrsus.com>. See the project +resource page at <http://www.catb.org/~esr/irker/>. + +Requires Python 2.7, or: +* 2.6 with the argparse package installed. +* Any 3.x + +""" +# SPDX-License-Identifier: BSD-2-Clause + +# These things might need tuning + +HOST = "localhost" +PORT = 6659 + +PROXY_TYPE = None # Use proxy if set 1: SOCKS4, 2: SOCKS5, 3: HTTP +PROXY_HOST = "" +PROXY_PORT = 1080 + +XMIT_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit +PING_TTL = (15 * 60) # Time to live, seconds from last PING +HANDSHAKE_TTL = 60 # Time to live, seconds from nick transmit +CHANNEL_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit +DISCONNECT_TTL = (24 * 60 * 60) # Time to live, seconds from last connect +UNSEEN_TTL = 60 # Time to live, seconds since first request +CHANNEL_MAX = 18 # Max channels open per socket (default) +ANTI_FLOOD_DELAY = 1.0 # Anti-flood delay after transmissions, seconds +ANTI_BUZZ_DELAY = 0.09 # Anti-buzz delay after queue-empty check +CONNECTION_MAX = 200 # To avoid hitting a thread limit +RECONNECT_DELAY = 3 # Don't spam servers with connection attempts + +# No user-serviceable parts below this line + +version = "2.18" + +import argparse +import logging +import logging.handlers +import json +import os +import os.path +try: # Python 3 + import queue +except ImportError: # Python 2 + import Queue as queue +import random +import re +import select +import signal +import socket +try: + import socks + socks_on = True +except ImportError: + socks_on = False +try: # Python 3 + import socketserver +except ImportError: # Python 2 + import SocketServer as socketserver +import ssl +import sys +import threading +import time +import traceback +try: # Python 3 + import urllib.parse as urllib_parse +except ImportError: # Python 2 + import urlparse as urllib_parse + + +LOG = logging.getLogger(__name__) +LOG.setLevel(logging.ERROR) +LOG_LEVELS = ['critical', 'error', 'warning', 'info', 'debug'] + +try: # Python 2 + UNICODE_TYPE = unicode +except NameError: # Python 3 + UNICODE_TYPE = str + + +# Sketch of implementation: +# +# One Irker object manages multiple IRC sessions. It holds a map of +# Dispatcher objects, one per (server, port) combination, which are +# responsible for routing messages to one of any number of Connection +# objects that do the actual socket conversations. The reason for the +# Dispatcher layer is that IRC daemons limit the number of channels a +# client (that is, from the daemon's point of view, a socket) can be +# joined to, so each session to a server needs a flock of Connection +# instances each with its own socket. +# +# Connections are timed out and removed when either they haven't seen a +# PING for a while (indicating that the server may be stalled or down) +# or there has been no message traffic to them for a while, or +# even if the queue is nonempty but efforts to connect have failed for +# a long time. +# +# There are multiple threads. One accepts incoming traffic from all +# servers. Each Connection also has a consumer thread and a +# thread-safe message queue. The program main appends messages to +# queues as JSON requests are received; the consumer threads try to +# ship them to servers. When a socket write stalls, it only blocks an +# individual consumer thread; if it stalls long enough, the session +# will be timed out. This solves the biggest problem with a +# single-threaded implementation, which is that you can't count on a +# single stalled write not hanging all other traffic - you're at the +# mercy of the length of the buffers in the TCP/IP layer. +# +# Message delivery is thus not reliable in the face of network stalls, +# but this was considered acceptable because IRC (notoriously) has the +# same problem - there is little point in reliable delivery to a relay +# that is down or unreliable. +# +# This code uses only NICK, JOIN, PART, MODE, PRIVMSG, USER, and QUIT. +# It is strictly compliant to RFC1459, except for the interpretation and +# use of the DEAF and CHANLIMIT and (obsolete) MAXCHANNELS features. +# +# CHANLIMIT is as described in the Internet RFC draft +# draft-brocklesby-irc-isupport-03 at <http://www.mirc.com/isupport.html>. +# The ",isnick" feature is as described in +# <http://ftp.ics.uci.edu/pub/ietf/uri/draft-mirashi-url-irc-01.txt>. + +# Historical note: the IRCClient and IRCServerConnection classes +# (~270LOC) replace the overweight, overcomplicated 3KLOC mass of +# irclib code that irker formerly used as a service library. They +# still look similar to parts of irclib because I contributed to that +# code before giving up on it. + +class IRCError(BaseException): + "An IRC exception" + pass + +class InvalidRequest(ValueError): + "An invalid JSON request" + pass + +class IRCClient(): + "An IRC client session to one or more servers." + def __init__(self): + self.mutex = threading.RLock() + self.server_connections = [] + self.event_handlers = {} + self.add_event_handler("ping", + lambda c, e: c.ship("PONG %s" % e.target)) + + def newserver(self): + "Initialize a new server-connection object." + conn = IRCServerConnection(self) + with self.mutex: + self.server_connections.append(conn) + return conn + + def spin(self, timeout=0.2): + "Spin processing data from connections forever." + # Outer loop should specifically *not* be mutex-locked. + # Otherwise no other thread would ever be able to change + # the shared state of an IRC object running this function. + while True: + nextsleep = 0 + with self.mutex: + connected = [x for x in self.server_connections + if x is not None and x.socket is not None] + sockets = [x.socket for x in connected] + if sockets: + connmap = dict([(c.socket.fileno(), c) for c in connected]) + (insocks, _o, _e) = select.select(sockets, [], [], timeout) + for s in insocks: + try: + connmap[s.fileno()].consume() + except UnicodeDecodeError as e: + LOG.warn('{0}: invalid encoding ({1})'.format( + self, e)) + else: + nextsleep = timeout + time.sleep(nextsleep) + + def add_event_handler(self, event, handler): + "Set a handler to be called later." + with self.mutex: + event_handlers = self.event_handlers.setdefault(event, []) + event_handlers.append(handler) + + def handle_event(self, connection, event): + with self.mutex: + h = self.event_handlers + th = sorted(h.get("all_events", []) + h.get(event.type, [])) + for handler in th: + handler(connection, event) + + def drop_connection(self, connection): + with self.mutex: + self.server_connections.remove(connection) + + +class LineBufferedStream(): + "Line-buffer a read stream." + _crlf_re = re.compile(b'\r?\n') + + def __init__(self): + self.buffer = b'' + + def append(self, newbytes): + self.buffer += newbytes + + def lines(self): + "Iterate over lines in the buffer." + lines = self._crlf_re.split(self.buffer) + self.buffer = lines.pop() + return iter(lines) + + def __iter__(self): + return self.lines() + +class IRCServerConnectionError(IRCError): + pass + +class IRCServerConnection(): + command_re = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?") + # The full list of numeric-to-event mappings is in Perl's Net::IRC. + # We only need to ensure that if some ancient server throws numerics + # for the ones we actually want to catch, they're mapped. + codemap = { + "001": "welcome", + "005": "featurelist", + "432": "erroneusnickname", + "433": "nicknameinuse", + "436": "nickcollision", + "437": "unavailresource", + } + + def __init__(self, master): + self.master = master + self.socket = None + + def _wrap_socket(self, socket, target, certfile=None, cafile=None, + protocol=ssl.PROTOCOL_TLSv1): + try: # Python 3.2 and greater + ssl_context = ssl.SSLContext(protocol) + except AttributeError: # Python < 3.2 + self.socket = ssl.wrap_socket( + socket, certfile=certfile, cert_reqs=ssl.CERT_REQUIRED, + ssl_version=protocol, ca_certs=cafile) + else: + ssl_context.verify_mode = ssl.CERT_REQUIRED + if certfile: + ssl_context.load_cert_chain(certfile) + if cafile: + ssl_context.load_verify_locations(cafile=cafile) + else: + ssl_context.set_default_verify_paths() + kwargs = {} + if ssl.HAS_SNI: + kwargs['server_hostname'] = target.servername + self.socket = ssl_context.wrap_socket(socket, **kwargs) + return self.socket + + def _check_hostname(self, target): + if hasattr(ssl, 'match_hostname'): # Python >= 3.2 + cert = self.socket.getpeercert() + try: + ssl.match_hostname(cert, target.servername) + except ssl.CertificateError as e: + raise IRCServerConnectionError( + 'Invalid SSL/TLS certificate: %s' % e) + else: # Python < 3.2 + LOG.warning( + 'cannot check SSL/TLS hostname with Python %s' % sys.version) + + def connect(self, target, nickname, username=None, realname=None, + **kwargs): + LOG.debug("connect(server=%r, port=%r, nickname=%r, ...)" % ( + target.servername, target.port, nickname)) + if self.socket is not None: + self.disconnect("Changing servers") + + self.buffer = LineBufferedStream() + self.event_handlers = {} + self.real_server_name = "" + self.target = target + self.nickname = nickname + try: + if socks_on and PROXY_TYPE: + self.socket = socks.socksocket(socket.AF_INET,socket.SOCK_STREAM) + self.socket.set_proxy(PROXY_TYPE, PROXY_HOST, PROXY_PORT) + else: + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if target.ssl: + self.socket = self._wrap_socket( + socket=self.socket, target=target, **kwargs) + self.socket.bind(('', 0)) + self.socket.connect((target.servername, target.port)) + except socket.error as err: + raise IRCServerConnectionError("Couldn't connect to socket: %s" % err) + + if target.ssl: + self._check_hostname(target=target) + if target.password: + self.ship("PASS " + target.password) + self.nick(self.nickname) + self.user( + username=target.username or username or 'irker', + realname=realname or 'irker relaying client') + return self + + def close(self): + # Without this thread lock, there is a window during which + # select() can find a closed socket, leading to an EBADF error. + with self.master.mutex: + self.disconnect("Closing object") + self.master.drop_connection(self) + + def consume(self): + try: + incoming = self.socket.recv(16384) + except socket.error: + # Server hung up on us. + self.disconnect("Connection reset by peer") + return + if not incoming: + # Dead air also indicates a connection reset. + self.disconnect("Connection reset by peer") + return + + self.buffer.append(incoming) + + for line in self.buffer: + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + LOG.debug("FROM: %s" % line) + + if not line: + continue + + prefix = None + command = None + arguments = None + self.handle_event(Event("every_raw_message", + self.real_server_name, + None, + [line])) + + m = IRCServerConnection.command_re.match(line) + if m.group("prefix"): + prefix = m.group("prefix") + if not self.real_server_name: + self.real_server_name = prefix + if m.group("command"): + command = m.group("command").lower() + if m.group("argument"): + a = m.group("argument").split(" :", 1) + arguments = a[0].split() + if len(a) == 2: + arguments.append(a[1]) + + command = IRCServerConnection.codemap.get(command, command) + if command in ["privmsg", "notice"]: + target = arguments.pop(0) + else: + target = None + + if command == "quit": + arguments = [arguments[0]] + elif command == "ping": + target = arguments[0] + else: + target = arguments[0] + arguments = arguments[1:] + + LOG.debug("command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments)) + self.handle_event(Event(command, prefix, target, arguments)) + + def handle_event(self, event): + self.master.handle_event(self, event) + if event.type in self.event_handlers: + for fn in self.event_handlers[event.type]: + fn(self, event) + + def is_connected(self): + return self.socket is not None + + def disconnect(self, message=""): + if self.socket is None: + return + # Don't send a QUIT here - causes infinite loop! + try: + self.socket.shutdown(socket.SHUT_WR) + self.socket.close() + except socket.error: + pass + del self.socket + self.socket = None + self.handle_event( + Event("disconnect", self.target.server, "", [message])) + + def join(self, channel, key=""): + self.ship("JOIN %s%s" % (channel, (key and (" " + key)))) + + def mode(self, target, command): + self.ship("MODE %s %s" % (target, command)) + + def nick(self, newnick): + self.ship("NICK " + newnick) + + def part(self, channel, message=""): + cmd_parts = ['PART', channel] + if message: + cmd_parts.append(message) + self.ship(' '.join(cmd_parts)) + + def privmsg(self, target, text): + self.ship("PRIVMSG %s :%s" % (target, text)) + + def quit(self, message=""): + self.ship("QUIT" + (message and (" :" + message))) + + def user(self, username, realname): + self.ship("USER %s 0 * :%s" % (username, realname)) + + def ship(self, string): + "Ship a command to the server, appending CR/LF" + try: + self.socket.send(string.encode('utf-8') + b'\r\n') + LOG.debug("TO: %s" % string) + except socket.error: + self.disconnect("Connection reset by peer.") + +class Event(object): + def __init__(self, evtype, source, target, arguments=None): + self.type = evtype + self.source = source + self.target = target + if arguments is None: + arguments = [] + self.arguments = arguments + +def is_channel(string): + return string and string[0] in "#&+!" + +class Connection: + def __init__(self, irker, target, nick_template, nick_needs_number=False, + password=None, **kwargs): + self.irker = irker + self.target = target + self.nick_template = nick_template + self.nick_needs_number = nick_needs_number + self.password = password + self.kwargs = kwargs + self.nick_trial = None + self.connection = None + self.status = None + self.last_xmit = time.time() + self.last_ping = time.time() + self.channels_joined = {} + self.channel_limits = {} + # The consumer thread + self.queue = queue.Queue() + self.thread = None + def nickname(self, n=None): + "Return a name for the nth server connection." + if n is None: + n = self.nick_trial + if self.nick_needs_number: + return self.nick_template % n + else: + return self.nick_template + def handle_ping(self): + "Register the fact that the server has pinged this connection." + self.last_ping = time.time() + def handle_welcome(self): + "The server says we're OK, with a non-conflicting nick." + self.status = "ready" + LOG.info("nick %s accepted" % self.nickname()) + if self.password: + self.connection.privmsg("nickserv", "identify %s" % self.password) + def handle_badnick(self): + "The server says our nick is ill-formed or has a conflict." + LOG.info("nick %s rejected" % self.nickname()) + if self.nick_needs_number: + # Randomness prevents a malicious user or bot from + # anticipating the next trial name in order to block us + # from completing the handshake. + self.nick_trial += random.randint(1, 3) + self.last_xmit = time.time() + self.connection.nick(self.nickname()) + # Otherwise fall through, it might be possible to + # recover manually. + def handle_disconnect(self): + "Server disconnected us for flooding or some other reason." + self.connection = None + if self.status != "expired": + self.status = "disconnected" + # Avoid flooding the server if it disconnects + # immediately on sucessful login. + time.sleep(RECONNECT_DELAY) + def handle_kick(self, outof): + "We've been kicked." + self.status = "handshaking" + try: + del self.channels_joined[outof] + except KeyError: + LOG.error("irkerd: kicked by %s from %s that's not joined" % ( + self.target, outof)) + qcopy = [] + while not self.queue.empty(): + (channel, message, key) = self.queue.get() + if channel != outof: + qcopy.append((channel, message, key)) + for (channel, message, key) in qcopy: + self.queue.put((channel, message, key)) + self.status = "ready" + def enqueue(self, channel, message, key, quit_after=False): + "Enque a message for transmission." + if self.thread is None or not self.thread.is_alive(): + self.status = "unseen" + self.thread = threading.Thread(target=self.dequeue) + self.thread.setDaemon(True) + self.thread.start() + self.queue.put((channel, message, key)) + if quit_after: + self.queue.put((channel, None, key)) + def dequeue(self): + "Try to ship pending messages from the queue." + try: + while True: + # We want to be kind to the IRC servers and not hold unused + # sockets open forever, so they have a time-to-live. The + # loop is coded this particular way so that we can drop + # the actual server connection when its time-to-live + # expires, then reconnect and resume transmission if the + # queue fills up again. + if self.queue.empty(): + # Queue is empty, at some point we want to time out + # the connection rather than holding a socket open in + # the server forever. + now = time.time() + xmit_timeout = now > self.last_xmit + XMIT_TTL + ping_timeout = now > self.last_ping + PING_TTL + if self.status == "disconnected": + # If the queue is empty, we can drop this connection. + self.status = "expired" + break + elif xmit_timeout or ping_timeout: + LOG.info(( + "timing out connection to %s at %s " + "(ping_timeout=%s, xmit_timeout=%s)") % ( + self.target, time.asctime(), ping_timeout, + xmit_timeout)) + with self.irker.irc.mutex: + self.connection.context = None + self.connection.quit("transmission timeout") + self.connection = None + self.status = "disconnected" + else: + # Prevent this thread from hogging the CPU by pausing + # for just a little bit after the queue-empty check. + # As long as this is less that the duration of a human + # reflex arc it is highly unlikely any human will ever + # notice. + time.sleep(ANTI_BUZZ_DELAY) + elif self.status == "disconnected" \ + and time.time() > self.last_xmit + DISCONNECT_TTL: + # Queue is nonempty, but the IRC server might be + # down. Letting failed connections retain queue + # space forever would be a memory leak. + self.status = "expired" + break + elif not self.connection and self.status != "expired": + # Queue is nonempty but server isn't connected. + with self.irker.irc.mutex: + self.connection = self.irker.irc.newserver() + self.connection.context = self + # Try to avoid colliding with other instances + self.nick_trial = random.randint(1, 990) + self.channels_joined = {} + try: + # This will throw + # IRCServerConnectionError on failure + self.connection.connect( + target=self.target, + nickname=self.nickname(), + **self.kwargs) + self.status = "handshaking" + LOG.info("XMIT_TTL bump (%s connection) at %s" % ( + self.target, time.asctime())) + self.last_xmit = time.time() + self.last_ping = time.time() + except IRCServerConnectionError as e: + LOG.error("irkerd: %s" % e) + self.status = "expired" + break + elif self.status == "handshaking": + if time.time() > self.last_xmit + HANDSHAKE_TTL: + self.status = "expired" + break + else: + # Don't buzz on the empty-queue test while we're + # handshaking + time.sleep(ANTI_BUZZ_DELAY) + elif self.status == "unseen" \ + and time.time() > self.last_xmit + UNSEEN_TTL: + # Nasty people could attempt a denial-of-service + # attack by flooding us with requests with invalid + # servernames. We guard against this by rapidly + # expiring connections that have a nonempty queue but + # have never had a successful open. + self.status = "expired" + break + elif self.status == "ready": + (channel, message, key) = self.queue.get() + if channel not in self.channels_joined: + self.connection.join(channel, key=key) + LOG.info("joining %s on %s." % (channel, self.target)) + # None is magic - it's a request to quit the server + if message is None: + self.connection.quit() + # An empty message might be used as a keepalive or + # to join a channel for logging, so suppress the + # privmsg send unless there is actual traffic. + elif message: + for segment in message.split("\n"): + # Truncate the message if it's too long, + # but we're working with characters here, + # not bytes, so we could be off. + # 500 = 512 - CRLF - 'PRIVMSG ' - ' :' + maxlength = 500 - len(channel) + if len(segment) > maxlength: + segment = segment[:maxlength] + try: + self.connection.privmsg(channel, segment) + except ValueError as err: + LOG.warning(( + "rejected a message to %s on %s " + "because: %s") % ( + channel, self.target, UNICODE_TYPE(err))) + LOG.debug(traceback.format_exc()) + time.sleep(ANTI_FLOOD_DELAY) + self.last_xmit = self.channels_joined[channel] = time.time() + LOG.info("XMIT_TTL bump (%s transmission) at %s" % ( + self.target, time.asctime())) + self.queue.task_done() + elif self.status == "expired": + LOG.error( + "irkerd: we're expired but still running! This is a bug.") + break + except Exception as e: + LOG.error("irkerd: exception %s in thread for %s" % (e, self.target)) + # Maybe this should have its own status? + self.status = "expired" + LOG.debug(traceback.format_exc()) + finally: + # Make sure we don't leave any zombies behind + self.connection.close() + def live(self): + "Should this connection not be scavenged?" + return self.status != "expired" + def joined_to(self, channel): + "Is this connection joined to the specified channel?" + return channel in self.channels_joined + def accepting(self, channel): + "Can this connection accept a join of this channel?" + if self.channel_limits: + match_count = 0 + for already in self.channels_joined: + # This obscure code is because the RFCs allow separate limits + # by channel type (indicated by the first character of the name) + # a feature that is almost never actually used. + if already[0] == channel[0]: + match_count += 1 + return match_count < self.channel_limits.get(channel[0], CHANNEL_MAX) + else: + return len(self.channels_joined) < CHANNEL_MAX + +class Target(): + "Represent a transmission target." + def __init__(self, url): + self.url = url + parsed = urllib_parse.urlparse(url) + self.ssl = parsed.scheme == 'ircs' + if self.ssl: + default_ircport = 6697 + else: + default_ircport = 6667 + self.username = parsed.username + self.password = parsed.password + self.servername = parsed.hostname + self.port = parsed.port or default_ircport + # IRC channel names are case-insensitive. If we don't smash + # case here we may run into problems later. There was a bug + # observed on irc.rizon.net where an irkerd user specified #Channel, + # got kicked, and irkerd crashed because the server returned + # "#channel" in the notification that our kick handler saw. + self.channel = parsed.path.lstrip('/').lower() + # This deals with a tweak in recent versions of urlparse. + if parsed.fragment: + self.channel += "#" + parsed.fragment + isnick = self.channel.endswith(",isnick") + if isnick: + self.channel = self.channel[:-7] + if self.channel and not isnick and self.channel[0] not in "#&+": + self.channel = "#" + self.channel + # support both channel?secret and channel?key=secret + self.key = "" + if parsed.query: + self.key = re.sub("^key=", "", parsed.query) + + def __str__(self): + "Represent this instance as a string" + return self.servername or self.url or repr(self) + + def validate(self): + "Raise InvalidRequest if the URL is missing a critical component" + if not self.servername: + raise InvalidRequest( + 'target URL missing a servername: %r' % self.url) + if not self.channel: + raise InvalidRequest( + 'target URL missing a channel: %r' % self.url) + def server(self): + "Return a hashable tuple representing the destination server." + return (self.servername, self.port) + +class Dispatcher: + "Manage connections to a particular server-port combination." + def __init__(self, irker, **kwargs): + self.irker = irker + self.kwargs = kwargs + self.connections = [] + def dispatch(self, channel, message, key, quit_after=False): + "Dispatch messages for our server-port combination." + # First, check if there is room for another channel + # on any of our existing connections. + connections = [x for x in self.connections if x.live()] + eligibles = [x for x in connections if x.joined_to(channel)] \ + or [x for x in connections if x.accepting(channel)] + if eligibles: + eligibles[0].enqueue(channel, message, key, quit_after) + return + # All connections are full up. Look for one old enough to be + # scavenged. + ancients = [] + for connection in connections: + for (chan, age) in connections.channels_joined.items(): + if age < time.time() - CHANNEL_TTL: + ancients.append((connection, chan, age)) + if ancients: + ancients.sort(key=lambda x: x[2]) + (found_connection, drop_channel, _drop_age) = ancients[0] + found_connection.part(drop_channel, "scavenged by irkerd") + del found_connection.channels_joined[drop_channel] + #time.sleep(ANTI_FLOOD_DELAY) + found_connection.enqueue(channel, message, key, quit_after) + return + # All existing channels had recent activity + newconn = Connection(self.irker, **self.kwargs) + self.connections.append(newconn) + newconn.enqueue(channel, message, key, quit_after) + def live(self): + "Does this server-port combination have any live connections?" + self.connections = [x for x in self.connections if x.live()] + return len(self.connections) > 0 + def pending(self): + "Return all connections with pending traffic." + return [x for x in self.connections if not x.queue.empty()] + def last_xmit(self): + "Return the time of the most recent transmission." + return max(x.last_xmit for x in self.connections) + +class Irker: + "Persistent IRC multiplexer." + def __init__(self, logfile=None, **kwargs): + self.logfile = logfile + self.kwargs = kwargs + self.irc = IRCClient() + self.irc.add_event_handler("ping", self._handle_ping) + self.irc.add_event_handler("welcome", self._handle_welcome) + self.irc.add_event_handler("erroneusnickname", self._handle_badnick) + self.irc.add_event_handler("nicknameinuse", self._handle_badnick) + self.irc.add_event_handler("nickcollision", self._handle_badnick) + self.irc.add_event_handler("unavailresource", self._handle_badnick) + self.irc.add_event_handler("featurelist", self._handle_features) + self.irc.add_event_handler("disconnect", self._handle_disconnect) + self.irc.add_event_handler("kick", self._handle_kick) + self.irc.add_event_handler("every_raw_message", self._handle_every_raw_message) + self.servers = {} + def thread_launch(self): + thread = threading.Thread(target=self.irc.spin) + thread.setDaemon(True) + self.irc._thread = thread + thread.start() + def _handle_ping(self, connection, _event): + "PING arrived, bump the last-received time for the connection." + if connection.context: + connection.context.handle_ping() + def _handle_welcome(self, connection, _event): + "Welcome arrived, nick accepted for this connection." + if connection.context: + connection.context.handle_welcome() + def _handle_badnick(self, connection, _event): + "Nick not accepted for this connection." + if connection.context: + connection.context.handle_badnick() + def _handle_features(self, connection, event): + "Determine if and how we can set deaf mode." + if connection.context: + cxt = connection.context + arguments = event.arguments + for lump in arguments: + if lump.startswith("DEAF="): + if not self.logfile: + connection.mode(cxt.nickname(), "+"+lump[5:]) + elif lump.startswith("MAXCHANNELS="): + m = int(lump[12:]) + for pref in "#&+": + cxt.channel_limits[pref] = m + LOG.info("%s maxchannels is %d" % (connection.target, m)) + elif lump.startswith("CHANLIMIT=#:"): + limits = lump[10:].split(",") + try: + for token in limits: + (prefixes, limit) = token.split(":") + limit = int(limit) + for c in prefixes: + cxt.channel_limits[c] = limit + LOG.info("%s channel limit map is %s" % ( + connection.target, cxt.channel_limits)) + except ValueError: + LOG.error("irkerd: ill-formed CHANLIMIT property") + def _handle_disconnect(self, connection, _event): + "Server hung up the connection." + LOG.info("server %s disconnected" % connection.target) + connection.close() + if connection.context: + connection.context.handle_disconnect() + def _handle_kick(self, connection, event): + "Server hung up the connection." + target = event.target + LOG.info("irker has been kicked from %s on %s" % ( + target, connection.target)) + if connection.context: + connection.context.handle_kick(target) + def _handle_every_raw_message(self, _connection, event): + "Log all messages when in watcher mode." + if self.logfile: + with open(self.logfile, "ab") as logfp: + message = u"%03f|%s|%s\n" % \ + (time.time(), event.source, event.arguments[0]) + logfp.write(message.encode('utf-8')) + + def pending(self): + "Do we have any pending message traffic?" + return [k for (k, v) in self.servers.items() if v.pending()] + + def _parse_request(self, line): + "Request-parsing helper for the handle() method" + request = json.loads(line.strip()) + if not isinstance(request, dict): + raise InvalidRequest( + "request is not a JSON dictionary: %r" % request) + if "to" not in request or "privmsg" not in request: + raise InvalidRequest( + "malformed request - 'to' or 'privmsg' missing: %r" % request) + channels = request['to'] + message = request['privmsg'] + if not isinstance(channels, (list, UNICODE_TYPE)): + raise InvalidRequest( + "malformed request - unexpected channel type: %r" % channels) + if not isinstance(message, UNICODE_TYPE): + raise InvalidRequest( + "malformed request - unexpected message type: %r" % message) + if not isinstance(channels, list): + channels = [channels] + targets = [] + for url in channels: + try: + if not isinstance(url, UNICODE_TYPE): + raise InvalidRequest( + "malformed request - URL has unexpected type: %r" % + url) + target = Target(url) + target.validate() + except InvalidRequest as e: + LOG.error("irkerd: " + UNICODE_TYPE(e)) + else: + targets.append(target) + return (targets, message) + + def handle(self, line, quit_after=False): + "Perform a JSON relay request." + try: + targets, message = self._parse_request(line=line) + for target in targets: + if target.server() not in self.servers: + self.servers[target.server()] = Dispatcher( + self, target=target, **self.kwargs) + self.servers[target.server()].dispatch( + target.channel, message, target.key, quit_after=quit_after) + # GC dispatchers with no active connections + servernames = self.servers.keys() + for servername in servernames: + if not self.servers[servername].live(): + del self.servers[servername] + # If we might be pushing a resource limit even + # after garbage collection, remove a session. The + # goal here is to head off DoS attacks that aim at + # exhausting thread space or file descriptors. + # The cost is that attempts to DoS this service + # will cause lots of join/leave spam as we + # scavenge old channels after connecting to new + # ones. The particular method used for selecting a + # session to be terminated doesn't matter much; we + # choose the one longest idle on the assumption + # that message activity is likely to be clumpy. + if len(self.servers) >= CONNECTION_MAX: + oldest = min( + self.servers.keys(), + key=lambda name: self.servers[name].last_xmit()) + del self.servers[oldest] + except InvalidRequest as e: + LOG.error("irkerd: " + UNICODE_TYPE(e)) + except ValueError: + LOG.error("irkerd: " + "can't recognize JSON on input: %r" % line) + except RuntimeError: + LOG.error("irkerd: " + "wildly malformed JSON blew the parser stack.") + +class IrkerTCPHandler(socketserver.StreamRequestHandler): + def handle(self): + while True: + line = self.rfile.readline() + if not line: + break + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) + +class IrkerUDPHandler(socketserver.BaseRequestHandler): + def handle(self): + line = self.request[0].strip() + #socket = self.request[1] + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) + +def in_background(): + "Is this process running in background?" + try: + return os.getpgrp() != os.tcgetpgrp(1) + except OSError: + return True + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__.strip().splitlines()[0]) + parser.add_argument( + '-c', '--ca-file', metavar='PATH', + help='file of trusted certificates for SSL/TLS') + parser.add_argument( + '-e', '--cert-file', metavar='PATH', + help='pem file used to authenticate to the server') + parser.add_argument( + '-d', '--log-level', metavar='LEVEL', choices=LOG_LEVELS, + help='how much to log to the log file (one of %(choices)s)') + parser.add_argument( + '-H', '--host', metavar='ADDRESS', default=HOST, + help='IP address to listen on') + parser.add_argument( + '-l', '--log-file', metavar='PATH', + help='file for saving captured message traffic') + parser.add_argument( + '-n', '--nick', metavar='NAME', default='irker%03d', + help="nickname (optionally with a '%%.*d' server connection marker)") + parser.add_argument( + '-p', '--password', metavar='PASSWORD', + help='NickServ password') + parser.add_argument( + '-i', '--immediate', metavar='IRC-URL', + help=( + 'send a single message to IRC-URL and exit. The message is the ' + 'first positional argument.')) + parser.add_argument( + '-V', '--version', action='version', + version='%(prog)s {0}'.format(version)) + parser.add_argument( + 'message', metavar='MESSAGE', nargs='?', + help='message for --immediate mode') + args = parser.parse_args() + + if not args.log_file and in_background(): + # The Linux, Mac, and FreeBSD values of the logging device. + logdev = [x for x in ('/dev/log', '/var/run/syslog', '/var/run/log') + if os.path.exists(x) and not os.path.isdir(x)] + if len(logdev) != 1: + sys.stderr.write("can't initialize log device, bailing out!\n") + raise SystemExit(1) + # There's a case for falling back to address = ('localhost', 514) + # But some systems (including OS X) disable this for security reasons. + handler = logging.handlers.SysLogHandler(address=logdev[0], + facility='daemon') + else: + handler = logging.StreamHandler() + + LOG.addHandler(handler) + if args.log_level: + log_level = getattr(logging, args.log_level.upper()) + LOG.setLevel(log_level) + + irker = Irker( + logfile=args.log_file, + nick_template=args.nick, + nick_needs_number=re.search('%.*d', args.nick), + password=args.password, + cafile=args.ca_file, + certfile=args.cert_file, + ) + LOG.info("irkerd version %s" % version) + if args.immediate: + if not args.message: + # We want newline to become '\n' and tab to become '\t'; + # the JSON decoder will undo these transformations. + # This will also encode backslash, backspace, formfeed, + # and high-half characters, which might produce unexpected + # results on output. + args.message = sys.stdin.read().encode("string_escape") + irker.irc.add_event_handler("quit", lambda _c, _e: sys.exit(0)) + irker.handle('{"to":"%s","privmsg":"%s"}' % ( + args.immediate, args.message), quit_after=True) + irker.irc.spin() + else: + if args.message: + LOG.error( + 'irkerd: message argument given (%r), but --immediate not set' % ( + args.message)) + raise SystemExit(1) + irker.thread_launch() + try: + tcpserver = socketserver.TCPServer((args.host, PORT), IrkerTCPHandler) + udpserver = socketserver.UDPServer((args.host, PORT), IrkerUDPHandler) + for server in [tcpserver, udpserver]: + server = threading.Thread(target=server.serve_forever) + server.setDaemon(True) + server.start() + try: + signal.pause() + except KeyboardInterrupt: + raise SystemExit(1) + except socket.error as e: + LOG.error("irkerd: server launch failed: %r\n" % e) + +# end diff --git a/irkerd.service b/irkerd.service new file mode 100644 index 0000000..d19378b --- /dev/null +++ b/irkerd.service @@ -0,0 +1,12 @@ +# Copyright 2012 Wulf C. Krueger <philantrop@exherbo.org> +# Distributed under the terms of the BSD LICENSE + +[Unit] +Description=irker daemon +Requires=network.target + +[Service] +ExecStart=/usr/bin/irkerd + +[Install] +WantedBy=multi-user.target diff --git a/irkerd.xml b/irkerd.xml new file mode 100644 index 0000000..59b7dae --- /dev/null +++ b/irkerd.xml @@ -0,0 +1,249 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irkerd.8'> +<refmeta> +<refentrytitle>irkerd</refentrytitle> +<manvolnum>8</manvolnum> +<refmiscinfo class='date'>Aug 27 2012</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irkerd</refname> +<refpurpose>relay for shipping notifications to IRC servers</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irkerd</command> + <arg>-c <replaceable>ca-file</replaceable></arg> + <arg>-d <replaceable>debuglevel</replaceable></arg> + <arg>-e <replaceable>cert-file</replaceable></arg> + <arg>-l <replaceable>logfile</replaceable></arg> + <arg>-H <replaceable>host</replaceable></arg> + <arg>-n <replaceable>nick</replaceable></arg> + <arg>-p <replaceable>password</replaceable></arg> + <arg>-i <replaceable>IRC-URL</replaceable></arg> + <arg>-V</arg> + <arg>-h</arg> + <arg choice='opt'><replaceable>message text</replaceable></arg> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irkerd</application> is a specialized write-only IRC +client intended to be used for shipping notification messages to IRC +channels. The use case in mind when it was designed was broadcasting +notifications from commit hooks in version-control systems.</para> + +<para>The main advantage of relaying through this daemon over +individual scripted sends from applications is that it can maintain +connection state for multiple channels, rather than producing obnoxious +join/leave channel spam on every message.</para> + +<para><application>irkerd</application> is a socket server that +listens on for UDP or TCP packets on port 6659 for textual request +lines containing JSON objects and terminated by a newline. Each JSON +object must have two members: "to" specifying a destination or +destination list, and "privmsg" specifying the message text. +Examples: + +<programlisting> +{"to":"irc://chat.freenode.net/git-ciabot", "privmsg":"Hello, world!"} +{"to":["irc://chat.freenode.net/#git-ciabot","irc://chat.freenode.net/#gpsd"],"privmsg":"Multichannel test"} +{"to":"irc://chat.hypothetical.net:6668/git-ciabot", "privmsg":"Hello, world!"} +{"to":"ircs://chat.hypothetical.net/git-private?key=topsecret", "privmsg":"Keyed channel test"} +{"to":"ircs://:topsecret@chat.example.net/git-private", "privmsg":"Password-protected server test"} +</programlisting></para> + +<para>If the channel part of the URL does not have one of the prefix +characters <quote>#</quote>, <quote>&</quote>, or +<quote>+</quote>, a <quote>#</quote> will be prepended to it before +shipping - <emphasis>unless</emphasis> the channel part has the suffix +",isnick" (which is unconditionally removed).</para> + +<para>The host part of the URL may have a port-number suffix separated by a +colon, as shown in the third example; otherwise +<application>irkerd</application> sends plaintext messages to the default +6667 IRC port of each server, and SSL/TLS messages to 6697.</para> + +<para>The password for password-protected servers can be set using the +usual <quote>[{username}:{password}@]{host}:{port}</quote> defined in +RFC 3986, as shown in the fifth example. Non-empty URL usernames +override the default <quote>irker</quote> username.</para> + +<para>When the <quote>to</quote> URL uses the <quote>ircs</quote> +scheme (as shown in the fourth and fifth examples), the connection to +the IRC server is made via SSL/TLS (vs. a plaintext connection with the +<quote>irc</quote> scheme). To connect via SSL/TLS with Python 2.x, +you need to explicitly declare the certificate authority file used to +verify server certificates. For example, <quote>-c +/etc/ssl/certs/ca-certificates.crt</quote>. In Python 3.2 and later, +you can still set this option to declare a custom CA file, but +<application>irkerd</application>; if you don't set it +<application>irkerd</application> will use OpenSSL's default file +(using Python's +<quote>ssl.SSLContext.set_default_verify_paths</quote>). In Python +3.2 and later, <quote>ssl.match_hostname</quote> is used to ensure the +server certificate belongs to the intended host, as well as being +signed by a trusted CA.</para> + +<para>To join password-protected (mode +k) channels, the channel part of the +URL may be followed with a query-string indicating the channel key, of the +form <quote>?secret</quote> or <quote>?key=secret</quote>, where +<quote>secret</quote> is the channel key.</para> + +<para>An empty message is legal and will cause +<application>irkerd</application> to join or maintain a connection to +the target channels without actually emitting a message. This may be +useful for advertising that an instance is up and running, or for +joining a channel to log its traffic.</para> +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irkerd</application> takes the following options:</para> + +<variablelist> +<varlistentry> +<term>-d</term> +<listitem> + <para> + Takes a following value, setting the debugging level from it; + possible values are 'critical', 'error', 'warning', 'info', + 'debug'. This option will generally only be of interest to + developers, as the logs are designed to help trace + <application>irkerd</application>'s internal state. These tracing + logs are independent of the traffic logs controlled by + <quote>-l</quote>. + </para> + <para> + Logging will be to standard error (if + <application>irkerd</application> is running in the foreground) or + to <quote>/dev/syslog</quote> with facility "daemon" (if + <application>irkerd</application> is running in the background). + The background-ness of <application>irkerd</application> is + determined by comparing the process group id with the process + group associated with the terminal attached to stdout (with + non-matches for background processes). We assume you aren't + running <application>irkerd</application> in Windows or another OS + that doesn't support <quote>os.getpgrp</quote> or + <quote>tcgetpgrp</quote>. We assume that if stdout is attached to + a TTY associated with the same process group as + <application>irkerd</application>, you do intend to log to stderr + and not syslog. + </para> +</listitem> +</varlistentry> +<varlistentry> +<term>-e</term> +<listitem><para>Takes a following filename in pem format and uses it +to authenticate to the IRC server. You must be connecting to the IRC server +over SSL for this to function properly. This is commonly known as +<quote>CertFP.</quote> +</para></listitem> +</varlistentry> +<varlistentry> +<term>-e</term> +<listitem><para>Takes a following filename in pem format and uses it +to authenticate to the IRC server. You must be connecting to the IRC +server over SSL for this to function properly. This is commonly known +as <quote>CertFP.</quote></para> +</listitem> +</varlistentry> +<varlistentry> +<term>-l</term> +<listitem><para>Takes a following filename, logs traffic to that file. +Each log line consists of three |-separated fields; a numeric +timestamp in Unix time, the FQDN of the sending server, and the +message data.</para></listitem> +</varlistentry> +<varlistentry> +<term>-H</term> +<listitem><para>Takes a following hostname, and binds to that address +when listening for messages. <application>irkerd</application> binds +to localhost by default, but you may want to use your host's public +address to listen on a local network. Listening on a public interface +is not recommended, as it makes spamming IRC channels very +easy.</para></listitem> +</varlistentry> +<varlistentry> +<term>-n</term> +<listitem><para>Takes a following value, setting the nick +to be used. If the nick contains a numeric format element +(such as %03d) it is used to generate suffixed fallback names +in the event of a nick collision.</para></listitem> +</varlistentry> +<varlistentry> +<term>-p</term> +<listitem><para>Takes a following value, setting a nickserv +password to be used. If given, this password is shipped to +authenticate the nick on receipt of a welcome message.</para></listitem> +</varlistentry> +<varlistentry> +<term>-i</term> +<listitem><para>Immediate mode, to be run in foreground. Takes a following +following value interpreted as a channel URL. May take a second +argument giving a message string; if the second argument is absent the +message is read from standard input (and may contain newlines). +Sends the message, then quits.</para></listitem> +</varlistentry> +<varlistentry> +<term>-V</term> +<listitem><para>Write the program version to stdout and +terminate.</para></listitem> +</varlistentry> +<varlistentry> +<term>-h</term> +<listitem><para>Print usage instructions and terminate.</para></listitem> +</varlistentry> +</variablelist> +</refsect1> + +<refsect1 id='limitations'><title>LIMITATIONS</title> +<para>Requests via UDP optimizes for lowest latency and network load +by avoiding TCP connection setup time; the cost is that delivery is +not reliable in the face of packet loss.</para> + +<para>An <application>irkerd</application> instance with a +publicly-accessible request socket could complicate blocking of IRC +spam by making it easy for spammers to submit while hiding their IP +addresses; the better way to deploy, then, is on places like +project-hosting sites where the <application>irkerd</application> +socket can be visible from commit-hook code but not exposed to the +outside world. Priming your firewall with blocklists of IP addresses +known to spew spam is always a good idea.</para> + +<para>The absence of any option to set the service port is deliberate. +If you think you need to do that, you have a problem better solved at +your firewall.</para> + +<para>IRC has a message length limit of 510 bytes; generate your +privmsg attribute values with appropriate care.</para> + +<para>IRC ignores any text after an embedded newline. Be aware that +<application>irkerd</application> will turn payload strings with +embedded newlines into multiple IRC sends to avoid having message data +discarded. </para> + +<para>Due to a bug in Python URL parsing, IRC urls with both a # and a +key part may fail unexpectedly. The workaround is to remove the #.</para> +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources, including an installable repository +hook script.</para> +</refsect1> +</refentry> diff --git a/irkerhook.py b/irkerhook.py new file mode 100755 index 0000000..9768eac --- /dev/null +++ b/irkerhook.py @@ -0,0 +1,547 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Eric S. Raymond <esr@thyrsus.com> +# Distributed under BSD terms. +# +# This script contains git porcelain and porcelain byproducts. +# Requires Python 2.6, or 2.5 with the simplejson library installed. +# +# usage: irkerhook.py [-V] [-n] [--variable=value...] [commit_id...] +# +# This script is meant to be run in an update or post-commit hook. +# Try it with -n to see the notification dumped to stdout and verify +# that it looks sane. With -V this script dumps its version and exits. +# +# See the irkerhook manual page in the distribution for a detailed +# explanation of how to configure this hook. + +# The default location of the irker proxy, if the project configuration +# does not override it. +# +# SPDX-License-Identifier: BSD-2-Clause +default_server = "localhost" +IRKER_PORT = 6659 + +# The default service used to turn your web-view URL into a tinyurl so it +# will take up less space on the IRC notification line. +default_tinyifier = "http://tinyurl.com/api-create.php?url=" + +# Map magic urlprefix values to actual URL prefixes. +urlprefixmap = { + "viewcvs": "http://%(host)s/viewcvs/%(repo)s?view=revision&revision=", + "gitweb": "http://%(host)s/cgi-bin/gitweb.cgi?p=%(repo)s;a=commit;h=", + "cgit": "http://%(host)s/cgi-bin/cgit.cgi/%(repo)s/commit/?id=", + } + +# By default, ship to the freenode #commits list +default_channels = "irc://chat.freenode.net/#commits" + +# +# No user-serviceable parts below this line: +# + +version = "2.17" + +import os, sys, socket, urllib2, subprocess, locale, datetime, re +from pipes import quote as shellquote + +try: + import simplejson as json # Faster, also makes us Python-2.5-compatible +except ImportError: + import json + +try: + getstatusoutput = subprocess.getstatusoutput +except AttributeError: + import commands + getstatusoutput = commands.getstatusoutput + +def do(command): + return unicode(getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8').encode(locale.getlocale()[1] or 'UTF-8') + +class Commit: + def __init__(self, extractor, commit): + "Per-commit data." + self.commit = commit + self.branch = None + self.rev = None + self.mail = None + self.author = None + self.files = None + self.logmsg = None + self.url = None + self.author_date = None + self.commit_date = None + self.__dict__.update(extractor.__dict__) + def __unicode__(self): + "Produce a notification string from this commit." + if self.urlprefix.lower() == "none": + self.url = "" + else: + urlprefix = urlprefixmap.get(self.urlprefix, self.urlprefix) + webview = (urlprefix % self.__dict__) + self.commit + try: + # See it the url is accessible + res = urllib2.urlopen(webview) + if self.tinyifier and self.tinyifier.lower() != "none": + try: + # Didn't get a retrieval error on the web + # view, so try to tinyify a reference to it. + self.url = urllib2.urlopen(self.tinyifier + webview).read() + try: + self.url = self.url.decode('UTF-8') + except UnicodeError: + pass + except IOError: + self.url = webview + else: + self.url = webview + except IOError as e: + if e.code == 401: + # Authentication error, so we assume the view is valid + self.url = webview + else: + self.url = "" + res = self.template % self.__dict__ + return unicode(res, 'UTF-8') if not isinstance(res, unicode) else res + +class GenericExtractor: + "Generic class for encapsulating data from a VCS." + booleans = ["tcp"] + numerics = ["maxchannels"] + strings = ["email"] + def __init__(self, arguments): + self.arguments = arguments + self.project = None + self.repo = None + # These aren't really repo data but they belong here anyway... + self.email = None + self.tcp = True + self.tinyifier = default_tinyifier + self.server = None + self.channels = None + self.maxchannels = 0 + self.template = None + self.urlprefix = None + self.host = socket.getfqdn() + self.cialike = None + self.filtercmd = None + # Color highlighting is disabled by default. + self.color = None + self.bold = self.green = self.blue = self.yellow = "" + self.brown = self.magenta = self.cyan = self.reset = "" + def activate_color(self, style): + "IRC color codes." + if style == 'mIRC': + # mIRC colors are mapped as closely to the ANSI colors as + # possible. However, bright colors (green, blue, red, + # yellow) have been made their dark counterparts since + # ChatZilla does not properly darken mIRC colors in the + # Light Motif color scheme. + self.bold = '\x02' + self.green = '\x0303' + self.blue = '\x0302' + self.red = '\x0305' + self.yellow = '\x0307' + self.brown = '\x0305' + self.magenta = '\x0306' + self.cyan = '\x0310' + self.reset = '\x0F' + if style == 'ANSI': + self.bold = '\x1b[1m' + self.green = '\x1b[1;32m' + self.blue = '\x1b[1;34m' + self.red = '\x1b[1;31m' + self.yellow = '\x1b[1;33m' + self.brown = '\x1b[33m' + self.magenta = '\x1b[35m' + self.cyan = '\x1b[36m' + self.reset = '\x1b[0m' + def load_preferences(self, conf): + "Load preferences from a file in the repository root." + if not os.path.exists(conf): + return + ln = 0 + for line in open(conf): + ln += 1 + if line.startswith("#") or not line.strip(): + continue + elif line.count('=') != 1: + sys.stderr.write('"%s", line %d: missing = in config line\n' \ + % (conf, ln)) + continue + fields = line.split('=') + if len(fields) != 2: + sys.stderr.write('"%s", line %d: too many fields in config line\n' \ + % (conf, ln)) + continue + variable = fields[0].strip() + value = fields[1].strip() + if value.lower() == "true": + value = True + elif value.lower() == "false": + value = False + # User cannot set maxchannels - only a command-line arg can do that. + if variable == "maxchannels": + return + setattr(self, variable, value) + def do_overrides(self): + "Make command-line overrides possible." + for tok in self.arguments: + for key in self.__dict__: + if tok.startswith("--" + key + "="): + val = tok[len(key)+3:] + setattr(self, key, val) + for (key, val) in self.__dict__.items(): + if key in GenericExtractor.booleans: + if type(val) == type("") and val.lower() == "true": + setattr(self, key, True) + elif type(val) == type("") and val.lower() == "false": + setattr(self, key, False) + elif key in GenericExtractor.numerics: + setattr(self, key, int(val)) + elif key in GenericExtractor.strings: + setattr(self, key, val) + if not self.project: + sys.stderr.write("irkerhook.py: no project name set!\n") + raise SystemExit(1) + if not self.repo: + self.repo = self.project.lower() + if not self.channels: + self.channels = default_channels % self.__dict__ + if self.color and self.color.lower() != "none": + self.activate_color(self.color) + +def has(dirname, paths): + "Test for existence of a list of paths." + # all() is a python2.5 construct + for exists in [os.path.exists(os.path.join(dirname, x)) for x in paths]: + if not exists: + return False + return True + +# VCS-dependent code begins here + +class GitExtractor(GenericExtractor): + "Metadata extraction for the git version control system." + @staticmethod + def is_repository(dirname): + # Must detect both ordinary and bare repositories + return has(dirname, [".git"]) or \ + has(dirname, ["HEAD", "refs", "objects"]) + def __init__(self, arguments): + GenericExtractor.__init__(self, arguments) + # Get all global config variables + self.project = do("git config --get irker.project") + self.repo = do("git config --get irker.repo") + self.server = do("git config --get irker.server") + self.channels = do("git config --get irker.channels") + self.email = do("git config --get irker.email") + self.tcp = do("git config --bool --get irker.tcp") + self.template = do("git config --get irker.template") or '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.tinyifier = do("git config --get irker.tinyifier") or default_tinyifier + self.color = do("git config --get irker.color") + self.urlprefix = do("git config --get irker.urlprefix") or "gitweb" + self.cialike = do("git config --get irker.cialike") + self.filtercmd = do("git config --get irker.filtercmd") + # These are git-specific + self.refname = do("git symbolic-ref HEAD 2>/dev/null") + self.revformat = do("git config --get irker.revformat") + # The project variable defaults to the name of the repository toplevel. + if not self.project: + bare = do("git config --bool --get core.bare") + if bare.lower() == "true": + keyfile = "HEAD" + else: + keyfile = ".git/HEAD" + here = os.getcwd() + while True: + if os.path.exists(os.path.join(here, keyfile)): + self.project = os.path.basename(here) + if self.project.endswith('.git'): + self.project = self.project[0:-4] + break + elif here == '/': + sys.stderr.write("irkerhook.py: no git repo below root!\n") + sys.exit(1) + here = os.path.dirname(here) + # Get overrides + self.do_overrides() + def head(self): + "Return a symbolic reference to the tip commit of the current branch." + return "HEAD" + def commit_factory(self, commit_id): + "Make a Commit object holding data for a specified commit ID." + commit = Commit(self, commit_id) + commit.branch = re.sub(r"^refs/[^/]*/", "", self.refname) + # Compute a description for the revision + if self.revformat == 'raw': + commit.rev = commit.commit + elif self.revformat == 'short': + commit.rev = '' + else: # self.revformat == 'describe' + commit.rev = do("git describe %s 2>/dev/null" % shellquote(commit.commit)) + if not commit.rev: + # Query git for the abbreviated hash + commit.rev = do("git log -1 '--pretty=format:%h' " + shellquote(commit.commit)) + if self.urlprefix in ('gitweb', 'cgit'): + # Also truncate the commit used for the announced urls + commit.commit = commit.rev + # Extract the meta-information for the commit + commit.files = do("git diff-tree -r --name-only " + shellquote(commit.commit)) + commit.files = " ".join(commit.files.strip().split("\n")[1:]) + # Design choice: for git we ship only the first message line, which is + # conventionally supposed to be a summary of the commit. Under + # other VCSes a different choice may be appropriate. + commit.author_name, commit.mail, commit.logmsg = \ + do("git log -1 '--pretty=format:%an%n%ae%n%s' " + shellquote(commit.commit)).split("\n") + # This discards the part of the author's address after @. + # Might be be nice to ship the full email address, if not + # for spammers' address harvesters - getting this wrong + # would make the freenode #commits channel into harvester heaven. + commit.author = commit.mail.split("@")[0] + commit.author_date, commit.commit_date = \ + do("git log -1 '--pretty=format:%ai|%ci' " + shellquote(commit.commit)).split("|") + return commit + +class SvnExtractor(GenericExtractor): + "Metadata extraction for the svn version control system." + @staticmethod + def is_repository(dirname): + return has(dirname, ["format", "hooks", "locks"]) + def __init__(self, arguments): + GenericExtractor.__init__(self, arguments) + # Some things we need to have before metadata queries will work + self.repository = '.' + for tok in arguments: + if tok.startswith("--repository="): + self.repository = tok[13:] + self.project = os.path.basename(self.repository) + self.template = '%(bold)s%(project)s%(reset)s: %(green)s%(author)s%(reset)s %(repo)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.urlprefix = "viewcvs" + self.load_preferences(os.path.join(self.repository, "irker.conf")) + self.do_overrides() + def head(self): + sys.stderr.write("irker: under svn, hook requires a commit argument.\n") + raise SystemExit(1) + def commit_factory(self, commit_id): + self.id = commit_id + commit = Commit(self, commit_id) + commit.branch = "" + commit.rev = "r%s" % self.id + commit.author = self.svnlook("author") + commit.commit_date = self.svnlook("date").partition('(')[0] + commit.files = self.svnlook("dirs-changed").strip().replace("\n", " ") + commit.logmsg = self.svnlook("log").strip() + return commit + def svnlook(self, info): + return do("svnlook %s %s --revision %s" % (shellquote(info), shellquote(self.repository), shellquote(self.id))) + +class HgExtractor(GenericExtractor): + "Metadata extraction for the Mercurial version control system." + @staticmethod + def is_repository(directory): + return has(directory, [".hg"]) + def __init__(self, arguments): + # This fiddling with arguments is necessary since the Mercurial hook can + # be run in two different ways: either directly via Python (in which + # case hg should be pointed to the hg_hook function below) or as a + # script (in which case the normal __main__ block at the end of this + # file is exercised). In the first case, we already get repository and + # ui objects from Mercurial, in the second case, we have to create them + # from the root path. + self.repository = None + if arguments and type(arguments[0]) == type(()): + # Called from hg_hook function + ui, self.repository = arguments[0] + arguments = [] # Should not be processed further by do_overrides + else: + # Called from command line: create repo/ui objects + from mercurial import hg, ui as uimod + + repopath = '.' + for tok in arguments: + if tok.startswith('--repository='): + repopath = tok[13:] + ui = uimod.ui() + ui.readconfig(os.path.join(repopath, '.hg', 'hgrc'), repopath) + self.repository = hg.repository(ui, repopath) + + GenericExtractor.__init__(self, arguments) + # Extract global values from the hg configuration file(s) + self.project = ui.config('irker', 'project') + self.repo = ui.config('irker', 'repo') + self.server = ui.config('irker', 'server') + self.channels = ui.config('irker', 'channels') + self.email = ui.config('irker', 'email') + self.tcp = str(ui.configbool('irker', 'tcp')) # converted to bool again in do_overrides + self.template = ui.config('irker', 'template') or '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.tinyifier = ui.config('irker', 'tinyifier') or default_tinyifier + self.color = ui.config('irker', 'color') + self.urlprefix = (ui.config('irker', 'urlprefix') or + ui.config('web', 'baseurl') or '') + if self.urlprefix: + # self.commit is appended to this by do_overrides + self.urlprefix = self.urlprefix.rstrip('/') + '/rev/' + self.cialike = ui.config('irker', 'cialike') + self.filtercmd = ui.config('irker', 'filtercmd') + if not self.project: + self.project = os.path.basename(self.repository.root.rstrip('/')) + self.do_overrides() + def head(self): + "Return a symbolic reference to the tip commit of the current branch." + return "-1" + def commit_factory(self, commit_id): + "Make a Commit object holding data for a specified commit ID." + from mercurial.node import short + from mercurial.templatefilters import person + node = self.repository.lookup(commit_id) + commit = Commit(self, short(node)) + # Extract commit-specific values from a "context" object + ctx = self.repository.changectx(node) + commit.rev = '%d:%s' % (ctx.rev(), commit.commit) + commit.branch = ctx.branch() + commit.author = person(ctx.user()) + commit.author_date = \ + datetime.datetime.fromtimestamp(ctx.date()[0]).strftime('%Y-%m-%d %H:%M:%S') + commit.logmsg = ctx.description() + # Extract changed files from status against first parent + st = self.repository.status(ctx.p1().node(), ctx.node()) + commit.files = ' '.join(st[0] + st[1] + st[2]) + return commit + +def hg_hook(ui, repo, **kwds): + # To be called from a Mercurial "commit", "incoming" or "changegroup" hook. + # Example configuration: + # [hooks] + # incoming.irker = python:/path/to/irkerhook.py:hg_hook + extractor = HgExtractor([(ui, repo)]) + start = repo[kwds['node']].rev() + end = len(repo) + if start != end: + # changegroup with multiple commits, so we generate a notification + # for each one + for rev in range(start, end): + ship(extractor, rev, False) + else: + ship(extractor, kwds['node'], False) + +# The files we use to identify a Subversion repo might occur as content +# in a git or hg repo, but the special subdirectories for those are more +# reliable indicators. So test for Subversion last. +extractors = [GitExtractor, HgExtractor, SvnExtractor] + +# VCS-dependent code ends here + +def ship(extractor, commit, debug): + "Ship a notification for the specified commit." + metadata = extractor.commit_factory(commit) + + # This is where we apply filtering + if extractor.filtercmd: + cmd = '%s %s' % (shellquote(extractor.filtercmd), + shellquote(json.dumps(metadata.__dict__))) + data = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout.read() + try: + metadata.__dict__.update(json.loads(data)) + except ValueError: + sys.stderr.write("irkerhook.py: could not decode JSON: %s\n" % data) + raise SystemExit(1) + + # Rewrite the file list if too long. The objective here is only + # to be easier on the eyes. + if extractor.cialike \ + and extractor.cialike.lower() != "none" \ + and len(metadata.files) > int(extractor.cialike): + files = metadata.files.split() + dirs = set([d.rpartition('/')[0] for d in files]) + if len(dirs) == 1: + metadata.files = "(%s files)" % (len(files),) + else: + metadata.files = "(%s files in %s dirs)" % (len(files), len(dirs)) + # Message reduction. The assumption here is that IRC can't handle + # lines more than 510 characters long. If we exceed that length, we + # try knocking out the file list, on the theory that for notification + # purposes the commit text is more important. If it's still too long + # there's nothing much can be done other than ship it expecting the IRC + # server to truncate. + privmsg = unicode(metadata) + if len(privmsg) > 510: + metadata.files = "" + privmsg = unicode(metadata) + + # Anti-spamming guard. It's deliberate that we get maxchannels not from + # the user-filtered metadata but from the extractor data - means repo + # administrators can lock in that setting. + channels = metadata.channels.split(",") + if extractor.maxchannels != 0: + channels = channels[:extractor.maxchannels] + + # Ready to ship. + message = json.dumps({"to": channels, "privmsg": privmsg}) + if debug: + print message + elif channels: + try: + if extractor.email: + # We can't really figure out what our SF username is without + # exploring our environment. The mail pipeline doesn't care + # about who sent the mail, other than being from sourceforge. + # A better way might be to simply call mail(1) + sender = "irker@users.sourceforge.net" + msg = """From: %(sender)s +Subject: irker json + +%(message)s""" % {"sender":sender, "message":message} + import smtplib + smtp = smtplib.SMTP() + smtp.connect() + smtp.sendmail(sender, extractor.email, msg) + smtp.quit() + elif extractor.tcp: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((extractor.server or default_server, IRKER_PORT)) + sock.sendall(message + "\n") + finally: + sock.close() + else: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.sendto(message + "\n", (extractor.server or default_server, IRKER_PORT)) + finally: + sock.close() + except socket.error, e: + sys.stderr.write("%s\n" % e) + +if __name__ == "__main__": + notify = True + repository = os.getcwd() + commits = [] + for arg in sys.argv[1:]: + if arg == '-n': + notify = False + elif arg == '-V': + print "irkerhook.py: version", version + sys.exit(0) + elif arg.startswith("--repository="): + repository = arg[13:] + elif not arg.startswith("--"): + commits.append(arg) + + # Figure out which extractor we should be using + for candidate in extractors: + if candidate.is_repository(repository): + cls = candidate + break + else: + sys.stderr.write("irkerhook: cannot identify a repository type.\n") + raise SystemExit(1) + extractor = cls(sys.argv[1:]) + + # And apply it. + if not commits: + commits = [extractor.head()] + for commit in commits: + ship(extractor, commit, not notify) + +#End diff --git a/irkerhook.xml b/irkerhook.xml new file mode 100644 index 0000000..2f068d2 --- /dev/null +++ b/irkerhook.xml @@ -0,0 +1,414 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irkerhook.1'> +<refmeta> +<refentrytitle>irkerhook</refentrytitle> +<manvolnum>1</manvolnum> +<refmiscinfo class='date'>Aug 27 2012</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irkerhook</refname> +<refpurpose>repository hook script issuing irker notifications</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irkerhook.py</command> + <arg>-n</arg> + <arg>-V</arg> + <group><arg rep='repeat'><replaceable>--variable=value</replaceable></arg></group> + <group><arg rep='repeat'><replaceable>commit-id</replaceable></arg></group> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irkerhook.py</application> is a Python script intended +to be called from the post-commit hook of a version-control repository. Its +job is to collect information about the commit that fired the hook (and +possibly preferences set by the repository owner) and ship that information +to an instance of <application>irkerd</application> for forwarding to +various announcement channels.</para> + +<para>The proper invocation and behavior of +<application>irkerhook.py</application> varies depending on which +VCS (version-control system) is calling it. There are four different places +from which it may extract information:</para> + +<orderedlist> +<listitem><para>Calls to VCS utilities.</para></listitem> +<listitem><para>In VCSes like git that support user-settable configuration +variables, variables with the prefix "irker.".</para></listitem> +<listitem><para>In other VCSes, a configuration file, "irker.conf", in the +repository's internals directory.</para></listitem> +<listitem><para>Command-line arguments of the form +--variable=value.</para></listitem> +</orderedlist> + +<para>The following variables are general to all supported VCSes:</para> + +<variablelist> +<varlistentry> +<term>project</term> +<listitem> +<para>The name of the project. Should be a relatively short identifier; +will usually appear at the very beginning of a notification.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>repo</term> +<listitem> +<para>The name of the repository top-level directory. If not +specified, defaults to a lowercased copy of the project name.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>channels</term> +<listitem> +<para>An IRC channel URL, or comma-separated list of same, identifying +channels to which notifications are to be sent. If not specified, the +default is the freenode #commits channel.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>server</term> +<listitem> +<para>The host on which the notification-relaying irker daemon is expected +to reside. Defaults to "localhost".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>email</term> +<listitem> +<para>If set, use email for communication rather than TCP or UDP. +The value is used as the target mail address.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>tcp</term> +<listitem> +<para>If "true", use TCP for communication; if "false", use UDP. +Defaults to "false".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>urlprefix</term> +<listitem> +<para>Changeset URL prefix for your repo. When the commit ID is appended +to this, it should point at a CGI that will display the commit +through cgit, gitweb or something similar. The defaults will probably +work if you have a typical gitweb/cgit setup.</para> + +<para>If the value of this variable is "None", generation of the URL +field in commit notifications will be suppressed. Other magic values +are "cgit", "gitweb", and "viewcvs", which expand to URL templates +that will usually work with those systems.</para> + +<para>The magic cookies "%(host)s" and %(repo)s" may occur in this +URL. The former is expanded to the FQDN of the host on which +<application>irkerhook.py</application> is running; the latter is +expanded to the value of the "repo" variable.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>tinyifier</term> +<listitem> +<para>URL template pointing to a service for compressing URLs so they +will take up less space in the notification line. If the value of this +variable is "None", no compression will be attempted.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>color</term> +<listitem> +<para>If "mIRC", highlight notification fields with mIRC color codes. +If "ANSI", highlight notification fields with ANSI color escape +sequences. Defaults to "none" (no colors). ANSI codes are supported +in Chatzilla, irssi, ircle, and BitchX; mIRC codes only are recognized +in mIRC, XChat, KVirc, Konversation, or weechat.</para> + +<para>Note: if you turn this on and notifications stop appearing on +your channel, you need to turn off IRC's color filter on that channel. +To do this you will need op privileges; issue the command "/mode +<channel> -c" with <channel> replaced by your channel name. +You may need to first issue the command "/msg chanserv set +<channel> MLOCK +nt-slk".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>maxchannels</term> +<listitem> +<para>Interpreted as an integer. If not zero, limits the number of +channels the hook will interpret from the "channels" variable.</para> + +<para>This variable cannot be set through VCS configuration variables +or <filename>irker.conf</filename>; it can only be set with a command-line +argument. Thus, on a forge site in which repository owners are not +allowed to modify their post-commit scripts, a site administrator can set it +to prevent shotgun spamming by malicious project owners. Setting it to +a value less than 2, however, would probably be unwise.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>cialike</term> +<listitem> +<para>If not empty and not "None" (the default), this emulates the old +CIA behavior of dropping long lists of files in favor of a summary of +the form (N files in M directories). The value must be numeric giving +a threshold value for the length of the file list in +characters.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>template</term> +<listitem> +<para>Set the template used to generate notification messages. Only +available in VCses with config variables; presently this means git or +hg. All basic commit and extractor fields, including color switches, +are available as %() substitutions.</para> +</listitem> +</varlistentry> +</variablelist> + +<refsect2 id="git"><title>git</title> + +<para>Under git, the normal way to invoke this hook (from within the +update hook) passes it a refname followed by a list of commits. Because +<command>git rev-list</command> normally lists from most recent to oldest, +you'll want to use --reverse to make notifications be omitted in chronological +order. In a normal update script, the invocation should look like this</para> + +<programlisting> +refname=$1 +old=$2 +new=$3 +irkerhook.py --refname=${refname} $(git rev-list --reverse ${old}..${new}) +</programlisting> + +<para>except that you'll need an absolute path for irkerhook.py.</para> + +<para>For testing purposes and backward compatibility, if you invoke +<application>irkerhook.py</application> with no arguments (as in a +post-commit hook) it will behave as though it had been called like +this:</para> + +<programlisting> +irkerhook.py --refname=refs/heads/master HEAD +</programlisting> + +<para>However, this will not give the right result when you push to +a non-default branch of a bare repo.</para> + +<para>A typical way to install this hook is actually in the +<filename>post-receive</filename> hook, because it gets all the +necessary details and will not abort the push on failure. Use the +following script:</para> + +<programlisting> +#!/bin/sh + +echo "sending IRC notification" +while read old new refname; do + irkerhook --refname=${refname} $(git rev-list --reverse ${old}..${new}) +done +</programlisting> + +<para>Preferences may be set in the repo <filename>config</filename> +file in an [irker] section. Here is an example of what that can look +like:</para> + +<programlisting> +[irker] + project = gpsd + color = ANSI + channels = irc://chat.freenode.net/gpsd,irc://chat.freenode.net/commits +</programlisting> + +<para> You should not set the "repository" variable (an equivalent +will be computed). No attempt is made to interpret an +<filename>irker.conf</filename> file.</para> + +<para>The default value of the "project" variable is the basename +of the repository directory. The default value of the "urlprefix" +variable is "cgit".</para> + +<para>There is one git-specific variable, "revformat", controlling +the format of the commit identifier in a notification. It +may have the following values:</para> + +<variablelist> +<varlistentry> +<term>raw</term> +<listitem><para>full hex ID of commit</para></listitem> +</varlistentry> +<varlistentry> +<term>short</term> +<listitem><para>first 12 chars of hex ID</para></listitem> +</varlistentry> +<varlistentry> +<term>describe</term> +<listitem><para>describe relative to last tag, falling back to short</para></listitem> +</varlistentry> +</variablelist> + +<para>The default is 'describe'.</para> +</refsect2> + +<refsect2 id="svn"><title>Subversion</title> + +<para>Under Subversion, <application>irkerhook.py</application> +accepts a --repository option with value (the absolute pathname of the +Subversion repository) and a commit argument (the numeric revision level of +the commit). The defaults are the current working directory and HEAD, +respectively.</para> + +<para>Note, however, that you <emphasis>cannot</emphasis> default the +repository argument inside a Subversion post-commit hook; this is +because of a limitation of Subversion, which is that getting the +current directory is not reliable inside these hooks. Instead, the +values must be the two arguments that Subversion passes to that hook +as arguments. Thus, a typical invocation in the post-commit script +will look like this:</para> + +<programlisting> +REPO=$1 +REV=$2 +irkerhook.py --repository=$REPO $REV +</programlisting> + +<para>Other --variable=value settings may also be +given on the command line, and will override any settings in an +<filename>irker.conf</filename> file.</para> + +<para>The default for the project variable is the basename of the +repository. The default value of the "urlprefix" variable is +"viewcvs".</para> + +<para>If an <filename>irker.conf</filename> file exists in the repository +root directory (not the checkout directory but where internals such as the +"format" file live) the hook will interpret variable settings from it. Here +is an example of what such a file might look like:</para> + +<programlisting> +# irkerhook variable settings for the irker project +project = irker +channels = irc://chat.freenode/irker,irc://chat.freenode/commits +tcp = false +</programlisting> + +<para>Don't set the "repository" or "commit" variables in this file; +that would have unhappy results.</para> + +<para>There are no Subversion-specific variables.</para> + +</refsect2> + +<refsect2 id="hg"><title>Mercurial</title> + +<para>Under Mercurial, <application>irkerhook.py</application> can be +invoked in two ways: either as a Python hook (preferred) or as a +script.</para> + +<para>To call it as a Python hook, add the collowing to the +"commit" or "incoming" hook declaration in your Mercurial +repository:</para> + +<programlisting> +[hooks] + incoming.irker = python:/path/to/irkerhook.py:hg_hook +</programlisting> + +<para>When called as a script, the hook accepts a --repository option +with value (the absolute pathname of the Mercurial repository) and can +take a commit argument (the Mercurial hash ID of the commit or a +reference to it). The default for the repository argument is the +current directory. The default commit argument is '-1', designating +the current tip commit.</para> + +<para>As for git, in both cases all variables may be set in the repo +<filename>hgrc</filename> file in an [irker] section. Command-line +variable=value arguments are accepted but not required for script +invocation. No attempt is made to interpret an +<filename>irker.conf</filename> file.</para> + +<para>The default value of the "project" variable is the basename +of the repository directory. The default value of the "urlprefix" +variable is the value of the "web.baseurl" config value, if it +exists.</para> + +</refsect2> + +<refsect2 id="filter"><title>Filtering</title> + +<para>It is possible to filter commits before sending them to +<application>irkerd</application>.</para> + +<para>You have to specify the <option>filtercmd</option> option, which +will be the command <application>irkerhook.py</application> will +run. This command should accept one arguments, which is a JSON +representation of commit and extractor metadata (including the +channels variable). The command should emit to standard output a JSON +representation of (possibly altered) metadata.</para> + +<para>Below is an example filter:</para> + +<programlisting> +#!/usr/bin/env python +# This is a trivial example of a metadata filter. +# All it does is change the name of the commit's author. +# +import sys, json +metadata = json.loads(sys.argv[1]) + +metadata['author'] = "The Great and Powerful Oz" + +print json.dumps(metadata) +# end +</programlisting> + +<para>Standard error is available to the hook for progress and +error messages.</para> + +</refsect2> + +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irkerhook.py</application> takes the following +options:</para> + +<variablelist> +<varlistentry> +<term>-n</term> +<listitem><para>Suppress transmission to a daemon. Instead, dump the +generated JSON request to standard output. Useful for +debugging.</para></listitem> +</varlistentry> +<varlistentry> +<term>-V</term> +<listitem><para>Write the program version to stdout and +terminate.</para></listitem> +</varlistentry> +</variablelist> + +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources.</para> +</refsect1> +</refentry> + diff --git a/org.catb.irkerd.plist b/org.catb.irkerd.plist new file mode 100644 index 0000000..3b30f92 --- /dev/null +++ b/org.catb.irkerd.plist @@ -0,0 +1,20 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> +<plist version="1.0"> +<dict> + <key>KeepAlive</key> + <true/> + <key>Label</key> + <string>org.catb.irkerd</string> + <key>ProgramArguments</key> + <array> + <string>/usr/bin/irkerd</string> + </array> + <key>RunAtLoad</key> + <true/> + <key>UserName</key> + <string>nobody</string> + <key>GroupName</key> + <string>nobody</string> +</dict> +</plist> diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b1726fb --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +PySocks==1.5.6 diff --git a/security.txt b/security.txt new file mode 100644 index 0000000..5a652d2 --- /dev/null +++ b/security.txt @@ -0,0 +1,268 @@ += Security analysis of irker = + +This is an analysis of security and DoS vulnerabilities associated +with irker, exploring and explaining certain design choices. Much of +it derives from a code audit and report by Daniel Franke. + +== Assumptions and Goals == + +We begin by stating some assumptions about how irker will be deployed, +and articulating a set of security goals. + +Communication flow in an irker deployment will look like this: + +----------------------------------------------------------------------------- + Committers + | + | + Version-control repositories + | + | + irkerhook.py + | + | + irkerd + | + | + IRC servers +----------------------------------------------------------------------------- + +Here are our assumptions: + +1. The repositories are hosted on a public forge sites such as +SourceForge, GitHub, Gitorious, Savannah, or Gna and must be +accessible to untrusted users. + +2. Repository project owners can set properties on their repositories +(including but not limited to irker.*), and may be able to set custom +post-commit hooks which can execute arbitrary code on the repository +server. In particular, these people my be able to modify the local +copy of irkerhook.py. + +3. The machine which hosts irkerd has the same owner as the machine which +hosts the the repo; these machines are possibly but not necessarily +one and the same. + +4. The network is protected by a perimeter firewall, and only a +trusted group is able to emit arbitrary packets from inside the +perimeter; committers are not necessarily part of this group. + +5. irkerd communicates with IRC servers over the open internet, +and an IRC server's administrator is assumed to hold no position of +trust with any other party. + +We can, accordingly, identify the following groups of security +principals: + +A. irker administrators. +B. Project committers. +C. Project owners +D. IRC server administrators. +E. Other people on irker's internal network. +F. irkerd-IRC men-in-the-middle (i.e. people who control the network path + between irkerd and the IRC server). +G. Random people on the internet. + +Our security goals for irker can be enumerated as follows: + +* Control: We don't want anyone outside group A gaining control of + the machines which host irkerd or the git repos. + +* Availability: Only group A should be able to to deny or degrade + irkerd's ability to receive commit messages and relay them to the + IRC server. We recognize and accept as inevitable that MITMs (groups + E and F) can do this too (by ARP spoofing, cable-cutting, etc.). + But, in particular, we would like irker-mediated services to be + resilient against DoS (denial of service) attacks. + +* Authentication/integrity: Notifications should be truthful, i.e., + commit messages sent to IRC channels should actually reflect that a + corresponding commit has taken place. We accept that groups A, C, + D, and E can violate this property. + +* Secrecy: irker shouldn't aid spammers (group G) in harvesting + committers' email addresses. + +* Auditability: If people abuse irkerd, we want to be able to identify + the abusive account or IP address. + +== Control Issues == + +We have audited the irker and irkerhook.py code for exploitable +vulnerabilities. We have not found any in the code itself, and the +use of Python gives us confidence in the absence of large classes of errors +(such as buffer overruns) that afflict C programs. + +However, the fact that irkerhook.py relies on external binaries to +mine data out of its repository opens up a well-known set of +vulnerabilities if a malicious user is able to insert binaries in a +carelessly-set execution path. Normal precautions against this should +be taken. + +== Availability == + +=== Solved problems === + +When the original implementation of irkerd saw a nick collision it +generated new nicks in a predictable sequence. A malicious IRC user +could have continuously changed his own nick to the next one that +irkerd is going to try. Some randomness has been added to nick +generation to prevent this. + +=== Unsolved problems === + +DoS attacks on any networked application can never completely +prevented, only mitigated by forcing attackers to invest more +resources. Here we consider the easiest attack paths against irker, +and possible countermeasures. + +irker handles each connection to a particular IRC server in a separate +thread - actually, due to server limits on open channels per +connection, there may be multiple sessions per server. This may not +scale well, especially on 32-bit architectures. + +Thread instance overhead, combined with the lack of any restriction on +how many URLs can appear in the 'to' list, is a DoS vulnerability. If +a repository's properties specify that notifications should go to more +than about 500 unique hostnames, then on 32-bit architectures we'll +hit the 4GB cap on virtual memory (even while the resident set size +remains small). + +Another ceiling to watch out for is the ulimit on file descriptors, +which defaults to 1024 on many Linux systems but can safely be set +much larger. Each connection instance costs a file descriptor. + +We consider some possible ways of addressing the problem: + +1. Limit the number of URLs in a request. Pretty painless - it will +be very rare that anyone wants to specify a larger set than a project +channel plus freenode #commits - but also ineffective. A malicious +hook could achieve DoS simply by spamming lots of requests. + +2. Limit the total number of requests than can be queued. Completely +ineffective - just sets a target for the DoS attack. + +3. Limit the number of requests that can be queued by source IP address. +This might be worth doing; it would stymie a single-source DoS attack through +a publicly-exposed irkerd, though not a DDoS by a botnet. But there isn't +a lot of win here for a properly installed irker (e.g. behind a firewall), +which is typically going to get all its requests from a single repo host +anyway. + +4. Rate-limit requests by source IP address - that is, after any request +discard additional ones during some timeout period. Again, good for +stopping a single-source DoS against an exposed irker, won't stop a +DDoS. The real problem though, is that any such rate limit might interfere +with legitimate high-volume use by a very active repo site. + +After this we appear to have run out of easy options, as source IP address +is the only thing irkerd can see that an attacker can't spoof. + +We mitigate some availability risks by reaping old sessions when we're +near resource limits. An ordinary DoS attack would then be prevented +from completely blocking all message traffic; the cost would be a +whole lot of join/leave spam due to connection churn. + +== Authentication/Integrity == + +One way to help prevent DoS attacks would be in-band authentication - +requiring irkerd submitters to present a credential along with each +message submission. In principle this, if it existed, could also be used +to verify that a submitter is authorized to issue notifications with +respect to a given project. + +We rejected this approach. The design goal for irker was to make +submissions fast, cheap, and stateless; baking an authentication +system directly into the irkerd codebase would have conflicted with +these objectives, not to mention probably becoming the camel's nose +for a godawful amount of code bloat. + +The deployment advice in the installation instructions assumes that +irkerd submitters are "authenticated" by being inside a firewall - that is, +mesages are issued from an intranet and it can be trusted that anyone +issuing messages from within a given intranet is authorized to do so. +This fits the assumption that irker instances will run on forge sites +receiving requests from instances of irkerhook.py. + +One larger issue (not unique to irker) is that because of the +insecured nature of IRC it is essentially impossible to secure +#commits against commit notifications that are either garbled by +software errors and misconfigurations or maliciously crafted to +confuse anyone attempting to gather statistics from that channel. The +lesson here is that IRC monitoring isn't a good method for that +purpose; going direct to the repositories via a toolkit such as Ohloh +is a far better idea. + +When this analysis was originally written, we recommended using spiped +or stunnel to solve the problem of passing notifications from irkerd +to IRC servers over a potentially hostile network that might interfere +with them. Later, SSL/TLS support proved easy to add and is now in +irkerd itself. + +== Secrecy == + +irkerd has no inherent secrecy risks. + +The distributed version of irkerhook.py removes the host part of +author addresses specifically in order to prevent address harvesting +from the notifications. + +== Auditability == + +We previously noted that source IP address is the only thing irker can +see that an attacker can't spoof. This makes auditability difficult +unless we impose conventions on the notifications passing though it. + +The irkerhook.py that we ship inherits an auditability property from +the CIA service it was designed to replace: the first field of every +notification (terminated by a colon) is the name of the issuing +project. The only other competitor to replace CIA known to us +(kgb_bot) shares this property. + +In the general case we cannot guarantee this property against +groups A and F. + +== Risks relative to centralized services == + +irker and irkerhook.py were written as a replacement for the +now-defunct CIA notification service. The author has written +a critique of that service: "CIA and the perils of overengineering" +at <http://esr.ibiblio.org/?p=4540>. It is thus worth considering how +a risk assessment of CIA compares to this one. + +The principal advantages of CIA from a security point of view were (a) +it provided a single point at which spam filtering and source blocking +could be done with benefit to all projects using the service, and (b) +since it had to have a database anyway for routing messages to project +channels, the incremental overhead for an authentication feature would +have been relatively low. + +As a matter of fact rather than theory CIA never fully exploited +either possibility. Anyone could create a CIA project entry with +fanout to any desired set of IRC channels. Notifications were not +authenticated, so anyone could masquerade as a member of any project. +The only check on abuse was human intervention to source-block +spammers, and this was by no means completely effective - spam shipped +via CIA was occasionally seen on on the freenode #commits channel. + +The principal security disadvantage of CIA was that it meant the +entire notification system was subject to single-point failure due +to software or hosting failures on cia.vc, or to DoS attacks +against the server. While there is no evidence that the site +was ever deliberately DoSed, failures were sufficiently common +that a half-hearted DoS attack might not have been even noticed. + +Despite the absence of authentication, irker instances on +properly firewalled intranets do not obviously pose additional +spamming risks beyond those incurred by the CIA service. The +overall robustness of the notification system as a whole should +be greatly improved. + +== Conclusions == + +The security and DoS issues irker has are not readily addressable by +changing the irker codebase itself, short of a complete (much more +complex and heavyweight) redesign. They are largely implicit risks of +its operating environment and must be managed by properly controlling +access to irker instances. + |