diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 14:27:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 14:27:58 +0000 |
commit | 3aecc6d0a8d757d6159dfa5083f1b254296c5b43 (patch) | |
tree | 3a305eb312ed47a1ac8967170ea1d45bb3c208be /irk | |
parent | Initial commit. (diff) | |
download | irker-3aecc6d0a8d757d6159dfa5083f1b254296c5b43.tar.xz irker-3aecc6d0a8d757d6159dfa5083f1b254296c5b43.zip |
Adding upstream version 2.23+dfsg.upstream/2.23+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rwxr-xr-x | irk | 65 | ||||
-rw-r--r-- | irk.xml | 84 | ||||
-rwxr-xr-x | irkerd | 1147 | ||||
-rw-r--r-- | irkerd.service | 16 | ||||
-rw-r--r-- | irkerd.xml | 261 | ||||
-rwxr-xr-x | irkerhook.py | 609 | ||||
-rw-r--r-- | irkerhook.xml | 417 |
7 files changed, 2599 insertions, 0 deletions
@@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# Illustrates how to test irkerd. +# +# First argument must be a channel URL. If it does not begin with "irc", +# the base URL for freenode is prepended. +# +# Second argument must be a payload string. Standard C-style escapes +# such as \n and \t are decoded. +# +# SPDX-License-Identifier: BSD-2-Clause +import json +import socket +import sys +import fileinput + +DEFAULT_SERVER = ("localhost", 6659) + +def connect(server = DEFAULT_SERVER): + return socket.create_connection(server) + +def send(s, target, message): + data = {"to": target, "privmsg" : message} + dump = json.dumps(data) + if not isinstance(dump, bytes): + dump = dump.encode('ascii') + s.sendall(dump) + +def irk(target, message, server = DEFAULT_SERVER): + s = connect(server) + if "irc:" not in target and "ircs:" not in target: + target = "irc://chat.freenode.net/{0}".format(target) + if message == '-': + for line in fileinput.input('-'): + send(s, target, line.rstrip('\n')) + else: + send(s, target, message) + s.close() + +def main(): + if len(sys.argv) < 2: + sys.stderr.write("irk: a URL argument is required\n") + sys.exit(1) + target = sys.argv[1] + message = " ".join(sys.argv[2:]) + # Allows pretty formatting of irker messages + if str == bytes: + message = message.decode('string_escape') + + # The actual IRC limit is 512. Avoid any off-by-ones + chunksize = 511 + try: + while message[:chunksize]: + irk(target, message[:chunksize]) + message = message[chunksize:] + except socket.error as e: + sys.stderr.write("irk: write to server failed: %r\n" % e) + sys.exit(1) + +if __name__ == '__main__': + main() + +# The following sets edit modes for GNU EMACS +# Local Variables: +# mode:python +# End: @@ -0,0 +1,84 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irk.8'> +<refmeta> +<refentrytitle>irk</refentrytitle> +<manvolnum>1</manvolnum> +<refmiscinfo class='date'>Apr 30 2014</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irk</refname> +<refpurpose>test program for irkerd</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irk</command> + <arg><replaceable>target</replaceable></arg> + <arg choice='opt'><replaceable>message text</replaceable></arg> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irk</application> is a simple test program for +<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>. It +will construct a simple JSON object and pass it to the daemon running +on localhost.</para> +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irk</application> takes the following options:</para> + +<variablelist> +<varlistentry> +<term>target</term> +<listitem><para>Which server and channel to join to announced the +message. If not prefixed with "irc:", it will prefix +"irc://chat.freenode.net/" to the argument before passing it directly +to irkerd. This argument is passed as the "to" parameter in the JSON +object.</para></listitem> +</varlistentry> +<varlistentry> +<term>message</term> +<listitem><para>Which message to send to the target specified +above. If the string "-", the message will be read from standard +input, with newlines stripped.</para></listitem> +</varlistentry> +</variablelist> + +</refsect1> + +<refsect1 id='limitations'><title>LIMITATIONS</title> + +<para><application>irk</application> has no commandline usage and may +be riddled with bugs.</para> + +<para><application>irk</application> doesn't know how to talk to your +favorite VCS. You will generally want to use +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry> +instead</para> + +<para><application>irk</application> has also all the limitations of +<application>irkerd</application>.</para> +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources, including an installable repository +hook script.</para> +</refsect1> +</refentry> @@ -0,0 +1,1147 @@ +#!/usr/bin/env python3 +""" +irkerd - a simple IRC multiplexer daemon + +Listens for JSON objects of the form {'to':<irc-url>, 'privmsg':<text>} +and relays messages to IRC channels. Each request must be followed by +a newline. + +The <text> must be a string. The value of the 'to' attribute can be a +string containing an IRC URL (e.g. 'irc://chat.freenet.net/botwar') or +a list of such strings; in the latter case the message is broadcast to +all listed channels. Note that the channel portion of the URL need +*not* have a leading '#' unless the channel name itself does. + +Design and code by Eric S. Raymond <esr@thyrsus.com>. See the project +resource page at <http://www.catb.org/~esr/irker/>. + +Requires Python 2.7, or: +* 2.6 with the argparse package installed. +* Any 3.x + +""" +# SPDX-License-Identifier: BSD-2-Clause + +# These things might need tuning + +HOST = "localhost" +HOST6 = "localhost" +PORT = 6659 + +PROXY_TYPE = None # Use proxy if set 1: SOCKS4, 2: SOCKS5, 3: HTTP +PROXY_HOST = "" +PROXY_PORT = 1080 + +XMIT_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit +PING_TTL = (15 * 60) # Time to live, seconds from last PING +HANDSHAKE_TTL = 60 # Time to live, seconds from nick transmit +CHANNEL_TTL = (3 * 60 * 60) # Time to live, seconds from last transmit +DISCONNECT_TTL = (24 * 60 * 60) # Time to live, seconds from last connect +UNSEEN_TTL = 60 # Time to live, seconds since first request +CHANNEL_MAX = 18 # Max channels open per socket (default) +ANTI_FLOOD_DELAY = 1.0 # Anti-flood delay after transmissions, seconds +ANTI_BUZZ_DELAY = 0.09 # Anti-buzz delay after queue-empty check +CONNECTION_MAX = 200 # To avoid hitting a thread limit +RECONNECT_DELAY = 3 # Don't spam servers with connection attempts + +# No user-serviceable parts below this line + +# pylint: disable=too-many-lines,invalid-name,missing-function-docstring,missing-class-docstring,redefined-outer-name,logging-not-lazy,too-many-arguments,too-many-branches,too-many-instance-attributes,attribute-defined-outside-init,raise-missing-from,no-else-return,no-else-break,too-many-statements,too-many-nested-blocks,no-self-use + +version = "2.23" + +# pylint: disable=wrong-import-position +import argparse +import logging +import logging.handlers +import json +import os +import os.path +try: # Python 3 + import queue +except ImportError: # Python 2 + import Queue as queue +import random +import re +import select +import signal +import socket +try: + import socks + socks_on = True +except ImportError: + socks_on = False +try: # Python 3 + import socketserver +except ImportError: # Python 2 + import SocketServer as socketserver +import ssl +import sys +import threading +import time +import traceback +try: # Python 3 + import urllib.parse as urllib_parse +except ImportError: # Python 2 + import urlparse as urllib_parse + + +LOG = logging.getLogger(__name__) +LOG.setLevel(logging.ERROR) +LOG_LEVELS = ['critical', 'error', 'warning', 'info', 'debug'] + +try: # Python 2 + UNICODE_TYPE = unicode +except NameError: # Python 3 + UNICODE_TYPE = str + + +# Sketch of implementation: +# +# One Irker object manages multiple IRC sessions. It holds a map of +# Dispatcher objects, one per (server, port) combination, which are +# responsible for routing messages to one of any number of Connection +# objects that do the actual socket conversations. The reason for the +# Dispatcher layer is that IRC daemons limit the number of channels a +# client (that is, from the daemon's point of view, a socket) can be +# joined to, so each session to a server needs a flock of Connection +# instances each with its own socket. +# +# Connections are timed out and removed when either they haven't seen a +# PING for a while (indicating that the server may be stalled or down) +# or there has been no message traffic to them for a while, or +# even if the queue is nonempty but efforts to connect have failed for +# a long time. +# +# There are multiple threads. One accepts incoming traffic from all +# servers. Each Connection also has a consumer thread and a +# thread-safe message queue. The program main appends messages to +# queues as JSON requests are received; the consumer threads try to +# ship them to servers. When a socket write stalls, it only blocks an +# individual consumer thread; if it stalls long enough, the session +# will be timed out. This solves the biggest problem with a +# single-threaded implementation, which is that you can't count on a +# single stalled write not hanging all other traffic - you're at the +# mercy of the length of the buffers in the TCP/IP layer. +# +# Message delivery is thus not reliable in the face of network stalls, +# but this was considered acceptable because IRC (notoriously) has the +# same problem - there is little point in reliable delivery to a relay +# that is down or unreliable. +# +# This code uses only NICK, JOIN, PART, MODE, PRIVMSG, USER, and QUIT. +# It is strictly compliant to RFC1459, except for the interpretation and +# use of the DEAF and CHANLIMIT and (obsolete) MAXCHANNELS features. +# +# CHANLIMIT is as described in the Internet RFC draft +# draft-brocklesby-irc-isupport-03 at <http://www.mirc.com/isupport.html>. +# The ",isnick" feature is as described in +# <http://ftp.ics.uci.edu/pub/ietf/uri/draft-mirashi-url-irc-01.txt>. + +# Historical note: the IRCClient and IRCServerConnection classes +# (~270LOC) replace the overweight, overcomplicated 3KLOC mass of +# irclib code that irker formerly used as a service library. They +# still look similar to parts of irclib because I contributed to that +# code before giving up on it. + +class IRCError(BaseException): + "An IRC exception" + # pylint: disable=unnecessary-pass + pass + +class InvalidRequest(ValueError): + "An invalid JSON request" + # pylint: disable=unnecessary-pass + pass + +class TCP6Server(socketserver.TCPServer): + "TCP server that supports IPv6" + address_family = socket.AF_INET6 + +class UDP6Server(socketserver.UDPServer): + "UDP server that supports IPv6" + address_family = socket.AF_INET6 + +class IRCClient(): + "An IRC client session to one or more servers." + def __init__(self): + self.mutex = threading.RLock() + self.server_connections = [] + self.event_handlers = {} + self.add_event_handler("ping", + lambda c, e: c.ship("PONG %s" % e.target)) + self.drops = 0 + + def newserver(self): + "Initialize a new server-connection object." + conn = IRCServerConnection(self) + with self.mutex: + self.server_connections.append(conn) + return conn + + def spin(self, immediate=False, timeout=0.2): + "Spin processing data from connections forever." + # Outer loop should specifically *not* be mutex-locked. + # Otherwise no other thread would ever be able to change + # the shared state of an IRC object running this function. + while True: + nextsleep = 0 + with self.mutex: + connected = [x for x in self.server_connections + if x is not None and x.socket is not None] + sockets = [x.socket for x in connected] + if sockets: + connmap = {c.socket.fileno(): c for c in connected} + (insocks, _o, _e) = select.select(sockets, [], [], timeout) + for s in insocks: + try: + connmap[s.fileno()].consume() + except UnicodeDecodeError as e: + LOG.warning('%s: invalid encoding (%s)', self, e) + else: + nextsleep = timeout + if immediate and self.drops > 0: + break + time.sleep(nextsleep) + + def add_event_handler(self, event, handler): + "Set a handler to be called later." + with self.mutex: + event_handlers = self.event_handlers.setdefault(event, []) + event_handlers.append(handler) + + def handle_event(self, connection, event): + with self.mutex: + h = self.event_handlers + th = h.get("all_events", []) + h.get(event.type, []) + for handler in th: + handler(connection, event) + + def drop_connection(self, connection): + with self.mutex: + self.server_connections.remove(connection) + self.drops += 1 + +class LineBufferedStream(): + "Line-buffer a read stream." + _crlf_re = re.compile(b'\r?\n') + + def __init__(self): + self.buffer = b'' + + def append(self, newbytes): + self.buffer += newbytes + + def lines(self): + "Iterate over lines in the buffer." + lines = self._crlf_re.split(self.buffer) + self.buffer = lines.pop() + return iter(lines) + + def __iter__(self): + return self.lines() + +class IRCServerConnectionError(IRCError): + pass + +class IRCServerConnection(): + command_re = re.compile("^(:(?P<prefix>[^ ]+) +)?(?P<command>[^ ]+)( *(?P<argument> .+))?") + # The full list of numeric-to-event mappings is in Perl's Net::IRC. + # We only need to ensure that if some ancient server throws numerics + # for the ones we actually want to catch, they're mapped. + codemap = { + "001": "welcome", + "005": "featurelist", + "324": "mode", + #"404": "cannotsendtochan", + "432": "erroneusnickname", + "433": "nicknameinuse", + "436": "nickcollision", + "437": "unavailresource", + } + + def __init__(self, master): + self.master = master + self.socket = None + + # PROTOCOL_SSLv23 selects the highest version that both client and server support + def _wrap_socket(self, socket, target, certfile=None, cafile=None, + protocol=ssl.PROTOCOL_SSLv23): + try: # Python 3.2 and greater + ssl_context = ssl.SSLContext(protocol) + except AttributeError: # Python < 3.2 + # pylint: disable=deprecated-method + self.socket = ssl.wrap_socket( + socket, certfile=certfile, cert_reqs=ssl.CERT_REQUIRED, + ssl_version=protocol, ca_certs=cafile) + else: + ssl_context.verify_mode = ssl.CERT_REQUIRED + if certfile: + ssl_context.load_cert_chain(certfile) + if cafile: + ssl_context.load_verify_locations(cafile=cafile) + else: + ssl_context.set_default_verify_paths() + kwargs = {} + if ssl.HAS_SNI: + kwargs['server_hostname'] = target.servername + self.socket = ssl_context.wrap_socket(socket, **kwargs) + return self.socket + + def _check_hostname(self, target): + if hasattr(ssl, 'match_hostname'): # Python >= 3.2 + cert = self.socket.getpeercert() + try: + ssl.match_hostname(cert, target.servername) + except ssl.CertificateError as e: + raise IRCServerConnectionError( + 'Invalid SSL/TLS certificate: %s' % e) + else: # Python < 3.2 + LOG.warning( + 'cannot check SSL/TLS hostname with Python %s' % sys.version) + + def connect(self, target, nickname, username=None, realname=None, timeout=5.0, + **kwargs): + LOG.debug("connect(server=%r, port=%r, nickname=%r, ...)" % ( + target.servername, target.port, nickname)) + if self.socket is not None: + self.disconnect("Changing servers") + + self.buffer = LineBufferedStream() + self.event_handlers = {} + self.real_server_name = "" + self.target = target + self.nickname = nickname + + err = None + for res in socket.getaddrinfo(target.servername, target.port, 0, socket.SOCK_STREAM): + af, socktype, proto, _, sa = res + try: + if socks_on and PROXY_TYPE: + self.socket = socks.socksocket(af, socktype, proto) + self.socket.set_proxy(PROXY_TYPE, PROXY_HOST, PROXY_PORT) + else: + self.socket = socket.socket(af, socktype, proto) + if target.ssl: + self.socket = self._wrap_socket( + socket=self.socket, target=target, **kwargs) + self.socket.bind(('', 0)) + self.socket.settimeout(timeout) + self.socket.connect(sa) + self.socket.settimeout(None) + + # Break explicitly a reference cycle + err = None + break + + except socket.error as _: + err = _ + if self.socket is not None: + self.socket.close() + + if self.socket is None: + err = socket.error("getaddrinfo returns an empty list") + + if err is not None: + try: + raise IRCServerConnectionError("Couldn't connect to socket: %s" % err) + finally: + # Break explicitly a reference cycle + err = None + if target.ssl: + self._check_hostname(target=target) + if target.password: + self.ship("PASS " + target.password) + self.nick(self.nickname) + self.user( + username=target.username or username or 'irker', + realname=realname or 'irker relaying client') + return self + + def close(self): + # Without this thread lock, there is a window during which + # select() can find a closed socket, leading to an EBADF error. + with self.master.mutex: + self.disconnect("Closing object") + self.master.drop_connection(self) + + def consume(self): + try: + incoming = self.socket.recv(16384) + except socket.error: + # Server hung up on us. + self.disconnect("Connection reset by peer") + return + if not incoming: + # Dead air also indicates a connection reset. + self.disconnect("Connection reset by peer") + return + + self.buffer.append(incoming) + + for line in self.buffer: + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + LOG.debug("FROM: %s" % line) + + if not line: + continue + + prefix = None + command = None + arguments = None + self.handle_event(Event("every_raw_message", + self.real_server_name, + None, + [line])) + + m = IRCServerConnection.command_re.match(line) + if m.group("prefix"): + prefix = m.group("prefix") + if not self.real_server_name: + self.real_server_name = prefix + if m.group("command"): + command = m.group("command").lower() + if m.group("argument"): + a = m.group("argument").split(" :", 1) + arguments = a[0].split() + if len(a) == 2: + arguments.append(a[1]) + + command = IRCServerConnection.codemap.get(command, command) + if command in ["privmsg", "notice"]: + target = arguments.pop(0) + else: + target = None + + if command == "quit": + arguments = [arguments[0]] + elif command == "ping": + target = arguments[0] + else: + target = arguments[0] + arguments = arguments[1:] + + LOG.debug("command: %s, source: %s, target: %s, arguments: %s" % ( + command, prefix, target, arguments)) + self.handle_event(Event(command, prefix, target, arguments)) + + def handle_event(self, event): + self.master.handle_event(self, event) + if event.type in self.event_handlers: + for fn in self.event_handlers[event.type]: + fn(self, event) + + def is_connected(self): + return self.socket is not None + + def disconnect(self, message=""): + if self.socket is None: + return + # Don't send a QUIT here - causes infinite loop! + try: + self.socket.shutdown(socket.SHUT_WR) + self.socket.close() + except socket.error: + pass + del self.socket + self.socket = None + self.handle_event( + Event("disconnect", self.target.server, "", [message])) + + def join(self, channel, key=""): + self.ship("JOIN %s%s" % (channel, (key and (" " + key)))) + + def mode(self, target, command): + self.ship("MODE %s %s" % (target, command)) + + def nick(self, newnick): + self.ship("NICK " + newnick) + + def part(self, channel, message=""): + cmd_parts = ['PART', channel] + if message: + cmd_parts.append(message) + self.ship(' '.join(cmd_parts)) + + def privmsg(self, target, text): + self.ship("PRIVMSG %s :%s" % (target, text)) + + def quit(self, message=""): + self.ship("QUIT" + (message and (" :" + message))) + + def user(self, username, realname): + self.ship("USER %s 0 * :%s" % (username, realname)) + + def ship(self, string): + "Ship a command to the server, appending CR/LF" + try: + self.socket.send(string.encode('utf-8') + b'\r\n') + LOG.debug("TO: %s" % string) + except socket.error: + self.disconnect("Connection reset by peer.") + +# pylint: disable=useless-object-inheritance,too-few-public-methods +class Event(object): + def __init__(self, evtype, source, target, arguments=None): + self.type = evtype + self.source = source + self.target = target + if arguments is None: + arguments = [] + self.arguments = arguments + +def is_channel(string): + return string and string[0] in "#&+!" + +class Connection: + def __init__(self, irker, target, nick_template, nick_needs_number=False, + password=None, **kwargs): + self.irker = irker + self.target = target + self.nick_template = nick_template + self.nick_needs_number = nick_needs_number + self.password = password + self.kwargs = kwargs + self.nick_trial = None + self.connection = None + self.status = None + self.last_xmit = time.time() + self.last_ping = time.time() + self.channels_joined = {} + self.channel_limits = {} + #self.channel_needs_join = {} # Set on receipt of cannotsendtochan, not yet used + self.channel_mode_queried = set() + # The consumer thread + self.queue = queue.Queue() + self.thread = None + def nickname(self, n=None): + "Return a name for the nth server connection." + if n is None: + n = self.nick_trial + if self.nick_needs_number: + return self.nick_template % n + else: + return self.nick_template + def handle_ping(self): + "Register the fact that the server has pinged this connection." + self.last_ping = time.time() + def handle_welcome(self): + "The server says we're OK, with a non-conflicting nick." + self.status = "ready" + LOG.info("nick %s accepted" % self.nickname()) + if self.password: + self.connection.privmsg("nickserv", "identify %s" % self.password) + def handle_badnick(self): + "The server says our nick is ill-formed or has a conflict." + LOG.info("nick %s rejected" % self.nickname()) + if self.nick_needs_number: + # Randomness prevents a malicious user or bot from + # anticipating the next trial name in order to block us + # from completing the handshake. + self.nick_trial += random.randint(1, 3) + self.last_xmit = time.time() + self.connection.nick(self.nickname()) + # Otherwise fall through, it might be possible to + # recover manually. + def handle_disconnect(self): + "Server disconnected us for flooding or some other reason." + self.connection = None + if self.status != "expired": + self.status = "disconnected" + # Avoid flooding the server if it disconnects + # immediately on sucessful login. + time.sleep(RECONNECT_DELAY) + def handle_kick(self, outof): + "We've been kicked." + self.status = "handshaking" + try: + del self.channels_joined[outof] + except KeyError: + LOG.error("irkerd: kicked by %s from %s that's not joined" % ( + self.target, outof)) + qcopy = [] + while not self.queue.empty(): + (channel, message, key) = self.queue.get() + if channel != outof: + qcopy.append((channel, message, key)) + for (channel, message, key) in qcopy: + self.queue.put((channel, message, key)) + self.status = "ready" + #def handle_cannotsendtochan(self, outof): + # "Joinless message send refused." + # self.channel_needs_join.add(outof) + def handle_mode(self, outof, arg): + "Mode reply." + # Stub - not yet used + LOG.info("MODE source %s has mode %s" % (outof, arg)) + def send(self, channel, message): + for segment in message.split("\n"): + # Truncate the message if it's too long, + # but we're working with characters here, + # not bytes, so we could be off. + # 500 = 512 - CRLF - 'PRIVMSG ' - ' :' + maxlength = 500 - len(channel) + if len(segment) > maxlength: + segment = segment[:maxlength] + try: + self.connection.privmsg(channel, segment) + except ValueError as err: + LOG.warning(( + "rejected a message to %s on %s " + "because: %s") % ( + channel, self.target, UNICODE_TYPE(err))) + LOG.debug(traceback.format_exc()) + time.sleep(ANTI_FLOOD_DELAY) + self.last_xmit = time.time() + LOG.info("XMIT_TTL bump (%s transmission) at %s" % ( + self.target, time.asctime())) + def enqueue(self, channel, message, key, quit_after=False): + "Enque a message for transmission." + if self.thread is None or not self.thread.is_alive(): + self.status = "unseen" + self.thread = threading.Thread(target=self.dequeue) + self.thread.setDaemon(True) + self.thread.start() + self.queue.put((channel, message, key)) + if quit_after: + self.queue.put((channel, None, key)) + def dequeue(self): + "Try to ship pending messages from the queue." + # pylint:disable=broad-except + try: + while True: + # We want to be kind to the IRC servers and not hold unused + # sockets open forever, so they have a time-to-live. The + # loop is coded this particular way so that we can drop + # the actual server connection when its time-to-live + # expires, then reconnect and resume transmission if the + # queue fills up again. + if self.queue.empty(): + # Queue is empty, at some point we want to time out + # the connection rather than holding a socket open in + # the server forever. + now = time.time() + xmit_timeout = now > self.last_xmit + XMIT_TTL + ping_timeout = now > self.last_ping + PING_TTL + if self.status == "disconnected": + # If the queue is empty, we can drop this connection. + self.status = "expired" + break + elif xmit_timeout or ping_timeout: + LOG.info(( + "timing out connection to %s at %s " + "(ping_timeout=%s, xmit_timeout=%s)") % ( + self.target, time.asctime(), ping_timeout, + xmit_timeout)) + with self.irker.irc.mutex: + self.connection.context = None + self.connection.quit("transmission timeout") + self.connection = None + self.status = "disconnected" + else: + # Prevent this thread from hogging the CPU by pausing + # for just a little bit after the queue-empty check. + # As long as this is less that the duration of a human + # reflex arc it is highly unlikely any human will ever + # notice. + time.sleep(ANTI_BUZZ_DELAY) + elif self.status == "disconnected" \ + and time.time() > self.last_xmit + DISCONNECT_TTL: + # Queue is nonempty, but the IRC server might be + # down. Letting failed connections retain queue + # space forever would be a memory leak. + self.status = "expired" + break + elif not self.connection and self.status != "expired": + # Queue is nonempty but server isn't connected. + with self.irker.irc.mutex: + self.connection = self.irker.irc.newserver() + self.connection.context = self + # Try to avoid colliding with other instances + self.nick_trial = random.randint(1, 990) + self.channels_joined = {} + try: + # This will throw + # IRCServerConnectionError on failure + self.connection.connect( + target=self.target, + nickname=self.nickname(), + **self.kwargs) + self.status = "handshaking" + LOG.info("XMIT_TTL bump (%s connection) at %s" % ( + self.target, time.asctime())) + self.last_xmit = time.time() + self.last_ping = time.time() + except IRCServerConnectionError as e: + LOG.error("irkerd: %s" % e) + self.status = "expired" + break + elif self.status == "handshaking": + if time.time() > self.last_xmit + HANDSHAKE_TTL: + self.status = "expired" + break + else: + # Don't buzz on the empty-queue test while we're + # handshaking + time.sleep(ANTI_BUZZ_DELAY) + elif self.status == "unseen" \ + and time.time() > self.last_xmit + UNSEEN_TTL: + # Nasty people could attempt a denial-of-service + # attack by flooding us with requests with invalid + # servernames. We guard against this by rapidly + # expiring connections that have a nonempty queue but + # have never had a successful open. + self.status = "expired" + break + elif self.status == "ready": + (channel, message, key) = self.queue.get() + if channel not in self.channels_joined: + self.connection.join(channel, key=key) + LOG.info("joining %s on %s." % (channel, self.target)) + self.channels_joined[channel] = time.time() + # None is magic - it's a request to quit the server + if message is None: + self.connection.quit() + # An empty message might be used as a keepalive or + # to join a channel for logging, so suppress the + # privmsg send unless there is actual traffic. + elif message: + self.send(channel, message) + self.queue.task_done() + elif self.status == "expired": + LOG.error( + "irkerd: we're expired but still running! This is a bug.") + break + except Exception as e: + LOG.error("irkerd: exception %s in thread for %s" % (e, self.target)) + # Maybe this should have its own status? + self.status = "expired" + LOG.debug(traceback.format_exc()) + finally: + # Make sure we don't leave any zombies behind + if self.connection: + self.connection.close() + def live(self): + "Should this connection not be scavenged?" + return self.status != "expired" + def joined_to(self, channel): + "Is this connection joined to the specified channel?" + return channel in self.channels_joined + def accepting(self, channel): + "Can this connection accept a join of this channel?" + if self.channel_limits: + match_count = 0 + for already in self.channels_joined: + # This obscure code is because the RFCs allow separate limits + # by channel type (indicated by the first character of the name) + # a feature that is almost never actually used. + if already[0] == channel[0]: + match_count += 1 + return match_count < self.channel_limits.get(channel[0], CHANNEL_MAX) + else: + return len(self.channels_joined) < CHANNEL_MAX + +class Target(): + "Represent a transmission target." + def __init__(self, url): + self.url = url + parsed = urllib_parse.urlparse(url) + self.ssl = parsed.scheme == 'ircs' + if self.ssl: + default_ircport = 6697 + else: + default_ircport = 6667 + self.username = parsed.username + self.password = parsed.password + self.servername = parsed.hostname + self.port = parsed.port or default_ircport + # IRC channel names are case-insensitive. If we don't smash + # case here we may run into problems later. There was a bug + # observed on irc.rizon.net where an irkerd user specified #Channel, + # got kicked, and irkerd crashed because the server returned + # "#channel" in the notification that our kick handler saw. + self.channel = parsed.path.lstrip('/').lower() + # This deals with a tweak in recent versions of urlparse. + if parsed.fragment: + self.channel += "#" + parsed.fragment + isnick = self.channel.endswith(",isnick") + if isnick: + self.channel = self.channel[:-7] + if self.channel and not isnick and self.channel[0] not in "#&+": + self.channel = "#" + self.channel + # support both channel?secret and channel?key=secret + self.key = "" + if parsed.query: + self.key = re.sub("^key=", "", parsed.query) + + def __str__(self): + "Represent this instance as a string" + return self.servername or self.url or repr(self) + + def validate(self): + "Raise InvalidRequest if the URL is missing a critical component" + if not self.servername: + raise InvalidRequest( + 'target URL missing a servername: %r' % self.url) + if not self.channel: + raise InvalidRequest( + 'target URL missing a channel: %r' % self.url) + def server(self): + "Return a hashable tuple representing the destination server." + return (self.servername, self.port) + +class Dispatcher: + "Manage connections to a particular server-port combination." + def __init__(self, irker, **kwargs): + self.irker = irker + self.kwargs = kwargs + self.connections = [] + def dispatch(self, channel, message, key, quit_after=False): + "Dispatch messages for our server-port combination." + # First, check if there is room for another channel + # on any of our existing connections. + connections = [x for x in self.connections if x.live()] + eligibles = [x for x in connections if x.joined_to(channel)] \ + or [x for x in connections if x.accepting(channel)] + if eligibles: + eligibles[0].enqueue(channel, message, key, quit_after) + return + # All connections are full up. Look for one old enough to be + # scavenged. + ancients = [] + for connection in connections: + for (chan, age) in connections.channels_joined.items(): + if age < time.time() - CHANNEL_TTL: + ancients.append((connection, chan, age)) + if ancients: + ancients.sort(key=lambda x: x[2]) + (found_connection, drop_channel, _drop_age) = ancients[0] + found_connection.part(drop_channel, "scavenged by irkerd") + del found_connection.channels_joined[drop_channel] + #time.sleep(ANTI_FLOOD_DELAY) + found_connection.enqueue(channel, message, key, quit_after) + return + # All existing channels had recent activity + newconn = Connection(self.irker, **self.kwargs) + self.connections.append(newconn) + newconn.enqueue(channel, message, key, quit_after) + def live(self): + "Does this server-port combination have any live connections?" + self.connections = [x for x in self.connections if x.live()] + return len(self.connections) > 0 + def pending(self): + "Return all connections with pending traffic." + return [x for x in self.connections if not x.queue.empty()] + def last_xmit(self): + "Return the time of the most recent transmission." + return max(x.last_xmit for x in self.connections) + +class Irker: + "Persistent IRC multiplexer." + def __init__(self, logfile=None, **kwargs): + self.logfile = logfile + self.kwargs = kwargs + self.irc = IRCClient() + self.irc.add_event_handler("ping", self._handle_ping) + self.irc.add_event_handler("welcome", self._handle_welcome) + self.irc.add_event_handler("erroneusnickname", self._handle_badnick) + self.irc.add_event_handler("nicknameinuse", self._handle_badnick) + self.irc.add_event_handler("nickcollision", self._handle_badnick) + self.irc.add_event_handler("unavailresource", self._handle_badnick) + self.irc.add_event_handler("featurelist", self._handle_features) + self.irc.add_event_handler("disconnect", self._handle_disconnect) + self.irc.add_event_handler("kick", self._handle_kick) + #self.irc.add_event_handler("cannotsendtochan", self._handle_cannotsendtochan) + self.irc.add_event_handler("mode", self._handle_mode) + self.irc.add_event_handler("every_raw_message", self._handle_every_raw_message) + self.servers = {} + def thread_launch(self): + thread = threading.Thread(target=self.irc.spin) + thread.setDaemon(True) + #self.irc._thread = thread + thread.start() + def _handle_ping(self, connection, _event): + "PING arrived, bump the last-received time for the connection." + if connection.context: + connection.context.handle_ping() + def _handle_welcome(self, connection, _event): + "Welcome arrived, nick accepted for this connection." + if connection.context: + connection.context.handle_welcome() + def _handle_badnick(self, connection, _event): + "Nick not accepted for this connection." + if connection.context: + connection.context.handle_badnick() + def _handle_features(self, connection, event): + "Determine if and how we can set deaf mode." + if connection.context: + cxt = connection.context + arguments = event.arguments + for lump in arguments: + if lump.startswith("DEAF="): + if not self.logfile: + connection.mode(cxt.nickname(), "+"+lump[5:]) + elif lump.startswith("MAXCHANNELS="): + m = int(lump[12:]) + for pref in "#&+": + cxt.channel_limits[pref] = m + LOG.info("%s maxchannels is %d" % (connection.target, m)) + elif lump.startswith("CHANLIMIT=#:"): + limits = lump[10:].split(",") + try: + for token in limits: + (prefixes, limit) = token.split(":") + limit = int(limit) + for c in prefixes: + cxt.channel_limits[c] = limit + LOG.info("%s channel limit map is %s" % ( + connection.target, cxt.channel_limits)) + except ValueError: + LOG.error("irkerd: ill-formed CHANLIMIT property") + def _handle_disconnect(self, connection, _event): + "Server hung up the connection." + LOG.info("server %s disconnected" % connection.target) + connection.close() + if connection.context: + connection.context.handle_disconnect() + def _handle_kick(self, connection, event): + "Server hung up the connection." + target = event.target + LOG.info("irker has been kicked from %s on %s" % ( + target, connection.target)) + if connection.context: + connection.context.handle_kick(target) + #def _handle_cannotsendtochan(self, connection, event): + # "Server refused message send without channel join" + # target = event.target + # LOG.info("joinless message refusal from %s on %s" % ( + # target, connection.target)) + # if connection.context: + # connection.context.handle_cannotsendtochan(target) + def _handle_mode(self, connection, event): + "Process mode reply." + LOG.info("mode reply %s on %s with %s" % ( + event.target, connection.target, event.arguments)) + if connection.context: + connection.context.handle_mode(event.target, event.arguments[0]) + def _handle_every_raw_message(self, _connection, event): + "Log all messages when in watcher mode." + if self.logfile: + with open(self.logfile, "ab") as logfp: + message = u"%03f|%s|%s\n" % \ + (time.time(), event.source, event.arguments[0]) + logfp.write(message.encode('utf-8')) + + def pending(self): + "Do we have any pending message traffic?" + return [k for (k, v) in self.servers.items() if v.pending()] + + def _parse_request(self, line): + "Request-parsing helper for the handle() method" + request = json.loads(line.strip()) + if not isinstance(request, dict): + raise InvalidRequest( + "request is not a JSON dictionary: %r" % request) + if "to" not in request or "privmsg" not in request: + raise InvalidRequest( + "malformed request - 'to' or 'privmsg' missing: %r" % request) + channels = request['to'] + message = request['privmsg'] + if not isinstance(channels, (list, UNICODE_TYPE)): + raise InvalidRequest( + "malformed request - unexpected channel type: %r" % channels) + if not isinstance(message, UNICODE_TYPE): + raise InvalidRequest( + "malformed request - unexpected message type: %r" % message) + if not isinstance(channels, list): + channels = [channels] + targets = [] + for url in channels: + try: + if not isinstance(url, UNICODE_TYPE): + raise InvalidRequest( + "malformed request - URL has unexpected type: %r" % + url) + target = Target(url) + target.validate() + except InvalidRequest as e: + LOG.error("irkerd: " + UNICODE_TYPE(e)) + else: + targets.append(target) + return (targets, message) + + def handle(self, line, quit_after=False): + "Perform a JSON relay request." + try: + targets, message = self._parse_request(line=line) + for target in targets: + if target.server() not in self.servers: + self.servers[target.server()] = Dispatcher( + self, target=target, **self.kwargs) + self.servers[target.server()].dispatch( + target.channel, message, target.key, quit_after=quit_after) + # GC dispatchers with no active connections + servernames = self.servers.keys() + for servername in servernames: + if not self.servers[servername].live(): + del self.servers[servername] + # If we might be pushing a resource limit even + # after garbage collection, remove a session. The + # goal here is to head off DoS attacks that aim at + # exhausting thread space or file descriptors. + # The cost is that attempts to DoS this service + # will cause lots of join/leave spam as we + # scavenge old channels after connecting to new + # ones. The particular method used for selecting a + # session to be terminated doesn't matter much; we + # choose the one longest idle on the assumption + # that message activity is likely to be clumpy. + if len(self.servers) >= CONNECTION_MAX: + oldest = min( + self.servers.keys(), + key=lambda name: self.servers[name].last_xmit()) + del self.servers[oldest] + except InvalidRequest as e: + LOG.error("irkerd: " + UNICODE_TYPE(e)) + except ValueError: + LOG.error("irkerd: " + "can't recognize JSON on input: %r" % line) + except RuntimeError: + LOG.error("irkerd: " + "wildly malformed JSON blew the parser stack.") + +class IrkerTCPHandler(socketserver.StreamRequestHandler): + def handle(self): + while True: + line = self.rfile.readline() + if not line: + break + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) + +class IrkerUDPHandler(socketserver.BaseRequestHandler): + def handle(self): + line = self.request[0].strip() + #socket = self.request[1] + if not isinstance(line, UNICODE_TYPE): + line = UNICODE_TYPE(line, 'utf-8') + irker.handle(line=line.strip()) + +def in_background(): + "Is this process running in background?" + try: + return os.getpgrp() != os.tcgetpgrp(1) + except OSError: + return True + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__.strip().splitlines()[0]) + password_group = parser.add_mutually_exclusive_group() + parser.add_argument( + '-c', '--ca-file', metavar='PATH', + help='file of trusted certificates for SSL/TLS') + parser.add_argument( + '-e', '--cert-file', metavar='PATH', + help='pem file used to authenticate to the server') + parser.add_argument( + '-d', '--log-level', metavar='LEVEL', choices=LOG_LEVELS, + help='how much to log to the log file (one of %(choices)s)') + parser.add_argument( + '-H', '--host', metavar='ADDRESS', default=HOST, + help='IP address to listen on') + parser.add_argument( + '-H6', '--host6', metavar='ADDRESS', default=HOST, + help='IPv6 address to listen on') + parser.add_argument( + '-l', '--log-file', metavar='PATH', + help='file for saving captured message traffic') + parser.add_argument( + '-n', '--nick', metavar='NAME', default='irker%03d', + help="nickname (optionally with a '%%.*d' server connection marker)") + password_group.add_argument( + '-p', '--password', metavar='PASSWORD', + help='NickServ password') + password_group.add_argument( + '-P', '--password-file', metavar='PATH', type=argparse.FileType('r'), + help='NickServ password from file') + parser.add_argument( + '-t', '--timeout', metavar='TIMEOUT', type=float, default=5.0, + help="connection timeout in seconds (default: 5.0)") + parser.add_argument( + '-i', '--immediate', metavar='IRC-URL', + help=( + 'send a single message to IRC-URL and exit. The message is the ' + 'first positional argument.')) + parser.add_argument( + '-V', '--version', action='version', + version='%(prog)s {0}'.format(version)) + parser.add_argument( + 'message', metavar='MESSAGE', nargs='?', + help='message for --immediate mode') + args = parser.parse_args() + + if not args.log_file and in_background(): + # There's a case for falling back to address = ('localhost', 514) + # But some systems (including OS X) disable this for security reasons. + handler = logging.handlers.SysLogHandler(facility='daemon') + else: + handler = logging.StreamHandler() + + LOG.addHandler(handler) + if args.log_level: + log_level = getattr(logging, args.log_level.upper()) + LOG.setLevel(log_level) + + if args.password_file: + with args.file as f: + # IRC passwords must be at most 128 bytes, and cannot contain a \n + args.password = f.read(128).split("\n")[0].strip() + + irker = Irker( + logfile=args.log_file, + nick_template=args.nick, + nick_needs_number=re.search('%.*d', args.nick), + password=args.password, + cafile=args.ca_file, + certfile=args.cert_file, + timeout=args.timeout, + ) + LOG.info("irkerd version %s" % version) + if args.immediate: + if not args.message: + # We want newline to become '\n' and tab to become '\t'; + # the JSON decoder will undo these transformations. + # This will also encode backslash, backspace, formfeed, + # and high-half characters, which might produce unexpected + # results on output. + args.message = sys.stdin.read().encode("string_escape") + irker.irc.add_event_handler("quit", lambda _c, _e: sys.exit(0)) + irker.handle('{"to":"%s","privmsg":"%s"}' % ( + args.immediate, args.message), quit_after=True) + irker.irc.spin(immediate=True) + else: + if args.message: + LOG.error( + 'irkerd: message argument given (%r), but --immediate not set' % ( + args.message)) + raise SystemExit(1) + irker.thread_launch() + try: + tcpserver = socketserver.TCPServer((args.host, PORT), IrkerTCPHandler) + udpserver = socketserver.UDPServer((args.host, PORT), IrkerUDPHandler) + # pylint: disable=undefined-variable + tcp6server = TCP6Server((args.host6, PORT), IrkerTCPHandler) + udp6server = UDP6Server((args.host6, PORT), IrkerUDPHandler) + for server in [tcpserver, udpserver, tcp6server, udp6server]: + server = threading.Thread(target=server.serve_forever) + server.setDaemon(True) + server.start() + try: + signal.pause() + except KeyboardInterrupt: + raise SystemExit(1) + except socket.error as e: + LOG.error("irkerd: server launch failed: %r\n" % e) + +# end diff --git a/irkerd.service b/irkerd.service new file mode 100644 index 0000000..4e75ae2 --- /dev/null +++ b/irkerd.service @@ -0,0 +1,16 @@ +# Copyright 2012 Wulf C. Krueger <philantrop@exherbo.org> +# Distributed under the terms of the BSD LICENSE + +[Unit] +Description=Internet Relay Chat (IRC) notification daemon +Requires=network.target +Documentation=man:irkerd(8) man:irkerhook(1) man:irk(1) + +[Service] +User=irker +ExecStart=/usr/bin/irkerd +User=irker + +[Install] +WantedBy=multi-user.target +Alias=irker.service diff --git a/irkerd.xml b/irkerd.xml new file mode 100644 index 0000000..93a9df4 --- /dev/null +++ b/irkerd.xml @@ -0,0 +1,261 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irkerd.8'> +<refmeta> +<refentrytitle>irkerd</refentrytitle> +<manvolnum>8</manvolnum> +<refmiscinfo class='date'>Aug 27 2012</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irkerd</refname> +<refpurpose>relay for shipping notifications to IRC servers</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irkerd</command> + <arg>-c <replaceable>ca-file</replaceable></arg> + <arg>-d <replaceable>debuglevel</replaceable></arg> + <arg>-e <replaceable>cert-file</replaceable></arg> + <arg>-l <replaceable>logfile</replaceable></arg> + <arg>-H <replaceable>host</replaceable></arg> + <arg>-n <replaceable>nick</replaceable></arg> + <arg>-p <replaceable>password</replaceable></arg> + <arg>-P <replaceable>password-file</replaceable></arg> + <arg>-i <replaceable>IRC-URL</replaceable></arg> + <arg>-t <replaceable>timeout</replaceable></arg> + <arg>-V</arg> + <arg>-h</arg> + <arg choice='opt'><replaceable>message text</replaceable></arg> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irkerd</application> is a specialized write-only IRC +client intended to be used for shipping notification messages to IRC +channels. The use case in mind when it was designed was broadcasting +notifications from commit hooks in version-control systems.</para> + +<para>The main advantage of relaying through this daemon over +individual scripted sends from applications is that it can maintain +connection state for multiple channels, rather than producing obnoxious +join/leave channel spam on every message.</para> + +<para><application>irkerd</application> is a socket server that +listens on for UDP or TCP packets on port 6659 for textual request +lines containing JSON objects and terminated by a newline. Each JSON +object must have two members: "to" specifying a destination or +destination list, and "privmsg" specifying the message text. +Examples: + +<programlisting> +{"to":"irc://chat.freenode.net/git-ciabot", "privmsg":"Hello, world!"} +{"to":["irc://chat.freenode.net/#git-ciabot","irc://chat.freenode.net/#gpsd"],"privmsg":"Multichannel test"} +{"to":"irc://chat.hypothetical.net:6668/git-ciabot", "privmsg":"Hello, world!"} +{"to":"ircs://chat.hypothetical.net/git-private?key=topsecret", "privmsg":"Keyed channel test"} +{"to":"ircs://:topsecret@chat.example.net/git-private", "privmsg":"Password-protected server test"} +</programlisting></para> + +<para>If the channel part of the URL does not have one of the prefix +characters <quote>#</quote>, <quote>&</quote>, or +<quote>+</quote>, a <quote>#</quote> will be prepended to it before +shipping - <emphasis>unless</emphasis> the channel part has the suffix +",isnick" (which is unconditionally removed).</para> + +<para>The host part of the URL may have a port-number suffix separated by a +colon, as shown in the third example; otherwise +<application>irkerd</application> sends plaintext messages to the default +6667 IRC port of each server, and SSL/TLS messages to 6697.</para> + +<para>The password for password-protected servers can be set using the +usual <quote>[{username}:{password}@]{host}:{port}</quote> defined in +RFC 3986, as shown in the fifth example. Non-empty URL usernames +override the default <quote>irker</quote> username.</para> + +<para>When the <quote>to</quote> URL uses the <quote>ircs</quote> +scheme (as shown in the fourth and fifth examples), the connection to +the IRC server is made via SSL/TLS (vs. a plaintext connection with the +<quote>irc</quote> scheme). To connect via SSL/TLS with Python 2.x, +you need to explicitly declare the certificate authority file used to +verify server certificates. For example, <quote>-c +/etc/ssl/certs/ca-certificates.crt</quote>. In Python 3.2 and later, +you can still set this option to declare a custom CA file, but +<application>irkerd</application>; if you don't set it +<application>irkerd</application> will use OpenSSL's default file +(using Python's +<quote>ssl.SSLContext.set_default_verify_paths</quote>). In Python +3.2 and later, <quote>ssl.match_hostname</quote> is used to ensure the +server certificate belongs to the intended host, as well as being +signed by a trusted CA.</para> + +<para>To join password-protected (mode +k) channels, the channel part of the +URL may be followed with a query-string indicating the channel key, of the +form <quote>?secret</quote> or <quote>?key=secret</quote>, where +<quote>secret</quote> is the channel key.</para> + +<para>An empty message is legal and will cause +<application>irkerd</application> to join or maintain a connection to +the target channels without actually emitting a message. This may be +useful for advertising that an instance is up and running, or for +joining a channel to log its traffic.</para> +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irkerd</application> takes the following options:</para> + +<variablelist> +<varlistentry> +<term>-d</term> +<listitem> + <para> + Takes a following value, setting the debugging level from it; + possible values are 'critical', 'error', 'warning', 'info', + 'debug'. This option will generally only be of interest to + developers, as the logs are designed to help trace + <application>irkerd</application>'s internal state. These tracing + logs are independent of the traffic logs controlled by + <quote>-l</quote>. + </para> + <para> + Logging will be to standard error (if + <application>irkerd</application> is running in the foreground) or + to <quote>/dev/syslog</quote> with facility "daemon" (if + <application>irkerd</application> is running in the background). + The background-ness of <application>irkerd</application> is + determined by comparing the process group id with the process + group associated with the terminal attached to stdout (with + non-matches for background processes). We assume you aren't + running <application>irkerd</application> in Windows or another OS + that doesn't support <quote>os.getpgrp</quote> or + <quote>tcgetpgrp</quote>. We assume that if stdout is attached to + a TTY associated with the same process group as + <application>irkerd</application>, you do intend to log to stderr + and not syslog. + </para> +</listitem> +</varlistentry> +<varlistentry> +<term>-e</term> +<listitem><para>Takes a following filename in pem format and uses it +to authenticate to the IRC server. You must be connecting to the IRC server +over SSL for this to function properly. This is commonly known as +<quote>CertFP.</quote> +</para></listitem> +</varlistentry> +<varlistentry> +<term>-e</term> +<listitem><para>Takes a following filename in pem format and uses it +to authenticate to the IRC server. You must be connecting to the IRC +server over SSL for this to function properly. This is commonly known +as <quote>CertFP.</quote></para> +</listitem> +</varlistentry> +<varlistentry> +<term>-l</term> +<listitem><para>Takes a following filename, logs traffic to that file. +Each log line consists of three |-separated fields; a numeric +timestamp in Unix time, the FQDN of the sending server, and the +message data.</para></listitem> +</varlistentry> +<varlistentry> +<term>-H</term> +<listitem><para>Takes a following hostname, and binds to that address +when listening for messages. <application>irkerd</application> binds +to localhost by default, but you may want to use your host's public +address to listen on a local network. Listening on a public interface +is not recommended, as it makes spamming IRC channels very +easy.</para></listitem> +</varlistentry> +<varlistentry> +<term>-n</term> +<listitem><para>Takes a following value, setting the nick +to be used. If the nick contains a numeric format element +(such as %03d) it is used to generate suffixed fallback names +in the event of a nick collision.</para></listitem> +</varlistentry> +<varlistentry> +<term>-p</term> +<listitem><para>Takes a following value, setting a nickserv +password to be used. If given, this password is shipped to +authenticate the nick on receipt of a welcome message.</para></listitem> +</varlistentry> +<varlistentry> +<term>-P</term> +<listitem><para>Liuke p, but the argument is interpreted as a filename +from which to read the password</para></listitem> +</varlistentry> +<varlistentry> +<term>-t</term> +<listitem><para>Takes a following value, setting the connection +timeout for server-socket opens.</para></listitem> +</varlistentry> +<varlistentry> +<term>-i</term> +<listitem><para>Immediate mode, to be run in foreground. Takes a following +following value interpreted as a channel URL. May take a second +argument giving a message string; if the second argument is absent the +message is read from standard input (and may contain newlines). +Sends the message, then quits.</para></listitem> +</varlistentry> +<varlistentry> +<term>-V</term> +<listitem><para>Write the program version to stdout and +terminate.</para></listitem> +</varlistentry> +<varlistentry> +<term>-h</term> +<listitem><para>Print usage instructions and terminate.</para></listitem> +</varlistentry> +</variablelist> +</refsect1> + +<refsect1 id='limitations'><title>LIMITATIONS</title> +<para>Requests via UDP optimizes for lowest latency and network load +by avoiding TCP connection setup time; the cost is that delivery is +not reliable in the face of packet loss.</para> + +<para>An <application>irkerd</application> instance with a +publicly-accessible request socket could complicate blocking of IRC +spam by making it easy for spammers to submit while hiding their IP +addresses; the better way to deploy, then, is on places like +project-hosting sites where the <application>irkerd</application> +socket can be visible from commit-hook code but not exposed to the +outside world. Priming your firewall with blocklists of IP addresses +known to spew spam is always a good idea.</para> + +<para>The absence of any option to set the service port is deliberate. +If you think you need to do that, you have a problem better solved at +your firewall.</para> + +<para>IRC has a message length limit of 510 bytes; generate your +privmsg attribute values with appropriate care.</para> + +<para>IRC ignores any text after an embedded newline. Be aware that +<application>irkerd</application> will turn payload strings with +embedded newlines into multiple IRC sends to avoid having message data +discarded. </para> + +<para>Due to a bug in Python URL parsing, IRC urls with both a # and a +key part may fail unexpectedly. The workaround is to remove the #.</para> +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerhook</refentrytitle><manvolnum>1</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources, including an installable repository +hook script.</para> +</refsect1> +</refentry> diff --git a/irkerhook.py b/irkerhook.py new file mode 100755 index 0000000..cf787ed --- /dev/null +++ b/irkerhook.py @@ -0,0 +1,609 @@ +#!/usr/bin/env python3 +# Copyright (c) 2012 Eric S. Raymond <esr@thyrsus.com> +# SPDX-License-Identifier: BSD-2-Clause +''' +This script contains git porcelain and porcelain byproducts. +Requires either Python 2.6, or 2.5 with the simplejson library installed +or Python 3.x. + +usage: irkerhook.py [-V] [-n] [--variable=value...] [commit_id...] + +This script is meant to be run in an update or post-commit hook. +Try it with -n to see the notification dumped to stdout and verify +that it looks sane. With -V this script dumps its version and exits. + +See the irkerhook manual page in the distribution for a detailed +explanation of how to configure this hook. + +The default location of the irker proxy, if the project configuration +does not override it. +''' +# SPDX-License-Identifier: BSD-2-Clause +from __future__ import print_function, absolute_import + +# pylint: disable=line-too-long,invalid-name,missing-function-docstring,missing-class-docstring,no-else-break,no-else-return,too-many-instance-attributes,too-many-locals,too-many-branches,too-many-statements,redefined-outer-name,import-outside-toplevel,raise-missing-from + +default_server = "localhost" +IRKER_PORT = 6659 + +# The default service used to turn your web-view URL into a tinyurl so it +# will take up less space on the IRC notification line. +default_tinyifier = u"http://tinyurl.com/api-create.php?url=" + +# Map magic urlprefix values to actual URL prefixes. +urlprefixmap = { + "viewcvs": "http://%(host)s/viewcvs/%(repo)s?view=revision&revision=", + "gitweb": "http://%(host)s/cgi-bin/gitweb.cgi?p=%(repo)s;a=commit;h=", + "cgit": "http://%(host)s/cgi-bin/cgit.cgi/%(repo)s/commit/?id=", + } + +# By default, ship to the freenode #commits list +default_channels = u"irc://chat.freenode.net/#commits" + +# +# No user-serviceable parts below this line: +# + +version = "2.21" + +# pylint: disable=multiple-imports,wrong-import-position +import os, sys, socket, subprocess, locale, datetime, re + +try + from shlex import quote as shellquote +except ImportError: + from pipes import quote as shellquote + +try: + from urllib2 import urlopen, HTTPError +except ImportError: + from urllib.error import HTTPError + from urllib.request import urlopen + +try: + import simplejson as json # Faster, also makes us Python-2.5-compatible +except ImportError: + import json + +if sys.version_info.major == 2: + # pylint: disable=undefined-variable + string_type = unicode +else: + string_type = str + +try: + getstatusoutput = subprocess.getstatusoutput +except AttributeError: + # pylint: disable=import-error + import commands + getstatusoutput = commands.getstatusoutput + +def do(command): + if sys.version_info.major == 2: + return string_type(getstatusoutput(command)[1], locale.getlocale()[1] or 'UTF-8') + else: + return getstatusoutput(command)[1] + +# pylint: disable=too-few-public-methods +class Commit: + def __init__(self, extractor, commit): + "Per-commit data." + self.commit = commit + self.branch = None + self.rev = None + self.mail = None + self.author = None + self.files = None + self.logmsg = None + self.url = None + self.author_name = None + self.author_date = None + self.commit_date = None + self.id = None + self.__dict__.update(extractor.__dict__) + + if sys.version_info.major == 2: + # Convert __str__ to __unicode__ for python 2 + self.__unicode__ = self.__str__ + # Not really needed, but maybe useful for debugging + self.__str__ = lambda x: x.__unicode__().encode('utf-8') + + def __str__(self): + "Produce a notification string from this commit." + # pylint: disable=no-member + if not self.urlprefix: + self.url = "" + else: + # pylint: disable=no-member + urlprefix = urlprefixmap.get(self.urlprefix, self.urlprefix) + webview = (urlprefix % self.__dict__) + self.commit + try: + # See it the url is accessible + res = urlopen(webview) + if self.tinyifier and self.tinyifier.lower() != "none": + try: + # Didn't get a retrieval error on the web + # view, so try to tinyify a reference to it. + self.url = urlopen(self.tinyifier + webview).read() + try: + self.url = self.url.decode('UTF-8') + except UnicodeError: + pass + except IOError: + self.url = webview + else: + self.url = webview + except HTTPError as e: + if e.code == 401: + # Authentication error, so we assume the view is valid + self.url = webview + else: + self.url = "" + except IOError: + self.url = "" + # pylint: disable=no-member + res = self.template % self.__dict__ + return string_type(res, 'UTF-8') if not isinstance(res, string_type) else res + +class GenericExtractor: + "Generic class for encapsulating data from a VCS." + booleans = ["tcp"] + numerics = ["maxchannels"] + strings = ["email"] + def __init__(self, arguments): + self.arguments = arguments + self.project = None + self.repo = None + # These aren't really repo data but they belong here anyway... + self.email = None + self.tcp = True + self.tinyifier = default_tinyifier + self.server = None + self.channels = None + self.maxchannels = 0 + self.template = None + self.urlprefix = None + self.host = socket.getfqdn() + self.cialike = None + self.filtercmd = None + # Color highlighting is disabled by default. + self.color = None + self.bold = self.green = self.blue = self.yellow = self.red = "" + self.brown = self.magenta = self.cyan = self.reset = "" + def activate_color(self, style): + "IRC color codes." + if style == 'mIRC': + # mIRC colors are mapped as closely to the ANSI colors as + # possible. However, bright colors (green, blue, red, + # yellow) have been made their dark counterparts since + # ChatZilla does not properly darken mIRC colors in the + # Light Motif color scheme. + self.bold = '\x02' + self.green = '\x0303' + self.blue = '\x0302' + self.red = '\x0304' + self.red = '\x0305' + self.yellow = '\x0307' + self.brown = '\x0305' + self.magenta = '\x0306' + self.cyan = '\x0310' + self.reset = '\x0F' + if style == 'ANSI': + self.bold = '\x1b[1m' + self.green = '\x1b[1;32m' + self.blue = '\x1b[1;34m' + self.red = '\x1b[1;31m' + self.yellow = '\x1b[1;33m' + self.brown = '\x1b[33m' + self.magenta = '\x1b[35m' + self.cyan = '\x1b[36m' + self.reset = '\x1b[0m' + def load_preferences(self, conf): + "Load preferences from a file in the repository root." + if not os.path.exists(conf): + return + ln = 0 + for line in open(conf): + ln += 1 + if line.startswith("#") or not line.strip(): + continue + if line.count('=') != 1: + sys.stderr.write('%s:%d: missing = in config line\n' \ + % (conf, ln)) + continue + fields = line.split('=') + if len(fields) != 2: + sys.stderr.write('%s:%d: too many fields in config line\n' \ + % (conf, ln)) + continue + variable = fields[0].strip() + value = fields[1].strip() + if value.lower() == "true": + value = True + elif value.lower() == "false": + value = False + # User cannot set maxchannels - only a command-line arg can do that. + if variable == "maxchannels": + return + setattr(self, variable, value) + def do_overrides(self): + "Make command-line overrides possible." + for tok in self.arguments: + for key in self.__dict__: + if tok.startswith("--" + key + "="): + val = tok[len(key)+3:] + setattr(self, key, val) + for (key, val) in self.__dict__.items(): + if key in GenericExtractor.booleans: + if isinstance(val, str) and val.lower() == "true": + setattr(self, key, True) + elif isinstance(val, str) and val.lower() == "false": + setattr(self, key, False) + elif key in GenericExtractor.numerics: + setattr(self, key, int(val)) + elif key in GenericExtractor.strings: + setattr(self, key, val) + if not self.project: + sys.stderr.write("irkerhook.py: no project name set!\n") + raise SystemExit(1) + if not self.repo: + self.repo = self.project.lower() + if not self.channels: + self.channels = default_channels % self.__dict__ + if self.color and self.color.lower() != "none": + self.activate_color(self.color) + +def has(dirname, paths): + "Test for existence of a list of paths." + # all() is a python2.5 construct + for exists in [os.path.exists(os.path.join(dirname, x)) for x in paths]: + if not exists: + return False + return True + +# VCS-dependent code begins here + +class GitExtractor(GenericExtractor): + "Metadata extraction for the git version control system." + @staticmethod + def is_repository(dirname): + # Must detect both ordinary and bare repositories + return has(dirname, [".git"]) or \ + has(dirname, ["HEAD", "refs", "objects"]) + def __init__(self, arguments): + GenericExtractor.__init__(self, arguments) + # Get all global config variables + self.project = do("git config --get irker.project") + self.repo = do("git config --get irker.repo") + self.server = do("git config --get irker.server") + self.channels = do("git config --get irker.channels") + self.email = do("git config --get irker.email") + self.tcp = do("git config --bool --get irker.tcp") + self.template = do("git config --get irker.template") or u'%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.tinyifier = do("git config --get irker.tinyifier") or default_tinyifier + self.color = do("git config --get irker.color") + self.urlprefix = do("git config --get irker.urlprefix") or u"gitweb" + self.cialike = do("git config --get irker.cialike") + self.filtercmd = do("git config --get irker.filtercmd") + # These are git-specific + self.refname = do("git symbolic-ref HEAD 2>/dev/null") + self.revformat = do("git config --get irker.revformat") + # The project variable defaults to the name of the repository toplevel. + if not self.project: + bare = do("git config --bool --get core.bare") + if bare.lower() == "true": + keyfile = "HEAD" + else: + keyfile = ".git/HEAD" + here = os.getcwd() + while True: + if os.path.exists(os.path.join(here, keyfile)): + self.project = os.path.basename(here) + if self.project.endswith('.git'): + self.project = self.project[0:-4] + break + elif here == '/': + sys.stderr.write("irkerhook.py: no git repo below root!\n") + sys.exit(1) + here = os.path.dirname(here) + # Get overrides + self.do_overrides() + # pylint: disable=no-self-use + def head(self): + "Return a symbolic reference to the tip commit of the current branch." + return "HEAD" + def commit_factory(self, commit_id): + "Make a Commit object holding data for a specified commit ID." + commit = Commit(self, commit_id) + commit.branch = re.sub(r"^refs/[^/]*/", "", self.refname) + # Compute a description for the revision + if self.revformat == 'raw': + commit.rev = commit.commit + elif self.revformat == 'short': + commit.rev = '' + else: # self.revformat == 'describe' + commit.rev = do("git describe %s 2>/dev/null" % shellquote(commit.commit)) + if not commit.rev: + # Query git for the abbreviated hash + commit.rev = do("git log -1 '--pretty=format:%h' " + shellquote(commit.commit)) + if self.urlprefix in ('gitweb', 'cgit'): + # Also truncate the commit used for the announced urls + commit.commit = commit.rev + # Extract the meta-information for the commit + commit.files = do("git diff-tree -r --name-only " + shellquote(commit.commit)) + commit.files = " ".join(commit.files.strip().split("\n")[1:]) + # Design choice: for git we ship only the first message line, which is + # conventionally supposed to be a summary of the commit. Under + # other VCSes a different choice may be appropriate. + commit.author_name, commit.mail, commit.logmsg = \ + do("git log -1 '--pretty=format:%an%n%ae%n%s' " + shellquote(commit.commit)).split("\n") + # This discards the part of the author's address after @. + # Might be be nice to ship the full email address, if not + # for spammers' address harvesters - getting this wrong + # would make the freenode #commits channel into harvester heaven. + commit.author = commit.mail.split("@")[0] + commit.author_date, commit.commit_date = \ + do("git log -1 '--pretty=format:%ai|%ci' " + shellquote(commit.commit)).split("|") + return commit + +class SvnExtractor(GenericExtractor): + "Metadata extraction for the svn version control system." + @staticmethod + def is_repository(dirname): + return has(dirname, ["format", "hooks", "locks"]) + def __init__(self, arguments): + GenericExtractor.__init__(self, arguments) + # Some things we need to have before metadata queries will work + self.repository = '.' + for tok in arguments: + if tok.startswith("--repository="): + self.repository = tok[13:] + self.project = os.path.basename(self.repository) + self.template = '%(bold)s%(project)s%(reset)s: %(green)s%(author)s%(reset)s %(repo)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.urlprefix = "viewcvs" + self.id = None + self.load_preferences(os.path.join(self.repository, "irker.conf")) + self.do_overrides() + # pylint: disable=no-self-use + def head(self): + sys.stderr.write("irker: under svn, hook requires a commit argument.\n") + raise SystemExit(1) + def commit_factory(self, commit_id): + self.id = commit_id + commit = Commit(self, commit_id) + commit.branch = "" + commit.rev = "r%s" % self.id + commit.author = self.svnlook("author") + commit.commit_date = self.svnlook("date").partition('(')[0] + commit.files = self.svnlook("dirs-changed").strip().replace("\n", " ") + commit.logmsg = self.svnlook("log").strip() + return commit + def svnlook(self, info): + return do("svnlook %s %s --revision %s" % (shellquote(info), shellquote(self.repository), shellquote(self.id))) + +class HgExtractor(GenericExtractor): + "Metadata extraction for the Mercurial version control system." + @staticmethod + def is_repository(directory): + return has(directory, [".hg"]) + def __init__(self, arguments): + from mercurial.encoding import unifromlocal, unitolocal + # This fiddling with arguments is necessary since the Mercurial hook can + # be run in two different ways: either directly via Python (in which + # case hg should be pointed to the hg_hook function below) or as a + # script (in which case the normal __main__ block at the end of this + # file is exercised). In the first case, we already get repository and + # ui objects from Mercurial, in the second case, we have to create them + # from the root path. + self.repository = None + if arguments and isinstance(arguments[0], tuple): + # Called from hg_hook function + ui, self.repository = arguments[0] + arguments = [] # Should not be processed further by do_overrides + else: + # Called from command line: create repo/ui objects + from mercurial import hg, ui as uimod + + repopath = b'.' + for tok in arguments: + if tok.startswith('--repository='): + repopath = unitolocal(tok[13:]) + ui = uimod.ui() + ui.readconfig(os.path.join(repopath, b'.hg', b'hgrc'), repopath) + self.repository = hg.repository(ui, repopath) + + GenericExtractor.__init__(self, arguments) + # Extract global values from the hg configuration file(s) + self.project = unifromlocal(ui.config(b'irker', b'project') or b'') + self.repo = unifromlocal(ui.config(b'irker', b'repo') or b'') + self.server = unifromlocal(ui.config(b'irker', b'server') or b'') + self.channels = unifromlocal(ui.config(b'irker', b'channels') or b'') + self.email = unifromlocal(ui.config(b'irker', b'email') or b'') + self.tcp = str(ui.configbool(b'irker', b'tcp')) # converted to bool again in do_overrides + self.template = unifromlocal(ui.config(b'irker', b'template') or b'') + if not self.template: + self.template = '%(bold)s%(project)s:%(reset)s %(green)s%(author)s%(reset)s %(repo)s:%(yellow)s%(branch)s%(reset)s * %(bold)s%(rev)s%(reset)s / %(bold)s%(files)s%(reset)s: %(logmsg)s %(brown)s%(url)s%(reset)s' + self.tinyifier = unifromlocal(ui.config( + b'irker', b'tinyifier', + default=default_tinyifier.encode('utf-8'))) + self.color = unifromlocal(ui.config(b'irker', b'color') or b'') + self.urlprefix = unifromlocal(ui.config( + b'irker', b'urlprefix', default=ui.config(b'web', b'baseurl'))) + if self.urlprefix: + # self.commit is appended to this by do_overrides + self.urlprefix = ( + self.urlprefix.rstrip('/') + + '/%s/rev/' % unifromlocal(self.repository.root).rstrip('/')) + self.cialike = unifromlocal(ui.config(b'irker', b'cialike') or b'') + self.filtercmd = unifromlocal(ui.config(b'irker', b'filtercmd') or b'') + if not self.project: + self.project = os.path.basename(unifromlocal(self.repository.root).rstrip('/')) + self.do_overrides() + # pylint: disable=no-self-use + def head(self): + "Return a symbolic reference to the tip commit of the current branch." + return "-1" + def commit_factory(self, commit_id): + "Make a Commit object holding data for a specified commit ID." + from mercurial.node import short + from mercurial.templatefilters import person + from mercurial.encoding import unifromlocal, unitolocal + if isinstance(commit_id, str) and not isinstance(commit_id, bytes): + commit_id = unitolocal(commit_id) + ctx = self.repository[commit_id] + commit = Commit(self, unifromlocal(short(ctx.hex()))) + # Extract commit-specific values from a "context" object + commit.rev = '%d:%s' % (ctx.rev(), commit.commit) + commit.branch = unifromlocal(ctx.branch()) + commit.author = unifromlocal(person(ctx.user())) + commit.author_date = \ + datetime.datetime.fromtimestamp(ctx.date()[0]).strftime('%Y-%m-%d %H:%M:%S') + commit.logmsg = unifromlocal(ctx.description()) + # Extract changed files from status against first parent + st = self.repository.status(ctx.p1().node(), ctx.node()) + commit.files = unifromlocal(b' '.join(st.modified + st.added + st.removed)) + return commit + +def hg_hook(ui, repo, **kwds): + # To be called from a Mercurial "commit", "incoming" or "changegroup" hook. + # Example configuration: + # [hooks] + # incoming.irker = python:/path/to/irkerhook.py:hg_hook + extractor = HgExtractor([(ui, repo)]) + start = repo[kwds['node']].rev() + end = len(repo) + if start != end: + # changegroup with multiple commits, so we generate a notification + # for each one + for rev in range(start, end): + ship(extractor, rev, False) + else: + ship(extractor, kwds['node'], False) + +# The files we use to identify a Subversion repo might occur as content +# in a git or hg repo, but the special subdirectories for those are more +# reliable indicators. So test for Subversion last. +extractors = [GitExtractor, HgExtractor, SvnExtractor] + +# VCS-dependent code ends here + +def convert_message(message): + """Convert the message to bytes to send to the socket""" + return message.encode(locale.getlocale()[1] or 'UTF-8') + b'\n' + +def ship(extractor, commit, debug): + "Ship a notification for the specified commit." + metadata = extractor.commit_factory(commit) + + # This is where we apply filtering + if extractor.filtercmd: + cmd = '%s %s' % (shellquote(extractor.filtercmd), + shellquote(json.dumps(metadata.__dict__))) + data = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout.read() + try: + metadata.__dict__.update(json.loads(data)) + except ValueError: + sys.stderr.write("irkerhook.py: could not decode JSON: %s\n" % data) + raise SystemExit(1) + + # Rewrite the file list if too long. The objective here is only + # to be easier on the eyes. + if extractor.cialike \ + and extractor.cialike.lower() != "none" \ + and len(metadata.files) > int(extractor.cialike): + files = metadata.files.split() + dirs = {d.rpartition('/')[0] for d in files} + if len(dirs) == 1: + metadata.files = "(%s files)" % (len(files),) + else: + metadata.files = "(%s files in %s dirs)" % (len(files), len(dirs)) + # Message reduction. The assumption here is that IRC can't handle + # lines more than 510 characters long. If we exceed that length, we + # try knocking out the file list, on the theory that for notification + # purposes the commit text is more important. If it's still too long + # there's nothing much can be done other than ship it expecting the IRC + # server to truncate. + privmsg = string_type(metadata) + if len(privmsg) > 510: + metadata.files = "" + privmsg = string_type(metadata) + + # Anti-spamming guard. It's deliberate that we get maxchannels not from + # the user-filtered metadata but from the extractor data - means repo + # administrators can lock in that setting. + channels = metadata.channels.split(",") + if extractor.maxchannels != 0: + channels = channels[:extractor.maxchannels] + + # Ready to ship. + message = json.dumps({"to": channels, "privmsg": privmsg}) + if debug: + print(message) + elif channels: + try: + if extractor.email: + # We can't really figure out what our SF username is without + # exploring our environment. The mail pipeline doesn't care + # about who sent the mail, other than being from sourceforge. + # A better way might be to simply call mail(1) + sender = "irker@users.sourceforge.net" + msg = """From: %(sender)s +Subject: irker json + +%(message)s""" % {"sender":sender, "message":message} + import smtplib + smtp = smtplib.SMTP() + smtp.connect() + smtp.sendmail(sender, extractor.email, msg) + smtp.quit() + elif extractor.tcp: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((extractor.server or default_server, IRKER_PORT)) + sock.sendall(convert_message(message)) + finally: + sock.close() + else: + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.sendto(convert_message(message), (extractor.server or default_server, IRKER_PORT)) + finally: + sock.close() + except socket.error as e: + sys.stderr.write("%s\n" % e) + +if __name__ == "__main__": + notify = True + repository = os.getcwd() + commits = [] + for arg in sys.argv[1:]: + if arg == '-n': + notify = False + elif arg == '-V': + print("irkerhook.py: version", version) + sys.exit(0) + elif arg.startswith("--repository="): + repository = arg[13:] + elif not arg.startswith("--"): + commits.append(arg) + + # Figure out which extractor we should be using + for candidate in extractors: + if candidate.is_repository(repository): + cls = candidate + break + else: + sys.stderr.write("irkerhook: cannot identify a repository type.\n") + raise SystemExit(1) + extractor = cls(sys.argv[1:]) + + # And apply it. + if not commits: + commits = [extractor.head()] + for commit in commits: + ship(extractor, commit, not notify) + +# The following sets edit modes for GNU EMACS +# Local Variables: +# mode:python +# End: diff --git a/irkerhook.xml b/irkerhook.xml new file mode 100644 index 0000000..54b7425 --- /dev/null +++ b/irkerhook.xml @@ -0,0 +1,417 @@ +<!DOCTYPE refentry PUBLIC + "-//OASIS//DTD DocBook XML V4.1.2//EN" + "docbook/docbookx.dtd"> +<refentry id='irkerhook.1'> +<refmeta> +<refentrytitle>irkerhook</refentrytitle> +<manvolnum>1</manvolnum> +<refmiscinfo class='date'>Aug 27 2012</refmiscinfo> +<refmiscinfo class='source'>irker</refmiscinfo> +<refmiscinfo class='product'>irker</refmiscinfo> +<refmiscinfo class='manual'>Commands</refmiscinfo> +</refmeta> +<refnamediv id='name'> +<refname>irkerhook</refname> +<refpurpose>repository hook script issuing irker notifications</refpurpose> +</refnamediv> +<refsynopsisdiv id='synopsis'> + +<cmdsynopsis> + <command>irkerhook.py</command> + <arg>-n</arg> + <arg>-V</arg> + <group><arg rep='repeat'><replaceable>--variable=value</replaceable></arg></group> + <group><arg rep='repeat'><replaceable>commit-id</replaceable></arg></group> +</cmdsynopsis> +</refsynopsisdiv> + +<refsect1 id='description'><title>DESCRIPTION</title> + +<para><application>irkerhook.py</application> is a Python script intended +to be called from the post-commit hook of a version-control repository. Its +job is to collect information about the commit that fired the hook (and +possibly preferences set by the repository owner) and ship that information +to an instance of <application>irkerd</application> for forwarding to +various announcement channels.</para> + +<para>The proper invocation and behavior of +<application>irkerhook.py</application> varies depending on which +VCS (version-control system) is calling it. There are four different places +from which it may extract information:</para> + +<orderedlist> +<listitem><para>Calls to VCS utilities.</para></listitem> +<listitem><para>In VCSes like git that support user-settable configuration +variables, variables with the prefix "irker.".</para></listitem> +<listitem><para>In other VCSes, a configuration file, "irker.conf", in the +repository's internals directory.</para></listitem> +<listitem><para>Command-line arguments of the form +--variable=value.</para></listitem> +</orderedlist> + +<para>The following variables are general to all supported VCSes:</para> + +<variablelist> +<varlistentry> +<term>project</term> +<listitem> +<para>The name of the project. Should be a relatively short identifier; +will usually appear at the very beginning of a notification.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>repo</term> +<listitem> +<para>The name of the repository top-level directory. If not +specified, defaults to a lowercased copy of the project name.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>channels</term> +<listitem> +<para>An IRC channel URL, or comma-separated list of same, identifying +channels to which notifications are to be sent. If not specified, the +default is the freenode #commits channel.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>server</term> +<listitem> +<para>The host on which the notification-relaying irker daemon is expected +to reside. Defaults to "localhost".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>email</term> +<listitem> +<para>If set, use email for communication rather than TCP or UDP. +The value is used as the target mail address.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>tcp</term> +<listitem> +<para>If "true", use TCP for communication; if "false", use UDP. +Defaults to "false".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>urlprefix</term> +<listitem> +<para>Changeset URL prefix for your repo. When the commit ID is appended +to this, it should point at a CGI that will display the commit +through cgit, gitweb or something similar. The defaults will probably +work if you have a typical gitweb/cgit setup.</para> + +<para>If the value of this variable is "None", generation of the URL +field in commit notifications will be suppressed. Other magic values +are "cgit", "gitweb", and "viewcvs", which expand to URL templates +that will usually work with those systems.</para> + +<para>The magic cookies "%(host)s" and %(repo)s" may occur in this +URL. The former is expanded to the FQDN of the host on which +<application>irkerhook.py</application> is running; the latter is +expanded to the value of the "repo" variable.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>tinyifier</term> +<listitem> +<para>URL template pointing to a service for compressing URLs so they +will take up less space in the notification line. If the value of this +variable is "None", no compression will be attempted.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>color</term> +<listitem> +<para>If "mIRC", highlight notification fields with mIRC color codes. +If "ANSI", highlight notification fields with ANSI color escape +sequences. Defaults to "none" (no colors). ANSI codes are supported +in Chatzilla, irssi, ircle, and BitchX; mIRC codes only are recognized +in mIRC, XChat, KVirc, Konversation, or weechat.</para> + +<para>Note: if you turn this on and notifications stop appearing on +your channel, you need to turn off IRC's color filter on that channel. +To do this you will need op privileges; issue the command "/mode +<channel> -c" with <channel> replaced by your channel name. +You may need to first issue the command "/msg chanserv set +<channel> MLOCK +nt-slk".</para> +</listitem> +</varlistentry> +<varlistentry> +<term>maxchannels</term> +<listitem> +<para>Interpreted as an integer. If not zero, limits the number of +channels the hook will interpret from the "channels" variable.</para> + +<para>This variable cannot be set through VCS configuration variables +or <filename>irker.conf</filename>; it can only be set with a command-line +argument. Thus, on a forge site in which repository owners are not +allowed to modify their post-commit scripts, a site administrator can set it +to prevent shotgun spamming by malicious project owners. Setting it to +a value less than 2, however, would probably be unwise.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>cialike</term> +<listitem> +<para>If not empty and not "None" (the default), this emulates the old +CIA behavior of dropping long lists of files in favor of a summary of +the form (N files in M directories). The value must be numeric giving +a threshold value for the length of the file list in +characters.</para> +</listitem> +</varlistentry> +<varlistentry> +<term>template</term> +<listitem> +<para>Set the template used to generate notification messages. Only +available in VCses with config variables; presently this means git or +hg. All basic commit and extractor fields, including color switches, +are available as %() substitutions.</para> +</listitem> +</varlistentry> +</variablelist> + +<para>irkerhook.py will run under both python 2 and python 3, but it does +not support mercurial repositories under python 3 yet.</para> + +<refsect2 id="git"><title>git</title> + +<para>Under git, the normal way to invoke this hook (from within the +update hook) passes it a refname followed by a list of commits. Because +<command>git rev-list</command> normally lists from most recent to oldest, +you'll want to use --reverse to make notifications be omitted in chronological +order. In a normal update script, the invocation should look like this</para> + +<programlisting> +refname=$1 +old=$2 +new=$3 +irkerhook.py --refname=${refname} $(git rev-list --reverse ${old}..${new}) +</programlisting> + +<para>except that you'll need an absolute path for irkerhook.py.</para> + +<para>For testing purposes and backward compatibility, if you invoke +<application>irkerhook.py</application> with no arguments (as in a +post-commit hook) it will behave as though it had been called like +this:</para> + +<programlisting> +irkerhook.py --refname=refs/heads/master HEAD +</programlisting> + +<para>However, this will not give the right result when you push to +a non-default branch of a bare repo.</para> + +<para>A typical way to install this hook is actually in the +<filename>post-receive</filename> hook, because it gets all the +necessary details and will not abort the push on failure. Use the +following script:</para> + +<programlisting> +#!/bin/sh + +echo "sending IRC notification" +while read old new refname; do + irkerhook --refname=${refname} $(git rev-list --reverse ${old}..${new}) +done +</programlisting> + +<para>Preferences may be set in the repo <filename>config</filename> +file in an [irker] section. Here is an example of what that can look +like:</para> + +<programlisting> +[irker] + project = gpsd + color = ANSI + channels = irc://chat.freenode.net/gpsd,irc://chat.freenode.net/commits +</programlisting> + +<para> You should not set the "repository" variable (an equivalent +will be computed). No attempt is made to interpret an +<filename>irker.conf</filename> file.</para> + +<para>The default value of the "project" variable is the basename +of the repository directory. The default value of the "urlprefix" +variable is "cgit".</para> + +<para>There is one git-specific variable, "revformat", controlling +the format of the commit identifier in a notification. It +may have the following values:</para> + +<variablelist> +<varlistentry> +<term>raw</term> +<listitem><para>full hex ID of commit</para></listitem> +</varlistentry> +<varlistentry> +<term>short</term> +<listitem><para>first 12 chars of hex ID</para></listitem> +</varlistentry> +<varlistentry> +<term>describe</term> +<listitem><para>describe relative to last tag, falling back to short</para></listitem> +</varlistentry> +</variablelist> + +<para>The default is 'describe'.</para> +</refsect2> + +<refsect2 id="svn"><title>Subversion</title> + +<para>Under Subversion, <application>irkerhook.py</application> +accepts a --repository option with value (the absolute pathname of the +Subversion repository) and a commit argument (the numeric revision level of +the commit). The defaults are the current working directory and HEAD, +respectively.</para> + +<para>Note, however, that you <emphasis>cannot</emphasis> default the +repository argument inside a Subversion post-commit hook; this is +because of a limitation of Subversion, which is that getting the +current directory is not reliable inside these hooks. Instead, the +values must be the two arguments that Subversion passes to that hook +as arguments. Thus, a typical invocation in the post-commit script +will look like this:</para> + +<programlisting> +REPO=$1 +REV=$2 +irkerhook.py --repository=$REPO $REV +</programlisting> + +<para>Other --variable=value settings may also be +given on the command line, and will override any settings in an +<filename>irker.conf</filename> file.</para> + +<para>The default for the project variable is the basename of the +repository. The default value of the "urlprefix" variable is +"viewcvs".</para> + +<para>If an <filename>irker.conf</filename> file exists in the repository +root directory (not the checkout directory but where internals such as the +"format" file live) the hook will interpret variable settings from it. Here +is an example of what such a file might look like:</para> + +<programlisting> +# irkerhook variable settings for the irker project +project = irker +channels = irc://chat.freenode/irker,irc://chat.freenode/commits +tcp = false +</programlisting> + +<para>Don't set the "repository" or "commit" variables in this file; +that would have unhappy results.</para> + +<para>There are no Subversion-specific variables.</para> + +</refsect2> + +<refsect2 id="hg"><title>Mercurial</title> + +<para>Under Mercurial, <application>irkerhook.py</application> can be +invoked in two ways: either as a Python hook (preferred) or as a +script.</para> + +<para>To call it as a Python hook, add the collowing to the +"commit" or "incoming" hook declaration in your Mercurial +repository:</para> + +<programlisting> +[hooks] + incoming.irker = python:/path/to/irkerhook.py:hg_hook +</programlisting> + +<para>When called as a script, the hook accepts a --repository option +with value (the absolute pathname of the Mercurial repository) and can +take a commit argument (the Mercurial hash ID of the commit or a +reference to it). The default for the repository argument is the +current directory. The default commit argument is '-1', designating +the current tip commit.</para> + +<para>As for git, in both cases all variables may be set in the repo +<filename>hgrc</filename> file in an [irker] section. Command-line +variable=value arguments are accepted but not required for script +invocation. No attempt is made to interpret an +<filename>irker.conf</filename> file.</para> + +<para>The default value of the "project" variable is the basename +of the repository directory. The default value of the "urlprefix" +variable is the value of the "web.baseurl" config value, if it +exists.</para> + +</refsect2> + +<refsect2 id="filter"><title>Filtering</title> + +<para>It is possible to filter commits before sending them to +<application>irkerd</application>.</para> + +<para>You have to specify the <option>filtercmd</option> option, which +will be the command <application>irkerhook.py</application> will +run. This command should accept one arguments, which is a JSON +representation of commit and extractor metadata (including the +channels variable). The command should emit to standard output a JSON +representation of (possibly altered) metadata.</para> + +<para>Below is an example filter:</para> + +<programlisting> +#!/usr/bin/env python3 +# This is a trivial example of a metadata filter. +# All it does is change the name of the commit's author. +# +import sys, json +metadata = json.loads(sys.argv[1]) + +metadata['author'] = "The Great and Powerful Oz" + +print json.dumps(metadata) +# end +</programlisting> + +<para>Standard error is available to the hook for progress and +error messages.</para> + +</refsect2> + +</refsect1> + +<refsect1 id='options'><title>OPTIONS</title> + +<para><application>irkerhook.py</application> takes the following +options:</para> + +<variablelist> +<varlistentry> +<term>-n</term> +<listitem><para>Suppress transmission to a daemon. Instead, dump the +generated JSON request to standard output. Useful for +debugging.</para></listitem> +</varlistentry> +<varlistentry> +<term>-V</term> +<listitem><para>Write the program version to stdout and +terminate.</para></listitem> +</varlistentry> +</variablelist> + +</refsect1> + +<refsect1 id='see_also'><title>SEE ALSO</title> +<para> +<citerefentry><refentrytitle>irkerd</refentrytitle><manvolnum>8</manvolnum></citerefentry>, +</para> +</refsect1> + +<refsect1 id='authors'><title>AUTHOR</title> +<para>Eric S. Raymond <email>esr@snark.thyrsus.com</email>. See the +project page at <ulink +url='http://www.catb.org/~esr/irker'>http://www.catb.org/~esr/irker</ulink> +for updates and other resources.</para> +</refsect1> +</refentry> + |