1 files changed, 256 insertions, 0 deletions
diff --git a/doc/schemas/logstash/logstash-radius.conf b/doc/schemas/logstash/logstash-radius.conf
new file mode 100644
index 0000000..f473179
--- /dev/null
+++ b/doc/schemas/logstash/logstash-radius.conf
@@ -0,0 +1,256 @@
+# logstash configuration to process RADIUS detail files
+#
+# Matthew Newton
+# April 2019
+#
+# This config has been tested with logstash version 6.7.0
+# 
+# RADIUS "detail" files are textual representations of the RADIUS
+# packets, and are written to disk by e.g. FreeRADIUS. They look
+# something like the following, with the timestamp on the first
+# line then all attributes/values tab-indented.
+#
+#	Tue Mar 10 15:32:24 2015
+#		Packet-Type = Access-Request
+#		User-Name = "test@example.com"
+#		Calling-Station-Id = "01-02-03-04-05-06"
+#		Called-Station-Id = "aa-bb-cc-dd-ee-ff:myssid"
+#		NAS-Port = 10
+#		NAS-IP-Address = 10.9.0.4
+#		NAS-Identifier = "Wireless-Controller-1"
+#		Service-Type = Framed-User
+#		NAS-Port-Type = Wireless-802.11
+#
+
+
+
+# Example input - read data from a file. For example, to read in a
+# detail file with this input you could use:
+#
+#  # /usr/share/logstash/bin/logstash --path.settings=/etc/logstash -f logstash-radius.conf --log.level=debug
+#
+
+input {
+	file {
+		path => "/var/log/radius/radacct/*/detail-*"
+		exclude => "*.gz"
+
+		# Note when testing that logstash will remember where
+		# it got to and continue from there.
+		start_position => "beginning"
+
+		# Set the type, for below.
+		type => radiusdetail
+
+		# It is preferable to use a log feeder that can join
+		# multiple lines together, rather than using multiline
+		# here. For an example, see the log-courier
+		# configuration in this directory.
+
+		# If you didn't read the above, go back and read it again.
+
+		# If that is not possible you may be able to use the
+		# following section. Note that if you are using the
+		# "stdin" input, the file is chunked into 16k blobs,
+		# so every 16k a detail record is likely to be chopped
+		# in half. If you are using the "file" input (as in this
+		# example), the blank links between records are not
+		# passed through so the regex here has to be aware of
+		# that. Basically, do multiline as early as possible
+		# in your log feeder client not here and you'll avoid
+		# most issues that are likely to come up.
+
+		codec => multiline {
+			pattern => "^\t"
+			negate => false
+			what => "previous"
+		}
+
+		# If you really want to use the "stdin" input, this
+		# will work better, but be aware of the comments
+		# above.
+
+		#codec => multiline {
+		#	pattern => "^[A-Z\t]"
+		#	negate => false
+		#	what => "next"
+		#}
+	}
+}
+
+# Moving into production will likely need something more reliable.
+# There are many input methods, an example here using log-courier
+# (which supports client-site multiline processing and does not
+# lose log events if logstash is restarted). You could also
+# investigate e.g. filebeat from Elastic.
+
+# input {
+# 	courier {
+# 		port => 5140
+# 		transport => "tcp"
+#
+#		# Don't set the type here, as it's set in the
+#		# log-courier config instead.
+#		#type => radiusdetail
+# 	}
+# }
+
+
+
+# Filter stage. Here we take the raw logs and process them into
+# something structured ready to index. Each attribute is stored as
+# a separate field in the output document.
+
+filter {
+
+	if [type] == "radiusdetail" {
+
+		# Pull off the timestamp at the start of the
+		# detail record. Note there may be additional data
+		# after it that has been added by the local admin,
+		# so stop at a newline OR a tab.
+
+		grok {
+			match => [ "message", "^(?<timestamp>[^\n\t]+)[\n\t]" ]
+		}
+
+		# Create the @timestamp field.
+
+		date {
+			match => [ "timestamp", "EEE MMM dd HH:mm:ss yyyy",
+						"EEE MMM  d HH:mm:ss yyyy" ]
+		}
+
+		# Split the attributes and values into fields.
+		# This is the bulk of processing that adds all of
+		# the RADIUS attributes as elasticsearch fields.
+
+		# Note issue https://github.com/logstash-plugins/logstash-filter-kv/issues/10
+		# currently means that all spaces will be stripped
+		# from all fields. If this is a problem, adjust the
+		# trim setting.
+
+		kv {
+			field_split => "\n"
+			source => "message"
+			trim_value => "\" "
+			trim_key => "\t "
+		}
+
+		# Now we try and add some useful additional
+		# information. If certain fields can be broken
+		# down into components then do that here and add
+		# the data as sub-fields. For example,
+		# Called-Station-Id might be able to be broken
+		# down to Called-Station-Id_mac and Called-Station-Id_ssid
+		# on some wireless systems, or to _ip and _port
+		# with a VPN.
+
+		# Multiple calls to grok otherwise it can stop
+		# processing once it has matched one field, but
+		# e.g. you want to pull both IP and port out of
+		# the same field in two different regex's.
+
+		# Pull out some IP addresses as field_ip:
+
+		grok {
+			break_on_match => false
+			tag_on_failure => []
+			match => [
+				"Framed-IP-Address", "^(?<Framed-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+				"NAS-IP-Address", "^(?<NAS-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_ip>\d+\.\d+\.\d+\.\d+)",
+				"Called-Station-Id", "^(?<Called-Station-Id_ip>\d+\.\d+\.\d+\.\d+)"
+			]
+		}
+
+		# Split User-Name, Operator-Name, and pull out
+		# some IP ports if they are there:
+
+		grok {
+			break_on_match => false
+			tag_on_failure => []
+			match => [
+				"User-Name", "^(?<User-Name_username>[^@]+)?(?:@(?<User-Name_realm>[^@]+))$",
+				"Operator-Name", "^(?<Operator-Name_id>.)(?<Operator-Name_value>.+)$",
+
+				"Calling-Station-Id", "\[(?<Calling-Station-Id_port>\d+)\]$",
+				"Called-Station-Id", "\[(?<Called-Station-Id_port>\d+)\]$"
+			]
+		}
+
+		# Extract MAC addresses (and SSIDs if there).
+		# MAC address matching here is lazy, but should be
+		# good enough.
+
+		grok {
+			break_on_match => false
+			tag_on_failure => []
+			match => [
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9:-]{17})$",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9\.]{14})$",
+				"Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9]{12})$",
+
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id_ssid>.*))?$",
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id_ssid>.*))?$",
+				"Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id_ssid>.*))?$"
+			]
+		}
+
+		# With the optional sanitize_mac plugin, it's
+		# possible to make sure all MAC addresses look the
+		# same, which has obvious benefits.
+		#
+		# https://github.com/mcnewton/logstash-filter-sanitize_mac
+
+		# sanitize_mac {
+		# 	match => {
+		# 		"Called-Station-Id_mac" => "Called-Station-Id_mac"
+		# 		"Calling-Station-Id_mac" => "Calling-Station-Id_mac"
+		# 		}
+		# 	separator => "-"
+		# 	fixcase => "lower"
+		# }
+
+
+		# Gigawords presents an issue because the 64-bit
+		# value is split across two attributes. Combine
+		# them both back into a single attribute so that
+		# the full value is available to use.
+
+		if ([Acct-Input-Octets]) {
+			ruby {
+				code => "event.set('Acct-Input-Octets_long', event.get('Acct-Input-Octets').to_i +
+						(event.get('Acct-Input-Gigawords') ? (event.get('Acct-Input-Gigawords').to_i * (2**32)) : 0))"
+			}
+		}
+
+		if ([Acct-Output-Octets]) {
+			ruby {
+				code => "event.set('Acct-Output-Octets_long', event.get('Acct-Output-Octets').to_i +
+					 	(event.get('Acct-Output-Gigawords') ? (event.get('Acct-Output-Gigawords').to_i * (2**32)) : 0))"
+			}
+		}
+
+
+		# Remove the original "message" field.
+
+		mutate {
+			remove_field => ["message"]
+		}
+
+	}
+}
+
+
+
+# Output data to the local elasticsearch cluster
+# using type "detail" in index "radius-DATE".
+
+output {
+	if [type] == "radiusdetail" {
+		elasticsearch {
+			index => "radius-%{+YYYY.MM.dd}"
+		}
+	}
+}