diff options
Diffstat (limited to '')
-rw-r--r-- | doc/schemas/logstash/logstash-radius.conf | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/doc/schemas/logstash/logstash-radius.conf b/doc/schemas/logstash/logstash-radius.conf new file mode 100644 index 0000000..f473179 --- /dev/null +++ b/doc/schemas/logstash/logstash-radius.conf @@ -0,0 +1,256 @@ +# logstash configuration to process RADIUS detail files +# +# Matthew Newton +# April 2019 +# +# This config has been tested with logstash version 6.7.0 +# +# RADIUS "detail" files are textual representations of the RADIUS +# packets, and are written to disk by e.g. FreeRADIUS. They look +# something like the following, with the timestamp on the first +# line then all attributes/values tab-indented. +# +# Tue Mar 10 15:32:24 2015 +# Packet-Type = Access-Request +# User-Name = "test@example.com" +# Calling-Station-Id = "01-02-03-04-05-06" +# Called-Station-Id = "aa-bb-cc-dd-ee-ff:myssid" +# NAS-Port = 10 +# NAS-IP-Address = 10.9.0.4 +# NAS-Identifier = "Wireless-Controller-1" +# Service-Type = Framed-User +# NAS-Port-Type = Wireless-802.11 +# + + + +# Example input - read data from a file. For example, to read in a +# detail file with this input you could use: +# +# # /usr/share/logstash/bin/logstash --path.settings=/etc/logstash -f logstash-radius.conf --log.level=debug +# + +input { + file { + path => "/var/log/radius/radacct/*/detail-*" + exclude => "*.gz" + + # Note when testing that logstash will remember where + # it got to and continue from there. + start_position => "beginning" + + # Set the type, for below. + type => radiusdetail + + # It is preferable to use a log feeder that can join + # multiple lines together, rather than using multiline + # here. For an example, see the log-courier + # configuration in this directory. + + # If you didn't read the above, go back and read it again. + + # If that is not possible you may be able to use the + # following section. Note that if you are using the + # "stdin" input, the file is chunked into 16k blobs, + # so every 16k a detail record is likely to be chopped + # in half. If you are using the "file" input (as in this + # example), the blank links between records are not + # passed through so the regex here has to be aware of + # that. Basically, do multiline as early as possible + # in your log feeder client not here and you'll avoid + # most issues that are likely to come up. + + codec => multiline { + pattern => "^\t" + negate => false + what => "previous" + } + + # If you really want to use the "stdin" input, this + # will work better, but be aware of the comments + # above. + + #codec => multiline { + # pattern => "^[A-Z\t]" + # negate => false + # what => "next" + #} + } +} + +# Moving into production will likely need something more reliable. +# There are many input methods, an example here using log-courier +# (which supports client-site multiline processing and does not +# lose log events if logstash is restarted). You could also +# investigate e.g. filebeat from Elastic. + +# input { +# courier { +# port => 5140 +# transport => "tcp" +# +# # Don't set the type here, as it's set in the +# # log-courier config instead. +# #type => radiusdetail +# } +# } + + + +# Filter stage. Here we take the raw logs and process them into +# something structured ready to index. Each attribute is stored as +# a separate field in the output document. + +filter { + + if [type] == "radiusdetail" { + + # Pull off the timestamp at the start of the + # detail record. Note there may be additional data + # after it that has been added by the local admin, + # so stop at a newline OR a tab. + + grok { + match => [ "message", "^(?<timestamp>[^\n\t]+)[\n\t]" ] + } + + # Create the @timestamp field. + + date { + match => [ "timestamp", "EEE MMM dd HH:mm:ss yyyy", + "EEE MMM d HH:mm:ss yyyy" ] + } + + # Split the attributes and values into fields. + # This is the bulk of processing that adds all of + # the RADIUS attributes as elasticsearch fields. + + # Note issue https://github.com/logstash-plugins/logstash-filter-kv/issues/10 + # currently means that all spaces will be stripped + # from all fields. If this is a problem, adjust the + # trim setting. + + kv { + field_split => "\n" + source => "message" + trim_value => "\" " + trim_key => "\t " + } + + # Now we try and add some useful additional + # information. If certain fields can be broken + # down into components then do that here and add + # the data as sub-fields. For example, + # Called-Station-Id might be able to be broken + # down to Called-Station-Id_mac and Called-Station-Id_ssid + # on some wireless systems, or to _ip and _port + # with a VPN. + + # Multiple calls to grok otherwise it can stop + # processing once it has matched one field, but + # e.g. you want to pull both IP and port out of + # the same field in two different regex's. + + # Pull out some IP addresses as field_ip: + + grok { + break_on_match => false + tag_on_failure => [] + match => [ + "Framed-IP-Address", "^(?<Framed-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)", + "NAS-IP-Address", "^(?<NAS-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)", + "Calling-Station-Id", "^(?<Calling-Station-Id_ip>\d+\.\d+\.\d+\.\d+)", + "Called-Station-Id", "^(?<Called-Station-Id_ip>\d+\.\d+\.\d+\.\d+)" + ] + } + + # Split User-Name, Operator-Name, and pull out + # some IP ports if they are there: + + grok { + break_on_match => false + tag_on_failure => [] + match => [ + "User-Name", "^(?<User-Name_username>[^@]+)?(?:@(?<User-Name_realm>[^@]+))$", + "Operator-Name", "^(?<Operator-Name_id>.)(?<Operator-Name_value>.+)$", + + "Calling-Station-Id", "\[(?<Calling-Station-Id_port>\d+)\]$", + "Called-Station-Id", "\[(?<Called-Station-Id_port>\d+)\]$" + ] + } + + # Extract MAC addresses (and SSIDs if there). + # MAC address matching here is lazy, but should be + # good enough. + + grok { + break_on_match => false + tag_on_failure => [] + match => [ + "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9:-]{17})$", + "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9\.]{14})$", + "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9]{12})$", + + "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id_ssid>.*))?$", + "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id_ssid>.*))?$", + "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id_ssid>.*))?$" + ] + } + + # With the optional sanitize_mac plugin, it's + # possible to make sure all MAC addresses look the + # same, which has obvious benefits. + # + # https://github.com/mcnewton/logstash-filter-sanitize_mac + + # sanitize_mac { + # match => { + # "Called-Station-Id_mac" => "Called-Station-Id_mac" + # "Calling-Station-Id_mac" => "Calling-Station-Id_mac" + # } + # separator => "-" + # fixcase => "lower" + # } + + + # Gigawords presents an issue because the 64-bit + # value is split across two attributes. Combine + # them both back into a single attribute so that + # the full value is available to use. + + if ([Acct-Input-Octets]) { + ruby { + code => "event.set('Acct-Input-Octets_long', event.get('Acct-Input-Octets').to_i + + (event.get('Acct-Input-Gigawords') ? (event.get('Acct-Input-Gigawords').to_i * (2**32)) : 0))" + } + } + + if ([Acct-Output-Octets]) { + ruby { + code => "event.set('Acct-Output-Octets_long', event.get('Acct-Output-Octets').to_i + + (event.get('Acct-Output-Gigawords') ? (event.get('Acct-Output-Gigawords').to_i * (2**32)) : 0))" + } + } + + + # Remove the original "message" field. + + mutate { + remove_field => ["message"] + } + + } +} + + + +# Output data to the local elasticsearch cluster +# using type "detail" in index "radius-DATE". + +output { + if [type] == "radiusdetail" { + elasticsearch { + index => "radius-%{+YYYY.MM.dd}" + } + } +} |