summaryrefslogtreecommitdiffstats
path: root/utils/asn.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--utils/asn.pl331
1 files changed, 331 insertions, 0 deletions
diff --git a/utils/asn.pl b/utils/asn.pl
new file mode 100644
index 0000000..4d54bad
--- /dev/null
+++ b/utils/asn.pl
@@ -0,0 +1,331 @@
+#!/usr/bin/env perl
+#
+
+use warnings;
+use strict;
+use autodie;
+
+use File::Basename;
+use File::Fetch;
+use Getopt::Long;
+use Pod::Usage;
+
+use FindBin;
+use lib "$FindBin::Bin/extlib/lib/perl5";
+
+use URI;
+
+my %config = (
+ asn_sources => [
+ 'ftp://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest',
+ 'ftp://ftp.ripe.net/ripe/stats/delegated-ripencc-latest',
+ 'http://ftp.afrinic.net/pub/stats/afrinic/delegated-afrinic-latest',
+ 'ftp://ftp.apnic.net/pub/stats/apnic/delegated-apnic-latest',
+ 'ftp://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-latest'
+ ],
+ bgp_sources => ['http://data.ris.ripe.net/rrc00/latest-bview.gz']
+);
+
+my $download_asn = 0;
+my $download_bgp = 0;
+my $download_target = "./";
+my $help = 0;
+my $man = 0;
+my $v4 = 1;
+my $v6 = 1;
+my $parse = 1;
+my $v4_zone = "asn.rspamd.com";
+my $v6_zone = "asn6.rspamd.com";
+my $v4_file = "asn.zone";
+my $v6_file = "asn6.zone";
+my $ns_servers = [ "asn-ns.rspamd.com", "asn-ns2.rspamd.com" ];
+my $unknown_placeholder = "--";
+
+GetOptions(
+ "download-asn" => \$download_asn,
+ "download-bgp" => \$download_bgp,
+ "4!" => \$v4,
+ "6!" => \$v6,
+ "parse!" => \$parse,
+ "target=s" => \$download_target,
+ "zone-v4=s" => \$v4_zone,
+ "zone-v6=s" => \$v6_zone,
+ "file-v4=s" => \$v4_file,
+ "file-v6=s" => \$v6_file,
+ "ns-server=s@" => \$ns_servers,
+ "help|?" => \$help,
+ "man" => \$man,
+ "unknown-placeholder" => \$unknown_placeholder,
+) or
+ pod2usage(2);
+
+pod2usage(1) if $help;
+pod2usage(-exitval => 0, -verbose => 2) if $man;
+
+if ($download_asn) {
+ foreach my $u (@{ $config{'asn_sources'} }) {
+ download_file($u);
+ }
+}
+
+if ($download_bgp) {
+ foreach my $u (@{ $config{'bgp_sources'} }) {
+ download_file($u);
+ }
+}
+
+if (!$parse) {
+ exit 0;
+}
+
+# Prefix to ASN map
+my $networks = { 4 => {}, 6 => {} };
+
+foreach my $u (@{ $config{'bgp_sources'} }) {
+ my $parsed = URI->new($u);
+ my $fname = $download_target . '/' . basename($parsed->path);
+
+ use constant {
+ F_MARKER => 0,
+ F_TIMESTAMP => 1,
+ F_PEER_IP => 3,
+ F_PEER_AS => 4,
+ F_PREFIX => 5,
+ F_AS_PATH => 6,
+ F_ORIGIN => 7,
+ };
+
+ open(my $bgpd, '-|', "bgpdump -v -M $fname") or die "can't start bgpdump: $!";
+
+ while (<$bgpd>) {
+ chomp;
+ my @e = split /\|/;
+ if ($e[F_MARKER] ne 'TABLE_DUMP2') {
+ warn "bad line: $_\n";
+ next;
+ }
+
+ my $origin_as;
+ my $prefix = $e[F_PREFIX];
+ my $ip_ver = 6;
+
+ if ($prefix =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\/\d{1,2}$/) {
+ $ip_ver = 4;
+ }
+
+ if ($e[F_AS_PATH]) {
+
+ # not empty AS_PATH
+ my @as_path = split /\s/, $e[F_AS_PATH];
+ $origin_as = pop @as_path;
+
+ if (substr($origin_as, 0, 1) eq '{') {
+
+ # route is aggregated
+ if ($origin_as =~ /^{(\d+)}$/) {
+
+ # single AS aggregated, just remove { } around
+ $origin_as = $1;
+ } else {
+
+ # use previous AS from AS_PATH
+ $origin_as = pop @as_path;
+ }
+ }
+
+ # strip bogus AS
+ while (is_bougus_asn($origin_as)) {
+ $origin_as = pop @as_path;
+ last if scalar @as_path == 0;
+ }
+ }
+
+ # empty AS_PATH or all AS_PATH elements was stripped as bogus - use
+ # PEER_AS as origin AS
+ $origin_as //= $e[F_PEER_AS];
+
+ $networks->{$ip_ver}{$prefix} = int($origin_as);
+ }
+}
+
+# Remove default routes
+delete $networks->{4}{'0.0.0.0/0'};
+delete $networks->{6}{'::/0'};
+
+# Now roughly detect countries
+my $as_info = {};
+
+# RIR statistics exchange format
+# https://www.apnic.net/publications/media-library/documents/resource-guidelines/rir-statistics-exchange-format
+# https://www.arin.net/knowledge/statistics/nro_extended_stats_format.pdf
+# first 7 fields for this two formats are same
+use constant {
+ F_REGISTRY => 0, # {afrinic,apnic,arin,iana,lacnic,ripencc}
+ F_CC => 1, # ISO 3166 2-letter country code
+ F_TYPE => 2, # {asn,ipv4,ipv6}
+ F_START => 3,
+ F_VALUE => 4,
+ F_DATE => 5,
+ F_STATUS => 6,
+};
+
+foreach my $u (@{ $config{'asn_sources'} }) {
+ my $parsed = URI->new($u);
+ my $fname = $download_target . '/' . basename($parsed->path);
+ open(my $fh, "<", $fname) or die "Cannot open $fname: $!";
+
+ while (<$fh>) {
+ next if /^\#/;
+ chomp;
+ my @elts = split /\|/;
+
+ if ($elts[F_TYPE] eq 'asn' && $elts[F_START] ne '*') {
+ my $as_start = int($elts[F_START]);
+ my $as_end = $as_start + int($elts[F_VALUE]) - 1;
+
+ for my $as ($as_start .. $as_end) {
+ $as_info->{$as}{'country'} = $elts[F_CC];
+ $as_info->{$as}{'rir'} = $elts[F_REGISTRY];
+ }
+ }
+ }
+}
+
+# Write zone files
+my $ns_list = join ' ', @{$ns_servers};
+my $zone_header = << "EOH";
+\$SOA 43200 $ns_servers->[0] support.rspamd.com 0 600 300 86400 300
+\$NS 43200 $ns_list
+EOH
+
+if ($v4) {
+ # create temp file in the same dir so we can be sure that mv is atomic
+ my $out_dir = dirname($v4_file);
+ my $out_file = basename($v4_file);
+ my $temp_file = "$out_dir/.$out_file.tmp";
+ open my $v4_fh, '>', $temp_file;
+ print $v4_fh $zone_header;
+
+ while (my ($net, $asn) = each %{ $networks->{4} }) {
+ my $country = $as_info->{$asn}{'country'} || $unknown_placeholder;
+ my $rir = $as_info->{$asn}{'rir'} || $unknown_placeholder;
+
+ # "8.8.8.0/24 15169|8.8.8.0/24|US|arin|" for 8.8.8.8
+ printf $v4_fh "%s %s|%s|%s|%s|\n", $net, $asn, $net, $country, $rir;
+ }
+
+ close $v4_fh;
+ rename $temp_file, $v4_file;
+}
+
+if ($v6) {
+ my $out_dir = dirname($v6_file);
+ my $out_file = basename($v6_file);
+ my $temp_file = "$out_dir/.$out_file.tmp";
+ open my $v6_fh, '>', $temp_file;
+ print $v6_fh $zone_header;
+
+ while (my ($net, $asn) = each %{ $networks->{6} }) {
+ my $country = $as_info->{$asn}{'country'} || $unknown_placeholder;
+ my $rir = $as_info->{$asn}{'rir'} || $unknown_placeholder;
+
+ # "2606:4700:4700::/48 13335|2606:4700:4700::/48|US|arin|" for 2606:4700:4700::1111
+ printf $v6_fh "%s %s|%s|%s|%s|\n", $net, $asn, $net, $country, $rir;
+ }
+
+ close $v6_fh;
+ rename $temp_file, $v6_file;
+}
+
+exit 0;
+
+########################################################################
+
+sub download_file {
+ my ($url) = @_;
+
+ local $File::Fetch::WARN = 0;
+ local $File::Fetch::TIMEOUT = 180; # connectivity to ftp.lacnic.net is bad
+
+ my $ff = File::Fetch->new(uri => $url);
+ my $where = $ff->fetch(to => $download_target) or
+ die "$url: ", $ff->error;
+
+ return $where;
+}
+
+# Returns true if AS number is bogus
+# e. g. a private AS.
+# List of allocated and reserved AS:
+# https://www.iana.org/assignments/as-numbers/as-numbers.txt
+sub is_bougus_asn {
+ my $as = shift;
+
+ # 64496-64511 Reserved for use in documentation and sample code
+ # 64512-65534 Designated for private use
+ # 65535 Reserved
+ # 65536-65551 Reserved for use in documentation and sample code
+ # 65552-131071 Reserved
+ return 1 if $as >= 64496 && $as <= 131071;
+
+ # Reserved (RFC6996, RFC7300, RFC7607)
+ return 1 if $as == 0 || $as >= 4200000000;
+
+ return 0;
+}
+
+__END__
+
+=head1 NAME
+
+asn.pl - download and parse ASN data for Rspamd
+
+=head1 SYNOPSIS
+
+asn.pl [options]
+
+ Options:
+ --download-asn Download ASN data from RIRs
+ --download-bgp Download BGP full view dump from RIPE RIS
+ --target Where to download files (default: current dir)
+ --zone-v4 IPv4 zone (default: asn.rspamd.com)
+ --zone-v6 IPv6 zone (default: asn6.rspamd.com)
+ --file-v4 IPv4 zone file (default: ./asn.zone)
+ --file-v6 IPv6 zone (default: ./asn6.zone)
+ --unknown-placeholder Placeholder for unknown elements (default: --)
+ --help Brief help message
+ --man Full documentation
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--download-asn>
+
+Download ASN data from RIR.
+
+=item B<--download-bgp>
+
+Download GeoIP data from Ripe
+
+=item B<--target>
+
+Specifies where to download files.
+
+=item B<--help>
+
+Print a brief help message and exits.
+
+=item B<--man>
+
+Prints the manual page and exits.
+
+=back
+
+=head1 DESCRIPTION
+
+B<asn.pl> is intended to download ASN data and GeoIP data and create a rbldnsd zone.
+
+=cut
+
+# vim: et:ts=4:sw=4