summaryrefslogtreecommitdiffstats
path: root/samples/bpf/xdp_fwd_kern.c
diff options
context:
space:
mode:
Diffstat (limited to 'samples/bpf/xdp_fwd_kern.c')
-rw-r--r--samples/bpf/xdp_fwd_kern.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
new file mode 100644
index 000000000..54c099cbd
--- /dev/null
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+
+#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 64);
+} xdp_tx_ports SEC(".maps");
+
+/* from include/net/ip.h */
+static __always_inline int ip_decrease_ttl(struct iphdr *iph)
+{
+ u32 check = (__force u32)iph->check;
+
+ check += (__force u32)htons(0x0100);
+ iph->check = (__force __sum16)(check + (check >= 0xFFFF));
+ return --iph->ttl;
+}
+
+static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct bpf_fib_lookup fib_params;
+ struct ethhdr *eth = data;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph;
+ u16 h_proto;
+ u64 nh_off;
+ int rc;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memset(&fib_params, 0, sizeof(fib_params));
+
+ h_proto = eth->h_proto;
+ if (h_proto == htons(ETH_P_IP)) {
+ iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ if (iph->ttl <= 1)
+ return XDP_PASS;
+
+ fib_params.family = AF_INET;
+ fib_params.tos = iph->tos;
+ fib_params.l4_protocol = iph->protocol;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(iph->tot_len);
+ fib_params.ipv4_src = iph->saddr;
+ fib_params.ipv4_dst = iph->daddr;
+ } else if (h_proto == htons(ETH_P_IPV6)) {
+ struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
+
+ ip6h = data + nh_off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ if (ip6h->hop_limit <= 1)
+ return XDP_PASS;
+
+ fib_params.family = AF_INET6;
+ fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
+ fib_params.l4_protocol = ip6h->nexthdr;
+ fib_params.sport = 0;
+ fib_params.dport = 0;
+ fib_params.tot_len = ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+ } else {
+ return XDP_PASS;
+ }
+
+ fib_params.ifindex = ctx->ingress_ifindex;
+
+ rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+ /*
+ * Some rc (return codes) from bpf_fib_lookup() are important,
+ * to understand how this XDP-prog interacts with network stack.
+ *
+ * BPF_FIB_LKUP_RET_NO_NEIGH:
+ * Even if route lookup was a success, then the MAC-addresses are also
+ * needed. This is obtained from arp/neighbour table, but if table is
+ * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
+ * doing ARP lookup directly from XDP, then send packet to normal
+ * network stack via XDP_PASS and expect it will do ARP resolution.
+ *
+ * BPF_FIB_LKUP_RET_FWD_DISABLED:
+ * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
+ * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
+ * enabled this on ingress device.
+ */
+ if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
+ /* Verify egress index has been configured as TX-port.
+ * (Note: User can still have inserted an egress ifindex that
+ * doesn't support XDP xmit, which will result in packet drops).
+ *
+ * Note: lookup in devmap supported since 0cdbb4b09a0.
+ * If not supported will fail with:
+ * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
+ */
+ if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
+ return XDP_PASS;
+
+ if (h_proto == htons(ETH_P_IP))
+ ip_decrease_ttl(iph);
+ else if (h_proto == htons(ETH_P_IPV6))
+ ip6h->hop_limit--;
+
+ memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+ return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp_fwd")
+int xdp_fwd_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, 0);
+}
+
+SEC("xdp_fwd_direct")
+int xdp_fwd_direct_prog(struct xdp_md *ctx)
+{
+ return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
+}
+
+char _license[] SEC("license") = "GPL";