summaryrefslogtreecommitdiffstats
path: root/net/xfrm
diff options
context:
space:
mode:
Diffstat (limited to 'net/xfrm')
-rw-r--r--net/xfrm/espintcp.c4
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_bpf.c4
-rw-r--r--net/xfrm/xfrm_interface_core.c26
-rw-r--r--net/xfrm/xfrm_policy.c163
-rw-r--r--net/xfrm/xfrm_proc.c1
-rw-r--r--net/xfrm/xfrm_state.c17
-rw-r--r--net/xfrm/xfrm_state_bpf.c4
-rw-r--r--net/xfrm/xfrm_user.c2
9 files changed, 182 insertions, 42 deletions
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index d3b3f9e720..fe82e2d073 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -10,6 +10,7 @@
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
+#include <net/hotdata.h>
static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
struct sock *sk)
@@ -169,7 +170,8 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&ctx->out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b2f7af63b7..3a2982a72a 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,7 @@
#include <net/ip_tunnels.h>
#include <net/ip6_tunnel.h>
#include <net/dst_metadata.h>
+#include <net/hotdata.h>
#include "xfrm_inout.h"
@@ -772,7 +773,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(net_hotdata.max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 7d5e920141..5ea15037eb 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -93,10 +93,10 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_START(xfrm_ifc_kfunc_set)
BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
-BTF_SET8_END(xfrm_ifc_kfunc_set)
+BTF_KFUNCS_END(xfrm_ifc_kfunc_set)
static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 21d50d75c2..4df5c06e3e 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -240,7 +240,6 @@ static void xfrmi_dev_free(struct net_device *dev)
struct xfrm_if *xi = netdev_priv(dev);
gro_cells_destroy(&xi->gro_cells);
- free_percpu(dev->tstats);
}
static int xfrmi_create(struct net_device *dev)
@@ -727,7 +726,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- return xi->p.link;
+ return READ_ONCE(xi->p.link);
}
static const struct net_device_ops xfrmi_netdev_ops = {
@@ -749,6 +748,7 @@ static void xfrmi_dev_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->needs_free_netdev = true;
dev->priv_destructor = xfrmi_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
eth_broadcast_addr(dev->broadcast);
@@ -765,15 +765,9 @@ static int xfrmi_dev_init(struct net_device *dev)
struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&xi->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
dev->features |= NETIF_F_LLTX;
dev->features |= XFRMI_FEATURES;
@@ -957,12 +951,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
+static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_exit_list, exit_list) {
struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
struct xfrm_if __rcu **xip;
@@ -973,18 +967,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
for (xip = &xfrmn->xfrmi[i];
(xi = rtnl_dereference(*xip)) != NULL;
xip = &xi->next)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
if (xi)
- unregister_netdevice_queue(xi->dev, &list);
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch = xfrmi_exit_batch_net,
+ .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index da6ecc6b3e..d154597728 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -29,6 +29,7 @@
#include <linux/audit.h>
#include <linux/rhashtable.h>
#include <linux/if_tunnel.h>
+#include <linux/icmp.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/inet_ecn.h>
@@ -2597,8 +2598,7 @@ static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
if (dst->ops->family == AF_INET6) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- path->path_cookie = rt6_get_cookie(rt);
+ path->path_cookie = rt6_get_cookie(dst_rt6_info(dst));
path->u.rt6.rt6i_nfheader_len = nfheader_len;
}
}
@@ -3505,6 +3505,130 @@ static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int
return 0;
}
+static bool icmp_err_packet(const struct flowi *fl, unsigned short family)
+{
+ const struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (family == AF_INET &&
+ fl4->flowi4_proto == IPPROTO_ICMP &&
+ (fl4->fl4_icmp_type == ICMP_DEST_UNREACH ||
+ fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED))
+ return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6) {
+ const struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (fl6->flowi6_proto == IPPROTO_ICMPV6 &&
+ (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH ||
+ fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG ||
+ fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED))
+ return true;
+ }
+#endif
+ return false;
+}
+
+static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family,
+ const struct flowi *fl, struct flowi *fl1)
+{
+ bool ret = true;
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+ int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) :
+ (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr));
+
+ if (!newskb)
+ return true;
+
+ if (!pskb_pull(newskb, hl))
+ goto out;
+
+ skb_reset_network_header(newskb);
+
+ if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0)
+ goto out;
+
+ fl1->flowi_oif = fl->flowi_oif;
+ fl1->flowi_mark = fl->flowi_mark;
+ fl1->flowi_tos = fl->flowi_tos;
+ nf_nat_decode_session(newskb, fl1, family);
+ ret = false;
+
+out:
+ consume_skb(newskb);
+ return ret;
+}
+
+static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family,
+ const struct xfrm_selector *sel,
+ const struct flowi *fl)
+{
+ bool ret = false;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return ret;
+
+ ret = xfrm_selector_match(sel, &fl1, family);
+ }
+
+ return ret;
+}
+
+static inline struct
+xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb,
+ const struct flowi *fl, unsigned short family,
+ u32 if_id)
+{
+ struct xfrm_policy *pol = NULL;
+
+ if (icmp_err_packet(fl, family)) {
+ struct flowi fl1;
+ struct net *net = dev_net(skb->dev);
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return pol;
+
+ pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id);
+ if (IS_ERR(pol))
+ pol = NULL;
+ }
+
+ return pol;
+}
+
+static inline struct
+dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl,
+ unsigned short family, struct dst_entry *dst)
+{
+ if (icmp_err_packet(fl, family)) {
+ struct net *net = dev_net(skb->dev);
+ struct dst_entry *dst2;
+ struct flowi fl1;
+
+ if (xfrm_icmp_flow_decode(skb, family, fl, &fl1))
+ return dst;
+
+ dst_hold(dst);
+
+ dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP));
+
+ if (IS_ERR(dst2))
+ return dst;
+
+ if (dst2->xfrm) {
+ dst_release(dst);
+ dst = dst2;
+ } else {
+ dst_release(dst2);
+ }
+ }
+
+ return dst;
+}
+
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
unsigned short family)
{
@@ -3551,9 +3675,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
for (i = sp->len - 1; i >= 0; i--) {
struct xfrm_state *x = sp->xvec[i];
+ int ret = 0;
+
if (!xfrm_selector_match(&x->sel, &fl, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
- return 0;
+ ret = 1;
+ if (x->props.flags & XFRM_STATE_ICMP &&
+ xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl))
+ ret = 0;
+ if (ret) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
+ return 0;
+ }
}
}
}
@@ -3576,6 +3708,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 0;
}
+ if (!pol && dir == XFRM_POLICY_FWD)
+ pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id);
+
if (!pol) {
if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3709,6 +3844,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
res = 0;
dst = NULL;
}
+
+ if (dst && !dst->xfrm)
+ dst = xfrm_out_fwd_icmp(skb, &fl, family, dst);
+
skb_dst_set(skb, dst);
return res;
}
@@ -3765,15 +3904,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
/* Impossible. Such dst must be popped before reaches point of failure. */
}
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
{
- if (dst) {
- if (dst->obsolete) {
- dst_release(dst);
- dst = NULL;
- }
- }
- return dst;
+ if (dst->obsolete)
+ sk_dst_reset(sk);
}
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
@@ -4027,10 +4161,7 @@ static int __net_init xfrm_policy_init(struct net *net)
int dir, err;
if (net_eq(net, &init_net)) {
- xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
- sizeof(struct xfrm_dst),
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
- NULL);
+ xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
err = rhashtable_init(&xfrm_policy_inexact_table,
&xfrm_pol_inexact_params);
BUG_ON(err);
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fee9b5cf37..5f9bf8e5c9 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -52,6 +52,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+ xfrm_state_update_stats(net);
snmp_get_cpu_field_batch(buff, xfrm_mib_list,
net->mib.xfrm_statistics);
for (i = 0; xfrm_mib_list[i].name; i++)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index bda5327bf3..0c306473a7 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -570,7 +570,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
int err = 0;
spin_lock(&x->lock);
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (x->km.state == XFRM_STATE_DEAD)
goto out;
@@ -1935,7 +1935,7 @@ EXPORT_SYMBOL(xfrm_state_update);
int xfrm_state_check_expire(struct xfrm_state *x)
{
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
if (!READ_ONCE(x->curlft.use_time))
WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
@@ -1957,6 +1957,19 @@ int xfrm_state_check_expire(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_check_expire);
+void xfrm_state_update_stats(struct net *net)
+{
+ struct xfrm_state *x;
+ int i;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ for (i = 0; i <= net->xfrm.state_hmask; i++) {
+ hlist_for_each_entry(x, net->xfrm.state_bydst + i, bydst)
+ xfrm_dev_state_update_stats(x);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+}
+
struct xfrm_state *
xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family)
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
index 9e20d4a377..2248eda741 100644
--- a/net/xfrm/xfrm_state_bpf.c
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -117,10 +117,10 @@ __bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
__bpf_kfunc_end_defs();
-BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_KFUNCS_START(xfrm_state_kfunc_set)
BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
-BTF_SET8_END(xfrm_state_kfunc_set)
+BTF_KFUNCS_END(xfrm_state_kfunc_set)
static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
.owner = THIS_MODULE,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 912c1189ba..810b520493 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -902,7 +902,7 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
if (x->xso.dev)
- xfrm_dev_state_update_curlft(x);
+ xfrm_dev_state_update_stats(x);
memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
put_unaligned(x->stats.replay_window, &p->stats.replay_window);
put_unaligned(x->stats.replay, &p->stats.replay);