summaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:35:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:31 +0000
commit85c675d0d09a45a135bddd15d7b385f8758c32fb (patch)
tree76267dbc9b9a130337be3640948fe397b04ac629 /include/net
parentAdding upstream version 6.6.15. (diff)
downloadlinux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz
linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--include/net/Space.h1
-rw-r--r--include/net/af_rxrpc.h15
-rw-r--r--include/net/af_unix.h20
-rw-r--r--include/net/af_vsock.h7
-rw-r--r--include/net/bluetooth/bluetooth.h2
-rw-r--r--include/net/bluetooth/hci_core.h43
-rw-r--r--include/net/bluetooth/hci_sync.h2
-rw-r--r--include/net/cfg80211.h288
-rw-r--r--include/net/devlink.h69
-rw-r--r--include/net/dropreason-core.h33
-rw-r--r--include/net/dsa.h69
-rw-r--r--include/net/dsa_stubs.h22
-rw-r--r--include/net/dst.h11
-rw-r--r--include/net/flow_offload.h2
-rw-r--r--include/net/gro.h2
-rw-r--r--include/net/ieee80211_radiotap.h6
-rw-r--r--include/net/if_inet6.h2
-rw-r--r--include/net/inet_connection_sock.h22
-rw-r--r--include/net/inet_sock.h11
-rw-r--r--include/net/inet_timewait_sock.h3
-rw-r--r--include/net/ip.h17
-rw-r--r--include/net/ip6_route.h19
-rw-r--r--include/net/ip_fib.h2
-rw-r--r--include/net/ipv6.h42
-rw-r--r--include/net/ipv6_stubs.h8
-rw-r--r--include/net/mac80211.h134
-rw-r--r--include/net/mana/hw_channel.h2
-rw-r--r--include/net/mana/mana.h2
-rw-r--r--include/net/net_namespace.h15
-rw-r--r--include/net/netfilter/nf_conntrack.h14
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h2
-rw-r--r--include/net/netfilter/nf_tables.h85
-rw-r--r--include/net/netkit.h44
-rw-r--r--include/net/netlink.h73
-rw-r--r--include/net/netns/conntrack.h2
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/nexthop.h8
-rw-r--r--include/net/page_pool/helpers.h230
-rw-r--r--include/net/page_pool/types.h6
-rw-r--r--include/net/pkt_cls.h6
-rw-r--r--include/net/pkt_sched.h8
-rw-r--r--include/net/regulatory.h1
-rw-r--r--include/net/route.h10
-rw-r--r--include/net/sch_generic.h4
-rw-r--r--include/net/sock.h17
-rw-r--r--include/net/switchdev.h3
-rw-r--r--include/net/tc_act/tc_ct.h1
-rw-r--r--include/net/tcp.h373
-rw-r--r--include/net/tcp_ao.h389
-rw-r--r--include/net/tcx.h7
-rw-r--r--include/net/tls.h26
-rw-r--r--include/net/udp_tunnel.h21
-rw-r--r--include/net/xdp.h19
-rw-r--r--include/net/xdp_sock.h16
-rw-r--r--include/net/xfrm.h20
56 files changed, 1745 insertions, 516 deletions
diff --git a/include/net/Space.h b/include/net/Space.h
index c29f3d51c0..ef42629f42 100644
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -10,4 +10,3 @@ struct net_device *smc_init(int unit);
struct net_device *cs89x0_probe(int unit);
struct net_device *tc515_probe(int unit);
struct net_device *lance_probe(int unit);
-struct net_device *cops_probe(int unit);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 5531dd0806..0754c46322 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
struct sock;
struct socket;
struct rxrpc_call;
+struct rxrpc_peer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id);
@@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, enum rxrpc_abort_reason);
void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
-void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
- struct sockaddr_rxrpc *);
-bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027..afd40dce40 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -46,12 +46,6 @@ struct scm_stat {
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
-#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
-#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
- spin_lock_nested(&unix_sk(s)->lock, \
- SINGLE_DEPTH_NESTING)
-
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
@@ -77,6 +71,20 @@ struct unix_sock {
#define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
#define unix_peer(sk) (unix_sk(sk)->peer)
+#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
+enum unix_socket_lock_class {
+ U_LOCK_NORMAL,
+ U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
+ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+};
+
+static inline void unix_state_lock_nested(struct sock *sk,
+ enum unix_socket_lock_class subclass)
+{
+ spin_lock_nested(&unix_sk(sk)->lock, subclass);
+}
+
#define peer_wait peer_wq.wait
long unix_inq_len(struct sock *sk);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index dc3cb16835..535701efc1 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -177,6 +177,9 @@ struct vsock_transport {
/* Read a single skb */
int (*read_skb)(struct vsock_sock *, skb_read_actor_t);
+
+ /* Zero-copy. */
+ bool (*msgzerocopy_allow)(void);
};
/**** CORE ****/
@@ -241,4 +244,8 @@ static inline void __init vsock_bpf_build_proto(void)
{}
#endif
+static inline bool vsock_msgzerocopy_allow(const struct vsock_transport *t)
+{
+ return t->msgzerocopy_allow && t->msgzerocopy_allow();
+}
#endif /* __AF_VSOCK_H__ */
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index aa90adc3b2..7ffa8c192c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -541,7 +541,7 @@ static inline struct sk_buff *bt_skb_sendmsg(struct sock *sk,
return ERR_PTR(-EFAULT);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
return skb;
}
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index b83cfcf666..65dd286693 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1296,29 +1296,6 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
return NULL;
}
-static inline struct hci_conn *hci_conn_hash_lookup_big_any_dst(struct hci_dev *hdev,
- __u8 handle)
-{
- struct hci_conn_hash *h = &hdev->conn_hash;
- struct hci_conn *c;
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(c, &h->list, list) {
- if (c->type != ISO_LINK)
- continue;
-
- if (handle != BT_ISO_QOS_BIG_UNSET && handle == c->iso_qos.bcast.big) {
- rcu_read_unlock();
- return c;
- }
- }
-
- rcu_read_unlock();
-
- return NULL;
-}
-
static inline struct hci_conn *
hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big)
{
@@ -1406,6 +1383,26 @@ static inline void hci_conn_hash_list_state(struct hci_dev *hdev,
rcu_read_unlock();
}
+static inline void hci_conn_hash_list_flag(struct hci_dev *hdev,
+ hci_conn_func_t func, __u8 type,
+ __u8 flag, void *data)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ if (!func)
+ return;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (c->type == type && test_bit(flag, &c->flags))
+ func(c, data);
+ }
+
+ rcu_read_unlock();
+}
+
static inline struct hci_conn *hci_lookup_le_connect(struct hci_dev *hdev)
{
struct hci_conn_hash *h = &hdev->conn_hash;
diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h
index 57eeb07aeb..6efbc21521 100644
--- a/include/net/bluetooth/hci_sync.h
+++ b/include/net/bluetooth/hci_sync.h
@@ -80,6 +80,8 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
u8 *data, u32 flags, u16 min_interval,
u16 max_interval, u16 sync_interval);
+int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance);
+
int hci_remove_advertising_sync(struct hci_dev *hdev, struct sock *sk,
u8 instance, bool force);
int hci_disable_advertising_sync(struct hci_dev *hdev);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 153a8c3e72..8f2c487618 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -76,6 +76,8 @@ struct wiphy;
* @IEEE80211_CHAN_DISABLED: This channel is disabled.
* @IEEE80211_CHAN_NO_IR: do not initiate radiation, this includes
* sending probe requests or beaconing.
+ * @IEEE80211_CHAN_PSD: Power spectral density (in dBm) is set for this
+ * channel.
* @IEEE80211_CHAN_RADAR: Radar detection is required on this channel.
* @IEEE80211_CHAN_NO_HT40PLUS: extension channel above this channel
* is not permitted.
@@ -119,7 +121,7 @@ struct wiphy;
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = 1<<0,
IEEE80211_CHAN_NO_IR = 1<<1,
- /* hole at 1<<2 */
+ IEEE80211_CHAN_PSD = 1<<2,
IEEE80211_CHAN_RADAR = 1<<3,
IEEE80211_CHAN_NO_HT40PLUS = 1<<4,
IEEE80211_CHAN_NO_HT40MINUS = 1<<5,
@@ -171,6 +173,7 @@ enum ieee80211_channel_flags {
* on this channel.
* @dfs_state_entered: timestamp (jiffies) when the dfs state was entered.
* @dfs_cac_ms: DFS CAC time in milliseconds, this is valid for DFS channels.
+ * @psd: power spectral density (in dBm)
*/
struct ieee80211_channel {
enum nl80211_band band;
@@ -187,6 +190,7 @@ struct ieee80211_channel {
enum nl80211_dfs_state dfs_state;
unsigned long dfs_state_entered;
unsigned int dfs_cac_ms;
+ s8 psd;
};
/**
@@ -410,6 +414,19 @@ struct ieee80211_sta_eht_cap {
u8 eht_ppe_thres[IEEE80211_EHT_PPE_THRES_MAX_LEN];
};
+/* sparse defines __CHECKER__; see Documentation/dev-tools/sparse.rst */
+#ifdef __CHECKER__
+/*
+ * This is used to mark the sband->iftype_data pointer which is supposed
+ * to be an array with special access semantics (per iftype), but a lot
+ * of code got it wrong in the past, so with this marking sparse will be
+ * noisy when the pointer is used directly.
+ */
+# define __iftd __attribute__((noderef, address_space(__iftype_data)))
+#else
+# define __iftd
+#endif /* __CHECKER__ */
+
/**
* struct ieee80211_sband_iftype_data - sband data per interface type
*
@@ -543,10 +560,48 @@ struct ieee80211_supported_band {
struct ieee80211_sta_s1g_cap s1g_cap;
struct ieee80211_edmg edmg_cap;
u16 n_iftype_data;
- const struct ieee80211_sband_iftype_data *iftype_data;
+ const struct ieee80211_sband_iftype_data __iftd *iftype_data;
};
/**
+ * _ieee80211_set_sband_iftype_data - set sband iftype data array
+ * @sband: the sband to initialize
+ * @iftd: the iftype data array pointer
+ * @n_iftd: the length of the iftype data array
+ *
+ * Set the sband iftype data array; use this where the length cannot
+ * be derived from the ARRAY_SIZE() of the argument, but prefer
+ * ieee80211_set_sband_iftype_data() where it can be used.
+ */
+static inline void
+_ieee80211_set_sband_iftype_data(struct ieee80211_supported_band *sband,
+ const struct ieee80211_sband_iftype_data *iftd,
+ u16 n_iftd)
+{
+ sband->iftype_data = (const void __iftd __force *)iftd;
+ sband->n_iftype_data = n_iftd;
+}
+
+/**
+ * ieee80211_set_sband_iftype_data - set sband iftype data array
+ * @sband: the sband to initialize
+ * @iftd: the iftype data array
+ */
+#define ieee80211_set_sband_iftype_data(sband, iftd) \
+ _ieee80211_set_sband_iftype_data(sband, iftd, ARRAY_SIZE(iftd))
+
+/**
+ * for_each_sband_iftype_data - iterate sband iftype data entries
+ * @sband: the sband whose iftype_data array to iterate
+ * @i: iterator counter
+ * @iftd: iftype data pointer to set
+ */
+#define for_each_sband_iftype_data(sband, i, iftd) \
+ for (i = 0, iftd = (const void __force *)&(sband)->iftype_data[i]; \
+ i < (sband)->n_iftype_data; \
+ i++, iftd = (const void __force *)&(sband)->iftype_data[i])
+
+/**
* ieee80211_get_sband_iftype_data - return sband data for a given iftype
* @sband: the sband to search for the STA on
* @iftype: enum nl80211_iftype
@@ -557,6 +612,7 @@ static inline const struct ieee80211_sband_iftype_data *
ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
u8 iftype)
{
+ const struct ieee80211_sband_iftype_data *data;
int i;
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
@@ -565,10 +621,7 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (iftype == NL80211_IFTYPE_AP_VLAN)
iftype = NL80211_IFTYPE_AP;
- for (i = 0; i < sband->n_iftype_data; i++) {
- const struct ieee80211_sband_iftype_data *data =
- &sband->iftype_data[i];
-
+ for_each_sband_iftype_data(sband, i, data) {
if (data->types_mask & BIT(iftype))
return data;
}
@@ -954,6 +1007,30 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
enum nl80211_iftype iftype);
/**
+ * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable and we
+ * can/need start CAC on such channel
+ * @wiphy: the wiphy to validate against
+ * @chandef: the channel definition to check
+ *
+ * Return: true if all channels available and at least
+ * one channel requires CAC (NL80211_DFS_USABLE)
+ */
+bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef);
+
+/**
+ * cfg80211_chandef_dfs_cac_time - get the DFS CAC time (in ms) for given
+ * channel definition
+ * @wiphy: the wiphy to validate against
+ * @chandef: the channel definition to check
+ *
+ * Returns: DFS CAC time (in ms) which applies for this channel definition
+ */
+unsigned int
+cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
+ const struct cfg80211_chan_def *chandef);
+
+/**
* nl80211_send_chandef - sends the channel definition.
* @msg: the msg to send channel definition
* @chandef: the channel definition to check
@@ -1289,6 +1366,7 @@ struct cfg80211_acl_data {
* struct cfg80211_fils_discovery - FILS discovery parameters from
* IEEE Std 802.11ai-2016, Annex C.3 MIB detail.
*
+ * @update: Set to true if the feature configuration should be updated.
* @min_interval: Minimum packet interval in TUs (0 - 10000)
* @max_interval: Maximum packet interval in TUs (0 - 10000)
* @tmpl_len: Template length
@@ -1296,6 +1374,7 @@ struct cfg80211_acl_data {
* frame headers.
*/
struct cfg80211_fils_discovery {
+ bool update;
u32 min_interval;
u32 max_interval;
size_t tmpl_len;
@@ -1306,6 +1385,7 @@ struct cfg80211_fils_discovery {
* struct cfg80211_unsol_bcast_probe_resp - Unsolicited broadcast probe
* response parameters in 6GHz.
*
+ * @update: Set to true if the feature configuration should be updated.
* @interval: Packet interval in TUs. Maximum allowed is 20 TU, as mentioned
* in IEEE P802.11ax/D6.0 26.17.2.3.2 - AP behavior for fast passive
* scanning
@@ -1313,6 +1393,7 @@ struct cfg80211_fils_discovery {
* @tmpl: Template data for probe response
*/
struct cfg80211_unsol_bcast_probe_resp {
+ bool update;
u32 interval;
size_t tmpl_len;
const u8 *tmpl;
@@ -1399,6 +1480,22 @@ struct cfg80211_ap_settings {
u16 punct_bitmap;
};
+
+/**
+ * struct cfg80211_ap_update - AP configuration update
+ *
+ * Subset of &struct cfg80211_ap_settings, for updating a running AP.
+ *
+ * @beacon: beacon data
+ * @fils_discovery: FILS discovery transmission parameters
+ * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
+ */
+struct cfg80211_ap_update {
+ struct cfg80211_beacon_data beacon;
+ struct cfg80211_fils_discovery fils_discovery;
+ struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
+};
+
/**
* struct cfg80211_csa_settings - channel switch settings
*
@@ -2346,7 +2443,7 @@ struct mesh_config {
* @user_mpm: userspace handles all MPM functions
* @dtim_period: DTIM period to use
* @beacon_interval: beacon interval to use
- * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a]
+ * @mcast_rate: multicast rate for Mesh Node [6Mbps is the default for 802.11a]
* @basic_rates: basic rates to use when creating the mesh
* @beacon_rate: bitrate to be used for beacons
* @userspace_handles_dfs: whether user space controls DFS operation, i.e.
@@ -2487,7 +2584,6 @@ struct cfg80211_scan_6ghz_params {
* @n_ssids: number of SSIDs
* @channels: channels to scan on.
* @n_channels: total number of channels to scan
- * @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
* @duration: how long to listen on each channel, in TUs. If
@@ -2517,7 +2613,6 @@ struct cfg80211_scan_request {
struct cfg80211_ssid *ssids;
int n_ssids;
u32 n_channels;
- enum nl80211_bss_scan_width scan_width;
const u8 *ie;
size_t ie_len;
u16 duration;
@@ -2566,7 +2661,7 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
* or no match (RSSI only)
* @rssi_thold: don't report scan results below this threshold (in s32 dBm)
* @per_band_rssi_thold: Minimum rssi threshold for each band to be applied
- * for filtering out scan results received. Drivers advertize this support
+ * for filtering out scan results received. Drivers advertise this support
* of band specific rssi based filtering through the feature capability
* %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band
* specific rssi thresholds take precedence over rssi_thold, if specified.
@@ -2612,14 +2707,13 @@ struct cfg80211_bss_select_adjust {
* @ssids: SSIDs to scan for (passed in the probe_reqs in active scans)
* @n_ssids: number of SSIDs
* @n_channels: total number of channels to scan
- * @scan_width: channel width for scanning
* @ie: optional information element(s) to add into Probe Request or %NULL
* @ie_len: length of ie in octets
* @flags: control flags from &enum nl80211_scan_flags
* @match_sets: sets of parameters to be matched for a scan result
* entry to be considered valid and to be passed to the host
* (others are filtered out).
- * If ommited, all results are passed.
+ * If omitted, all results are passed.
* @n_match_sets: number of match sets
* @report_results: indicates that results were reported for this request
* @wiphy: the wiphy this was for
@@ -2653,14 +2747,13 @@ struct cfg80211_bss_select_adjust {
* to the specified band while deciding whether a better BSS is reported
* using @relative_rssi. If delta is a negative number, the BSSs that
* belong to the specified band will be penalized by delta dB in relative
- * comparisions.
+ * comparisons.
*/
struct cfg80211_sched_scan_request {
u64 reqid;
struct cfg80211_ssid *ssids;
int n_ssids;
u32 n_channels;
- enum nl80211_bss_scan_width scan_width;
const u8 *ie;
size_t ie_len;
u32 flags;
@@ -2708,7 +2801,6 @@ enum cfg80211_signal_type {
/**
* struct cfg80211_inform_bss - BSS inform data
* @chan: channel the frame was received on
- * @scan_width: scan width that was used
* @signal: signal strength value, according to the wiphy's
* signal type
* @boottime_ns: timestamp (CLOCK_BOOTTIME) when the information was
@@ -2728,7 +2820,6 @@ enum cfg80211_signal_type {
*/
struct cfg80211_inform_bss {
struct ieee80211_channel *chan;
- enum nl80211_bss_scan_width scan_width;
s32 signal;
u64 boottime_ns;
u64 parent_tsf;
@@ -2762,7 +2853,6 @@ struct cfg80211_bss_ies {
* for use in scan results and similar.
*
* @channel: channel this BSS is on
- * @scan_width: width of the control channel
* @bssid: BSSID of the BSS
* @beacon_interval: the beacon interval as from the frame
* @capability: the capability field in host byte order
@@ -2775,6 +2865,8 @@ struct cfg80211_bss_ies {
* own the beacon_ies, but they're just pointers to the ones from the
* @hidden_beacon_bss struct)
* @proberesp_ies: the information elements from the last Probe Response frame
+ * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
+ * cannot rely on it having valid data
* @hidden_beacon_bss: in case this BSS struct represents a probe response from
* a BSS that hides the SSID in its beacon, this points to the BSS struct
* that holds the beacon data. @beacon_ies is still valid, of course, and
@@ -2792,7 +2884,6 @@ struct cfg80211_bss_ies {
*/
struct cfg80211_bss {
struct ieee80211_channel *channel;
- enum nl80211_bss_scan_width scan_width;
const struct cfg80211_bss_ies __rcu *ies;
const struct cfg80211_bss_ies __rcu *beacon_ies;
@@ -2811,6 +2902,8 @@ struct cfg80211_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 proberesp_ecsa_stuck:1;
+
u8 bssid_index;
u8 max_bssid_indicator;
@@ -2891,12 +2984,15 @@ struct cfg80211_auth_request {
* @elems_len: length of the elements
* @disabled: If set this link should be included during association etc. but it
* should not be used until enabled by the AP MLD.
+ * @error: per-link error code, must be <= 0. If there is an error, then the
+ * operation as a whole must fail.
*/
struct cfg80211_assoc_link {
struct cfg80211_bss *bss;
const u8 *elems;
size_t elems_len;
bool disabled;
+ int error;
};
/**
@@ -3495,7 +3591,7 @@ struct cfg80211_update_ft_ies_params {
* This structure provides information needed to transmit a mgmt frame
*
* @chan: channel to use
- * @offchan: indicates wether off channel operation is required
+ * @offchan: indicates whether off channel operation is required
* @wait: duration for ROC
* @buf: buffer to transmit
* @len: buffer length
@@ -3613,7 +3709,7 @@ struct cfg80211_nan_func_filter {
* @publish_bcast: if true, the solicited publish should be broadcasted
* @subscribe_active: if true, the subscribe is active
* @followup_id: the instance ID for follow up
- * @followup_reqid: the requestor instance ID for follow up
+ * @followup_reqid: the requester instance ID for follow up
* @followup_dest: MAC address of the recipient of the follow up
* @ttl: time to live counter in DW.
* @serv_spec_info: Service Specific Info
@@ -4450,7 +4546,7 @@ struct cfg80211_ops {
int (*start_ap)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *settings);
int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_beacon_data *info);
+ struct cfg80211_ap_update *info);
int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev,
unsigned int link_id);
@@ -4816,6 +4912,8 @@ struct cfg80211_ops {
* @WIPHY_FLAG_SUPPORTS_EXT_KCK_32: The device supports 32-byte KCK keys.
* @WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER: The device could handle reg notify for
* NL80211_REGDOM_SET_BY_DRIVER.
+ * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver
+ * set this flag to update channels on beacon hints.
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
@@ -4842,6 +4940,7 @@ enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22),
WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23),
WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER = BIT(24),
+ WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON = BIT(25),
};
/**
@@ -5882,7 +5981,7 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy,
/**
* wiphy_delayed_work_flush - flush previously queued delayed work
* @wiphy: the wiphy, for debug purposes
- * @work: the work to flush
+ * @dwork: the delayed work to flush
*
* Flush the work (i.e. run it if pending). This must be called
* under the wiphy mutex acquired by wiphy_lock().
@@ -5938,8 +6037,6 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* @mgmt_registrations: list of registrations for management frames
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
- * @mtx: mutex used to lock data in this struct, may be used by drivers
- * and some API functions require it held
* @beacon_interval: beacon interval used on this device for transmitting
* beacons, 0 when not valid
* @address: The address for this device, valid only if @netdev is %NULL
@@ -5986,8 +6083,6 @@ struct wireless_dev {
struct list_head mgmt_registrations;
u8 mgmt_registrations_need_update:1;
- struct mutex mtx;
-
bool use_4addr, is_running, registered, registering;
u8 address[ETH_ALEN] __aligned(sizeof(u16));
@@ -6278,13 +6373,11 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
/**
* ieee80211_mandatory_rates - get mandatory rates for a given band
* @sband: the band to look for rates in
- * @scan_width: width of the control channel
*
* This function returns a bitmap of the mandatory rates for the given
* band, bits are set according to the rate position in the bitrates array.
*/
-u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
- enum nl80211_bss_scan_width scan_width);
+u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband);
/*
* Radiotap parsing functions -- for controlled injection support
@@ -6625,7 +6718,7 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
* @ies: data consisting of IEs
* @len: length of data
*
- * Return: %NULL if the etended element could not be found or if
+ * Return: %NULL if the extended element could not be found or if
* the element is invalid (claims to be longer than the given
* data) or if the byte array doesn't match; otherwise return the
* requested element struct.
@@ -6772,7 +6865,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
/**
* regulatory_set_wiphy_regd - set regdom info for self managed drivers
* @wiphy: the wireless device we want to process the regulatory domain on
- * @rd: the regulatory domain informatoin to use for this wiphy
+ * @rd: the regulatory domain information to use for this wiphy
*
* Set the regulatory domain information for self-managed wiphys, only they
* may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
@@ -6863,7 +6956,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
* Regulatory self-managed driver can use it to proactively
*
* @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
- * @freq: the freqency(in MHz) to be queried.
+ * @freq: the frequency (in MHz) to be queried.
* @rule: pointer to store the wmm rule from the regulatory db.
*
* Self-managed wireless drivers can use this function to query
@@ -6946,22 +7039,6 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
gfp_t gfp);
static inline struct cfg80211_bss * __must_check
-cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
- struct ieee80211_channel *rx_channel,
- enum nl80211_bss_scan_width scan_width,
- struct ieee80211_mgmt *mgmt, size_t len,
- s32 signal, gfp_t gfp)
-{
- struct cfg80211_inform_bss data = {
- .chan = rx_channel,
- .scan_width = scan_width,
- .signal = signal,
- };
-
- return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp);
-}
-
-static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss_frame(struct wiphy *wiphy,
struct ieee80211_channel *rx_channel,
struct ieee80211_mgmt *mgmt, size_t len,
@@ -6969,7 +7046,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
{
struct cfg80211_inform_bss data = {
.chan = rx_channel,
- .scan_width = NL80211_BSS_CHAN_WIDTH_20,
.signal = signal,
};
@@ -7072,26 +7148,6 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
gfp_t gfp);
static inline struct cfg80211_bss * __must_check
-cfg80211_inform_bss_width(struct wiphy *wiphy,
- struct ieee80211_channel *rx_channel,
- enum nl80211_bss_scan_width scan_width,
- enum cfg80211_bss_frame_type ftype,
- const u8 *bssid, u64 tsf, u16 capability,
- u16 beacon_interval, const u8 *ie, size_t ielen,
- s32 signal, gfp_t gfp)
-{
- struct cfg80211_inform_bss data = {
- .chan = rx_channel,
- .scan_width = scan_width,
- .signal = signal,
- };
-
- return cfg80211_inform_bss_data(wiphy, &data, ftype, bssid, tsf,
- capability, beacon_interval, ie, ielen,
- gfp);
-}
-
-static inline struct cfg80211_bss * __must_check
cfg80211_inform_bss(struct wiphy *wiphy,
struct ieee80211_channel *rx_channel,
enum cfg80211_bss_frame_type ftype,
@@ -7101,7 +7157,6 @@ cfg80211_inform_bss(struct wiphy *wiphy,
{
struct cfg80211_inform_bss data = {
.chan = rx_channel,
- .scan_width = NL80211_BSS_CHAN_WIDTH_20,
.signal = signal,
};
@@ -7186,19 +7241,6 @@ void cfg80211_bss_iter(struct wiphy *wiphy,
void *data),
void *iter_data);
-static inline enum nl80211_bss_scan_width
-cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef)
-{
- switch (chandef->width) {
- case NL80211_CHAN_WIDTH_5:
- return NL80211_BSS_CHAN_WIDTH_5;
- case NL80211_CHAN_WIDTH_10:
- return NL80211_BSS_CHAN_WIDTH_10;
- default:
- return NL80211_BSS_CHAN_WIDTH_20;
- }
-}
-
/**
* cfg80211_rx_mlme_mgmt - notification of processed MLME management frame
* @dev: network device
@@ -7231,7 +7273,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
/**
- * struct cfg80211_rx_assoc_resp - association response data
+ * struct cfg80211_rx_assoc_resp_data - association response data
* @bss: the BSS that association was requested with, ownership of the pointer
* moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
* @buf: (Re)Association Response frame (header + body)
@@ -7246,7 +7288,7 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
* @links.status: Set this (along with a BSS pointer) for links that
* were rejected by the AP.
*/
-struct cfg80211_rx_assoc_resp {
+struct cfg80211_rx_assoc_resp_data {
const u8 *buf;
size_t len;
const u8 *req_ies;
@@ -7263,7 +7305,7 @@ struct cfg80211_rx_assoc_resp {
/**
* cfg80211_rx_assoc_resp - notification of processed association response
* @dev: network device
- * @data: association response data, &struct cfg80211_rx_assoc_resp
+ * @data: association response data, &struct cfg80211_rx_assoc_resp_data
*
* After being asked to associate via cfg80211_ops::assoc() the driver must
* call either this function or cfg80211_auth_timeout().
@@ -7271,7 +7313,7 @@ struct cfg80211_rx_assoc_resp {
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp *data);
+ struct cfg80211_rx_assoc_resp_data *data);
/**
* struct cfg80211_assoc_failure - association failure data
@@ -7990,7 +8032,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
* cfg80211_port_authorized - notify cfg80211 of successful security association
*
* @dev: network device
- * @bssid: the BSSID of the AP
+ * @peer_addr: BSSID of the AP/P2P GO in case of STA/GC or STA/GC MAC address
+ * in case of AP/P2P GO
* @td_bitmap: transition disable policy
* @td_bitmap_len: Length of transition disable policy
* @gfp: allocation flags
@@ -8001,8 +8044,11 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
* should be preceded with a call to cfg80211_connect_result(),
* cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to
* indicate the 802.11 association.
+ * This function can also be called by AP/P2P GO driver that supports
+ * authentication offload. In this case the peer_mac passed is that of
+ * associated STA/GC.
*/
-void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr,
const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp);
/**
@@ -8591,7 +8637,7 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
* @link_id: the link ID for MLO, must be 0 for non-MLO
* @punct_bitmap: the new puncturing bitmap
*
- * Caller must acquire wdev_lock, therefore must only be called from sleepable
+ * Caller must hold wiphy mutex, therefore must only be called from sleepable
* driver context!
*/
void cfg80211_ch_switch_notify(struct net_device *dev,
@@ -8831,6 +8877,18 @@ static inline size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
}
/**
+ * ieee80211_fragment_element - fragment the last element in skb
+ * @skb: The skbuf that the element was added to
+ * @len_pos: Pointer to length of the element to fragment
+ * @frag_id: The element ID to use for fragments
+ *
+ * This function fragments all data after @len_pos, adding fragmentation
+ * elements with the given ID as appropriate. The SKB will grow in size
+ * accordingly.
+ */
+void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id);
+
+/**
* cfg80211_report_wowlan_wakeup - report wakeup from WoWLAN
* @wdev: the wireless device reporting the wakeup
* @wakeup: the wakeup report
@@ -9079,9 +9137,9 @@ bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
/**
* cfg80211_assoc_comeback - notification of association that was
- * temporarly rejected with a comeback
+ * temporarily rejected with a comeback
* @netdev: network device
- * @ap_addr: AP (MLD) address that rejected the assocation
+ * @ap_addr: AP (MLD) address that rejected the association
* @timeout: timeout interval value TUs.
*
* this function may sleep. the caller must hold the corresponding wdev's mutex.
@@ -9245,4 +9303,50 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
*/
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
+#ifdef CONFIG_CFG80211_DEBUGFS
+/**
+ * wiphy_locked_debugfs_read - do a locked read in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being read
+ * @buf: the buffer to fill and then read from
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy to
+ * @count: read count
+ * @ppos: read position
+ * @handler: the read handler to call (under wiphy lock)
+ * @data: additional data to pass to the read handler
+ */
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ char __user *userbuf, size_t count,
+ loff_t *ppos,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data),
+ void *data);
+
+/**
+ * wiphy_locked_debugfs_write - do a locked write in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being written to
+ * @buf: the buffer to copy the user data to
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy from
+ * @count: read count
+ * @handler: the write handler to call (under wiphy lock)
+ * @data: additional data to pass to the write handler
+ */
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ const char __user *userbuf, size_t count,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data),
+ void *data);
+#endif
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 29fd1b4ee6..9ac394bdfb 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -150,6 +150,7 @@ struct devlink_port {
struct devlink_rate *devlink_rate;
struct devlink_linecard *linecard;
+ u32 rel_index;
};
struct devlink_port_new_attrs {
@@ -1697,6 +1698,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
+int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
+ struct devlink *fn_devlink);
struct devlink_rate *
devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
struct devlink_rate *parent);
@@ -1717,8 +1720,8 @@ void devlink_linecard_provision_clear(struct devlink_linecard *linecard);
void devlink_linecard_provision_fail(struct devlink_linecard *linecard);
void devlink_linecard_activate(struct devlink_linecard *linecard);
void devlink_linecard_deactivate(struct devlink_linecard *linecard);
-void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
- struct devlink *nested_devlink);
+int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard,
+ struct devlink *nested_devlink);
int devl_sb_register(struct devlink *devlink, unsigned int sb_index,
u32 size, u16 ingress_pools_count,
u16 egress_pools_count, u16 ingress_tc_count,
@@ -1851,36 +1854,36 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req,
const char *version_value,
enum devlink_info_version_type version_type);
-int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
-int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
-int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name);
-int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
-int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name);
-int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg);
-
-int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
-int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
-int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len);
-
-int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value);
-int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value);
-int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value);
-int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value);
-int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value);
-int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u32 value_len);
+void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
+void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
+void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
+void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name);
+void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg);
+
+void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
+void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
+void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len);
+
+void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value);
+void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value);
+void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value);
+void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value);
+void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value);
+void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u32 value_len);
struct devlink_health_reporter *
devl_port_health_reporter_create(struct devlink_port *port,
@@ -1918,6 +1921,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
void
devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
+int devl_nested_devlink_set(struct devlink *devlink,
+ struct devlink *nested_devlink);
bool devlink_is_reload_failed(const struct devlink *devlink);
void devlink_remote_reload_actions_performed(struct devlink *devlink,
enum devlink_reload_limit limit,
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a587e83fc1..3c70ad53a4 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -20,9 +20,14 @@
FN(IP_NOPROTO) \
FN(SOCKET_RCVBUFF) \
FN(PROTO_MEM) \
+ FN(TCP_AUTH_HDR) \
FN(TCP_MD5NOTFOUND) \
FN(TCP_MD5UNEXPECTED) \
FN(TCP_MD5FAILURE) \
+ FN(TCP_AONOTFOUND) \
+ FN(TCP_AOUNEXPECTED) \
+ FN(TCP_AOKEYNOTFOUND) \
+ FN(TCP_AOFAILURE) \
FN(SOCKET_BACKLOG) \
FN(TCP_FLAGS) \
FN(TCP_ZEROWINDOW) \
@@ -80,6 +85,7 @@
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
FN(QUEUE_PURGE) \
+ FN(TC_ERROR) \
FNe(MAX)
/**
@@ -142,6 +148,11 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_PROTO_MEM,
/**
+ * @SKB_DROP_REASON_TCP_AUTH_HDR: TCP-MD5 or TCP-AO hashes are met
+ * twice or set incorrectly.
+ */
+ SKB_DROP_REASON_TCP_AUTH_HDR,
+ /**
* @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected,
* corresponding to LINUX_MIB_TCPMD5NOTFOUND
*/
@@ -157,6 +168,26 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_TCP_MD5FAILURE,
/**
+ * @SKB_DROP_REASON_TCP_AONOTFOUND: no TCP-AO hash and one was expected,
+ * corresponding to LINUX_MIB_TCPAOREQUIRED
+ */
+ SKB_DROP_REASON_TCP_AONOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_AOUNEXPECTED: TCP-AO hash is present and it
+ * was not expected, corresponding to LINUX_MIB_TCPAOKEYNOTFOUND
+ */
+ SKB_DROP_REASON_TCP_AOUNEXPECTED,
+ /**
+ * @SKB_DROP_REASON_TCP_AOKEYNOTFOUND: TCP-AO key is unknown,
+ * corresponding to LINUX_MIB_TCPAOKEYNOTFOUND
+ */
+ SKB_DROP_REASON_TCP_AOKEYNOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TCP_AOFAILURE: TCP-AO hash is wrong,
+ * corresponding to LINUX_MIB_TCPAOBAD
+ */
+ SKB_DROP_REASON_TCP_AOFAILURE,
+ /**
* @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog (
* see LINUX_MIB_TCPBACKLOGDROP)
*/
@@ -345,6 +376,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
SKB_DROP_REASON_QUEUE_PURGE,
+ /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */
+ SKB_DROP_REASON_TC_ERROR,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0b9c6aa270..82135fbdb1 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -102,11 +102,11 @@ struct dsa_device_ops {
const char *name;
enum dsa_tag_protocol proto;
/* Some tagging protocols either mangle or shift the destination MAC
- * address, in which case the DSA master would drop packets on ingress
+ * address, in which case the DSA conduit would drop packets on ingress
* if what it understands out of the destination MAC address is not in
* its RX filter.
*/
- bool promisc_on_master;
+ bool promisc_on_conduit;
};
struct dsa_lag {
@@ -236,12 +236,12 @@ struct dsa_bridge {
};
struct dsa_port {
- /* A CPU port is physically connected to a master device.
- * A user port exposed to userspace has a slave device.
+ /* A CPU port is physically connected to a conduit device. A user port
+ * exposes a network device to user-space, called 'user' here.
*/
union {
- struct net_device *master;
- struct net_device *slave;
+ struct net_device *conduit;
+ struct net_device *user;
};
/* Copy of the tagging protocol operations, for quicker access
@@ -249,7 +249,7 @@ struct dsa_port {
*/
const struct dsa_device_ops *tag_ops;
- /* Copies for faster access in master receive hot path */
+ /* Copies for faster access in conduit receive hot path */
struct dsa_switch_tree *dst;
struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
@@ -281,9 +281,9 @@ struct dsa_port {
u8 lag_tx_enabled:1;
- /* Master state bits, valid only on CPU ports */
- u8 master_admin_up:1;
- u8 master_oper_up:1;
+ /* conduit state bits, valid only on CPU ports */
+ u8 conduit_admin_up:1;
+ u8 conduit_oper_up:1;
/* Valid only on user ports */
u8 cpu_port_in_lag:1;
@@ -303,7 +303,7 @@ struct dsa_port {
struct list_head list;
/*
- * Original copy of the master netdev ethtool_ops
+ * Original copy of the conduit netdev ethtool_ops
*/
const struct ethtool_ops *orig_ethtool_ops;
@@ -452,10 +452,10 @@ struct dsa_switch {
const struct dsa_switch_ops *ops;
/*
- * Slave mii_bus and devices for the individual ports.
+ * User mii_bus and devices for the individual ports.
*/
u32 phys_mii_mask;
- struct mii_bus *slave_mii_bus;
+ struct mii_bus *user_mii_bus;
/* Ageing Time limits in msecs */
unsigned int ageing_time_min;
@@ -520,10 +520,10 @@ static inline bool dsa_port_is_unused(struct dsa_port *dp)
return dp->type == DSA_PORT_TYPE_UNUSED;
}
-static inline bool dsa_port_master_is_operational(struct dsa_port *dp)
+static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp)
{
- return dsa_port_is_cpu(dp) && dp->master_admin_up &&
- dp->master_oper_up;
+ return dsa_port_is_cpu(dp) && dp->conduit_admin_up &&
+ dp->conduit_oper_up;
}
static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p)
@@ -713,12 +713,12 @@ static inline bool dsa_port_offloads_lag(struct dsa_port *dp,
return dsa_port_lag_dev_get(dp) == lag->dev;
}
-static inline struct net_device *dsa_port_to_master(const struct dsa_port *dp)
+static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp)
{
if (dp->cpu_port_in_lag)
return dsa_port_lag_dev_get(dp->cpu_dp);
- return dp->cpu_dp->master;
+ return dp->cpu_dp->conduit;
}
static inline
@@ -732,7 +732,7 @@ struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp)
else if (dp->hsr_dev)
return dp->hsr_dev;
- return dp->slave;
+ return dp->user;
}
static inline struct net_device *
@@ -834,9 +834,9 @@ struct dsa_switch_ops {
int (*connect_tag_protocol)(struct dsa_switch *ds,
enum dsa_tag_protocol proto);
- int (*port_change_master)(struct dsa_switch *ds, int port,
- struct net_device *master,
- struct netlink_ext_ack *extack);
+ int (*port_change_conduit)(struct dsa_switch *ds, int port,
+ struct net_device *conduit,
+ struct netlink_ext_ack *extack);
/* Optional switch-wide initialization and destruction methods */
int (*setup)(struct dsa_switch *ds);
@@ -969,6 +969,16 @@ struct dsa_switch_ops {
struct phy_device *phy);
void (*port_disable)(struct dsa_switch *ds, int port);
+
+ /*
+ * Notification for MAC address changes on user ports. Drivers can
+ * currently only veto operations. They should not use the method to
+ * program the hardware, since the operation is not rolled back in case
+ * of other errors.
+ */
+ int (*port_set_mac_address)(struct dsa_switch *ds, int port,
+ const unsigned char *addr);
+
/*
* Compatibility between device trees defining multiple CPU ports and
* drivers which are not OK to use by default the numerically smallest
@@ -1198,7 +1208,8 @@ struct dsa_switch_ops {
* HSR integration
*/
int (*port_hsr_join)(struct dsa_switch *ds, int port,
- struct net_device *hsr);
+ struct net_device *hsr,
+ struct netlink_ext_ack *extack);
int (*port_hsr_leave)(struct dsa_switch *ds, int port,
struct net_device *hsr);
@@ -1222,11 +1233,11 @@ struct dsa_switch_ops {
int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid);
/*
- * DSA master tracking operations
+ * DSA conduit tracking operations
*/
- void (*master_state_change)(struct dsa_switch *ds,
- const struct net_device *master,
- bool operational);
+ void (*conduit_state_change)(struct dsa_switch *ds,
+ const struct net_device *conduit,
+ bool operational);
};
#define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \
@@ -1363,9 +1374,9 @@ static inline int dsa_switch_resume(struct dsa_switch *ds)
#endif /* CONFIG_PM_SLEEP */
#if IS_ENABLED(CONFIG_NET_DSA)
-bool dsa_slave_dev_check(const struct net_device *dev);
+bool dsa_user_dev_check(const struct net_device *dev);
#else
-static inline bool dsa_slave_dev_check(const struct net_device *dev)
+static inline bool dsa_user_dev_check(const struct net_device *dev)
{
return false;
}
diff --git a/include/net/dsa_stubs.h b/include/net/dsa_stubs.h
index 361811750a..6f384897f2 100644
--- a/include/net/dsa_stubs.h
+++ b/include/net/dsa_stubs.h
@@ -13,14 +13,14 @@
extern const struct dsa_stubs *dsa_stubs;
struct dsa_stubs {
- int (*master_hwtstamp_validate)(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack);
+ int (*conduit_hwtstamp_validate)(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack);
};
-static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack)
+static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
if (!netdev_uses_dsa(dev))
return 0;
@@ -29,18 +29,18 @@ static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
* netdev_uses_dsa() returns true, the dsa_core module is still
* registered, and so, dsa_unregister_stubs() couldn't have run.
* For netdev_uses_dsa() to start returning false, it would imply that
- * dsa_master_teardown() has executed, which requires rtnl_lock().
+ * dsa_conduit_teardown() has executed, which requires rtnl_lock().
*/
ASSERT_RTNL();
- return dsa_stubs->master_hwtstamp_validate(dev, config, extack);
+ return dsa_stubs->conduit_hwtstamp_validate(dev, config, extack);
}
#else
-static inline int dsa_master_hwtstamp_validate(struct net_device *dev,
- const struct kernel_hwtstamp_config *config,
- struct netlink_ext_ack *extack)
+static inline int dsa_conduit_hwtstamp_validate(struct net_device *dev,
+ const struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
return 0;
}
diff --git a/include/net/dst.h b/include/net/dst.h
index 78884429de..f5dfc8fb7b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -222,13 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr
return msecs_to_jiffies(dst_metric(dst, metric));
}
-static inline u32
-dst_allfrag(const struct dst_entry *dst)
-{
- int ret = dst_feature(dst, RTAX_FEATURE_ALLFRAG);
- return ret;
-}
-
static inline int
dst_metric_locked(const struct dst_entry *dst, int metric)
{
@@ -392,10 +385,10 @@ static inline int dst_discard(struct sk_buff *skb)
{
return dst_discard_out(&init_net, skb->sk, skb);
}
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_obsolete, unsigned short flags);
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
- struct net_device *dev, int initial_ref, int initial_obsolete,
+ struct net_device *dev, int initial_obsolete,
unsigned short flags);
struct dst_entry *dst_destroy(struct dst_entry *dst);
void dst_dev_put(struct dst_entry *dst);
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 9efa9a59e8..314087a5e1 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -333,7 +333,7 @@ struct flow_action_entry {
struct flow_action {
unsigned int num_entries;
- struct flow_action_entry entries[];
+ struct flow_action_entry entries[] __counted_by(num_entries);
};
static inline bool flow_action_has_entries(const struct flow_action *action)
diff --git a/include/net/gro.h b/include/net/gro.h
index 88644b3ca6..b435f0ddbf 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -41,7 +41,7 @@ struct napi_gro_cb {
/* Number of segments aggregated. */
u16 count;
- /* Used in ipv6_gro_receive() and foo-over-udp */
+ /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
u16 proto;
/* Used in napi_gro_cb::free */
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 2338f8d2a8..925bac726a 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -539,6 +539,12 @@ enum ieee80211_radiotap_eht_usig_common {
IEEE80211_RADIOTAP_EHT_USIG_COMMON_VALIDATE_BITS_OK = 0x00000080,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_PHY_VER = 0x00007000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW = 0x00038000,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_20MHZ = 0,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_40MHZ = 1,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_80MHZ = 2,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_160MHZ = 3,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_1 = 4,
+ IEEE80211_RADIOTAP_EHT_USIG_COMMON_BW_320MHZ_2 = 5,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_UL_DL = 0x00040000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_BSS_COLOR = 0x01f80000,
IEEE80211_RADIOTAP_EHT_USIG_COMMON_TXOP = 0xfe000000,
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 31bf475eca..f07642264c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -85,7 +85,7 @@ struct ip6_sf_socklist {
unsigned int sl_max;
unsigned int sl_count;
struct rcu_head rcu;
- struct in6_addr sl_addr[];
+ struct in6_addr sl_addr[] __counted_by(sl_max);
};
#define IP6_SFBLOCK 10 /* allocate this many at once */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 01a73bf74f..9ab4bf704e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -44,7 +44,6 @@ struct inet_connection_sock_af_ops {
struct request_sock *req_unhash,
bool *own_req);
u16 net_header_len;
- u16 net_frag_header_len;
u16 sockaddr_len;
int (*setsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
@@ -114,7 +113,10 @@ struct inet_connection_sock {
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 retry; /* Number of attempts */
- __u32 ato; /* Predicted tick of soft clock */
+ #define ATO_BITS 8
+ __u32 ato:ATO_BITS, /* Predicted tick of soft clock */
+ lrcv_flowlabel:20, /* last received ipv6 flowlabel */
+ unused:4;
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
@@ -325,11 +327,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 1
-
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
- inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
+ inet_csk(sk)->icsk_ack.pingpong =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
}
static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
@@ -339,7 +340,16 @@ static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
{
- return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ return inet_csk(sk)->icsk_ack.pingpong >=
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
+}
+
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ack.pingpong < U8_MAX)
+ icsk->icsk_ack.pingpong++;
}
static inline bool inet_csk_has_ulp(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2790ba58ff..8d5fe15b0f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -244,7 +244,6 @@ struct inet_sock {
};
#define IPCORK_OPT 1 /* ip-options has been held in ipcork.opt */
-#define IPCORK_ALLFRAG 2 /* always fragment (for ipv6 for now) */
enum {
INET_FLAGS_PKTINFO = 0,
@@ -268,6 +267,16 @@ enum {
INET_FLAGS_NODEFRAG = 17,
INET_FLAGS_BIND_ADDRESS_NO_PORT = 18,
INET_FLAGS_DEFER_CONNECT = 19,
+ INET_FLAGS_MC6_LOOP = 20,
+ INET_FLAGS_RECVERR6_RFC4884 = 21,
+ INET_FLAGS_MC6_ALL = 22,
+ INET_FLAGS_AUTOFLOWLABEL_SET = 23,
+ INET_FLAGS_AUTOFLOWLABEL = 24,
+ INET_FLAGS_DONTFRAG = 25,
+ INET_FLAGS_RECVERR6 = 26,
+ INET_FLAGS_REPFLOW = 27,
+ INET_FLAGS_RTALERT_ISOLATE = 28,
+ INET_FLAGS_SNDFLOW = 29,
};
/* cmsg flags for inet */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 4a8e578405..b14999ff55 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -67,7 +67,8 @@ struct inet_timewait_sock {
/* And these are ours. */
unsigned int tw_transparent : 1,
tw_flowlabel : 20,
- tw_pad : 3, /* 3 bits hole */
+ tw_usec_ts : 1,
+ tw_pad : 2, /* 2 bits hole */
tw_tos : 8;
u32 tw_txhash;
u32 tw_priority;
diff --git a/include/net/ip.h b/include/net/ip.h
index 3489a1cca5..6390dd08da 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -258,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
{
- return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
+ return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos));
}
/* datagram.c */
@@ -434,19 +434,22 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc != IP_PMTUDISC_INTERFACE &&
- inet_sk(sk)->pmtudisc != IP_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);
+
+ return pmtudisc != IP_PMTUDISC_INTERFACE &&
+ pmtudisc != IP_PMTUDISC_OMIT;
}
static inline bool ip_sk_use_pmtu(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
+ return READ_ONCE(inet_sk(sk)->pmtudisc) < IP_PMTUDISC_PROBE;
}
static inline bool ip_sk_ignore_df(const struct sock *sk)
{
- return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
- inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet_sk(sk)->pmtudisc);
+
+ return pmtudisc < IP_PMTUDISC_DO || pmtudisc == IP_PMTUDISC_OMIT;
}
static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
@@ -758,7 +761,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
* Functions provided by ip_sockglue.c
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst);
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b32539bb0f..28b0657902 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -53,13 +53,12 @@ struct route_info {
*/
static inline int rt6_srcprefs2flags(unsigned int srcprefs)
{
- /* No need to bitmask because srcprefs have only 3 bits. */
- return srcprefs << 3;
+ return (srcprefs & IPV6_PREFER_SRC_MASK) << 3;
}
static inline unsigned int rt6_flags2srcprefs(int flags)
{
- return (flags >> 3) & 7;
+ return (flags >> 3) & IPV6_PREFER_SRC_MASK;
}
static inline bool rt6_need_strict(const struct in6_addr *daddr)
@@ -266,7 +265,7 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
const struct dst_entry *dst = skb_dst(skb);
unsigned int mtu;
- if (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) {
+ if (np && READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE) {
mtu = READ_ONCE(dst->dev->mtu);
mtu -= lwtunnel_headroom(dst->lwtstate, mtu);
} else {
@@ -277,14 +276,18 @@ static inline unsigned int ip6_skb_dst_mtu(const struct sk_buff *skb)
static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_INTERFACE &&
- inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc != IPV6_PMTUDISC_INTERFACE &&
+ pmtudisc != IPV6_PMTUDISC_OMIT;
}
static inline bool ip6_sk_ignore_df(const struct sock *sk)
{
- return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
- inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
+ u8 pmtudisc = READ_ONCE(inet6_sk(sk)->pmtudisc);
+
+ return pmtudisc < IPV6_PMTUDISC_DO ||
+ pmtudisc == IPV6_PMTUDISC_OMIT;
}
static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 15de07d365..d4667b7797 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -157,7 +157,7 @@ struct fib_info {
bool pfsrc_removed;
struct nexthop *nh;
struct rcu_head rcu;
- struct fib_nh fib_nh[];
+ struct fib_nh fib_nh[] __counted_by(fib_nhs);
};
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c6932d1a3f..78d38dd88a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -373,12 +373,12 @@ static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
}
static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
- const struct ipv6_pinfo *np)
+ const struct sock *sk)
{
*ipc6 = (struct ipcm6_cookie) {
.hlimit = -1,
- .tclass = np->tclass,
- .dontfrag = np->dontfrag,
+ .tclass = inet6_sk(sk)->tclass,
+ .dontfrag = inet6_test_bit(DONTFRAG, sk),
};
}
@@ -428,7 +428,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
@@ -914,9 +914,9 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
int hlimit;
if (ipv6_addr_is_multicast(&fl6->daddr))
- hlimit = np->mcast_hops;
+ hlimit = READ_ONCE(np->mcast_hops);
else
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
return hlimit;
@@ -1133,12 +1133,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
bool connected);
-struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net, struct socket *sock,
- struct in6_addr *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol, bool use_cache);
struct dst_entry *ip6_blackhole_route(struct net *net,
struct dst_entry *orig_dst);
@@ -1303,15 +1297,16 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
static inline void ip6_sock_set_recverr(struct sock *sk)
{
- lock_sock(sk);
- inet6_sk(sk)->recverr = true;
- release_sock(sk);
+ inet6_set_bit(RECVERR6, sk);
}
-static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+#define IPV6_PREFER_SRC_MASK (IPV6_PREFER_SRC_TMP | IPV6_PREFER_SRC_PUBLIC | \
+ IPV6_PREFER_SRC_COA)
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
{
+ unsigned int prefmask = ~IPV6_PREFER_SRC_MASK;
unsigned int pref = 0;
- unsigned int prefmask = ~0;
/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
switch (val & (IPV6_PREFER_SRC_PUBLIC |
@@ -1361,20 +1356,11 @@ static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
return -EINVAL;
}
- inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+ WRITE_ONCE(inet6_sk(sk)->srcprefs,
+ (READ_ONCE(inet6_sk(sk)->srcprefs) & prefmask) | pref);
return 0;
}
-static inline int ip6_sock_set_addr_preferences(struct sock *sk, int val)
-{
- int ret;
-
- lock_sock(sk);
- ret = __ip6_sock_set_addr_preferences(sk, val);
- release_sock(sk);
- return ret;
-}
-
static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
{
lock_sock(sk);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index c48186bf47..485c39a898 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
#if IS_ENABLED(CONFIG_XFRM)
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb);
int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
#endif
@@ -85,6 +88,11 @@ struct ipv6_bpf_stub {
sockptr_t optval, unsigned int optlen);
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen);
+ int (*ipv6_dev_get_saddr)(struct net *net,
+ const struct net_device *dst_dev,
+ const struct in6_addr *daddr,
+ unsigned int prefs,
+ struct in6_addr *saddr);
};
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7c707358d1..580781ff9d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -79,7 +79,7 @@
* helpers for sanity checking. Drivers must ensure all work added onto the
* mac80211 workqueue should be cancelled on the driver stop() callback.
*
- * mac80211 will flushed the workqueue upon interface removal and during
+ * mac80211 will flush the workqueue upon interface removal and during
* suspend.
*
* All work performed on the mac80211 workqueue must not acquire the RTNL lock.
@@ -138,7 +138,7 @@
* field to the frame RX timestamp and report the ack TX timestamp in the
* ieee80211_rx_status struct.
*
- * Similarly, To report hardware timestamps for Timing Measurement or Fine
+ * Similarly, to report hardware timestamps for Timing Measurement or Fine
* Timing Measurement frame TX, the driver should set the SKB's hwtstamp field
* to the frame TX timestamp and report the ack RX timestamp in the
* ieee80211_tx_status struct.
@@ -341,6 +341,7 @@ struct ieee80211_vif_chanctx_switch {
* @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
* status changed.
* @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed.
+ * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
*/
enum ieee80211_bss_change {
BSS_CHANGED_ASSOC = 1<<0,
@@ -376,6 +377,7 @@ enum ieee80211_bss_change {
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32),
+ BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -643,9 +645,7 @@ struct ieee80211_fils_discovery {
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
* @eht_puncturing: bitmap to indicate which channels are punctured in this BSS
- * @csa_active: marks whether a channel switch is going on. Internally it is
- * write-protected by sdata_lock and local->mtx so holding either is fine
- * for read access.
+ * @csa_active: marks whether a channel switch is going on.
* @csa_punct_bitmap: new puncturing bitmap for channel switch
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
@@ -653,9 +653,7 @@ struct ieee80211_fils_discovery {
* path needing to access it; even though the netdev carrier will always
* be off when it is %NULL there can still be races and packets could be
* processed after it switches back to %NULL.
- * @color_change_active: marks whether a color change is ongoing. Internally it is
- * write-protected by sdata_lock and local->mtx so holding either is fine
- * for read access.
+ * @color_change_active: marks whether a color change is ongoing.
* @color_change_color: the bss color that will be used after the change.
* @ht_ldpc: in AP mode, indicates interface has HT LDPC capability.
* @vht_ldpc: in AP mode, indicates interface has VHT LDPC capability.
@@ -1082,6 +1080,11 @@ struct ieee80211_tx_rate {
#define IEEE80211_MAX_TX_RETRY 31
+static inline bool ieee80211_rate_valid(struct ieee80211_tx_rate *rate)
+{
+ return rate->idx >= 0 && rate->count > 0;
+}
+
static inline void ieee80211_rate_set_vht(struct ieee80211_tx_rate *rate,
u8 mcs, u8 nss)
{
@@ -1115,7 +1118,9 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* not valid if the interface is an MLD since we won't know which
* link the frame will be transmitted on
* @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
- * @ack_frame_id: internal frame ID for TX status, used internally
+ * @status_data: internal data for TX status handling, assigned privately,
+ * see also &enum ieee80211_status_data for the internal documentation
+ * @status_data_idr: indicates status data is IDR allocated ID for ack frame
* @tx_time_est: TX time estimate in units of 4us, used internally
* @control: union part for control data
* @control.rates: TX rates array to try
@@ -1155,10 +1160,11 @@ struct ieee80211_tx_info {
/* common information */
u32 flags;
u32 band:3,
- ack_frame_id:13,
+ status_data_idr:1,
+ status_data:13,
hw_queue:4,
tx_time_est:10;
- /* 2 free bits */
+ /* 1 free bit */
union {
struct {
@@ -1172,7 +1178,11 @@ struct ieee80211_tx_info {
u8 use_cts_prot:1;
u8 short_preamble:1;
u8 skip_table:1;
- /* 2 bytes free */
+
+ /* for injection only (bitmap) */
+ u8 antennas:2;
+
+ /* 14 bits free */
};
/* only needed before rate control */
unsigned long jiffies;
@@ -1757,15 +1767,15 @@ struct ieee80211_channel_switch {
* @IEEE80211_VIF_GET_NOA_UPDATE: request to handle NOA attributes
* and send P2P_PS notification to the driver if NOA changed, even
* this is not pure P2P vif.
- * @IEEE80211_VIF_DISABLE_SMPS_OVERRIDE: disable user configuration of
- * SMPS mode via debugfs.
+ * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is
+ * enabled for the interface.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
IEEE80211_VIF_SUPPORTS_CQM_RSSI = BIT(1),
IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2),
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
- IEEE80211_VIF_DISABLE_SMPS_OVERRIDE = BIT(4),
+ IEEE80211_VIF_EML_ACTIVE = BIT(4),
};
@@ -1938,7 +1948,7 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif)
for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
if ((!(vif)->active_links || \
(vif)->active_links & BIT(link_id)) && \
- (link = rcu_dereference((vif)->link_conf[link_id])))
+ (link = link_conf_dereference_check(vif, link_id)))
static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
{
@@ -1971,22 +1981,18 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
*/
struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
-/**
- * lockdep_vif_mutex_held - for lockdep checks on link poiners
- * @vif: the interface to check
- */
-static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif)
+static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif)
{
- return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->mtx);
+ return lockdep_is_held(&ieee80211_vif_to_wdev(vif)->wiphy->mtx);
}
#define link_conf_dereference_protected(vif, link_id) \
rcu_dereference_protected((vif)->link_conf[link_id], \
- lockdep_vif_mutex_held(vif))
+ lockdep_vif_wiphy_mutex_held(vif))
#define link_conf_dereference_check(vif, link_id) \
rcu_dereference_check((vif)->link_conf[link_id], \
- lockdep_vif_mutex_held(vif))
+ lockdep_vif_wiphy_mutex_held(vif))
/**
* enum ieee80211_key_flags - key flags
@@ -2393,7 +2399,7 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \
if ((!(vif)->active_links || \
(vif)->active_links & BIT(link_id)) && \
- ((link_sta) = link_sta_dereference_protected(sta, link_id)))
+ ((link_sta) = link_sta_dereference_check(sta, link_id)))
/**
* enum sta_notify_cmd - sta notify command
@@ -3056,7 +3062,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* The set_key() call for the %SET_KEY command should return 0 if
* the key is now in use, -%EOPNOTSUPP or -%ENOSPC if it couldn't be
* added; if you return 0 then hw_key_idx must be assigned to the
- * hardware key index, you are free to use the full u8 range.
+ * hardware key index. You are free to use the full u8 range.
*
* Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is
* set, mac80211 will not automatically fall back to software crypto if
@@ -3066,7 +3072,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* When the cmd is %DISABLE_KEY then it must succeed.
*
* Note that it is permissible to not decrypt a frame even if a key
- * for it has been uploaded to hardware, the stack will not make any
+ * for it has been uploaded to hardware. The stack will not make any
* decision based on whether a key has been uploaded or not but rather
* based on the receive flags.
*
@@ -3081,7 +3087,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* The update_tkip_key() call updates the driver with the new phase 1 key.
* This happens every time the iv16 wraps around (every 65536 packets). The
* set_key() call will happen only once for each key (unless the AP did
- * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is
+ * rekeying); it will not include a valid phase 1 key. The valid phase 1 key is
* provided by update_tkip_key only. The trigger that makes mac80211 call this
* handler is software decryption with wrap around of iv16.
*
@@ -3108,7 +3114,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
*
* mac80211 has support for various powersave implementations.
*
- * First, it can support hardware that handles all powersaving by itself,
+ * First, it can support hardware that handles all powersaving by itself;
* such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware
* flag. In that case, it will be told about the desired powersave mode
* with the %IEEE80211_CONF_PS flag depending on the association status.
@@ -3133,12 +3139,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still
* required to pass up beacons. The hardware is still required to handle
* waking up for multicast traffic; if it cannot the driver must handle that
- * as best as it can, mac80211 is too slow to do that.
+ * as best as it can; mac80211 is too slow to do that.
*
* Dynamic powersave is an extension to normal powersave in which the
* hardware stays awake for a user-specified period of time after sending a
* frame so that reply frames need not be buffered and therefore delayed to
- * the next wakeup. It's compromise of getting good enough latency when
+ * the next wakeup. It's a compromise of getting good enough latency when
* there's data traffic and still saving significantly power in idle
* periods.
*
@@ -3203,7 +3209,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* Note that change, for the sake of simplification, also includes information
* elements appearing or disappearing from the beacon.
*
- * Some hardware supports an "ignore list" instead, just make sure nothing
+ * Some hardware supports an "ignore list" instead. Just make sure nothing
* that was requested is on the ignore list, and include commonly changing
* information element IDs in the ignore list, for example 11 (BSS load) and
* the various vendor-assigned IEs with unknown contents (128, 129, 133-136,
@@ -3214,7 +3220,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* In addition to these capabilities, hardware should support notifying the
* host of changes in the beacon RSSI. This is relevant to implement roaming
* when no traffic is flowing (when traffic is flowing we see the RSSI of
- * the received data packets). This can consist in notifying the host when
+ * the received data packets). This can consist of notifying the host when
* the RSSI changes significantly or when it drops below or rises above
* configurable thresholds. In the future these thresholds will also be
* configured by mac80211 (which gets them from userspace) to implement
@@ -3361,8 +3367,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* period starts for any reason, @release_buffered_frames is called
* with the number of frames to be released and which TIDs they are
* to come from. In this case, the driver is responsible for setting
- * the EOSP (for uAPSD) and MORE_DATA bits in the released frames,
- * to help the @more_data parameter is passed to tell the driver if
+ * the EOSP (for uAPSD) and MORE_DATA bits in the released frames.
+ * To help the @more_data parameter is passed to tell the driver if
* there is more data on other TIDs -- the TIDs to release frames
* from are ignored since mac80211 doesn't know how many frames the
* buffers for those TIDs contain.
@@ -3411,7 +3417,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
* Additionally, the driver has to then use these HW queue IDs for the queue
* management functions (ieee80211_stop_queue() et al.)
*
- * The driver is free to set up the queue mappings as needed, multiple virtual
+ * The driver is free to set up the queue mappings as needed; multiple virtual
* interfaces may map to the same hardware queues if needed. The setup has to
* happen during add_interface or change_interface callbacks. For example, a
* driver supporting station+station and station+AP modes might decide to have
@@ -3635,11 +3641,14 @@ enum ieee80211_reconfig_type {
* @success: whether the frame exchange was successful, only
* used with the mgd_complete_tx() method, and then only
* valid for auth and (re)assoc.
+ * @link_id: the link id on which the frame will be TX'ed.
+ * Only used with the mgd_prepare_tx() method.
*/
struct ieee80211_prep_tx_info {
u16 duration;
u16 subtype;
u8 success:1;
+ int link_id;
};
/**
@@ -3863,6 +3872,10 @@ struct ieee80211_prep_tx_info {
* the station. See @sta_pre_rcu_remove if needed.
* This callback can sleep.
*
+ * @vif_add_debugfs: Drivers can use this callback to add a debugfs vif
+ * directory with its files. This callback should be within a
+ * CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
+ *
* @link_add_debugfs: Drivers can use this callback to add debugfs files
* when a link is added to a mac80211 vif. This callback should be within
* a CONFIG_MAC80211_DEBUGFS conditional. This callback can sleep.
@@ -4067,11 +4080,15 @@ struct ieee80211_prep_tx_info {
* This callback must be atomic.
*
* @get_et_sset_count: Ethtool API to get string-set count.
+ * Note that the wiphy mutex is not held for this callback since it's
+ * expected to return a static value.
*
* @get_et_stats: Ethtool API to get a set of u64 stats.
*
* @get_et_strings: Ethtool API to get a set of strings to describe stats
* and perhaps other supported types of ethtool data-sets.
+ * Note that the wiphy mutex is not held for this callback since it's
+ * expected to return a static value.
*
* @mgd_prepare_tx: Prepare for transmitting a management frame for association
* before associated. In multi-channel scenarios, a virtual interface is
@@ -4358,6 +4375,8 @@ struct ieee80211_ops {
int (*sta_remove)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
struct ieee80211_sta *sta);
#ifdef CONFIG_MAC80211_DEBUGFS
+ void (*vif_add_debugfs)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif);
void (*link_add_debugfs)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf,
@@ -4506,7 +4525,8 @@ struct ieee80211_ops {
struct ieee80211_prep_tx_info *info);
void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ unsigned int link_id);
int (*add_chanctx)(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx);
@@ -4544,7 +4564,8 @@ struct ieee80211_ops {
struct ieee80211_channel_switch *ch_switch);
int (*post_channel_switch)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void (*abort_channel_switch)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw,
@@ -4890,7 +4911,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw);
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* This function must be called with BHs disabled and RCU read lock
*
@@ -4915,7 +4936,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* This function must be called with BHs disabled.
*
@@ -4940,7 +4961,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
* for a single hardware must be synchronized against each other. Calls to
* this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
* mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* In process context use instead ieee80211_rx_ni().
*
@@ -4960,7 +4981,7 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
*
* Calls to this function, ieee80211_rx() or ieee80211_rx_ni() may not
* be mixed for a single hardware.Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* @hw: the hardware this frame came in on
* @skb: the buffer to receive, owned by mac80211 after this call
@@ -4975,7 +4996,7 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb);
*
* Calls to this function, ieee80211_rx() and ieee80211_rx_irqsafe() may
* not be mixed for a single hardware. Must not run concurrently with
- * ieee80211_tx_status() or ieee80211_tx_status_ni().
+ * ieee80211_tx_status_skb() or ieee80211_tx_status_ni().
*
* @hw: the hardware this frame came in on
* @skb: the buffer to receive, owned by mac80211 after this call
@@ -5151,7 +5172,7 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info);
/**
- * ieee80211_tx_status - transmit status callback
+ * ieee80211_tx_status_skb - transmit status callback
*
* Call this function for all transmitted frames after they have been
* transmitted. It is permissible to not call this function for
@@ -5166,13 +5187,13 @@ void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
* @hw: the hardware the frame was transmitted by
* @skb: the frame that was transmitted, owned by mac80211 after this call
*/
-void ieee80211_tx_status(struct ieee80211_hw *hw,
- struct sk_buff *skb);
+void ieee80211_tx_status_skb(struct ieee80211_hw *hw,
+ struct sk_buff *skb);
/**
* ieee80211_tx_status_ext - extended transmit status callback
*
- * This function can be used as a replacement for ieee80211_tx_status
+ * This function can be used as a replacement for ieee80211_tx_status_skb()
* in drivers that may want to provide extra information that does not
* fit into &struct ieee80211_tx_info.
*
@@ -5189,7 +5210,7 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
/**
* ieee80211_tx_status_noskb - transmit status callback without skb
*
- * This function can be used as a replacement for ieee80211_tx_status
+ * This function can be used as a replacement for ieee80211_tx_status_skb()
* in drivers that cannot reliably map tx status information back to
* specific skbs.
*
@@ -5217,9 +5238,9 @@ static inline void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
/**
* ieee80211_tx_status_ni - transmit status callback (in process context)
*
- * Like ieee80211_tx_status() but can be called in process context.
+ * Like ieee80211_tx_status_skb() but can be called in process context.
*
- * Calls to this function, ieee80211_tx_status() and
+ * Calls to this function, ieee80211_tx_status_skb() and
* ieee80211_tx_status_irqsafe() may not be mixed
* for a single hardware.
*
@@ -5230,17 +5251,17 @@ static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw,
struct sk_buff *skb)
{
local_bh_disable();
- ieee80211_tx_status(hw, skb);
+ ieee80211_tx_status_skb(hw, skb);
local_bh_enable();
}
/**
* ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback
*
- * Like ieee80211_tx_status() but can be called in IRQ context
+ * Like ieee80211_tx_status_skb() but can be called in IRQ context
* (internally defers to a tasklet.)
*
- * Calls to this function, ieee80211_tx_status() and
+ * Calls to this function, ieee80211_tx_status_skb() and
* ieee80211_tx_status_ni() may not be mixed for a single hardware.
*
* @hw: the hardware the frame was transmitted by
@@ -6542,11 +6563,14 @@ void ieee80211_radar_detected(struct ieee80211_hw *hw);
* ieee80211_chswitch_done - Complete channel switch process
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @success: make the channel switch successful or not
+ * @link_id: the link_id on which the switch was done. Ignored if success is
+ * false.
*
* Complete the channel switch post-process: set the new operational channel
* and wake up the suspended queues.
*/
-void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success,
+ unsigned int link_id);
/**
* ieee80211_channel_switch_disconnect - disconnect due to channel switch error
@@ -7242,7 +7266,7 @@ ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq,
*
* This function is used to check whether given txq is allowed to transmit by
* the airtime scheduler, and can be used by drivers to access the airtime
- * fairness accounting without going using the scheduling order enfored by
+ * fairness accounting without using the scheduling order enforced by
* next_txq().
*
* Returns %true if the airtime scheduler thinks the TXQ should be allowed to
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
index 3d3b5c881b..158b125692 100644
--- a/include/net/mana/hw_channel.h
+++ b/include/net/mana/hw_channel.h
@@ -121,7 +121,7 @@ struct hwc_dma_buf {
u32 gpa_mkey;
u32 num_reqs;
- struct hwc_work_request reqs[];
+ struct hwc_work_request reqs[] __counted_by(num_reqs);
};
typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 4d43adf186..6e3e9c1363 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -339,7 +339,7 @@ struct mana_rxq {
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
- struct mana_recv_buf_oob rx_oobs[];
+ struct mana_recv_buf_oob rx_oobs[] __counted_by(num_rx_buf);
};
struct mana_tx_qp {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index eb6cd43b17..13b3a4e29f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -368,21 +368,30 @@ static inline void put_net_track(struct net *net, netns_tracker *tracker)
typedef struct {
#ifdef CONFIG_NET_NS
- struct net *net;
+ struct net __rcu *net;
#endif
} possible_net_t;
static inline void write_pnet(possible_net_t *pnet, struct net *net)
{
#ifdef CONFIG_NET_NS
- pnet->net = net;
+ rcu_assign_pointer(pnet->net, net);
#endif
}
static inline struct net *read_pnet(const possible_net_t *pnet)
{
#ifdef CONFIG_NET_NS
- return pnet->net;
+ return rcu_dereference_protected(pnet->net, true);
+#else
+ return &init_net;
+#endif
+}
+
+static inline struct net *read_pnet_rcu(possible_net_t *pnet)
+{
+#ifdef CONFIG_NET_NS
+ return rcu_dereference(pnet->net);
#else
return &init_net;
#endif
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 4085765c33..cba3ccf03f 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -160,10 +160,6 @@ static inline struct net *nf_ct_net(const struct nf_conn *ct)
return read_pnet(&ct->ct_net);
}
-/* Alter reply tuple (maybe alter helper). */
-void nf_conntrack_alter_reply(struct nf_conn *ct,
- const struct nf_conntrack_tuple *newreply);
-
/* Is this tuple taken? (ignoring any belonging to the given
conntrack). */
int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
@@ -284,6 +280,16 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
}
+static inline void nf_conntrack_alter_reply(struct nf_conn *ct,
+ const struct nf_conntrack_tuple *newreply)
+{
+ /* Must be unconfirmed, so not in hash table yet */
+ if (WARN_ON(nf_ct_is_confirmed(ct)))
+ return;
+
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
+}
+
#define nfct_time_stamp ((u32)(jiffies))
/* jiffies until ct expires, 0 if already expired */
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index fcb19a4e8f..6903f72bcc 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -39,7 +39,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
#ifdef CONFIG_NF_CONNTRACK_LABELS
struct net *net = nf_ct_net(ct);
- if (net->ct.labels_used == 0)
+ if (atomic_read(&net->ct.labels_used) == 0)
return NULL;
return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 692d595591..4a767b3d20 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -275,7 +275,7 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route);
+ struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 75972e211b..62013d0184 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -274,6 +274,9 @@ struct nft_userdata {
unsigned char data[];
};
+/* placeholder structure for opaque set element backend representation. */
+struct nft_elem_priv { };
+
/**
* struct nft_set_elem - generic representation of set elements
*
@@ -294,9 +297,14 @@ struct nft_set_elem {
u32 buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
struct nft_data val;
} data;
- void *priv;
+ struct nft_elem_priv *priv;
};
+static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
+{
+ return (void *)priv;
+}
+
struct nft_set;
struct nft_set_iter {
u8 genmask;
@@ -306,7 +314,7 @@ struct nft_set_iter {
int (*fn)(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
};
/**
@@ -430,7 +438,8 @@ struct nft_set_ops {
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
const u32 *key,
- void *(*new)(struct nft_set *,
+ struct nft_elem_priv *
+ (*new)(struct nft_set *,
const struct nft_expr *,
struct nft_regs *),
const struct nft_expr *expr,
@@ -442,27 +451,27 @@ struct nft_set_ops {
int (*insert)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **ext);
+ struct nft_elem_priv **priv);
void (*activate)(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem);
- void * (*deactivate)(const struct net *net,
+ struct nft_elem_priv *elem_priv);
+ struct nft_elem_priv * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
- bool (*flush)(const struct net *net,
+ void (*flush)(const struct net *net,
const struct nft_set *set,
- void *priv);
+ struct nft_elem_priv *priv);
void (*remove)(const struct net *net,
const struct nft_set *set,
- const struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
void (*walk)(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter);
- void * (*get)(const struct net *net,
+ struct nft_elem_priv * (*get)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem,
unsigned int flags);
- void (*commit)(const struct nft_set *set);
+ void (*commit)(struct nft_set *set);
void (*abort)(const struct nft_set *set);
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
@@ -789,16 +798,22 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
+ u64 tstamp)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
- time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
+ time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+ return __nft_set_elem_expired(ext, get_jiffies_64());
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
- void *elem)
+ const struct nft_elem_priv *elem_priv)
{
- return elem + set->ops->elemsize;
+ return (void *)elem_priv + set->ops->elemsize;
}
static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
@@ -810,16 +825,19 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nlattr *attr);
-void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const u32 *key, const u32 *key_end, const u32 *data,
- u64 timeout, u64 expiration, gfp_t gfp);
+struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *key_end,
+ const u32 *data,
+ u64 timeout, u64 expiration, gfp_t gfp);
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[]);
-void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
bool destroy_expr);
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem);
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv);
struct nft_expr_ops;
/**
@@ -1061,7 +1079,7 @@ struct nft_chain {
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain);
int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
const struct nft_set_iter *iter,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
@@ -1198,10 +1216,13 @@ static inline void nft_use_inc_restore(u32 *use)
* @hgenerator: handle generator state
* @handle: table handle
* @use: number of chain references to this table
+ * @family:address family
* @flags: table flag (see enum nft_table_flags)
* @genmask: generation mask
- * @afinfo: address family info
+ * @nlpid: netlink port ID
* @name: name of the table
+ * @udlen: length of the user data
+ * @udata: user data
* @validate_state: internal, set when transaction adds jumps
*/
struct nft_table {
@@ -1307,6 +1328,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
* @type: stateful object numeric type
* @owner: module owner
* @maxattr: maximum netlink attribute
+ * @family: address family for AF-specific object types
* @policy: netlink attribute policy
*/
struct nft_object_type {
@@ -1316,6 +1338,7 @@ struct nft_object_type {
struct list_head list;
u32 type;
unsigned int maxattr;
+ u8 family;
struct module *owner;
const struct nla_policy *policy;
};
@@ -1635,14 +1658,14 @@ struct nft_trans_table {
struct nft_trans_elem {
struct nft_set *set;
- struct nft_set_elem elem;
+ struct nft_elem_priv *elem_priv;
bool bound;
};
#define nft_trans_elem_set(trans) \
(((struct nft_trans_elem *)trans->data)->set)
-#define nft_trans_elem(trans) \
- (((struct nft_trans_elem *)trans->data)->elem)
+#define nft_trans_elem_priv(trans) \
+ (((struct nft_trans_elem *)trans->data)->elem_priv)
#define nft_trans_elem_set_bound(trans) \
(((struct nft_trans_elem *)trans->data)->bound)
@@ -1683,7 +1706,7 @@ struct nft_trans_gc {
struct nft_set *set;
u32 seq;
u16 count;
- void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct nft_elem_priv *priv[NFT_TRANS_GC_BATCHCOUNT];
struct rcu_head rcu;
};
@@ -1706,7 +1729,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
void nft_setelem_data_deactivate(const struct net *net,
const struct nft_set *set,
- struct nft_set_elem *elem);
+ struct nft_elem_priv *elem_priv);
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1733,6 +1756,7 @@ struct nftables_pernet {
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
+ u64 tstamp;
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
@@ -1745,6 +1769,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
return net_generic(net, nf_tables_net_id);
}
+static inline u64 nft_net_tstamp(const struct net *net)
+{
+ return nft_pernet(net)->tstamp;
+}
+
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
diff --git a/include/net/netkit.h b/include/net/netkit.h
new file mode 100644
index 0000000000..9ec0163739
--- /dev/null
+++ b/include/net/netkit.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef __NET_NETKIT_H
+#define __NET_NETKIT_H
+
+#include <linux/bpf.h>
+
+#ifdef CONFIG_NETKIT
+int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
+int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
+#else
+static inline int netkit_prog_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_link_attach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_prog_detach(const union bpf_attr *attr,
+ struct bpf_prog *prog)
+{
+ return -EINVAL;
+}
+
+static inline int netkit_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
+
+static inline struct net_device *netkit_peer_dev(struct net_device *dev)
+{
+ return NULL;
+}
+#endif /* CONFIG_NETKIT */
+#endif /* __NET_NETKIT_H */
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 8a7cd1170e..83bdf787ae 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -128,6 +128,8 @@
* nla_len(nla) length of attribute payload
*
* Attribute Payload Access for Basic Types:
+ * nla_get_uint(nla) get payload for a uint attribute
+ * nla_get_sint(nla) get payload for a sint attribute
* nla_get_u8(nla) get payload for a u8 attribute
* nla_get_u16(nla) get payload for a u16 attribute
* nla_get_u32(nla) get payload for a u32 attribute
@@ -183,6 +185,8 @@ enum {
NLA_REJECT,
NLA_BE16,
NLA_BE32,
+ NLA_SINT,
+ NLA_UINT,
__NLA_TYPE_MAX,
};
@@ -229,6 +233,7 @@ enum nla_policy_validation {
* nested header (or empty); len field is used if
* nested_policy is also used, for the max attr
* number in the nested policy.
+ * NLA_SINT, NLA_UINT,
* NLA_U8, NLA_U16,
* NLA_U32, NLA_U64,
* NLA_S8, NLA_S16,
@@ -260,12 +265,14 @@ enum nla_policy_validation {
* while an array has the nested attributes at another
* level down and the attribute types directly in the
* nesting don't matter.
+ * NLA_UINT,
* NLA_U8,
* NLA_U16,
* NLA_U32,
* NLA_U64,
* NLA_BE16,
* NLA_BE32,
+ * NLA_SINT,
* NLA_S8,
* NLA_S16,
* NLA_S32,
@@ -280,6 +287,7 @@ enum nla_policy_validation {
* or NLA_POLICY_FULL_RANGE_SIGNED() macros instead.
* Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
* NLA_POLICY_RANGE() macros.
+ * NLA_UINT,
* NLA_U8,
* NLA_U16,
* NLA_U32,
@@ -288,6 +296,7 @@ enum nla_policy_validation {
* to a struct netlink_range_validation that indicates
* the min/max values.
* Use NLA_POLICY_FULL_RANGE().
+ * NLA_SINT,
* NLA_S8,
* NLA_S16,
* NLA_S32,
@@ -351,8 +360,8 @@ struct nla_policy {
const u32 mask;
const char *reject_message;
const struct nla_policy *nested_policy;
- struct netlink_range_validation *range;
- struct netlink_range_validation_signed *range_signed;
+ const struct netlink_range_validation *range;
+ const struct netlink_range_validation_signed *range_signed;
struct {
s16 min, max;
};
@@ -377,9 +386,11 @@ struct nla_policy {
#define __NLA_IS_UINT_TYPE(tp) \
(tp == NLA_U8 || tp == NLA_U16 || tp == NLA_U32 || \
- tp == NLA_U64 || tp == NLA_BE16 || tp == NLA_BE32)
+ tp == NLA_U64 || tp == NLA_UINT || \
+ tp == NLA_BE16 || tp == NLA_BE32)
#define __NLA_IS_SINT_TYPE(tp) \
- (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
+ (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64 || \
+ tp == NLA_SINT)
#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
#define NLA_ENSURE_UINT_TYPE(tp) \
@@ -1358,6 +1369,22 @@ static inline int nla_put_u32(struct sk_buff *skb, int attrtype, u32 value)
}
/**
+ * nla_put_uint - Add a variable-size unsigned int to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_uint(struct sk_buff *skb, int attrtype, u64 value)
+{
+ u64 tmp64 = value;
+ u32 tmp32 = value;
+
+ if (tmp64 == tmp32)
+ return nla_put_u32(skb, attrtype, tmp32);
+ return nla_put(skb, attrtype, sizeof(u64), &tmp64);
+}
+
+/**
* nla_put_be32 - Add a __be32 netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
@@ -1512,6 +1539,22 @@ static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value,
}
/**
+ * nla_put_sint - Add a variable-size signed int to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_sint(struct sk_buff *skb, int attrtype, s64 value)
+{
+ s64 tmp64 = value;
+ s32 tmp32 = value;
+
+ if (tmp64 == tmp32)
+ return nla_put_s32(skb, attrtype, tmp32);
+ return nla_put(skb, attrtype, sizeof(s64), &tmp64);
+}
+
+/**
* nla_put_string - Add a string netlink attribute to a socket buffer
* @skb: socket buffer to add attribute to
* @attrtype: attribute type
@@ -1668,6 +1711,17 @@ static inline u64 nla_get_u64(const struct nlattr *nla)
}
/**
+ * nla_get_uint - return payload of uint attribute
+ * @nla: uint netlink attribute
+ */
+static inline u64 nla_get_uint(const struct nlattr *nla)
+{
+ if (nla_len(nla) == sizeof(u32))
+ return nla_get_u32(nla);
+ return nla_get_u64(nla);
+}
+
+/**
* nla_get_be64 - return payload of __be64 attribute
* @nla: __be64 netlink attribute
*/
@@ -1730,6 +1784,17 @@ static inline s64 nla_get_s64(const struct nlattr *nla)
}
/**
+ * nla_get_sint - return payload of uint attribute
+ * @nla: uint netlink attribute
+ */
+static inline s64 nla_get_sint(const struct nlattr *nla)
+{
+ if (nla_len(nla) == sizeof(s32))
+ return nla_get_s32(nla);
+ return nla_get_s64(nla);
+}
+
+/**
* nla_get_flag - return payload of flag attribute
* @nla: flag netlink attribute
*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 1f463b3957..bae914815a 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -107,7 +107,7 @@ struct netns_ct {
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
- unsigned int labels_used;
+ atomic_t labels_used;
#endif
};
#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7a41c47915..73f43f6991 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,9 @@ struct netns_ipv4 {
u8 sysctl_tcp_syncookies;
u8 sysctl_tcp_migrate_req;
u8 sysctl_tcp_comp_sack_nr;
+ u8 sysctl_tcp_backlog_ack_defer;
+ u8 sysctl_tcp_pingpong_thresh;
+
int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 2b12725de9..d92046a4a0 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -92,7 +92,7 @@ struct nh_res_table {
u32 unbalanced_timer;
u16 num_nh_buckets;
- struct nh_res_bucket nh_buckets[];
+ struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
};
struct nh_grp_entry {
@@ -126,7 +126,7 @@ struct nh_group {
bool has_v4;
struct nh_res_table __rcu *res_table;
- struct nh_grp_entry nh_entries[];
+ struct nh_grp_entry nh_entries[] __counted_by(num_nh);
};
struct nexthop {
@@ -187,7 +187,7 @@ struct nh_notifier_grp_entry_info {
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
- struct nh_notifier_grp_entry_info nh_entries[];
+ struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};
struct nh_notifier_res_bucket_info {
@@ -200,7 +200,7 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info {
u16 num_nh_buckets;
- struct nh_notifier_single_info nhs[];
+ struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
};
struct nh_notifier_info {
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 8e7751464f..4ebd544ae9 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -8,23 +8,46 @@
/**
* DOC: page_pool allocator
*
- * The page_pool allocator is optimized for the XDP mode that
- * uses one frame per-page, but it can fallback on the
- * regular page allocator APIs.
- *
- * Basic use involves replacing alloc_pages() calls with the
- * page_pool_alloc_pages() call. Drivers should use
- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
- *
- * The API keeps track of in-flight pages, in order to let API users know
- * when it is safe to free a page_pool object. Thus, API users
- * must call page_pool_put_page() to free the page, or attach
- * the page to a page_pool-aware object like skbs marked with
+ * The page_pool allocator is optimized for recycling page or page fragment used
+ * by skb packet and xdp frame.
+ *
+ * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
+ * which allocate memory with or without page splitting depending on the
+ * requested memory size.
+ *
+ * If the driver knows that it always requires full pages or its allocations are
+ * always smaller than half a page, it can use one of the more specific API
+ * calls:
+ *
+ * 1. page_pool_alloc_pages(): allocate memory without page splitting when
+ * driver knows that the memory it need is always bigger than half of the page
+ * allocated from page pool. There is no cache line dirtying for 'struct page'
+ * when a page is recycled back to the page pool.
+ *
+ * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
+ * knows that the memory it need is always smaller than or equal to half of the
+ * page allocated from page pool. Page splitting enables memory saving and thus
+ * avoids TLB/cache miss for data access, but there also is some cost to
+ * implement page splitting, mainly some cache line dirtying/bouncing for
+ * 'struct page' and atomic operation for page->pp_frag_count.
+ *
+ * The API keeps track of in-flight pages, in order to let API users know when
+ * it is safe to free a page_pool object, the API users must call
+ * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
+ * attach the page_pool object to a page_pool-aware object like skbs marked with
* skb_mark_for_recycle().
*
- * API users must call page_pool_put_page() once on a page, as it
- * will either recycle the page, or in case of refcnt > 1, it will
- * release the DMA mapping and in-flight state accounting.
+ * page_pool_put_page() may be called multi times on the same page if a page is
+ * split into multi fragments. For the last fragment, it will either recycle the
+ * page, or in case of page->_refcount > 1, it will release the DMA mapping and
+ * in-flight state accounting.
+ *
+ * dma_sync_single_range_for_device() is only called for the last fragment when
+ * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
+ * last freed fragment to do the sync_for_device operation for all fragments in
+ * the same page when a page is split, the API user must setup pool->p.max_len
+ * and pool->p.offset correctly and ensure that page_pool_put_page() is called
+ * with dma_sync_size being -1 for fragment API.
*/
#ifndef _NET_PAGE_POOL_HELPERS_H
#define _NET_PAGE_POOL_HELPERS_H
@@ -73,6 +96,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
+/**
+ * page_pool_dev_alloc_frag() - allocate a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: requested size
+ *
+ * Get a page fragment from the page allocator or page_pool caches.
+ *
+ * Return:
+ * Return allocated page fragment, otherwise return NULL.
+ */
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
unsigned int size)
@@ -82,6 +116,91 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
return page_pool_alloc_frag(pool, offset, size, gfp);
}
+static inline struct page *page_pool_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size, gfp_t gfp)
+{
+ unsigned int max_size = PAGE_SIZE << pool->p.order;
+ struct page *page;
+
+ if ((*size << 1) > max_size) {
+ *size = max_size;
+ *offset = 0;
+ return page_pool_alloc_pages(pool, gfp);
+ }
+
+ page = page_pool_alloc_frag(pool, offset, *size, gfp);
+ if (unlikely(!page))
+ return NULL;
+
+ /* There is very likely not enough space for another fragment, so append
+ * the remaining size to the current fragment to avoid truesize
+ * underestimate problem.
+ */
+ if (pool->frag_offset + *size > max_size) {
+ *size = max_size - *offset;
+ pool->frag_offset = max_size;
+ }
+
+ return page;
+}
+
+/**
+ * page_pool_dev_alloc() - allocate a page or a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: in as the requested size, out as the allocated size
+ *
+ * Get a page or a page fragment from the page allocator or page_pool caches
+ * depending on the requested size in order to allocate memory with least memory
+ * utilization and performance penalty.
+ *
+ * Return:
+ * Return allocated page or page fragment, otherwise return NULL.
+ */
+static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int *size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc(pool, offset, size, gfp);
+}
+
+static inline void *page_pool_alloc_va(struct page_pool *pool,
+ unsigned int *size, gfp_t gfp)
+{
+ unsigned int offset;
+ struct page *page;
+
+ /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
+ page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
+ if (unlikely(!page))
+ return NULL;
+
+ return page_address(page) + offset;
+}
+
+/**
+ * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
+ * va.
+ * @pool: pool from which to allocate
+ * @size: in as the requested size, out as the allocated size
+ *
+ * This is just a thin wrapper around the page_pool_alloc() API, and
+ * it returns va of the allocated page or page fragment.
+ *
+ * Return:
+ * Return the va for the allocated page or page fragment, otherwise return NULL.
+ */
+static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
+ unsigned int *size)
+{
+ gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+ return page_pool_alloc_va(pool, size, gfp);
+}
+
/**
* page_pool_get_dma_dir() - Retrieve the stored DMA direction.
* @pool: pool from which page was allocated
@@ -115,28 +234,49 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
long ret;
/* If nr == pp_frag_count then we have cleared all remaining
- * references to the page. No need to actually overwrite it, instead
- * we can leave this to be overwritten by the calling function.
+ * references to the page:
+ * 1. 'n == 1': no need to actually overwrite it.
+ * 2. 'n != 1': overwrite it with one, which is the rare case
+ * for pp_frag_count draining.
*
- * The main advantage to doing this is that an atomic_read is
- * generally a much cheaper operation than an atomic update,
- * especially when dealing with a page that may be partitioned
- * into only 2 or 3 pieces.
+ * The main advantage to doing this is that not only we avoid a atomic
+ * update, as an atomic_read is generally a much cheaper operation than
+ * an atomic update, especially when dealing with a page that may be
+ * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
+ * handling by ensuring all pages have partitioned into only 1 piece
+ * initially, and only overwrite it when the page is partitioned into
+ * more than one piece.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr)
+ if (atomic_long_read(&page->pp_frag_count) == nr) {
+ /* As we have ensured nr is always one for constant case using
+ * the BUILD_BUG_ON(), only need to handle the non-constant case
+ * here for pp_frag_count draining, which is a rare case.
+ */
+ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
+ if (!__builtin_constant_p(nr))
+ atomic_long_set(&page->pp_frag_count, 1);
+
return 0;
+ }
ret = atomic_long_sub_return(nr, &page->pp_frag_count);
WARN_ON(ret < 0);
+
+ /* We are the last user here too, reset pp_frag_count back to 1 to
+ * ensure all pages have been partitioned into 1 piece initially,
+ * this should be the rare case when the last two fragment users call
+ * page_pool_defrag_page() currently.
+ */
+ if (unlikely(!ret))
+ atomic_long_set(&page->pp_frag_count, 1);
+
return ret;
}
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
- struct page *page)
+static inline bool page_pool_is_last_frag(struct page *page)
{
- /* If fragments aren't enabled or count is 0 we were the last user */
- return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
- (page_pool_defrag_page(page, 1) == 0);
+ /* If page_pool_defrag_page() returns 0, we were the last user */
+ return page_pool_defrag_page(page, 1) == 0;
}
/**
@@ -161,7 +301,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(pool, page))
+ if (!page_pool_is_last_frag(page))
return;
page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
@@ -197,10 +337,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
+#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \
(sizeof(dma_addr_t) > sizeof(unsigned long))
/**
+ * page_pool_free_va() - free a va into the page_pool
+ * @pool: pool from which va was allocated
+ * @va: va to be freed
+ * @allow_direct: freed by the consumer, allow lockless caching
+ *
+ * Free a va allocated from page_pool_allo_va().
+ */
+static inline void page_pool_free_va(struct page_pool *pool, void *va,
+ bool allow_direct)
+{
+ page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
+}
+
+/**
* page_pool_get_dma_addr() - Retrieve the stored DMA address.
* @page: page allocated from a page pool
*
@@ -211,17 +365,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
dma_addr_t ret = page->dma_addr;
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
return ret;
}
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
+ page->dma_addr = addr >> PAGE_SHIFT;
+
+ /* We assume page alignment to shave off bottom bits,
+ * if this "compression" doesn't work we need to drop.
+ */
+ return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
+ }
+
page->dma_addr = addr;
- if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
- page->dma_addr_upper = upper_32_bits(addr);
+ return false;
}
static inline bool page_pool_put(struct page_pool *pool)
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 887e7946a5..6fc5134095 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -17,10 +17,8 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV |\
- PP_FLAG_PAGE_FRAG)
+ PP_FLAG_DMA_SYNC_DEV)
/*
* Fast allocation side cache array/stack
@@ -45,7 +43,7 @@ struct pp_alloc_cache {
/**
* struct page_pool_params - page pool parameters
- * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
+ * @flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
* @order: 2^order pages on allocation
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index f308e82686..a76c9171db 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -154,6 +154,12 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
return xchg(clp, cl);
}
+static inline void tcf_set_drop_reason(struct tcf_result *res,
+ enum skb_drop_reason reason)
+{
+ res->drop_reason = reason;
+}
+
static inline void
__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 15960564e0..9fa1d0794d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -20,10 +20,10 @@ struct qdisc_walker {
int (*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
};
-static inline void *qdisc_priv(struct Qdisc *q)
-{
- return &q->privdata;
-}
+#define qdisc_priv(q) \
+ _Generic(q, \
+ const struct Qdisc * : (const void *)&q->privdata, \
+ struct Qdisc * : (void *)&q->privdata)
static inline struct Qdisc *qdisc_from_priv(void *priv)
{
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b2cb4a9eb0..ebf9e028d1 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -213,6 +213,7 @@ struct ieee80211_reg_rule {
u32 flags;
u32 dfs_cac_ms;
bool has_wmm;
+ s8 psd;
};
struct ieee80211_regdomain {
diff --git a/include/net/route.h b/include/net/route.h
index 51a45b1887..980ab474ea 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -37,7 +37,7 @@
#define RTO_ONLINK 0x01
-#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
+#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE))
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
@@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)
static inline __u8 ip_sock_rt_tos(const struct sock *sk)
{
- return RT_TOS(inet_sk(sk)->tos);
+ return RT_TOS(READ_ONCE(inet_sk(sk)->tos));
}
struct ip_tunnel_info;
@@ -136,12 +136,6 @@ static inline struct rtable *__ip_route_output_key(struct net *net,
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
-struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net, __be32 *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol, bool use_cache);
-
struct dst_entry *ipv4_blackhole_route(struct net *net,
struct dst_entry *dst_orig);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e940debac4..959a7725c2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -324,7 +324,6 @@ struct Qdisc_ops {
struct module *owner;
};
-
struct tcf_result {
union {
struct {
@@ -332,8 +331,8 @@ struct tcf_result {
u32 classid;
};
const struct tcf_proto *goto_tp;
-
};
+ enum skb_drop_reason drop_reason;
};
struct tcf_chain;
@@ -591,6 +590,7 @@ static inline void sch_tree_unlock(struct Qdisc *q)
extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
+extern const u8 sch_default_prio2band[TC_PRIO_MAX + 1];
extern struct Qdisc_ops mq_qdisc_ops;
extern struct Qdisc_ops noqueue_qdisc_ops;
extern const struct Qdisc_ops *default_qdisc_ops;
diff --git a/include/net/sock.h b/include/net/sock.h
index e70c903b04..f9a9f61fa1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1821,12 +1821,11 @@ static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
static inline void sock_release_ownership(struct sock *sk)
{
- if (sock_owned_by_user_nocheck(sk)) {
- sk->sk_lock.owned = 0;
+ DEBUG_NET_WARN_ON_ONCE(!sock_owned_by_user_nocheck(sk));
+ sk->sk_lock.owned = 0;
- /* The sk_lock has mutex_unlock() semantics: */
- mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
- }
+ /* The sk_lock has mutex_unlock() semantics: */
+ mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
}
/* no reclassification while locks are held */
@@ -1865,11 +1864,13 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int sock_setsockopt(struct socket *sock, int level, int op,
sockptr_t optval, unsigned int optlen);
+int do_sock_setsockopt(struct socket *sock, bool compat, int level,
+ int optname, sockptr_t optval, int optlen);
+int do_sock_getsockopt(struct socket *sock, bool compat, int level,
+ int optname, sockptr_t optval, sockptr_t optlen);
int sk_getsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, sockptr_t optlen);
-int sock_getsockopt(struct socket *sock, int level, int op,
- char __user *optval, int __user *optlen);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32);
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
@@ -2249,7 +2250,7 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
}
}
-bool sk_mc_loop(struct sock *sk);
+bool sk_mc_loop(const struct sock *sk);
static inline bool sk_can_gso(const struct sock *sk)
{
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a43062d4c7..8346b0d295 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -308,6 +308,9 @@ void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack);
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index 1dc2f827d0..77f87c622a 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -22,6 +22,7 @@ struct tcf_ct_params {
struct nf_nat_range2 range;
bool ipv4_range;
+ bool put_labels;
u16 ct_action;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9cbcfb3c95..2ac9475be1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -37,6 +37,7 @@
#include <net/snmp.h>
#include <net/ip.h>
#include <net/tcp_states.h>
+#include <net/tcp_ao.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
#include <net/mptcp.h>
@@ -131,6 +132,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
+static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
+
#if HZ >= 100
#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
#define TCP_ATO_MIN ((unsigned)(HZ/25))
@@ -164,7 +167,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_KEEPCNT 127
#define MAX_TCP_SYNCNT 127
-#define TCP_PAWS_24DAYS (60 * 60 * 24 * 24)
+/* Ensure that TCP PAWS checks are relaxed after ~2147 seconds
+ * to avoid overflows. This assumes a clock smaller than 1 Mhz.
+ * Default clock is 1 Khz, tcp_usec_ts uses 1 Mhz.
+ */
+#define TCP_PAWS_WRAP (INT_MAX / USEC_PER_SEC)
+
#define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
* after this time. It should be equal
* (or greater than) TCP_TIMEWAIT_LEN
@@ -187,6 +195,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_AO 29 /* Authentication Option (RFC5925) */
#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
@@ -429,7 +438,6 @@ int tcp_mmap(struct file *file, struct socket *sock,
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
-const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
* BPF SKB-less helpers
@@ -726,7 +734,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
u32 tcp_delack_max(const struct sock *sk);
/* Compute the actual rto_min value */
-static inline u32 tcp_rto_min(struct sock *sk)
+static inline u32 tcp_rto_min(const struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
u32 rto_min = inet_csk(sk)->icsk_rto_min;
@@ -736,7 +744,7 @@ static inline u32 tcp_rto_min(struct sock *sk)
return rto_min;
}
-static inline u32 tcp_rto_min_us(struct sock *sk)
+static inline u32 tcp_rto_min_us(const struct sock *sk)
{
return jiffies_to_usecs(tcp_rto_min(sk));
}
@@ -796,22 +804,31 @@ static inline u64 tcp_clock_us(void)
return div_u64(tcp_clock_ns(), NSEC_PER_USEC);
}
-/* This should only be used in contexts where tp->tcp_mstamp is up to date */
-static inline u32 tcp_time_stamp(const struct tcp_sock *tp)
+static inline u64 tcp_clock_ms(void)
+{
+ return div_u64(tcp_clock_ns(), NSEC_PER_MSEC);
+}
+
+/* TCP Timestamp included in TS option (RFC 1323) can either use ms
+ * or usec resolution. Each socket carries a flag to select one or other
+ * resolution, as the route attribute could change anytime.
+ * Each flow must stick to initial resolution.
+ */
+static inline u32 tcp_clock_ts(bool usec_ts)
{
- return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+ return usec_ts ? tcp_clock_us() : tcp_clock_ms();
}
-/* Convert a nsec timestamp into TCP TSval timestamp (ms based currently) */
-static inline u64 tcp_ns_to_ts(u64 ns)
+static inline u32 tcp_time_stamp_ms(const struct tcp_sock *tp)
{
- return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ);
+ return div_u64(tp->tcp_mstamp, USEC_PER_MSEC);
}
-/* Could use tcp_clock_us() / 1000, but this version uses a single divide */
-static inline u32 tcp_time_stamp_raw(void)
+static inline u32 tcp_time_stamp_ts(const struct tcp_sock *tp)
{
- return tcp_ns_to_ts(tcp_clock_ns());
+ if (tp->tcp_usec_ts)
+ return tp->tcp_mstamp;
+ return tcp_time_stamp_ms(tp);
}
void tcp_mstamp_refresh(struct tcp_sock *tp);
@@ -821,17 +838,30 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
return max_t(s64, t1 - t0, 0);
}
-static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
-{
- return tcp_ns_to_ts(skb->skb_mstamp_ns);
-}
-
/* provide the departure time in us unit */
static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
{
return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
}
+/* Provide skb TSval in usec or ms unit */
+static inline u32 tcp_skb_timestamp_ts(bool usec_ts, const struct sk_buff *skb)
+{
+ if (usec_ts)
+ return tcp_skb_timestamp_us(skb);
+
+ return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC);
+}
+
+static inline u32 tcp_tw_tsval(const struct tcp_timewait_sock *tcptw)
+{
+ return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset;
+}
+
+static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
+{
+ return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off;
+}
#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
@@ -1460,13 +1490,15 @@ static inline int tcp_space_from_win(const struct sock *sk, int win)
return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
}
+/* Assume a conservative default of 1200 bytes of payload per 4K page.
+ * This may be adjusted later in tcp_measure_rcv_mss().
+ */
+#define TCP_DEFAULT_SCALING_RATIO ((1200 << TCP_RMEM_TO_WIN_SCALE) / \
+ SKB_TRUESIZE(4096))
+
static inline void tcp_scaling_ratio_init(struct sock *sk)
{
- /* Assume a conservative default of 1200 bytes of payload per 4K page.
- * This may be adjusted later in tcp_measure_rcv_mss().
- */
- tcp_sk(sk)->scaling_ratio = (1200 << TCP_RMEM_TO_WIN_SCALE) /
- SKB_TRUESIZE(4096);
+ tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
}
/* Note: caller must be prepared to deal with negative returns */
@@ -1602,7 +1634,7 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
return true;
if (unlikely(!time_before32(ktime_get_seconds(),
- rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
+ rx_opt->ts_recent_stamp + TCP_PAWS_WRAP)))
return true;
/*
* Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1662,12 +1694,7 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp)
tp->retransmit_skb_hint = NULL;
}
-union tcp_md5_addr {
- struct in_addr a4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr a6;
-#endif
-};
+#define tcp_md5_addr tcp_ao_addr
/* - key database */
struct tcp_md5sig_key {
@@ -1711,12 +1738,39 @@ union tcp_md5sum_block {
#endif
};
-/* - pool: digest algorithm, hash description and scratch buffer */
-struct tcp_md5sig_pool {
- struct ahash_request *md5_req;
- void *scratch;
+/*
+ * struct tcp_sigpool - per-CPU pool of ahash_requests
+ * @scratch: per-CPU temporary area, that can be used between
+ * tcp_sigpool_start() and tcp_sigpool_end() to perform
+ * crypto request
+ * @req: pre-allocated ahash request
+ */
+struct tcp_sigpool {
+ void *scratch;
+ struct ahash_request *req;
};
+int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size);
+void tcp_sigpool_get(unsigned int id);
+void tcp_sigpool_release(unsigned int id);
+int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp,
+ const struct sk_buff *skb,
+ unsigned int header_len);
+
+/**
+ * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash
+ * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash()
+ * @c: returned tcp_sigpool for usage (uninitialized on failure)
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c);
+/**
+ * tcp_sigpool_end - enable bh and stop using tcp_sigpool
+ * @c: tcp_sigpool context that was returned by tcp_sigpool_start()
+ */
+void tcp_sigpool_end(struct tcp_sigpool *c);
+size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len);
/* - functions */
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
@@ -1729,28 +1783,36 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index, u8 flags);
+void tcp_clear_md5_list(struct sock *sk);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
-#include <linux/jump_label.h>
-extern struct static_key_false_deferred tcp_md5_needed;
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
- int family);
+ int family, bool any_l3index);
static inline struct tcp_md5sig_key *
tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr, int family)
{
if (!static_branch_unlikely(&tcp_md5_needed.key))
return NULL;
- return __tcp_md5_do_lookup(sk, l3index, addr, family);
+ return __tcp_md5_do_lookup(sk, l3index, addr, family, false);
+}
+
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup_any_l3index(const struct sock *sk,
+ const union tcp_md5_addr *addr, int family)
+{
+ if (!static_branch_unlikely(&tcp_md5_needed.key))
+ return NULL;
+ return __tcp_md5_do_lookup(sk, 0, addr, family, true);
}
enum skb_drop_reason
tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
const void *saddr, const void *daddr,
- int family, int dif, int sdif);
+ int family, int l3index, const __u8 *hash_location);
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
@@ -1762,27 +1824,29 @@ tcp_md5_do_lookup(const struct sock *sk, int l3index,
return NULL;
}
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup_any_l3index(const struct sock *sk,
+ const union tcp_md5_addr *addr, int family)
+{
+ return NULL;
+}
+
static inline enum skb_drop_reason
tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb,
const void *saddr, const void *daddr,
- int family, int dif, int sdif)
+ int family, int l3index, const __u8 *hash_location)
{
return SKB_NOT_DROPPED_YET;
}
#define tcp_twsk_md5_key(twsk) NULL
#endif
-bool tcp_alloc_md5sig_pool(void);
+int tcp_md5_alloc_sigpool(void);
+void tcp_md5_release_sigpool(void);
+void tcp_md5_add_sigpool(void);
+extern int tcp_md5_sigpool_id;
-struct tcp_md5sig_pool *tcp_get_md5sig_pool(void);
-static inline void tcp_put_md5sig_pool(void)
-{
- local_bh_enable();
-}
-
-int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
- unsigned int header_len);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
+int tcp_md5_hash_key(struct tcp_sigpool *hp,
const struct tcp_md5sig_key *key);
/* From tcp_fastopen.c */
@@ -2087,7 +2151,11 @@ INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff))
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb));
+#ifdef CONFIG_INET
void tcp_gro_complete(struct sk_buff *skb);
+#else
+static inline void tcp_gro_complete(struct sk_buff *skb) { }
+#endif
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
@@ -2127,6 +2195,18 @@ struct tcp_sock_af_ops {
sockptr_t optval,
int optlen);
#endif
+#ifdef CONFIG_TCP_AO
+ int (*ao_parse)(struct sock *sk, int optname, sockptr_t optval, int optlen);
+ struct tcp_ao_key *(*ao_lookup)(const struct sock *sk,
+ struct sock *addr_sk,
+ int sndid, int rcvid);
+ int (*ao_calc_key_sk)(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send);
+ int (*calc_ao_hash)(char *location, struct tcp_ao_key *ao,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+#endif
};
struct tcp_request_sock_ops {
@@ -2139,6 +2219,15 @@ struct tcp_request_sock_ops {
const struct sock *sk,
const struct sk_buff *skb);
#endif
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_key *(*ao_lookup)(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+ int (*ao_calc_key)(struct tcp_ao_key *mkt, u8 *key, struct request_sock *sk);
+ int (*ao_synack_hash)(char *ao_hash, struct tcp_ao_key *mkt,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+#endif
#ifdef CONFIG_SYN_COOKIES
__u32 (*cookie_init_seq)(const struct sk_buff *skb,
__u16 *mss);
@@ -2179,6 +2268,76 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
}
#endif
+struct tcp_key {
+ union {
+ struct {
+ struct tcp_ao_key *ao_key;
+ char *traffic_key;
+ u32 sne;
+ u8 rcv_next;
+ };
+ struct tcp_md5sig_key *md5_key;
+ };
+ enum {
+ TCP_KEY_NONE = 0,
+ TCP_KEY_MD5,
+ TCP_KEY_AO,
+ } type;
+};
+
+static inline void tcp_get_current_key(const struct sock *sk,
+ struct tcp_key *out)
+{
+#if defined(CONFIG_TCP_AO) || defined(CONFIG_TCP_MD5SIG)
+ const struct tcp_sock *tp = tcp_sk(sk);
+#endif
+
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key)) {
+ struct tcp_ao_info *ao;
+
+ ao = rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (ao) {
+ out->ao_key = READ_ONCE(ao->current_key);
+ out->type = TCP_KEY_AO;
+ return;
+ }
+ }
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ if (static_branch_unlikely(&tcp_md5_needed.key) &&
+ rcu_access_pointer(tp->md5sig_info)) {
+ out->md5_key = tp->af_specific->md5_lookup(sk, sk);
+ if (out->md5_key) {
+ out->type = TCP_KEY_MD5;
+ return;
+ }
+ }
+#endif
+ out->type = TCP_KEY_NONE;
+}
+
+static inline bool tcp_key_is_md5(const struct tcp_key *key)
+{
+#ifdef CONFIG_TCP_MD5SIG
+ if (static_branch_unlikely(&tcp_md5_needed.key) &&
+ key->type == TCP_KEY_MD5)
+ return true;
+#endif
+ return false;
+}
+
+static inline bool tcp_key_is_ao(const struct tcp_key *key)
+{
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key) &&
+ key->type == TCP_KEY_AO)
+ return true;
+#endif
+ return false;
+}
+
int tcpv4_offload_init(void);
void tcp_v4_init(void);
@@ -2343,7 +2502,7 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
- int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+ int (*get_info)(struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
@@ -2537,4 +2696,116 @@ static inline u64 tcp_transmit_time(const struct sock *sk)
return 0;
}
+static inline int tcp_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const struct tcp_ao_hdr **aoh)
+{
+ const u8 *md5_tmp, *ao_tmp;
+ int ret;
+
+ ret = tcp_do_parse_auth_options(th, &md5_tmp, &ao_tmp);
+ if (ret)
+ return ret;
+
+ if (md5_hash)
+ *md5_hash = md5_tmp;
+
+ if (aoh) {
+ if (!ao_tmp)
+ *aoh = NULL;
+ else
+ *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2);
+ }
+
+ return 0;
+}
+
+static inline bool tcp_ao_required(struct sock *sk, const void *saddr,
+ int family, int l3index, bool stat_inc)
+{
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao_info;
+ struct tcp_ao_key *ao_key;
+
+ if (!static_branch_unlikely(&tcp_ao_needed.key))
+ return false;
+
+ ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info,
+ lockdep_sock_is_held(sk));
+ if (!ao_info)
+ return false;
+
+ ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1);
+ if (ao_info->ao_required || ao_key) {
+ if (stat_inc) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOREQUIRED);
+ atomic64_inc(&ao_info->counters.ao_required);
+ }
+ return true;
+ }
+#endif
+ return false;
+}
+
+/* Called with rcu_read_lock() */
+static inline enum skb_drop_reason
+tcp_inbound_hash(struct sock *sk, const struct request_sock *req,
+ const struct sk_buff *skb,
+ const void *saddr, const void *daddr,
+ int family, int dif, int sdif)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ const struct tcp_ao_hdr *aoh;
+ const __u8 *md5_location;
+ int l3index;
+
+ /* Invalid option or two times meet any of auth options */
+ if (tcp_parse_auth_options(th, &md5_location, &aoh)) {
+ tcp_hash_fail("TCP segment has incorrect auth options set",
+ family, skb, "");
+ return SKB_DROP_REASON_TCP_AUTH_HDR;
+ }
+
+ if (req) {
+ if (tcp_rsk_used_ao(req) != !!aoh) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD);
+ tcp_hash_fail("TCP connection can't start/end using TCP-AO",
+ family, skb, "%s",
+ !aoh ? "missing AO" : "AO signed");
+ return SKB_DROP_REASON_TCP_AOFAILURE;
+ }
+ }
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ /* Fast path: unsigned segments */
+ if (likely(!md5_location && !aoh)) {
+ /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid
+ * for the remote peer. On TCP-AO established connection
+ * the last key is impossible to remove, so there's
+ * always at least one current_key.
+ */
+ if (tcp_ao_required(sk, saddr, family, l3index, true)) {
+ tcp_hash_fail("AO hash is required, but not found",
+ family, skb, "L3 index %d", l3index);
+ return SKB_DROP_REASON_TCP_AONOTFOUND;
+ }
+ if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
+ tcp_hash_fail("MD5 Hash not found",
+ family, skb, "L3 index %d", l3index);
+ return SKB_DROP_REASON_TCP_MD5NOTFOUND;
+ }
+ return SKB_NOT_DROPPED_YET;
+ }
+
+ if (aoh)
+ return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh);
+
+ return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family,
+ l3index, md5_location);
+}
+
#endif /* _TCP_H */
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
new file mode 100644
index 0000000000..b04afced4c
--- /dev/null
+++ b/include/net/tcp_ao.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _TCP_AO_H
+#define _TCP_AO_H
+
+#define TCP_AO_KEY_ALIGN 1
+#define __tcp_ao_key_align __aligned(TCP_AO_KEY_ALIGN)
+
+union tcp_ao_addr {
+ struct in_addr a4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct in6_addr a6;
+#endif
+};
+
+struct tcp_ao_hdr {
+ u8 kind;
+ u8 length;
+ u8 keyid;
+ u8 rnext_keyid;
+};
+
+struct tcp_ao_counters {
+ atomic64_t pkt_good;
+ atomic64_t pkt_bad;
+ atomic64_t key_not_found;
+ atomic64_t ao_required;
+ atomic64_t dropped_icmp;
+};
+
+struct tcp_ao_key {
+ struct hlist_node node;
+ union tcp_ao_addr addr;
+ u8 key[TCP_AO_MAXKEYLEN] __tcp_ao_key_align;
+ unsigned int tcp_sigpool_id;
+ unsigned int digest_size;
+ int l3index;
+ u8 prefixlen;
+ u8 family;
+ u8 keylen;
+ u8 keyflags;
+ u8 sndid;
+ u8 rcvid;
+ u8 maclen;
+ struct rcu_head rcu;
+ atomic64_t pkt_good;
+ atomic64_t pkt_bad;
+ u8 traffic_keys[];
+};
+
+static inline u8 *rcv_other_key(struct tcp_ao_key *key)
+{
+ return key->traffic_keys;
+}
+
+static inline u8 *snd_other_key(struct tcp_ao_key *key)
+{
+ return key->traffic_keys + key->digest_size;
+}
+
+static inline int tcp_ao_maclen(const struct tcp_ao_key *key)
+{
+ return key->maclen;
+}
+
+/* Use tcp_ao_len_aligned() for TCP header calculations */
+static inline int tcp_ao_len(const struct tcp_ao_key *key)
+{
+ return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr);
+}
+
+static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key)
+{
+ return round_up(tcp_ao_len(key), 4);
+}
+
+static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key)
+{
+ return key->digest_size;
+}
+
+static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key)
+{
+ return sizeof(struct tcp_ao_key) + (key->digest_size << 1);
+}
+
+struct tcp_ao_info {
+ /* List of tcp_ao_key's */
+ struct hlist_head head;
+ /* current_key and rnext_key aren't maintained on listen sockets.
+ * Their purpose is to cache keys on established connections,
+ * saving needless lookups. Never dereference any of them from
+ * listen sockets.
+ * ::current_key may change in RX to the key that was requested by
+ * the peer, please use READ_ONCE()/WRITE_ONCE() in order to avoid
+ * load/store tearing.
+ * Do the same for ::rnext_key, if you don't hold socket lock
+ * (it's changed only by userspace request in setsockopt()).
+ */
+ struct tcp_ao_key *current_key;
+ struct tcp_ao_key *rnext_key;
+ struct tcp_ao_counters counters;
+ u32 ao_required :1,
+ accept_icmps :1,
+ __unused :30;
+ __be32 lisn;
+ __be32 risn;
+ /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
+ * that protect TCP-AO connection from replayed old TCP segments.
+ * See RFC5925 (6.2).
+ * In order to get correct SNE, there's a helper tcp_ao_compute_sne().
+ * It needs SEQ basis to understand whereabouts are lower SEQ numbers.
+ * According to that basis vector, it can provide incremented SNE
+ * when SEQ rolls over or provide decremented SNE when there's
+ * a retransmitted segment from before-rolling over.
+ * - for request sockets such basis is rcv_isn/snt_isn, which seems
+ * good enough as it's unexpected to receive 4 Gbytes on reqsk.
+ * - for full sockets the basis is rcv_nxt/snd_una. snd_una is
+ * taken instead of snd_nxt as currently it's easier to track
+ * in tcp_snd_una_update(), rather than updating SNE in all
+ * WRITE_ONCE(tp->snd_nxt, ...)
+ * - for time-wait sockets the basis is tw_rcv_nxt/tw_snd_nxt.
+ * tw_snd_nxt is not expected to change, while tw_rcv_nxt may.
+ */
+ u32 snd_sne;
+ u32 rcv_sne;
+ refcount_t refcnt; /* Protects twsk destruction */
+ struct rcu_head rcu;
+};
+
+#ifdef CONFIG_TCP_MD5SIG
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_md5_needed;
+#define static_branch_tcp_md5() static_branch_unlikely(&tcp_md5_needed.key)
+#else
+#define static_branch_tcp_md5() false
+#endif
+#ifdef CONFIG_TCP_AO
+/* TCP-AO structures and functions */
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_ao_needed;
+#define static_branch_tcp_ao() static_branch_unlikely(&tcp_ao_needed.key)
+#else
+#define static_branch_tcp_ao() false
+#endif
+
+static inline bool tcp_hash_should_produce_warnings(void)
+{
+ return static_branch_tcp_md5() || static_branch_tcp_ao();
+}
+
+#define tcp_hash_fail(msg, family, skb, fmt, ...) \
+do { \
+ const struct tcphdr *th = tcp_hdr(skb); \
+ char hdr_flags[6]; \
+ char *f = hdr_flags; \
+ \
+ if (!tcp_hash_should_produce_warnings()) \
+ break; \
+ if (th->fin) \
+ *f++ = 'F'; \
+ if (th->syn) \
+ *f++ = 'S'; \
+ if (th->rst) \
+ *f++ = 'R'; \
+ if (th->psh) \
+ *f++ = 'P'; \
+ if (th->ack) \
+ *f++ = '.'; \
+ *f = 0; \
+ if ((family) == AF_INET) { \
+ net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \
+ msg, &ip_hdr(skb)->saddr, ntohs(th->source), \
+ &ip_hdr(skb)->daddr, ntohs(th->dest), \
+ hdr_flags, ##__VA_ARGS__); \
+ } else { \
+ net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \
+ msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \
+ &ipv6_hdr(skb)->daddr, ntohs(th->dest), \
+ hdr_flags, ##__VA_ARGS__); \
+ } \
+} while (0)
+
+#ifdef CONFIG_TCP_AO
+/* TCP-AO structures and functions */
+struct tcp4_ao_context {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ __be32 sisn;
+ __be32 disn;
+};
+
+struct tcp6_ao_context {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be16 sport;
+ __be16 dport;
+ __be32 sisn;
+ __be32 disn;
+};
+
+struct tcp_sigpool;
+#define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \
+ TCPF_CLOSE | TCPF_CLOSE_WAIT | \
+ TCPF_LAST_ACK | TCPF_CLOSING)
+
+int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_ao_key *key, struct tcphdr *th,
+ __u8 *hash_location);
+int tcp_ao_hash_skb(unsigned short int family,
+ char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family,
+ sockptr_t optval, int optlen);
+struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao,
+ int sndid, int rcvid);
+int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
+ struct request_sock *req, struct sk_buff *skb,
+ int family);
+int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
+ unsigned int len, struct tcp_sigpool *hp);
+void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
+void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
+bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code);
+int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen);
+int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen);
+enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
+ const struct sk_buff *skb, unsigned short int family,
+ const struct request_sock *req, int l3index,
+ const struct tcp_ao_hdr *aoh);
+u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq);
+struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_ao_addr *addr,
+ int family, int sndid, int rcvid);
+int tcp_ao_hash_hdr(unsigned short family, char *ao_hash,
+ struct tcp_ao_key *key, const u8 *tkey,
+ const union tcp_ao_addr *daddr,
+ const union tcp_ao_addr *saddr,
+ const struct tcphdr *th, u32 sne);
+int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb,
+ const struct tcp_ao_hdr *aoh, int l3index, u32 seq,
+ struct tcp_ao_key **key, char **traffic_key,
+ bool *allocated_traffic_key, u8 *keyid, u32 *sne);
+
+/* ipv4 specific functions */
+int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen);
+struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk,
+ int sndid, int rcvid);
+int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *mkt,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk,
+ __be32 sisn, __be32 disn, bool send);
+int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req);
+struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+/* ipv6 specific functions */
+int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes);
+int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb, __be32 sisn, __be32 disn);
+int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk, __be32 sisn,
+ __be32 disn, bool send);
+int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req);
+struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk,
+ struct sock *addr_sk, int sndid, int rcvid);
+struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid);
+int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne);
+int tcp_v6_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen);
+int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne);
+void tcp_ao_established(struct sock *sk);
+void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb);
+void tcp_ao_connect_init(struct sock *sk);
+void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_request_sock *treq,
+ unsigned short int family, int l3index);
+#else /* CONFIG_TCP_AO */
+
+static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ struct tcp_ao_key *key, struct tcphdr *th,
+ __u8 *hash_location)
+{
+ return 0;
+}
+
+static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
+ struct tcp_request_sock *treq,
+ unsigned short int family, int l3index)
+{
+}
+
+static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int family,
+ int type, int code)
+{
+ return false;
+}
+
+static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
+ const struct sk_buff *skb, unsigned short int family,
+ const struct request_sock *req, int l3index,
+ const struct tcp_ao_hdr *aoh)
+{
+ return SKB_NOT_DROPPED_YET;
+}
+
+static inline struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk,
+ int l3index, const union tcp_ao_addr *addr,
+ int family, int sndid, int rcvid)
+{
+ return NULL;
+}
+
+static inline void tcp_ao_destroy_sock(struct sock *sk, bool twsk)
+{
+}
+
+static inline void tcp_ao_established(struct sock *sk)
+{
+}
+
+static inline void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb)
+{
+}
+
+static inline void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw,
+ struct tcp_sock *tp)
+{
+}
+
+static inline void tcp_ao_connect_init(struct sock *sk)
+{
+}
+
+static inline int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_get_repair(struct sock *sk,
+ sockptr_t optval, sockptr_t optlen)
+{
+ return -ENOPROTOOPT;
+}
+
+static inline int tcp_ao_set_repair(struct sock *sk,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -ENOPROTOOPT;
+}
+#endif
+
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
+int tcp_do_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const u8 **ao_hash);
+#else
+static inline int tcp_do_parse_auth_options(const struct tcphdr *th,
+ const u8 **md5_hash, const u8 **ao_hash)
+{
+ *md5_hash = NULL;
+ *ao_hash = NULL;
+ return 0;
+}
+#endif
+
+#endif /* _TCP_AO_H */
diff --git a/include/net/tcx.h b/include/net/tcx.h
index 264f147953..04be937778 100644
--- a/include/net/tcx.h
+++ b/include/net/tcx.h
@@ -38,16 +38,11 @@ static inline struct tcx_entry *tcx_entry(struct bpf_mprog_entry *entry)
return container_of(bundle, struct tcx_entry, bundle);
}
-static inline struct tcx_link *tcx_link(struct bpf_link *link)
+static inline struct tcx_link *tcx_link(const struct bpf_link *link)
{
return container_of(link, struct tcx_link, link);
}
-static inline const struct tcx_link *tcx_link_const(const struct bpf_link *link)
-{
- return tcx_link((struct bpf_link *)link);
-}
-
void tcx_inc(void);
void tcx_dec(void);
diff --git a/include/net/tls.h b/include/net/tls.h
index a2b44578dc..340ad43971 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -61,7 +61,8 @@ struct tls_rec;
#define TLS_AAD_SPACE_SIZE 13
-#define MAX_IV_SIZE 16
+#define TLS_MAX_IV_SIZE 16
+#define TLS_MAX_SALT_SIZE 4
#define TLS_TAG_SIZE 16
#define TLS_MAX_REC_SEQ_SIZE 8
#define TLS_MAX_AAD_SIZE TLS_AAD_SPACE_SIZE
@@ -96,9 +97,6 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
- /* protect crypto_wait with encrypt_pending */
- spinlock_t encrypt_compl_lock;
- int async_notify;
u8 async_capable:1;
#define BIT_TX_SCHEDULED 0
@@ -135,8 +133,6 @@ struct tls_sw_context_rx {
struct tls_strparser strp;
atomic_t decrypt_pending;
- /* protect crypto_wait with decrypt_pending*/
- spinlock_t decrypt_compl_lock;
struct sk_buff_head async_hold;
struct wait_queue_head wq;
};
@@ -149,6 +145,7 @@ struct tls_record_info {
skb_frag_t frags[MAX_SKB_FRAGS];
};
+#define TLS_DRIVER_STATE_SIZE_TX 16
struct tls_offload_context_tx {
struct crypto_aead *aead_send;
spinlock_t lock; /* protects records list */
@@ -162,17 +159,13 @@ struct tls_offload_context_tx {
void (*sk_destruct)(struct sock *sk);
struct work_struct destruct_work;
struct tls_context *ctx;
- u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
* driver specific state to justify another layer of indirection
*/
-#define TLS_DRIVER_STATE_SIZE_TX 16
+ u8 driver_state[TLS_DRIVER_STATE_SIZE_TX] __aligned(8);
};
-#define TLS_OFFLOAD_CONTEXT_SIZE_TX \
- (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
-
enum tls_context_flags {
/* tls_device_down was called after the netdev went down, device state
* was released, and kTLS works in software, even though rx_conf is
@@ -193,8 +186,8 @@ enum tls_context_flags {
};
struct cipher_context {
- char *iv;
- char *rec_seq;
+ char iv[TLS_MAX_IV_SIZE + TLS_MAX_SALT_SIZE];
+ char rec_seq[TLS_MAX_REC_SEQ_SIZE];
};
union tls_crypto_context {
@@ -302,6 +295,7 @@ struct tls_offload_resync_async {
u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
};
+#define TLS_DRIVER_STATE_SIZE_RX 8
struct tls_offload_context_rx {
/* sw must be the first member of tls_offload_context_rx */
struct tls_sw_context_rx sw;
@@ -325,17 +319,13 @@ struct tls_offload_context_rx {
struct tls_offload_resync_async *resync_async;
};
};
- u8 driver_state[] __aligned(8);
/* The TLS layer reserves room for driver specific state
* Currently the belief is that there is not enough
* driver specific state to justify another layer of indirection
*/
-#define TLS_DRIVER_STATE_SIZE_RX 8
+ u8 driver_state[TLS_DRIVER_STATE_SIZE_RX] __aligned(8);
};
-#define TLS_OFFLOAD_CONTEXT_SIZE_RX \
- (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
-
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
u32 seq, u64 *p_record_sn);
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 29251c3519..d716214fe0 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -154,13 +154,30 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck);
void udp_tunnel_sock_release(struct socket *sock);
+struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net, int oif,
+ __be32 *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 tos,
+ struct dst_cache *dst_cache);
+struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net,
+ struct socket *sock, int oif,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 dsfield,
+ struct dst_cache *dst_cache);
+
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
__be16 flags, __be64 tunnel_id,
int md_size);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index de08c8e0d1..349c36fb5f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -383,14 +383,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
+/* Define the relationship between xdp-rx-metadata kfunc and
+ * various other entities:
+ * - xdp_rx_metadata enum
+ * - netdev netlink enum (Documentation/netlink/specs/netdev.yaml)
+ * - kfunc name
+ * - xdp_metadata_ops field
+ */
#define XDP_METADATA_KFUNC_xxx \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
- bpf_xdp_metadata_rx_timestamp) \
+ NETDEV_XDP_RX_METADATA_TIMESTAMP, \
+ bpf_xdp_metadata_rx_timestamp, \
+ xmo_rx_timestamp) \
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
- bpf_xdp_metadata_rx_hash) \
+ NETDEV_XDP_RX_METADATA_HASH, \
+ bpf_xdp_metadata_rx_hash, \
+ xmo_rx_hash) \
-enum {
-#define XDP_METADATA_KFUNC(name, _) name,
+enum xdp_rx_metadata {
+#define XDP_METADATA_KFUNC(name, _, __, ___) name,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
MAX_XDP_METADATA_KFUNC,
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69b472604b..f83128007f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -63,6 +63,13 @@ struct xdp_sock {
struct xsk_queue *tx ____cacheline_aligned_in_smp;
struct list_head tx_list;
+ /* record the number of tx descriptors sent by this xsk and
+ * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
+ * to be given to other xsks for sending tx descriptors, thereby
+ * preventing other XSKs from being starved.
+ */
+ u32 tx_budget_spent;
+
/* Protects generic receive. */
spinlock_t rx_lock;
@@ -109,4 +116,13 @@ static inline void __xsk_map_flush(void)
#endif /* CONFIG_XDP_SOCKETS */
+#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
+bool xsk_map_check_flush(void);
+#else
+static inline bool xsk_map_check_flush(void)
+{
+ return false;
+}
+#endif
+
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 363c7d5105..c9bb0f892f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
-static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 0);
+ return __xfrm_decode_session(net, skb, fl, family, 0);
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 1);
+ return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
@@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
@@ -1669,7 +1669,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
@@ -1689,7 +1688,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
@@ -1712,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
@@ -2166,7 +2168,7 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
proto = sk->sk_protocol;
if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
- return inet6_sk(sk)->dontfrag;
+ return inet6_test_bit(DONTFRAG, sk);
return false;
}