summaryrefslogtreecommitdiffstats
path: root/net/rxrpc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /net/rxrpc
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/rxrpc')
-rw-r--r--net/rxrpc/Kconfig77
-rw-r--r--net/rxrpc/Makefile43
-rw-r--r--net/rxrpc/af_rxrpc.c1085
-rw-r--r--net/rxrpc/ar-internal.h1409
-rw-r--r--net/rxrpc/call_accept.c479
-rw-r--r--net/rxrpc/call_event.c547
-rw-r--r--net/rxrpc/call_object.c767
-rw-r--r--net/rxrpc/call_state.c69
-rw-r--r--net/rxrpc/conn_client.c817
-rw-r--r--net/rxrpc/conn_event.c435
-rw-r--r--net/rxrpc/conn_object.c489
-rw-r--r--net/rxrpc/conn_service.c194
-rw-r--r--net/rxrpc/input.c1027
-rw-r--r--net/rxrpc/insecure.c88
-rw-r--r--net/rxrpc/io_thread.c560
-rw-r--r--net/rxrpc/key.c699
-rw-r--r--net/rxrpc/local_event.c84
-rw-r--r--net/rxrpc/local_object.c488
-rw-r--r--net/rxrpc/misc.c62
-rw-r--r--net/rxrpc/net_ns.c120
-rw-r--r--net/rxrpc/output.c733
-rw-r--r--net/rxrpc/peer_event.c349
-rw-r--r--net/rxrpc/peer_object.c495
-rw-r--r--net/rxrpc/proc.c501
-rw-r--r--net/rxrpc/protocol.h179
-rw-r--r--net/rxrpc/recvmsg.c555
-rw-r--r--net/rxrpc/rtt.c195
-rw-r--r--net/rxrpc/rxkad.c1267
-rw-r--r--net/rxrpc/rxperf.c625
-rw-r--r--net/rxrpc/security.c207
-rw-r--r--net/rxrpc/sendmsg.c824
-rw-r--r--net/rxrpc/server_key.c171
-rw-r--r--net/rxrpc/skbuff.c83
-rw-r--r--net/rxrpc/sysctl.c144
-rw-r--r--net/rxrpc/txbuf.c134
-rw-r--r--net/rxrpc/utils.c44
36 files changed, 16045 insertions, 0 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 0000000000..a20986806f
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# RxRPC session sockets
+#
+
+config AF_RXRPC
+ tristate "RxRPC session sockets"
+ depends on INET
+ select CRYPTO
+ select KEYS
+ select NET_UDP_TUNNEL
+ help
+ Say Y or M here to include support for RxRPC session sockets (just
+ the transport part, not the presentation part: (un)marshalling is
+ left to the application).
+
+ These are used for AFS kernel filesystem and userspace utilities.
+
+ This module at the moment only supports client operations and is
+ currently incomplete.
+
+ See Documentation/networking/rxrpc.rst.
+
+if AF_RXRPC
+
+config AF_RXRPC_IPV6
+ bool "IPv6 support for RxRPC"
+ depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+ help
+ Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+ its network transport.
+
+config AF_RXRPC_INJECT_LOSS
+ bool "Inject packet loss into RxRPC packet stream"
+ help
+ Say Y here to inject packet loss by discarding some received and some
+ transmitted packets.
+
+config AF_RXRPC_INJECT_RX_DELAY
+ bool "Inject delay into packet reception"
+ depends on SYSCTL
+ help
+ Say Y here to inject a delay into packet reception, allowing an
+ extended RTT time to be modelled. The delay can be configured using
+ /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
+ milliseconds up to 0.5s (note that the granularity is actually in
+ jiffies).
+
+config AF_RXRPC_DEBUG
+ bool "RxRPC dynamic debugging"
+ help
+ Say Y here to make runtime controllable debugging messages appear.
+
+ See Documentation/networking/rxrpc.rst.
+
+
+config RXKAD
+ bool "RxRPC Kerberos security"
+ select CRYPTO
+ select CRYPTO_MANAGER
+ select CRYPTO_SKCIPHER
+ select CRYPTO_PCBC
+ select CRYPTO_FCRYPT
+ help
+ Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
+ through the use of the key retention service.
+
+ See Documentation/networking/rxrpc.rst.
+
+config RXPERF
+ tristate "RxRPC test service"
+ help
+ Provide an rxperf service tester. This listens on UDP port 7009 for
+ incoming calls from the rxperf program (an example of which can be
+ found in OpenAFS).
+
+endif
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
new file mode 100644
index 0000000000..ac5caf5a48
--- /dev/null
+++ b/net/rxrpc/Makefile
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Linux kernel RxRPC
+#
+
+obj-$(CONFIG_AF_RXRPC) += rxrpc.o
+
+rxrpc-y := \
+ af_rxrpc.o \
+ call_accept.o \
+ call_event.o \
+ call_object.o \
+ call_state.o \
+ conn_client.o \
+ conn_event.o \
+ conn_object.o \
+ conn_service.o \
+ input.o \
+ insecure.o \
+ io_thread.o \
+ key.o \
+ local_event.o \
+ local_object.o \
+ misc.o \
+ net_ns.o \
+ output.o \
+ peer_event.o \
+ peer_object.o \
+ recvmsg.o \
+ rtt.o \
+ security.o \
+ sendmsg.o \
+ server_key.o \
+ skbuff.o \
+ txbuf.o \
+ utils.o
+
+rxrpc-$(CONFIG_PROC_FS) += proc.o
+rxrpc-$(CONFIG_RXKAD) += rxkad.o
+rxrpc-$(CONFIG_SYSCTL) += sysctl.o
+
+
+obj-$(CONFIG_RXPERF) += rxperf.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 0000000000..fa8aec78f6
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AF_RXRPC implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <linux/key-type.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
+#include "ar-internal.h"
+
+MODULE_DESCRIPTION("RxRPC network protocol");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_RXRPC);
+
+unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
+module_param_named(debug, rxrpc_debug, uint, 0644);
+MODULE_PARM_DESC(debug, "RxRPC debugging mask");
+
+static struct proto rxrpc_proto;
+static const struct proto_ops rxrpc_rpc_ops;
+
+/* current debugging ID */
+atomic_t rxrpc_debug_id;
+EXPORT_SYMBOL(rxrpc_debug_id);
+
+/* count of skbs currently in use */
+atomic_t rxrpc_n_rx_skbs;
+
+struct workqueue_struct *rxrpc_workqueue;
+
+static void rxrpc_sock_destructor(struct sock *);
+
+/*
+ * see if an RxRPC socket is currently writable
+ */
+static inline int rxrpc_writable(struct sock *sk)
+{
+ return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+}
+
+/*
+ * wait for write bufferage to become available
+ */
+static void rxrpc_write_space(struct sock *sk)
+{
+ _enter("%p", sk);
+ rcu_read_lock();
+ if (rxrpc_writable(sk)) {
+ struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+ if (skwq_has_sleeper(wq))
+ wake_up_interruptible(&wq->wait);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * validate an RxRPC address
+ */
+static int rxrpc_validate_address(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
+ int len)
+{
+ unsigned int tail;
+
+ if (len < sizeof(struct sockaddr_rxrpc))
+ return -EINVAL;
+
+ if (srx->srx_family != AF_RXRPC)
+ return -EAFNOSUPPORT;
+
+ if (srx->transport_type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ len -= offsetof(struct sockaddr_rxrpc, transport);
+ if (srx->transport_len < sizeof(sa_family_t) ||
+ srx->transport_len > len)
+ return -EINVAL;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (rx->family != AF_INET &&
+ rx->family != AF_INET6)
+ return -EAFNOSUPPORT;
+ if (srx->transport_len < sizeof(struct sockaddr_in))
+ return -EINVAL;
+ tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ if (rx->family != AF_INET6)
+ return -EAFNOSUPPORT;
+ if (srx->transport_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+ tail = offsetof(struct sockaddr_rxrpc, transport) +
+ sizeof(struct sockaddr_in6);
+ break;
+#endif
+
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (tail < len)
+ memset((void *)srx + tail, 0, len - tail);
+ _debug("INET: %pISp", &srx->transport);
+ return 0;
+}
+
+/*
+ * bind a local address to an RxRPC socket
+ */
+static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ u16 service_id;
+ int ret;
+
+ _enter("%p,%p,%d", rx, saddr, len);
+
+ ret = rxrpc_validate_address(rx, srx, len);
+ if (ret < 0)
+ goto error;
+ service_id = srx->srx_service;
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNBOUND:
+ rx->srx = *srx;
+ local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
+ }
+
+ if (service_id) {
+ write_lock(&local->services_lock);
+ if (local->service)
+ goto service_in_use;
+ rx->local = local;
+ local->service = rx;
+ write_unlock(&local->services_lock);
+
+ rx->sk.sk_state = RXRPC_SERVER_BOUND;
+ } else {
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ }
+ break;
+
+ case RXRPC_SERVER_BOUND:
+ ret = -EINVAL;
+ if (service_id == 0)
+ goto error_unlock;
+ ret = -EADDRINUSE;
+ if (service_id == rx->srx.srx_service)
+ goto error_unlock;
+ ret = -EINVAL;
+ srx->srx_service = rx->srx.srx_service;
+ if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0)
+ goto error_unlock;
+ rx->second_service = service_id;
+ rx->sk.sk_state = RXRPC_SERVER_BOUND2;
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto error_unlock;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = 0");
+ return 0;
+
+service_in_use:
+ write_unlock(&local->services_lock);
+ rxrpc_unuse_local(local, rxrpc_local_unuse_bind);
+ rxrpc_put_local(local, rxrpc_local_put_bind);
+ ret = -EADDRINUSE;
+error_unlock:
+ release_sock(&rx->sk);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * set the number of pending calls permitted on a listening socket
+ */
+static int rxrpc_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ unsigned int max, old;
+ int ret;
+
+ _enter("%p,%d", rx, backlog);
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNBOUND:
+ ret = -EADDRNOTAVAIL;
+ break;
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_BOUND2:
+ ASSERT(rx->local != NULL);
+ max = READ_ONCE(rxrpc_max_backlog);
+ ret = -EINVAL;
+ if (backlog == INT_MAX)
+ backlog = max;
+ else if (backlog < 0 || backlog > max)
+ break;
+ old = sk->sk_max_ack_backlog;
+ sk->sk_max_ack_backlog = backlog;
+ ret = rxrpc_service_prealloc(rx, GFP_KERNEL);
+ if (ret == 0)
+ rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+ else
+ sk->sk_max_ack_backlog = old;
+ break;
+ case RXRPC_SERVER_LISTENING:
+ if (backlog == 0) {
+ rx->sk.sk_state = RXRPC_SERVER_LISTEN_DISABLED;
+ sk->sk_max_ack_backlog = 0;
+ rxrpc_discard_prealloc(rx);
+ ret = 0;
+ break;
+ }
+ fallthrough;
+ default:
+ ret = -EBUSY;
+ break;
+ }
+
+ release_sock(&rx->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
+ * @sock: The socket on which to make the call
+ * @srx: The address of the peer to contact
+ * @key: The security context to use (defaults to socket setting)
+ * @user_call_ID: The ID to use
+ * @tx_total_len: Total length of data to transmit during the call (or -1)
+ * @hard_timeout: The maximum lifespan of the call in sec
+ * @gfp: The allocation constraints
+ * @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
+ * @interruptibility: The call is interruptible, or can be canceled.
+ * @debug_id: The debug ID for tracing to be assigned to the call
+ *
+ * Allow a kernel service to begin a call on the nominated socket. This just
+ * sets up all the internal tracking structures and allocates connection and
+ * call IDs as appropriate. The call to be used is returned.
+ *
+ * The default socket destination address and security may be overridden by
+ * supplying @srx and @key.
+ */
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ s64 tx_total_len,
+ u32 hard_timeout,
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade,
+ enum rxrpc_interruptibility interruptibility,
+ unsigned int debug_id)
+{
+ struct rxrpc_conn_parameters cp;
+ struct rxrpc_call_params p;
+ struct rxrpc_call *call;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter(",,%x,%lx", key_serial(key), user_call_ID);
+
+ ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ lock_sock(&rx->sk);
+
+ if (!key)
+ key = rx->key;
+ if (key && !key->payload.data[0])
+ key = NULL; /* a no-security key */
+
+ memset(&p, 0, sizeof(p));
+ p.user_call_ID = user_call_ID;
+ p.tx_total_len = tx_total_len;
+ p.interruptibility = interruptibility;
+ p.kernel = true;
+ p.timeouts.hard = hard_timeout;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = key;
+ cp.security_level = rx->min_sec_level;
+ cp.exclusive = false;
+ cp.upgrade = upgrade;
+ cp.service_id = srx->srx_service;
+ call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
+ /* The socket has been unlocked. */
+ if (!IS_ERR(call)) {
+ call->notify_rx = notify_rx;
+ mutex_unlock(&call->user_mutex);
+ }
+
+ _leave(" = %p", call);
+ return call;
+}
+EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+
+/*
+ * Dummy function used to stop the notifier talking to recvmsg().
+ */
+static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+}
+
+/**
+ * rxrpc_kernel_shutdown_call - Allow a kernel service to shut down a call it was using
+ * @sock: The socket the call is on
+ * @call: The call to end
+ *
+ * Allow a kernel service to shut down a call it was using. The call must be
+ * complete before this is called (the call should be aborted if necessary).
+ */
+void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call)
+{
+ _enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
+
+ mutex_lock(&call->user_mutex);
+ if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+ rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+ /* Make sure we're not going to call back into a kernel service */
+ if (call->notify_rx) {
+ spin_lock(&call->notify_lock);
+ call->notify_rx = rxrpc_dummy_notify_rx;
+ spin_unlock(&call->notify_lock);
+ }
+ }
+ mutex_unlock(&call->user_mutex);
+}
+EXPORT_SYMBOL(rxrpc_kernel_shutdown_call);
+
+/**
+ * rxrpc_kernel_put_call - Release a reference to a call
+ * @sock: The socket the call is on
+ * @call: The call to put
+ *
+ * Drop the application's ref on an rxrpc call.
+ */
+void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call)
+{
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
+}
+EXPORT_SYMBOL(rxrpc_kernel_put_call);
+
+/**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - whether
+ * it has completed successfully and all received data has been consumed.
+ */
+bool rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call)
+{
+ if (!rxrpc_call_is_complete(call))
+ return true;
+ if (call->completion != RXRPC_CALL_SUCCEEDED)
+ return false;
+ return !skb_queue_empty(&call->recvmsg_queue);
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
+ * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
+ * @sock: The socket the call is on
+ * @call: The call to query
+ *
+ * Allow a kernel service to retrieve the epoch value from a service call to
+ * see if the client at the other end rebooted.
+ */
+u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->conn->proto.epoch;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_epoch);
+
+/**
+ * rxrpc_kernel_new_call_notification - Get notifications of new calls
+ * @sock: The socket to intercept received messages on
+ * @notify_new_call: Function to be called when new calls appear
+ * @discard_new_call: Function to discard preallocated calls
+ *
+ * Allow a kernel service to be given notifications about new calls.
+ */
+void rxrpc_kernel_new_call_notification(
+ struct socket *sock,
+ rxrpc_notify_new_call_t notify_new_call,
+ rxrpc_discard_new_call_t discard_new_call)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+
+ rx->notify_new_call = notify_new_call;
+ rx->discard_new_call = discard_new_call;
+}
+EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
+
+/**
+ * rxrpc_kernel_set_max_life - Set maximum lifespan on a call
+ * @sock: The socket the call is on
+ * @call: The call to configure
+ * @hard_timeout: The maximum lifespan of the call in jiffies
+ *
+ * Set the maximum lifespan of a call. The call will end with ETIME or
+ * ETIMEDOUT if it takes longer than this.
+ */
+void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call,
+ unsigned long hard_timeout)
+{
+ unsigned long now;
+
+ mutex_lock(&call->user_mutex);
+
+ now = jiffies;
+ hard_timeout += now;
+ WRITE_ONCE(call->expect_term_by, hard_timeout);
+ rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard);
+
+ mutex_unlock(&call->user_mutex);
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_max_life);
+
+/*
+ * connect an RxRPC socket
+ * - this just targets it at a specific destination; no actual connection
+ * negotiation takes place
+ */
+static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
+{
+ struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+ ret = rxrpc_validate_address(rx, srx, addr_len);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+
+ lock_sock(&rx->sk);
+
+ ret = -EISCONN;
+ if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags))
+ goto error;
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNBOUND:
+ rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ break;
+ case RXRPC_CLIENT_UNBOUND:
+ case RXRPC_CLIENT_BOUND:
+ break;
+ default:
+ ret = -EBUSY;
+ goto error;
+ }
+
+ rx->connect_srx = *srx;
+ set_bit(RXRPC_SOCK_CONNECTED, &rx->flags);
+ ret = 0;
+
+error:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/*
+ * send a message through an RxRPC socket
+ * - in a client this does a number of things:
+ * - finds/sets up a connection for the security specified (if any)
+ * - initiates a call (ID in control data)
+ * - ends the request phase of a call (if MSG_MORE is not set)
+ * - sends a call data packet
+ * - may send an abort (abort code in control data)
+ */
+static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
+{
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ _enter(",{%d},,%zu", rx->sk.sk_state, len);
+
+ if (m->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ if (m->msg_name) {
+ ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
+ if (ret < 0) {
+ _leave(" = %d [bad addr]", ret);
+ return ret;
+ }
+ }
+
+ lock_sock(&rx->sk);
+
+ switch (rx->sk.sk_state) {
+ case RXRPC_UNBOUND:
+ case RXRPC_CLIENT_UNBOUND:
+ rx->srx.srx_family = AF_RXRPC;
+ rx->srx.srx_service = 0;
+ rx->srx.transport_type = SOCK_DGRAM;
+ rx->srx.transport.family = rx->family;
+ switch (rx->family) {
+ case AF_INET:
+ rx->srx.transport_len = sizeof(struct sockaddr_in);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ rx->srx.transport_len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ ret = -EAFNOSUPPORT;
+ goto error_unlock;
+ }
+ local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx);
+ if (IS_ERR(local)) {
+ ret = PTR_ERR(local);
+ goto error_unlock;
+ }
+
+ rx->local = local;
+ rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+ fallthrough;
+
+ case RXRPC_CLIENT_BOUND:
+ if (!m->msg_name &&
+ test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) {
+ m->msg_name = &rx->connect_srx;
+ m->msg_namelen = sizeof(rx->connect_srx);
+ }
+ fallthrough;
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_LISTENING:
+ ret = rxrpc_do_sendmsg(rx, m, len);
+ /* The socket has been unlocked */
+ goto out;
+ default:
+ ret = -EINVAL;
+ goto error_unlock;
+ }
+
+error_unlock:
+ release_sock(&rx->sk);
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+ if (sk->sk_state != RXRPC_UNBOUND)
+ return -EISCONN;
+ if (val > RXRPC_SECURITY_MAX)
+ return -EINVAL;
+ lock_sock(sk);
+ rxrpc_sk(sk)->min_sec_level = val;
+ release_sock(sk);
+ return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
+/*
+ * set RxRPC socket options
+ */
+static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ unsigned int min_sec_level;
+ u16 service_upgrade[2];
+ int ret;
+
+ _enter(",%d,%d,,%d", level, optname, optlen);
+
+ lock_sock(&rx->sk);
+ ret = -EOPNOTSUPP;
+
+ if (level == SOL_RXRPC) {
+ switch (optname) {
+ case RXRPC_EXCLUSIVE_CONNECTION:
+ ret = -EINVAL;
+ if (optlen != 0)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
+ goto error;
+ rx->exclusive = true;
+ goto success;
+
+ case RXRPC_SECURITY_KEY:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
+ goto error;
+ ret = rxrpc_request_key(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_SECURITY_KEYRING:
+ ret = -EINVAL;
+ if (rx->key)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
+ goto error;
+ ret = rxrpc_server_keyring(rx, optval, optlen);
+ goto error;
+
+ case RXRPC_MIN_SECURITY_LEVEL:
+ ret = -EINVAL;
+ if (optlen != sizeof(unsigned int))
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
+ goto error;
+ ret = copy_from_sockptr(&min_sec_level, optval,
+ sizeof(unsigned int));
+ if (ret < 0)
+ goto error;
+ ret = -EINVAL;
+ if (min_sec_level > RXRPC_SECURITY_MAX)
+ goto error;
+ rx->min_sec_level = min_sec_level;
+ goto success;
+
+ case RXRPC_UPGRADEABLE_SERVICE:
+ ret = -EINVAL;
+ if (optlen != sizeof(service_upgrade) ||
+ rx->service_upgrade.from != 0)
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_SERVER_BOUND2)
+ goto error;
+ ret = -EFAULT;
+ if (copy_from_sockptr(service_upgrade, optval,
+ sizeof(service_upgrade)) != 0)
+ goto error;
+ ret = -EINVAL;
+ if ((service_upgrade[0] != rx->srx.srx_service ||
+ service_upgrade[1] != rx->second_service) &&
+ (service_upgrade[0] != rx->second_service ||
+ service_upgrade[1] != rx->srx.srx_service))
+ goto error;
+ rx->service_upgrade.from = service_upgrade[0];
+ rx->service_upgrade.to = service_upgrade[1];
+ goto success;
+
+ default:
+ break;
+ }
+ }
+
+success:
+ ret = 0;
+error:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/*
+ * Get socket options.
+ */
+static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *_optlen)
+{
+ int optlen;
+
+ if (level != SOL_RXRPC)
+ return -EOPNOTSUPP;
+
+ if (get_user(optlen, _optlen))
+ return -EFAULT;
+
+ switch (optname) {
+ case RXRPC_SUPPORTED_CMSG:
+ if (optlen < sizeof(int))
+ return -ETOOSMALL;
+ if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) ||
+ put_user(sizeof(int), _optlen))
+ return -EFAULT;
+ return 0;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/*
+ * permit an RxRPC socket to be polled
+ */
+static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ __poll_t mask;
+
+ sock_poll_wait(file, sock, wait);
+ mask = 0;
+
+ /* the socket is readable if there are any messages waiting on the Rx
+ * queue */
+ if (!list_empty(&rx->recvmsg_q))
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ /* the socket is writable if there is space to add new data to the
+ * socket; there is no guarantee that any particular call in progress
+ * on the socket may have space in the Tx ACK window */
+ if (rxrpc_writable(sk))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+
+ return mask;
+}
+
+/*
+ * create an RxRPC socket
+ */
+static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
+ int kern)
+{
+ struct rxrpc_net *rxnet;
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%p,%d", sock, protocol);
+
+ /* we support transport protocol UDP/UDP6 only */
+ if (protocol != PF_INET &&
+ IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
+ return -EPROTONOSUPPORT;
+
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ sock->ops = &rxrpc_rpc_ops;
+ sock->state = SS_UNCONNECTED;
+
+ sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ sk->sk_state = RXRPC_UNBOUND;
+ sk->sk_write_space = rxrpc_write_space;
+ sk->sk_max_ack_backlog = 0;
+ sk->sk_destruct = rxrpc_sock_destructor;
+
+ rx = rxrpc_sk(sk);
+ rx->family = protocol;
+ rx->calls = RB_ROOT;
+
+ spin_lock_init(&rx->incoming_lock);
+ INIT_LIST_HEAD(&rx->sock_calls);
+ INIT_LIST_HEAD(&rx->to_be_accepted);
+ INIT_LIST_HEAD(&rx->recvmsg_q);
+ spin_lock_init(&rx->recvmsg_lock);
+ rwlock_init(&rx->call_lock);
+ memset(&rx->srx, 0, sizeof(rx->srx));
+
+ rxnet = rxrpc_net(sock_net(&rx->sk));
+ timer_reduce(&rxnet->peer_keepalive_timer, jiffies + 1);
+
+ _leave(" = 0 [%p]", rx);
+ return 0;
+}
+
+/*
+ * Kill all the calls on a socket and shut it down.
+ */
+static int rxrpc_shutdown(struct socket *sock, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret = 0;
+
+ _enter("%p,%d", sk, flags);
+
+ if (flags != SHUT_RDWR)
+ return -EOPNOTSUPP;
+ if (sk->sk_state == RXRPC_CLOSE)
+ return -ESHUTDOWN;
+
+ lock_sock(sk);
+
+ if (sk->sk_state < RXRPC_CLOSE) {
+ sk->sk_state = RXRPC_CLOSE;
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ } else {
+ ret = -ESHUTDOWN;
+ }
+
+ rxrpc_discard_prealloc(rx);
+
+ release_sock(sk);
+ return ret;
+}
+
+/*
+ * RxRPC socket destructor
+ */
+static void rxrpc_sock_destructor(struct sock *sk)
+{
+ _enter("%p", sk);
+
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ WARN_ON(refcount_read(&sk->sk_wmem_alloc));
+ WARN_ON(!sk_unhashed(sk));
+ WARN_ON(sk->sk_socket);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ printk("Attempt to release alive rxrpc socket: %p\n", sk);
+ return;
+ }
+}
+
+/*
+ * release an RxRPC socket
+ */
+static int rxrpc_release_sock(struct sock *sk)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+
+ _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt));
+
+ /* declare the socket closed for business */
+ sock_orphan(sk);
+ sk->sk_shutdown = SHUTDOWN_MASK;
+
+ /* We want to kill off all connections from a service socket
+ * as fast as possible because we can't share these; client
+ * sockets, on the other hand, can share an endpoint.
+ */
+ switch (sk->sk_state) {
+ case RXRPC_SERVER_BOUND:
+ case RXRPC_SERVER_BOUND2:
+ case RXRPC_SERVER_LISTENING:
+ case RXRPC_SERVER_LISTEN_DISABLED:
+ rx->local->service_closed = true;
+ break;
+ }
+
+ sk->sk_state = RXRPC_CLOSE;
+
+ if (rx->local && rx->local->service == rx) {
+ write_lock(&rx->local->services_lock);
+ rx->local->service = NULL;
+ write_unlock(&rx->local->services_lock);
+ }
+
+ /* try to flush out this socket */
+ rxrpc_discard_prealloc(rx);
+ rxrpc_release_calls_on_socket(rx);
+ flush_workqueue(rxrpc_workqueue);
+ rxrpc_purge_queue(&sk->sk_receive_queue);
+
+ rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock);
+ rxrpc_put_local(rx->local, rxrpc_local_put_release_sock);
+ rx->local = NULL;
+ key_put(rx->key);
+ rx->key = NULL;
+ key_put(rx->securities);
+ rx->securities = NULL;
+ sock_put(sk);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * release an RxRPC BSD socket on close() or equivalent
+ */
+static int rxrpc_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ _enter("%p{%p}", sock, sk);
+
+ if (!sk)
+ return 0;
+
+ sock->sk = NULL;
+
+ return rxrpc_release_sock(sk);
+}
+
+/*
+ * RxRPC network protocol
+ */
+static const struct proto_ops rxrpc_rpc_ops = {
+ .family = PF_RXRPC,
+ .owner = THIS_MODULE,
+ .release = rxrpc_release,
+ .bind = rxrpc_bind,
+ .connect = rxrpc_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = rxrpc_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = rxrpc_listen,
+ .shutdown = rxrpc_shutdown,
+ .setsockopt = rxrpc_setsockopt,
+ .getsockopt = rxrpc_getsockopt,
+ .sendmsg = rxrpc_sendmsg,
+ .recvmsg = rxrpc_recvmsg,
+ .mmap = sock_no_mmap,
+};
+
+static struct proto rxrpc_proto = {
+ .name = "RXRPC",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct rxrpc_sock),
+ .max_header = sizeof(struct rxrpc_wire_header),
+};
+
+static const struct net_proto_family rxrpc_family_ops = {
+ .family = PF_RXRPC,
+ .create = rxrpc_create,
+ .owner = THIS_MODULE,
+};
+
+/*
+ * initialise and register the RxRPC protocol
+ */
+static int __init af_rxrpc_init(void)
+{
+ int ret = -1;
+
+ BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
+
+ ret = -ENOMEM;
+ rxrpc_gen_version_string();
+ rxrpc_call_jar = kmem_cache_create(
+ "rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!rxrpc_call_jar) {
+ pr_notice("Failed to allocate call jar\n");
+ goto error_call_jar;
+ }
+
+ rxrpc_workqueue = alloc_ordered_workqueue("krxrpcd", WQ_HIGHPRI | WQ_MEM_RECLAIM);
+ if (!rxrpc_workqueue) {
+ pr_notice("Failed to allocate work queue\n");
+ goto error_work_queue;
+ }
+
+ ret = rxrpc_init_security();
+ if (ret < 0) {
+ pr_crit("Cannot initialise security\n");
+ goto error_security;
+ }
+
+ ret = register_pernet_device(&rxrpc_net_ops);
+ if (ret)
+ goto error_pernet;
+
+ ret = proto_register(&rxrpc_proto, 1);
+ if (ret < 0) {
+ pr_crit("Cannot register protocol\n");
+ goto error_proto;
+ }
+
+ ret = sock_register(&rxrpc_family_ops);
+ if (ret < 0) {
+ pr_crit("Cannot register socket family\n");
+ goto error_sock;
+ }
+
+ ret = register_key_type(&key_type_rxrpc);
+ if (ret < 0) {
+ pr_crit("Cannot register client key type\n");
+ goto error_key_type;
+ }
+
+ ret = register_key_type(&key_type_rxrpc_s);
+ if (ret < 0) {
+ pr_crit("Cannot register server key type\n");
+ goto error_key_type_s;
+ }
+
+ ret = rxrpc_sysctl_init();
+ if (ret < 0) {
+ pr_crit("Cannot register sysctls\n");
+ goto error_sysctls;
+ }
+
+ return 0;
+
+error_sysctls:
+ unregister_key_type(&key_type_rxrpc_s);
+error_key_type_s:
+ unregister_key_type(&key_type_rxrpc);
+error_key_type:
+ sock_unregister(PF_RXRPC);
+error_sock:
+ proto_unregister(&rxrpc_proto);
+error_proto:
+ unregister_pernet_device(&rxrpc_net_ops);
+error_pernet:
+ rxrpc_exit_security();
+error_security:
+ destroy_workqueue(rxrpc_workqueue);
+error_work_queue:
+ kmem_cache_destroy(rxrpc_call_jar);
+error_call_jar:
+ return ret;
+}
+
+/*
+ * unregister the RxRPC protocol
+ */
+static void __exit af_rxrpc_exit(void)
+{
+ _enter("");
+ rxrpc_sysctl_exit();
+ unregister_key_type(&key_type_rxrpc_s);
+ unregister_key_type(&key_type_rxrpc);
+ sock_unregister(PF_RXRPC);
+ proto_unregister(&rxrpc_proto);
+ unregister_pernet_device(&rxrpc_net_ops);
+ ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
+
+ /* Make sure the local and peer records pinned by any dying connections
+ * are released.
+ */
+ rcu_barrier();
+
+ destroy_workqueue(rxrpc_workqueue);
+ rxrpc_exit_security();
+ kmem_cache_destroy(rxrpc_call_jar);
+ _leave("");
+}
+
+module_init(af_rxrpc_init);
+module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 0000000000..e8b4340813
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,1409 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* AF_RXRPC internal definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/atomic.h>
+#include <linux/seqlock.h>
+#include <linux/win_minmax.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include "protocol.h"
+
+#define FCRYPT_BSIZE 8
+struct rxrpc_crypt {
+ union {
+ u8 x[FCRYPT_BSIZE];
+ __be32 n[2];
+ };
+} __attribute__((aligned(8)));
+
+#define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS))
+#define rxrpc_queue_delayed_work(WS,D) \
+ queue_delayed_work(rxrpc_workqueue, (WS), (D))
+
+struct key_preparsed_payload;
+struct rxrpc_connection;
+struct rxrpc_txbuf;
+
+/*
+ * Mark applied to socket buffers in skb->mark. skb->priority is used
+ * to pass supplementary information.
+ */
+enum rxrpc_skb_mark {
+ RXRPC_SKB_MARK_PACKET, /* Received packet */
+ RXRPC_SKB_MARK_ERROR, /* Error notification */
+ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
+ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
+ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
+};
+
+/*
+ * sk_state for RxRPC sockets
+ */
+enum {
+ RXRPC_UNBOUND = 0,
+ RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */
+ RXRPC_CLIENT_BOUND, /* client local address bound */
+ RXRPC_SERVER_BOUND, /* server local address bound */
+ RXRPC_SERVER_BOUND2, /* second server local address bound */
+ RXRPC_SERVER_LISTENING, /* server listening for connections */
+ RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */
+ RXRPC_CLOSE, /* socket is being closed */
+};
+
+/*
+ * Per-network namespace data.
+ */
+struct rxrpc_net {
+ struct proc_dir_entry *proc_net; /* Subdir in /proc/net */
+ u32 epoch; /* Local epoch for detecting local-end reset */
+ struct list_head calls; /* List of calls active in this namespace */
+ spinlock_t call_lock; /* Lock for ->calls */
+ atomic_t nr_calls; /* Count of allocated calls */
+
+ atomic_t nr_conns;
+ struct list_head conn_proc_list; /* List of conns in this namespace for proc */
+ struct list_head service_conns; /* Service conns in this namespace */
+ rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
+ struct work_struct service_conn_reaper;
+ struct timer_list service_conn_reap_timer;
+
+ bool live;
+
+ atomic_t nr_client_conns;
+
+ struct hlist_head local_endpoints;
+ struct mutex local_mutex; /* Lock for ->local_endpoints */
+
+ DECLARE_HASHTABLE (peer_hash, 10);
+ spinlock_t peer_hash_lock; /* Lock for ->peer_hash */
+
+#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */
+ u8 peer_keepalive_cursor;
+ time64_t peer_keepalive_base;
+ struct list_head peer_keepalive[32];
+ struct list_head peer_keepalive_new;
+ struct timer_list peer_keepalive_timer;
+ struct work_struct peer_keepalive_work;
+
+ atomic_t stat_tx_data;
+ atomic_t stat_tx_data_retrans;
+ atomic_t stat_tx_data_send;
+ atomic_t stat_tx_data_send_frag;
+ atomic_t stat_tx_data_send_fail;
+ atomic_t stat_tx_data_underflow;
+ atomic_t stat_tx_data_cwnd_reset;
+ atomic_t stat_rx_data;
+ atomic_t stat_rx_data_reqack;
+ atomic_t stat_rx_data_jumbo;
+
+ atomic_t stat_tx_ack_fill;
+ atomic_t stat_tx_ack_send;
+ atomic_t stat_tx_ack_skip;
+ atomic_t stat_tx_acks[256];
+ atomic_t stat_rx_acks[256];
+
+ atomic_t stat_why_req_ack[8];
+
+ atomic_t stat_io_loop;
+};
+
+/*
+ * Service backlog preallocation.
+ *
+ * This contains circular buffers of preallocated peers, connections and calls
+ * for incoming service calls and their head and tail pointers. This allows
+ * calls to be set up in the data_ready handler, thereby avoiding the need to
+ * shuffle packets around so much.
+ */
+struct rxrpc_backlog {
+ unsigned short peer_backlog_head;
+ unsigned short peer_backlog_tail;
+ unsigned short conn_backlog_head;
+ unsigned short conn_backlog_tail;
+ unsigned short call_backlog_head;
+ unsigned short call_backlog_tail;
+#define RXRPC_BACKLOG_MAX 32
+ struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX];
+ struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX];
+ struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX];
+};
+
+/*
+ * RxRPC socket definition
+ */
+struct rxrpc_sock {
+ /* WARNING: sk has to be the first member */
+ struct sock sk;
+ rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */
+ rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
+ struct rxrpc_local *local; /* local endpoint */
+ struct rxrpc_backlog *backlog; /* Preallocation for services */
+ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */
+ struct list_head sock_calls; /* List of calls owned by this socket */
+ struct list_head to_be_accepted; /* calls awaiting acceptance */
+ struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
+ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */
+ struct key *key; /* security for this socket */
+ struct key *securities; /* list of server security descriptors */
+ struct rb_root calls; /* User ID -> call mapping */
+ unsigned long flags;
+#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */
+ rwlock_t call_lock; /* lock for calls */
+ u32 min_sec_level; /* minimum security level */
+#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
+ bool exclusive; /* Exclusive connection for a client socket */
+ u16 second_service; /* Additional service bound to the endpoint */
+ struct {
+ /* Service upgrade information */
+ u16 from; /* Service ID to upgrade (if not 0) */
+ u16 to; /* service ID to upgrade to */
+ } service_upgrade;
+ sa_family_t family; /* Protocol family created with */
+ struct sockaddr_rxrpc srx; /* Primary Service/local addresses */
+ struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */
+};
+
+#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
+
+/*
+ * CPU-byteorder normalised Rx packet header.
+ */
+struct rxrpc_host_header {
+ u32 epoch; /* client boot timestamp */
+ u32 cid; /* connection and channel ID */
+ u32 callNumber; /* call ID (0 for connection-level packets) */
+ u32 seq; /* sequence number of pkt in call stream */
+ u32 serial; /* serial number of pkt sent to network */
+ u8 type; /* packet type */
+ u8 flags; /* packet flags */
+ u8 userStatus; /* app-layer defined status */
+ u8 securityIndex; /* security protocol ID */
+ union {
+ u16 _rsvd; /* reserved */
+ u16 cksum; /* kerberos security checksum */
+ };
+ u16 serviceId; /* service ID */
+} __packed;
+
+/*
+ * RxRPC socket buffer private variables
+ * - max 48 bytes (struct sk_buff::cb)
+ */
+struct rxrpc_skb_priv {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
+#define RXRPC_RX_VERIFIED 0x01
+
+ struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
+};
+
+#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
+
+/*
+ * RxRPC security module interface
+ */
+struct rxrpc_security {
+ const char *name; /* name of this service */
+ u8 security_index; /* security type provided */
+ u32 no_key_abort; /* Abort code indicating no key */
+
+ /* Initialise a security service */
+ int (*init)(void);
+
+ /* Clean up a security service */
+ void (*exit)(void);
+
+ /* Parse the information from a server key */
+ int (*preparse_server_key)(struct key_preparsed_payload *);
+
+ /* Clean up the preparse buffer after parsing a server key */
+ void (*free_preparse_server_key)(struct key_preparsed_payload *);
+
+ /* Destroy the payload of a server key */
+ void (*destroy_server_key)(struct key *);
+
+ /* Describe a server key */
+ void (*describe_server_key)(const struct key *, struct seq_file *);
+
+ /* initialise a connection's security */
+ int (*init_connection_security)(struct rxrpc_connection *,
+ struct rxrpc_key_token *);
+
+ /* Work out how much data we can store in a packet, given an estimate
+ * of the amount of data remaining.
+ */
+ int (*how_much_data)(struct rxrpc_call *, size_t,
+ size_t *, size_t *, size_t *);
+
+ /* impose security on a packet */
+ int (*secure_packet)(struct rxrpc_call *, struct rxrpc_txbuf *);
+
+ /* verify the security on a received packet */
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *);
+
+ /* Free crypto request on a call */
+ void (*free_call_crypto)(struct rxrpc_call *);
+
+ /* issue a challenge */
+ int (*issue_challenge)(struct rxrpc_connection *);
+
+ /* respond to a challenge */
+ int (*respond_to_challenge)(struct rxrpc_connection *,
+ struct sk_buff *);
+
+ /* verify a response */
+ int (*verify_response)(struct rxrpc_connection *,
+ struct sk_buff *);
+
+ /* clear connection security */
+ void (*clear)(struct rxrpc_connection *);
+};
+
+/*
+ * RxRPC local transport endpoint description
+ * - owned by a single AF_RXRPC socket
+ * - pointed to by transport socket struct sk_user_data
+ */
+struct rxrpc_local {
+ struct rcu_head rcu;
+ atomic_t active_users; /* Number of users of the local endpoint */
+ refcount_t ref; /* Number of references to the structure */
+ struct net *net; /* The network namespace */
+ struct rxrpc_net *rxnet; /* Our bits in the network namespace */
+ struct hlist_node link;
+ struct socket *socket; /* my UDP socket */
+ struct task_struct *io_thread;
+ struct completion io_thread_ready; /* Indication that the I/O thread started */
+ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ struct sk_buff_head rx_delay_queue; /* Delay injection queue */
+#endif
+ struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head conn_attend_q; /* Conns requiring immediate attention */
+ struct list_head call_attend_q; /* Calls requiring immediate attention */
+
+ struct rb_root client_bundles; /* Client connection bundles by socket params */
+ spinlock_t client_bundles_lock; /* Lock for client_bundles */
+ bool kill_all_client_conns;
+ struct list_head idle_client_conns;
+ struct timer_list client_conn_reap_timer;
+ unsigned long client_conn_flags;
+#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */
+
+ spinlock_t lock; /* access lock */
+ rwlock_t services_lock; /* lock for services list */
+ int debug_id; /* debug ID for printks */
+ bool dead;
+ bool service_closed; /* Service socket closed */
+ struct idr conn_ids; /* List of connection IDs */
+ struct list_head new_client_calls; /* Newly created client calls need connection */
+ spinlock_t client_call_lock; /* Lock for ->new_client_calls */
+ struct sockaddr_rxrpc srx; /* local address */
+};
+
+/*
+ * RxRPC remote transport endpoint definition
+ * - matched by local endpoint, remote port, address and protocol type
+ */
+struct rxrpc_peer {
+ struct rcu_head rcu; /* This must be first */
+ refcount_t ref;
+ unsigned long hash_key;
+ struct hlist_node hash_link;
+ struct rxrpc_local *local;
+ struct hlist_head error_targets; /* targets for net error distribution */
+ struct rb_root service_conns; /* Service connections */
+ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */
+ time64_t last_tx_at; /* Last time packet sent here */
+ seqlock_t service_conn_lock;
+ spinlock_t lock; /* access lock */
+ unsigned int if_mtu; /* interface MTU for this peer */
+ unsigned int mtu; /* network MTU for this peer */
+ unsigned int maxdata; /* data size (MTU - hdrsize) */
+ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */
+ int debug_id; /* debug ID for printks */
+ struct sockaddr_rxrpc srx; /* remote address */
+
+ /* calculated RTT cache */
+#define RXRPC_RTT_CACHE_SIZE 32
+ spinlock_t rtt_input_lock; /* RTT lock for input routine */
+ ktime_t rtt_last_req; /* Time of last RTT request */
+ unsigned int rtt_count; /* Number of samples we've got */
+
+ u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ u32 mdev_us; /* medium deviation */
+ u32 mdev_max_us; /* maximal mdev for the last rtt period */
+ u32 rttvar_us; /* smoothed mdev_max */
+ u32 rto_j; /* Retransmission timeout in jiffies */
+ u8 backoff; /* Backoff timeout */
+
+ u8 cong_ssthresh; /* Congestion slow-start threshold */
+};
+
+/*
+ * Keys for matching a connection.
+ */
+struct rxrpc_conn_proto {
+ union {
+ struct {
+ u32 epoch; /* epoch of this connection */
+ u32 cid; /* connection ID */
+ };
+ u64 index_key;
+ };
+};
+
+struct rxrpc_conn_parameters {
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct key *key; /* Security details */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
+ u16 service_id; /* Service ID for this connection */
+ u32 security_level; /* Security level selected */
+};
+
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+ RXRPC_CALL_SUCCEEDED, /* - Normal termination */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
+ * Bits in the connection flags.
+ */
+enum rxrpc_conn_flag {
+ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
+ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
+ RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
+ RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */
+ RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */
+ RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */
+ RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */
+};
+
+#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_1) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_2) | \
+ (1UL << RXRPC_CONN_FINAL_ACK_3))
+
+/*
+ * Events that can be raised upon a connection.
+ */
+enum rxrpc_conn_event {
+ RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+ RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */
+};
+
+/*
+ * The connection protocol state.
+ */
+enum rxrpc_conn_proto_state {
+ RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */
+ RXRPC_CONN_CLIENT, /* Client connection */
+ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */
+ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
+ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
+ RXRPC_CONN_SERVICE, /* Service secured connection */
+ RXRPC_CONN_ABORTED, /* Conn aborted */
+ RXRPC_CONN__NR_STATES
+};
+
+/*
+ * RxRPC client connection bundle.
+ */
+struct rxrpc_bundle {
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct key *key; /* Security details */
+ const struct rxrpc_security *security; /* applied security module */
+ refcount_t ref;
+ atomic_t active; /* Number of active users */
+ unsigned int debug_id;
+ u32 security_level; /* Security level selected */
+ u16 service_id; /* Service ID for this connection */
+ bool try_upgrade; /* True if the bundle is attempting upgrade */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
+ unsigned short alloc_error; /* Error from last conn allocation */
+ struct rb_node local_node; /* Node in local->client_conns */
+ struct list_head waiting_calls; /* Calls waiting for channels */
+ unsigned long avail_chans; /* Mask of available channels */
+ struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */
+};
+
+/*
+ * RxRPC connection definition
+ * - matched by { local, peer, epoch, conn_id, direction }
+ * - each connection can only handle four simultaneous calls
+ */
+struct rxrpc_connection {
+ struct rxrpc_conn_proto proto;
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Remote endpoint */
+ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
+ struct list_head attend_link; /* Link in local->conn_attend_q */
+
+ refcount_t ref;
+ atomic_t active; /* Active count for service conns */
+ struct rcu_head rcu;
+ struct list_head cache_link;
+
+ unsigned char act_chans; /* Mask of active channels */
+ struct rxrpc_channel {
+ unsigned long final_ack_at; /* Time at which to issue final ACK */
+ struct rxrpc_call *call; /* Active call */
+ unsigned int call_debug_id; /* call->debug_id */
+ u32 call_id; /* ID of current call */
+ u32 call_counter; /* Call ID counter */
+ u32 last_call; /* ID of last call */
+ u8 last_type; /* Type of last packet */
+ union {
+ u32 last_seq;
+ u32 last_abort;
+ };
+ } channels[RXRPC_MAXCALLS];
+
+ struct timer_list timer; /* Conn event timer */
+ struct work_struct processor; /* connection event processor */
+ struct work_struct destructor; /* In-process-context destroyer */
+ struct rxrpc_bundle *bundle; /* Client connection bundle */
+ struct rb_node service_node; /* Node in peer->service_conns */
+ struct list_head proc_link; /* link in procfs list */
+ struct list_head link; /* link in master connection list */
+ struct sk_buff_head rx_queue; /* received conn-level packets */
+
+ struct mutex security_lock; /* Lock for security management */
+ const struct rxrpc_security *security; /* applied security module */
+ union {
+ struct {
+ struct crypto_sync_skcipher *cipher; /* encryption handle */
+ struct rxrpc_crypt csum_iv; /* packet checksum base */
+ u32 nonce; /* response re-use preventer */
+ } rxkad;
+ };
+ unsigned long flags;
+ unsigned long events;
+ unsigned long idle_timestamp; /* Time at which last became idle */
+ spinlock_t state_lock; /* state-change lock */
+ enum rxrpc_conn_proto_state state; /* current state of connection */
+ enum rxrpc_call_completion completion; /* Completion condition */
+ s32 abort_code; /* Abort code of connection abort */
+ int debug_id; /* debug ID for printks */
+ atomic_t serial; /* packet serial number counter */
+ unsigned int hi_serial; /* highest serial number received */
+ u32 service_id; /* Service ID, possibly upgraded */
+ u32 security_level; /* Security level selected */
+ u8 security_ix; /* security type */
+ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
+ u8 bundle_shift; /* Index into bundle->avail_chans */
+ bool exclusive; /* T if conn is exclusive */
+ bool upgrade; /* T if service ID can be upgraded */
+ u16 orig_service_id; /* Originally requested service ID */
+ short error; /* Local error code */
+};
+
+static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp)
+{
+ return sp->hdr.flags & RXRPC_CLIENT_INITIATED;
+}
+
+static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp)
+{
+ return !rxrpc_to_server(sp);
+}
+
+/*
+ * Flags in call->flags.
+ */
+enum rxrpc_call_flag {
+ RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */
+ RXRPC_CALL_HAS_USERID, /* has a user ID attached */
+ RXRPC_CALL_IS_SERVICE, /* Call is service call */
+ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
+ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
+ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_TX_ALL_ACKED, /* Last packet has been hard-acked */
+ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */
+ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
+ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
+ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
+ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */
+ RXRPC_CALL_KERNEL, /* The call was made by the kernel */
+ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
+ RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
+ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
+ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
+};
+
+/*
+ * Events that can be raised on a call.
+ */
+enum rxrpc_call_event {
+ RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */
+ RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */
+};
+
+/*
+ * The states that a call can be in.
+ */
+enum rxrpc_call_state {
+ RXRPC_CALL_UNINITIALISED,
+ RXRPC_CALL_CLIENT_AWAIT_CONN, /* - client waiting for connection to become available */
+ RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
+ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
+ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
+ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */
+ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
+ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
+ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
+ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
+ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
+ RXRPC_CALL_COMPLETE, /* - call complete */
+ NR__RXRPC_CALL_STATES
+};
+
+/*
+ * Call Tx congestion management modes.
+ */
+enum rxrpc_congest_mode {
+ RXRPC_CALL_SLOW_START,
+ RXRPC_CALL_CONGEST_AVOIDANCE,
+ RXRPC_CALL_PACKET_LOSS,
+ RXRPC_CALL_FAST_RETRANSMIT,
+ NR__RXRPC_CONGEST_MODES
+};
+
+/*
+ * RxRPC call definition
+ * - matched by { connection, call_id }
+ */
+struct rxrpc_call {
+ struct rcu_head rcu;
+ struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_bundle *bundle; /* Connection bundle to use */
+ struct rxrpc_peer *peer; /* Peer record for remote address */
+ struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_sock __rcu *socket; /* socket responsible */
+ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ struct key *key; /* Security details */
+ const struct rxrpc_security *security; /* applied security module */
+ struct mutex user_mutex; /* User access mutex */
+ struct sockaddr_rxrpc dest_srx; /* Destination address */
+ unsigned long delay_ack_at; /* When DELAY ACK needs to happen */
+ unsigned long ack_lost_at; /* When ACK is figured as lost */
+ unsigned long resend_at; /* When next resend needs to happen */
+ unsigned long ping_at; /* When next to send a ping */
+ unsigned long keepalive_at; /* When next to send a keepalive ping */
+ unsigned long expect_rx_by; /* When we expect to get a packet by */
+ unsigned long expect_req_by; /* When we expect to get a request DATA packet by */
+ unsigned long expect_term_by; /* When we expect call termination by */
+ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */
+ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */
+ u32 hard_timo; /* Maximum lifetime or 0 (jif) */
+ struct timer_list timer; /* Combined event timer */
+ struct work_struct destroyer; /* In-process-context destroyer */
+ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
+ struct list_head link; /* link in master call list */
+ struct list_head wait_link; /* Link in local->new_client_calls */
+ struct hlist_node error_link; /* link in error distribution list */
+ struct list_head accept_link; /* Link in rx->acceptq */
+ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
+ struct list_head sock_link; /* Link in rx->sock_calls */
+ struct rb_node sock_node; /* Node in rx->calls */
+ struct list_head attend_link; /* Link in local->call_attend_q */
+ struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */
+ wait_queue_head_t waitq; /* Wait queue for channel or Tx */
+ s64 tx_total_len; /* Total length left to be transmitted (or -1) */
+ unsigned long user_call_ID; /* user-defined call ID */
+ unsigned long flags;
+ unsigned long events;
+ spinlock_t notify_lock; /* Kernel notification lock */
+ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */
+ s32 send_abort; /* Abort code to be sent */
+ short send_abort_err; /* Error to be associated with the abort */
+ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */
+ s32 abort_code; /* Local/remote abort code */
+ int error; /* Local error incurred */
+ enum rxrpc_call_state _state; /* Current state of call (needs barrier) */
+ enum rxrpc_call_completion completion; /* Call completion condition */
+ refcount_t ref;
+ u8 security_ix; /* Security type */
+ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */
+ u32 call_id; /* call ID on connection */
+ u32 cid; /* connection ID plus channel index */
+ u32 security_level; /* Security level selected */
+ int debug_id; /* debug ID for printks */
+ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
+ unsigned short rx_pkt_len; /* Current recvmsg packet len */
+
+ /* Transmitted data tracking. */
+ spinlock_t tx_lock; /* Transmit queue lock */
+ struct list_head tx_sendmsg; /* Sendmsg prepared packets */
+ struct list_head tx_buffer; /* Buffer of transmissible packets */
+ rxrpc_seq_t tx_bottom; /* First packet in buffer */
+ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */
+ rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */
+ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+ u16 tx_backoff; /* Delay to insert due to Tx failure */
+ u8 tx_winsize; /* Maximum size of Tx window */
+#define RXRPC_TX_MAX_WINDOW 128
+ ktime_t tx_last_sent; /* Last time a transmission occurred */
+
+ /* Received data tracking */
+ struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */
+ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */
+
+ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */
+ rxrpc_seq_t rx_consumed; /* Highest packet consumed */
+ rxrpc_serial_t rx_serial; /* Highest serial received for this call */
+ u8 rx_winsize; /* Size of Rx window */
+
+ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
+ * is fixed, we keep these numbers in terms of segments (ie. DATA
+ * packets) rather than bytes.
+ */
+#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
+#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4)
+ u8 cong_cwnd; /* Congestion window size */
+ u8 cong_extra; /* Extra to send for congestion management */
+ u8 cong_ssthresh; /* Slow-start threshold */
+ enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */
+ u8 cong_dup_acks; /* Count of ACKs showing missing packets */
+ u8 cong_cumul_acks; /* Cumulative ACK count */
+ ktime_t cong_tstamp; /* Last time cwnd was changed */
+
+ /* Receive-phase ACK management (ACKs we send). */
+ u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
+ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
+ rxrpc_seq_t ackr_window; /* Base of SACK window */
+ rxrpc_seq_t ackr_wtop; /* Base of SACK window */
+ unsigned int ackr_nr_unacked; /* Number of unacked packets */
+ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
+ struct {
+#define RXRPC_SACK_SIZE 256
+ /* SACK table for soft-acked packets */
+ u8 ackr_sack_table[RXRPC_SACK_SIZE];
+ } __aligned(8);
+
+ /* RTT management */
+ rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */
+ ktime_t rtt_sent_at[4]; /* Time packet sent */
+ unsigned long rtt_avail; /* Mask of available slots in bits 0-3,
+ * Mask of pending samples in 8-11 */
+#define RXRPC_CALL_RTT_AVAIL_MASK 0xf
+#define RXRPC_CALL_RTT_PEND_SHIFT 8
+
+ /* Transmission-phase ACK management (ACKs we've received). */
+ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
+ rxrpc_seq_t acks_first_seq; /* first sequence number received */
+ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */
+ rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */
+ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
+ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */
+};
+
+/*
+ * Summary of a new ACK and the changes it made to the Tx buffer packet states.
+ */
+struct rxrpc_ack_summary {
+ u16 nr_acks; /* Number of ACKs in packet */
+ u16 nr_new_acks; /* Number of new ACKs in packet */
+ u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u8 ack_reason;
+ bool saw_nacks; /* Saw NACKs in packet */
+ bool new_low_nack; /* T if new low NACK found */
+ bool retrans_timeo; /* T if reTx due to timeout happened */
+ u8 flight_size; /* Number of unreceived transmissions */
+ /* Place to stash values for tracing */
+ enum rxrpc_congest_mode mode:8;
+ u8 cwnd;
+ u8 ssthresh;
+ u8 dup_acks;
+ u8 cumulative_acks;
+};
+
+/*
+ * sendmsg() cmsg-specified parameters.
+ */
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+ RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */
+};
+
+struct rxrpc_call_params {
+ s64 tx_total_len; /* Total Tx data length (if send data) */
+ unsigned long user_call_ID; /* User's call ID */
+ struct {
+ u32 hard; /* Maximum lifetime (sec) */
+ u32 idle; /* Max time since last data packet (msec) */
+ u32 normal; /* Max time since last call packet (msec) */
+ } timeouts;
+ u8 nr_timeouts; /* Number of timeouts specified */
+ bool kernel; /* T if kernel is making the call */
+ enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */
+};
+
+struct rxrpc_send_params {
+ struct rxrpc_call_params call;
+ u32 abort_code; /* Abort code to Tx (if abort) */
+ enum rxrpc_command command : 8; /* The command to implement */
+ bool exclusive; /* Shared or exclusive call */
+ bool upgrade; /* If the connection is upgradeable */
+};
+
+/*
+ * Buffer of data to be output as a packet.
+ */
+struct rxrpc_txbuf {
+ struct rcu_head rcu;
+ struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */
+ struct list_head tx_link; /* Link in live Enc queue or Tx queue */
+ ktime_t last_sent; /* Time at which last transmitted */
+ refcount_t ref;
+ rxrpc_seq_t seq; /* Sequence number of this packet */
+ unsigned int call_debug_id;
+ unsigned int debug_id;
+ unsigned int len; /* Amount of data in buffer */
+ unsigned int space; /* Remaining data space */
+ unsigned int offset; /* Offset of fill point */
+ unsigned long flags;
+#define RXRPC_TXBUF_LAST 0 /* Set if last packet in Tx phase */
+#define RXRPC_TXBUF_RESENT 1 /* Set if has been resent */
+ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */
+ struct {
+ /* The packet for encrypting and DMA'ing. We align it such
+ * that data[] aligns correctly for any crypto blocksize.
+ */
+ u8 pad[64 - sizeof(struct rxrpc_wire_header)];
+ struct rxrpc_wire_header wire; /* Network-ready header */
+ union {
+ u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
+ struct {
+ struct rxrpc_ackpacket ack;
+ DECLARE_FLEX_ARRAY(u8, acks);
+ };
+ };
+ } __aligned(64);
+};
+
+static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb)
+{
+ return txb->wire.flags & RXRPC_CLIENT_INITIATED;
+}
+
+static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
+{
+ return !rxrpc_sending_to_server(txb);
+}
+
+#include <trace/events/rxrpc.h>
+
+/*
+ * af_rxrpc.c
+ */
+extern atomic_t rxrpc_n_rx_skbs;
+extern struct workqueue_struct *rxrpc_workqueue;
+
+/*
+ * call_accept.c
+ */
+int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
+void rxrpc_discard_prealloc(struct rxrpc_sock *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
+void rxrpc_accept_incoming_calls(struct rxrpc_local *);
+int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
+
+/*
+ * call_event.c
+ */
+void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
+ enum rxrpc_propose_ack_trace why);
+void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ack_trace);
+void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t,
+ enum rxrpc_propose_ack_trace);
+void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *);
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb);
+
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why);
+
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+
+/*
+ * call_object.c
+ */
+extern const char *const rxrpc_call_states[];
+extern const char *const rxrpc_call_completions[];
+extern struct kmem_cache *rxrpc_call_jar;
+
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what);
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
+ struct rxrpc_conn_parameters *,
+ struct sockaddr_rxrpc *,
+ struct rxrpc_call_params *, gfp_t,
+ unsigned int);
+void rxrpc_start_call_timer(struct rxrpc_call *call);
+void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
+ struct sk_buff *);
+void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace);
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_cleanup_call(struct rxrpc_call *);
+void rxrpc_destroy_all_calls(struct rxrpc_net *);
+
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+ return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+ return !rxrpc_is_service_call(call);
+}
+
+/*
+ * call_state.c
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error);
+bool rxrpc_call_completed(struct rxrpc_call *call);
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why);
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error);
+
+static inline void rxrpc_set_call_state(struct rxrpc_call *call,
+ enum rxrpc_call_state state)
+{
+ /* Order write of completion info before write of ->state. */
+ smp_store_release(&call->_state, state);
+ wake_up(&call->waitq);
+}
+
+static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call)
+{
+ return call->_state; /* Only inside I/O thread */
+}
+
+static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call)
+{
+ /* Order read ->state before read of completion info. */
+ return smp_load_acquire(&call->_state);
+}
+
+static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call)
+{
+ return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED;
+}
+
+/*
+ * conn_client.c
+ */
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned long rxrpc_conn_idle_client_expiry;
+extern unsigned long rxrpc_conn_idle_client_fast_expiry;
+
+void rxrpc_purge_client_connections(struct rxrpc_local *local);
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
+void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp);
+void rxrpc_connect_client_calls(struct rxrpc_local *local);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
+void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
+void rxrpc_clean_up_local_conns(struct rxrpc_local *);
+
+/*
+ * conn_event.c
+ */
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int channel);
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why);
+void rxrpc_process_connection(struct work_struct *);
+void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
+
+static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
+{
+ /* Order reading the abort info after the state check. */
+ return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED;
+}
+
+/*
+ * conn_object.c
+ */
+extern unsigned int rxrpc_connection_expiry;
+extern unsigned int rxrpc_closed_conn_expiry;
+
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why);
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
+ struct sockaddr_rxrpc *,
+ struct sk_buff *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
+void rxrpc_disconnect_call(struct rxrpc_call *);
+void rxrpc_kill_client_conn(struct rxrpc_connection *);
+void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
+void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *,
+ enum rxrpc_conn_trace);
+void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace);
+void rxrpc_service_connection_reaper(struct work_struct *);
+void rxrpc_destroy_all_connections(struct rxrpc_net *);
+
+static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
+{
+ return conn->out_clientflag;
+}
+
+static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
+{
+ return !rxrpc_conn_is_client(conn);
+}
+
+static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
+ unsigned long expire_at)
+{
+ timer_reduce(&conn->timer, expire_at);
+}
+
+/*
+ * conn_service.c
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
+ struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *,
+ const struct rxrpc_security *, struct sk_buff *);
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
+
+/*
+ * input.c
+ */
+void rxrpc_congestion_degrade(struct rxrpc_call *);
+void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *);
+void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * io_thread.c
+ */
+int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
+void rxrpc_error_report(struct sock *);
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
+int rxrpc_io_thread(void *data);
+static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
+{
+ wake_up_process(local->io_thread);
+}
+
+static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO);
+}
+
+/*
+ * insecure.c
+ */
+extern const struct rxrpc_security rxrpc_no_security;
+
+/*
+ * key.c
+ */
+extern struct key_type key_type_rxrpc;
+
+int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int);
+int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t,
+ u32);
+
+/*
+ * local_event.c
+ */
+void rxrpc_gen_version_string(void);
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb);
+
+/*
+ * local_object.c
+ */
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set);
+struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace);
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace);
+void rxrpc_destroy_local(struct rxrpc_local *local);
+void rxrpc_destroy_all_locals(struct rxrpc_net *);
+
+static inline bool __rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
+{
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_fetch_add_unless(&local->active_users, 1, 0);
+ trace_rxrpc_local(local->debug_id, why, r, u);
+ return u != 0;
+}
+
+static inline void rxrpc_see_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
+{
+ int r, u;
+
+ r = refcount_read(&local->ref);
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r, u);
+}
+
+/*
+ * misc.c
+ */
+extern unsigned int rxrpc_max_backlog __read_mostly;
+extern unsigned long rxrpc_soft_ack_delay;
+extern unsigned long rxrpc_idle_ack_delay;
+extern unsigned int rxrpc_rx_window_size;
+extern unsigned int rxrpc_rx_mtu;
+extern unsigned int rxrpc_rx_jumbo_max;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+extern unsigned long rxrpc_inject_rx_delay;
+#endif
+
+/*
+ * net_ns.c
+ */
+extern unsigned int rxrpc_net_id;
+extern struct pernet_operations rxrpc_net_ops;
+
+static inline struct rxrpc_net *rxrpc_net(struct net *net)
+{
+ return net_generic(net, rxrpc_net_id);
+}
+
+/*
+ * output.c
+ */
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
+int rxrpc_send_abort_packet(struct rxrpc_call *);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
+void rxrpc_send_keepalive(struct rxrpc_peer *);
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
+
+/*
+ * peer_event.c
+ */
+void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *);
+void rxrpc_peer_keepalive_worker(struct work_struct *);
+
+/*
+ * peer_object.c
+ */
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
+ const struct sockaddr_rxrpc *);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
+ enum rxrpc_peer_trace);
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
+void rxrpc_destroy_all_peers(struct rxrpc_net *);
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
+void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
+
+/*
+ * proc.c
+ */
+extern const struct seq_operations rxrpc_call_seq_ops;
+extern const struct seq_operations rxrpc_connection_seq_ops;
+extern const struct seq_operations rxrpc_peer_seq_ops;
+extern const struct seq_operations rxrpc_local_seq_ops;
+
+/*
+ * recvmsg.c
+ */
+void rxrpc_notify_socket(struct rxrpc_call *);
+int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
+
+/*
+ * Abort a call due to a protocol error.
+ */
+static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ s32 abort_code,
+ enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why);
+ return -EPROTO;
+}
+
+/*
+ * rtt.c
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int,
+ rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
+void rxrpc_peer_init_rtt(struct rxrpc_peer *);
+
+/*
+ * rxkad.c
+ */
+#ifdef CONFIG_RXKAD
+extern const struct rxrpc_security rxkad;
+#endif
+
+/*
+ * security.c
+ */
+int __init rxrpc_init_security(void);
+const struct rxrpc_security *rxrpc_security_lookup(u8);
+void rxrpc_exit_security(void);
+int rxrpc_init_client_call_security(struct rxrpc_call *);
+int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *,
+ struct sk_buff *);
+struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
+ struct sk_buff *, u32, u32);
+
+/*
+ * sendmsg.c
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why);
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+
+/*
+ * server_key.c
+ */
+extern struct key_type key_type_rxrpc_s;
+
+int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int);
+
+/*
+ * skbuff.c
+ */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_purge_queue(struct sk_buff_head *);
+
+/*
+ * stats.c
+ */
+int rxrpc_stats_show(struct seq_file *seq, void *v);
+int rxrpc_stats_clear(struct file *file, char *buf, size_t size);
+
+#define rxrpc_inc_stat(rxnet, s) atomic_inc(&(rxnet)->s)
+#define rxrpc_dec_stat(rxnet, s) atomic_dec(&(rxnet)->s)
+
+/*
+ * sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int __init rxrpc_sysctl_init(void);
+extern void rxrpc_sysctl_exit(void);
+#else
+static inline int __init rxrpc_sysctl_init(void) { return 0; }
+static inline void rxrpc_sysctl_exit(void) {}
+#endif
+
+/*
+ * txbuf.c
+ */
+extern atomic_t rxrpc_nr_txbuf;
+struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
+ gfp_t gfp);
+void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what);
+
+/*
+ * utils.c
+ */
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+
+static inline bool before(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) < 0;
+}
+static inline bool before_eq(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) <= 0;
+}
+static inline bool after(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) > 0;
+}
+static inline bool after_eq(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) >= 0;
+}
+
+/*
+ * debug tracing
+ */
+extern unsigned int rxrpc_debug;
+
+#define dbgprintk(FMT,...) \
+ printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__)
+
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AF_RXRPC_DEBUG)
+#define RXRPC_DEBUG_KENTER 0x01
+#define RXRPC_DEBUG_KLEAVE 0x02
+#define RXRPC_DEBUG_KDEBUG 0x04
+
+#define _enter(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \
+ kenter(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _leave(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \
+ kleave(FMT,##__VA_ARGS__); \
+} while (0)
+
+#define _debug(FMT,...) \
+do { \
+ if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \
+ kdebug(FMT,##__VA_ARGS__); \
+} while (0)
+
+#else
+#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
+#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X) \
+do { \
+ if (unlikely(!(X))) { \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+ __typeof__(X) _x = (X); \
+ __typeof__(Y) _y = (__typeof__(X))(Y); \
+ if (unlikely(!(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ (unsigned long)_x, (unsigned long)_x, #OP, \
+ (unsigned long)_y, (unsigned long)_y); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+ if (unlikely((C) && !(X))) { \
+ pr_err("Assertion failed\n"); \
+ BUG(); \
+ } \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+ __typeof__(X) _x = (X); \
+ __typeof__(Y) _y = (__typeof__(X))(Y); \
+ if (unlikely((C) && !(_x OP _y))) { \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ (unsigned long)_x, (unsigned long)_x, #OP, \
+ (unsigned long)_y, (unsigned long)_y); \
+ BUG(); \
+ } \
+} while (0)
+
+#else
+
+#define ASSERT(X) \
+do { \
+} while (0)
+
+#define ASSERTCMP(X, OP, Y) \
+do { \
+} while (0)
+
+#define ASSERTIF(C, X) \
+do { \
+} while (0)
+
+#define ASSERTIFCMP(C, X, OP, Y) \
+do { \
+} while (0)
+
+#endif /* __KDEBUGALL */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
new file mode 100644
index 0000000000..0f5a1d77b8
--- /dev/null
+++ b/net/rxrpc/call_accept.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* incoming call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <linux/gfp.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call,
+ unsigned long user_call_ID)
+{
+}
+
+/*
+ * Preallocate a single service call, connection and peer and, if possible,
+ * give them a user ID and attach the user's side of the ID to them.
+ */
+static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+ struct rxrpc_backlog *b,
+ rxrpc_notify_rx_t notify_rx,
+ rxrpc_user_attach_call_t user_attach_call,
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id)
+{
+ struct rxrpc_call *call, *xcall;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
+ struct rb_node *parent, **pp;
+ int max, tmp;
+ unsigned int size = RXRPC_BACKLOG_MAX;
+ unsigned int head, tail, call_head, call_tail;
+
+ max = rx->sk.sk_max_ack_backlog;
+ tmp = rx->sk.sk_ack_backlog;
+ if (tmp >= max) {
+ _leave(" = -ENOBUFS [full %u]", max);
+ return -ENOBUFS;
+ }
+ max -= tmp;
+
+ /* We don't need more conns and peers than we have calls, but on the
+ * other hand, we shouldn't ever use more peers than conns or conns
+ * than calls.
+ */
+ call_head = b->call_backlog_head;
+ call_tail = READ_ONCE(b->call_backlog_tail);
+ tmp = CIRC_CNT(call_head, call_tail, size);
+ if (tmp >= max) {
+ _leave(" = -ENOBUFS [enough %u]", tmp);
+ return -ENOBUFS;
+ }
+ max = tmp + 1;
+
+ head = b->peer_backlog_head;
+ tail = READ_ONCE(b->peer_backlog_tail);
+ if (CIRC_CNT(head, tail, size) < max) {
+ struct rxrpc_peer *peer;
+
+ peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc);
+ if (!peer)
+ return -ENOMEM;
+ b->peer_backlog[head] = peer;
+ smp_store_release(&b->peer_backlog_head,
+ (head + 1) & (size - 1));
+ }
+
+ head = b->conn_backlog_head;
+ tail = READ_ONCE(b->conn_backlog_tail);
+ if (CIRC_CNT(head, tail, size) < max) {
+ struct rxrpc_connection *conn;
+
+ conn = rxrpc_prealloc_service_connection(rxnet, gfp);
+ if (!conn)
+ return -ENOMEM;
+ b->conn_backlog[head] = conn;
+ smp_store_release(&b->conn_backlog_head,
+ (head + 1) & (size - 1));
+ }
+
+ /* Now it gets complicated, because calls get registered with the
+ * socket here, with a user ID preassigned by the user.
+ */
+ call = rxrpc_alloc_call(rx, gfp, debug_id);
+ if (!call)
+ return -ENOMEM;
+ call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC);
+ __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
+
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ user_call_ID, rxrpc_call_new_prealloc_service);
+
+ write_lock(&rx->call_lock);
+
+ /* Check the user ID isn't already in use */
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ xcall = rb_entry(parent, struct rxrpc_call, sock_node);
+ if (user_call_ID < xcall->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > xcall->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto id_in_use;
+ }
+
+ call->user_call_ID = user_call_ID;
+ call->notify_rx = notify_rx;
+ if (user_attach_call) {
+ rxrpc_get_call(call, rxrpc_call_get_kernel_service);
+ user_attach_call(call, user_call_ID);
+ }
+
+ rxrpc_get_call(call, rxrpc_call_get_userid);
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+
+ list_add(&call->sock_link, &rx->sock_calls);
+
+ write_unlock(&rx->call_lock);
+
+ rxnet = call->rxnet;
+ spin_lock(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock(&rxnet->call_lock);
+
+ b->call_backlog[call_head] = call;
+ smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
+ _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID);
+ return 0;
+
+id_in_use:
+ write_unlock(&rx->call_lock);
+ rxrpc_cleanup_call(call);
+ _leave(" = -EBADSLT");
+ return -EBADSLT;
+}
+
+/*
+ * Allocate the preallocation buffers for incoming service calls. These must
+ * be charged manually.
+ */
+int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+
+ if (!b) {
+ b = kzalloc(sizeof(struct rxrpc_backlog), gfp);
+ if (!b)
+ return -ENOMEM;
+ rx->backlog = b;
+ }
+
+ return 0;
+}
+
+/*
+ * Discard the preallocation on a service.
+ */
+void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
+ unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
+
+ if (!b)
+ return;
+ rx->backlog = NULL;
+
+ /* Make sure that there aren't any incoming calls in progress before we
+ * clear the preallocation buffers.
+ */
+ spin_lock(&rx->incoming_lock);
+ spin_unlock(&rx->incoming_lock);
+
+ head = b->peer_backlog_head;
+ tail = b->peer_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_peer *peer = b->peer_backlog[tail];
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
+ kfree(peer);
+ tail = (tail + 1) & (size - 1);
+ }
+
+ head = b->conn_backlog_head;
+ tail = b->conn_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_connection *conn = b->conn_backlog[tail];
+ write_lock(&rxnet->conn_lock);
+ list_del(&conn->link);
+ list_del(&conn->proc_link);
+ write_unlock(&rxnet->conn_lock);
+ kfree(conn);
+ if (atomic_dec_and_test(&rxnet->nr_conns))
+ wake_up_var(&rxnet->nr_conns);
+ tail = (tail + 1) & (size - 1);
+ }
+
+ head = b->call_backlog_head;
+ tail = b->call_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_call *call = b->call_backlog[tail];
+ rcu_assign_pointer(call->socket, rx);
+ if (rx->discard_new_call) {
+ _debug("discard %lx", call->user_call_ID);
+ rx->discard_new_call(call, call->user_call_ID);
+ if (call->notify_rx)
+ call->notify_rx = rxrpc_dummy_notify;
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
+ }
+ rxrpc_call_completed(call);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put_discard_prealloc);
+ tail = (tail + 1) & (size - 1);
+ }
+
+ kfree(b);
+}
+
+/*
+ * Allocate a new incoming call from the prealloc pool, along with a connection
+ * and a peer as necessary.
+ */
+static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ const struct rxrpc_security *sec,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+ struct rxrpc_call *call;
+ unsigned short call_head, conn_head, peer_head;
+ unsigned short call_tail, conn_tail, peer_tail;
+ unsigned short call_count, conn_count;
+
+ /* #calls >= #conns >= #peers must hold true. */
+ call_head = smp_load_acquire(&b->call_backlog_head);
+ call_tail = b->call_backlog_tail;
+ call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX);
+ conn_head = smp_load_acquire(&b->conn_backlog_head);
+ conn_tail = b->conn_backlog_tail;
+ conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX);
+ ASSERTCMP(conn_count, >=, call_count);
+ peer_head = smp_load_acquire(&b->peer_backlog_head);
+ peer_tail = b->peer_backlog_tail;
+ ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=,
+ conn_count);
+
+ if (call_count == 0)
+ return NULL;
+
+ if (!conn) {
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn))
+ peer = NULL;
+ if (!peer) {
+ peer = b->peer_backlog[peer_tail];
+ peer->srx = *peer_srx;
+ b->peer_backlog[peer_tail] = NULL;
+ smp_store_release(&b->peer_backlog_tail,
+ (peer_tail + 1) &
+ (RXRPC_BACKLOG_MAX - 1));
+
+ rxrpc_new_incoming_peer(local, peer);
+ }
+
+ /* Now allocate and set up the connection */
+ conn = b->conn_backlog[conn_tail];
+ b->conn_backlog[conn_tail] = NULL;
+ smp_store_release(&b->conn_backlog_tail,
+ (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+ conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn);
+ conn->peer = peer;
+ rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn);
+ rxrpc_new_incoming_connection(rx, conn, sec, skb);
+ } else {
+ rxrpc_get_connection(conn, rxrpc_conn_get_service_conn);
+ atomic_inc(&conn->active);
+ }
+
+ /* And now we can allocate and set up a new call */
+ call = b->call_backlog[call_tail];
+ b->call_backlog[call_tail] = NULL;
+ smp_store_release(&b->call_backlog_tail,
+ (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+
+ rxrpc_see_call(call, rxrpc_call_see_accept);
+ call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call);
+ call->conn = conn;
+ call->security = conn->security;
+ call->security_ix = conn->security_ix;
+ call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept);
+ call->dest_srx = peer->srx;
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ call->tx_last_sent = ktime_get_real();
+ return call;
+}
+
+/*
+ * Set up a new incoming call. Called from the I/O thread.
+ *
+ * If this is for a kernel service, when we allocate the call, it will have
+ * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
+ * retainer ref obtained from the backlog buffer. Prealloc calls for userspace
+ * services only have the ref from the backlog buffer.
+ *
+ * If we want to report an error, we mark the skb with the packet type and
+ * abort code and return false.
+ */
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
+{
+ const struct rxrpc_security *sec = NULL;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = NULL;
+ struct rxrpc_sock *rx;
+
+ _enter("");
+
+ /* Don't set up a call for anything other than a DATA packet. */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
+
+ read_lock(&local->services_lock);
+
+ /* Weed out packets to services we're not offering. Packets that would
+ * begin a call are explicitly rejected and the rest are just
+ * discarded.
+ */
+ rx = local->service;
+ if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
+ sp->hdr.serviceId != rx->second_service)
+ ) {
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+ sp->hdr.seq == 1)
+ goto unsupported_service;
+ goto discard;
+ }
+
+ if (!conn) {
+ sec = rxrpc_get_incoming_security(rx, skb);
+ if (!sec)
+ goto unsupported_security;
+ }
+
+ spin_lock(&rx->incoming_lock);
+ if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
+ rx->sk.sk_state == RXRPC_CLOSE) {
+ rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
+ goto no_call;
+ }
+
+ call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx,
+ skb);
+ if (!call) {
+ skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
+ goto no_call;
+ }
+
+ trace_rxrpc_receive(call, rxrpc_receive_incoming,
+ sp->hdr.serial, sp->hdr.seq);
+
+ /* Make the call live. */
+ rxrpc_incoming_call(rx, call, skb);
+ conn = call->conn;
+
+ if (rx->notify_new_call)
+ rx->notify_new_call(&rx->sk, call, call->user_call_ID);
+
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
+ conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+ set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
+ rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge);
+ }
+ spin_unlock(&conn->state_lock);
+
+ spin_unlock(&rx->incoming_lock);
+ read_unlock(&local->services_lock);
+
+ if (hlist_unhashed(&call->error_link)) {
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
+ }
+
+ _leave(" = %p{%d}", call, call->debug_id);
+ rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return true;
+
+unsupported_service:
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
+unsupported_security:
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
+no_call:
+ spin_unlock(&rx->incoming_lock);
+ read_unlock(&local->services_lock);
+ _leave(" = f [%u]", skb->mark);
+ return false;
+discard:
+ read_unlock(&local->services_lock);
+ return true;
+}
+
+/*
+ * Charge up socket with preallocated calls, attaching user call IDs.
+ */
+int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+
+ if (rx->sk.sk_state == RXRPC_CLOSE)
+ return -ESHUTDOWN;
+
+ return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID,
+ GFP_KERNEL,
+ atomic_inc_return(&rxrpc_debug_id));
+}
+
+/*
+ * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls
+ * @sock: The socket on which to preallocate
+ * @notify_rx: Event notification function for the call
+ * @user_attach_call: Func to attach call to user_call_ID
+ * @user_call_ID: The tag to attach to the preallocated call
+ * @gfp: The allocation conditions.
+ * @debug_id: The tracing debug ID.
+ *
+ * Charge up the socket with preallocated calls, each with a user ID. A
+ * function should be provided to effect the attachment from the user's side.
+ * The user is given a ref to hold on the call.
+ *
+ * Note that the call may be come connected before this function returns.
+ */
+int rxrpc_kernel_charge_accept(struct socket *sock,
+ rxrpc_notify_rx_t notify_rx,
+ rxrpc_user_attach_call_t user_attach_call,
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct rxrpc_backlog *b = rx->backlog;
+
+ if (sock->sk->sk_state == RXRPC_CLOSE)
+ return -ESHUTDOWN;
+
+ return rxrpc_service_prealloc_one(rx, b, notify_rx,
+ user_attach_call, user_call_ID,
+ gfp, debug_id);
+}
+EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
new file mode 100644
index 0000000000..e363f21a20
--- /dev/null
+++ b/net/rxrpc/call_event.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * Propose a PING ACK be sent.
+ */
+void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial,
+ enum rxrpc_propose_ack_trace why)
+{
+ unsigned long now = jiffies;
+ unsigned long ping_at = now + rxrpc_idle_ack_delay;
+
+ if (time_before(ping_at, call->ping_at)) {
+ WRITE_ONCE(call->ping_at, ping_at);
+ rxrpc_reduce_call_timer(call, ping_at, now,
+ rxrpc_timer_set_for_ping);
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_PING, serial);
+ }
+}
+
+/*
+ * Propose a DELAY ACK be sent in the future.
+ */
+void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
+ enum rxrpc_propose_ack_trace why)
+{
+ unsigned long expiry = rxrpc_soft_ack_delay;
+ unsigned long now = jiffies, ack_at;
+
+ call->ackr_serial = serial;
+
+ if (rxrpc_soft_ack_delay < expiry)
+ expiry = rxrpc_soft_ack_delay;
+ if (call->peer->srtt_us != 0)
+ ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
+ else
+ ack_at = expiry;
+
+ ack_at += READ_ONCE(call->tx_backoff);
+ ack_at += now;
+ if (time_before(ack_at, call->delay_ack_at)) {
+ WRITE_ONCE(call->delay_ack_at, ack_at);
+ rxrpc_reduce_call_timer(call, ack_at, now,
+ rxrpc_timer_set_for_ack);
+ }
+
+ trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial);
+}
+
+/*
+ * Queue an ACK for immediate transmission.
+ */
+void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
+ rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return;
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]);
+
+ txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK,
+ rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS);
+ if (!txb) {
+ kleave(" = -ENOMEM");
+ return;
+ }
+
+ txb->ack_why = why;
+ txb->wire.seq = 0;
+ txb->wire.type = RXRPC_PACKET_TYPE_ACK;
+ txb->wire.flags |= RXRPC_SLOW_START_OK;
+ txb->ack.bufferSpace = 0;
+ txb->ack.maxSkew = 0;
+ txb->ack.firstPacket = 0;
+ txb->ack.previousPacket = 0;
+ txb->ack.serial = htonl(serial);
+ txb->ack.reason = ack_reason;
+ txb->ack.nAcks = 0;
+
+ trace_rxrpc_send_ack(call, why, ack_reason, serial);
+ rxrpc_send_ack_packet(call, txb);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx);
+}
+
+/*
+ * Handle congestion being detected by the retransmit timeout.
+ */
+static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+{
+ set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
+}
+
+/*
+ * Perform retransmission of NAK'd and unack'd packets.
+ */
+void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
+{
+ struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_txbuf *txb;
+ unsigned long resend_at;
+ rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
+ ktime_t now, max_age, oldest, ack_ts;
+ bool unacked = false;
+ unsigned int i;
+ LIST_HEAD(retrans_queue);
+
+ _enter("{%d,%d}", call->acks_hard_ack, call->tx_top);
+
+ now = ktime_get_real();
+ max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j));
+ oldest = now;
+
+ if (list_empty(&call->tx_buffer))
+ goto no_resend;
+
+ if (list_empty(&call->tx_buffer))
+ goto no_further_resend;
+
+ trace_rxrpc_resend(call, ack_skb);
+ txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link);
+
+ /* Scan the soft ACK table without dropping the lock and resend any
+ * explicitly NAK'd packets.
+ */
+ if (ack_skb) {
+ ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
+
+ for (i = 0; i < ack->nAcks; i++) {
+ rxrpc_seq_t seq;
+
+ if (ack->acks[i] & 1)
+ continue;
+ seq = ntohl(ack->firstPacket) + i;
+ if (after(txb->seq, transmitted))
+ break;
+ if (after(txb->seq, seq))
+ continue; /* A new hard ACK probably came in */
+ list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
+ if (txb->seq == seq)
+ goto found_txb;
+ }
+ goto no_further_resend;
+
+ found_txb:
+ if (after(ntohl(txb->wire.serial), call->acks_highest_serial))
+ continue; /* Ack point not yet reached */
+
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked);
+
+ if (list_empty(&txb->tx_link)) {
+ list_add_tail(&txb->tx_link, &retrans_queue);
+ set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
+ }
+
+ trace_rxrpc_retransmit(call, txb->seq,
+ ktime_to_ns(ktime_sub(txb->last_sent,
+ max_age)));
+
+ if (list_is_last(&txb->call_link, &call->tx_buffer))
+ goto no_further_resend;
+ txb = list_next_entry(txb, call_link);
+ }
+ }
+
+ /* Fast-forward through the Tx queue to the point the peer says it has
+ * seen. Anything between the soft-ACK table and that point will get
+ * ACK'd or NACK'd in due course, so don't worry about it here; here we
+ * need to consider retransmitting anything beyond that point.
+ *
+ * Note that ACK for a packet can beat the update of tx_transmitted.
+ */
+ if (after_eq(READ_ONCE(call->acks_prev_seq), READ_ONCE(call->tx_transmitted)))
+ goto no_further_resend;
+
+ list_for_each_entry_from(txb, &call->tx_buffer, call_link) {
+ if (before_eq(txb->seq, READ_ONCE(call->acks_prev_seq)))
+ continue;
+ if (after(txb->seq, READ_ONCE(call->tx_transmitted)))
+ break; /* Not transmitted yet */
+
+ if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE &&
+ before(ntohl(txb->wire.serial), ntohl(ack->serial)))
+ goto do_resend; /* Wasn't accounted for by a more recent ping. */
+
+ if (ktime_after(txb->last_sent, max_age)) {
+ if (ktime_before(txb->last_sent, oldest))
+ oldest = txb->last_sent;
+ continue;
+ }
+
+ do_resend:
+ unacked = true;
+ if (list_empty(&txb->tx_link)) {
+ list_add_tail(&txb->tx_link, &retrans_queue);
+ set_bit(RXRPC_TXBUF_RESENT, &txb->flags);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans);
+ }
+ }
+
+no_further_resend:
+no_resend:
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
+ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer,
+ !list_empty(&retrans_queue));
+ WRITE_ONCE(call->resend_at, resend_at);
+
+ if (unacked)
+ rxrpc_congestion_timeout(call);
+
+ /* If there was nothing that needed retransmission then it's likely
+ * that an ACK got lost somewhere. Send a ping to find out instead of
+ * retransmitting data.
+ */
+ if (list_empty(&retrans_queue)) {
+ rxrpc_reduce_call_timer(call, resend_at, jiffies,
+ rxrpc_timer_set_for_resend);
+ ack_ts = ktime_sub(now, call->acks_latest_ts);
+ if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
+ goto out;
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_lost_ack);
+ goto out;
+ }
+
+ /* Retransmit the queue */
+ while ((txb = list_first_entry_or_null(&retrans_queue,
+ struct rxrpc_txbuf, tx_link))) {
+ list_del_init(&txb->tx_link);
+ rxrpc_transmit_one(call, txb);
+ }
+
+out:
+ _leave("");
+}
+
+/*
+ * Start transmitting the reply to a service. This cancels the need to ACK the
+ * request if we haven't yet done so.
+ */
+static void rxrpc_begin_service_reply(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies;
+
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
+ WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+}
+
+/*
+ * Close the transmission phase. After this point there is no more data to be
+ * transmitted in the call.
+ */
+static void rxrpc_close_tx_phase(struct rxrpc_call *call)
+{
+ _debug("________awaiting reply/ACK__________");
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK);
+ break;
+ default:
+ break;
+ }
+}
+
+static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
+{
+ unsigned int winsize = min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra);
+ rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize;
+ rxrpc_seq_t tx_top = call->tx_top;
+ int space;
+
+ space = wtop - tx_top;
+ return space > 0;
+}
+
+/*
+ * Decant some if the sendmsg prepared queue into the transmission buffer.
+ */
+static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+
+ if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ rxrpc_expose_client_call(call);
+ }
+
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ spin_lock(&call->tx_lock);
+ list_del(&txb->call_link);
+ spin_unlock(&call->tx_lock);
+
+ call->tx_top = txb->seq;
+ list_add_tail(&txb->call_link, &call->tx_buffer);
+
+ if (txb->wire.flags & RXRPC_LAST_PACKET)
+ rxrpc_close_tx_phase(call);
+
+ rxrpc_transmit_one(call, txb);
+
+ if (!rxrpc_tx_window_has_space(call))
+ break;
+ }
+}
+
+static void rxrpc_transmit_some_data(struct rxrpc_call *call)
+{
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ if (list_empty(&call->tx_sendmsg))
+ return;
+ rxrpc_begin_service_reply(call);
+ fallthrough;
+
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ if (!rxrpc_tx_window_has_space(call))
+ return;
+ if (list_empty(&call->tx_sendmsg)) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow);
+ return;
+ }
+ rxrpc_decant_prepared_tx(call);
+ break;
+ default:
+ return;
+ }
+}
+
+/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_initial_ping(struct rxrpc_call *call)
+{
+ if (call->peer->rtt_count < 3 ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_params);
+}
+
+/*
+ * Handle retransmission and deferred ACK/abort generation.
+ */
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ unsigned long now, next, t;
+ rxrpc_serial_t ackr_serial;
+ bool resend = false, expired = false;
+ s32 abort_code;
+
+ rxrpc_see_call(call, rxrpc_call_see_input);
+
+ //printk("\n--------------------\n");
+ _enter("{%d,%s,%lx}",
+ call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)],
+ call->events);
+
+ if (__rxrpc_call_is_complete(call))
+ goto out;
+
+ /* Handle abort request locklessly, vs rxrpc_propose_abort(). */
+ abort_code = smp_load_acquire(&call->send_abort);
+ if (abort_code) {
+ rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err,
+ call->send_abort_why);
+ goto out;
+ }
+
+ if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
+ goto out;
+
+ /* If we see our async-event poke, check for timeout trippage. */
+ now = jiffies;
+ t = READ_ONCE(call->expect_rx_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now);
+ expired = true;
+ }
+
+ t = READ_ONCE(call->expect_req_by);
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
+ expired = true;
+ }
+
+ t = READ_ONCE(call->expect_term_by);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now);
+ expired = true;
+ }
+
+ t = READ_ONCE(call->delay_ack_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
+ cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
+ ackr_serial = xchg(&call->ackr_serial, 0);
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_propose_ack_ping_for_lost_ack);
+ }
+
+ t = READ_ONCE(call->ack_lost_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now);
+ cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET);
+ set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events);
+ }
+
+ t = READ_ONCE(call->keepalive_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now);
+ cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
+ }
+
+ t = READ_ONCE(call->ping_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now);
+ cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET);
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_keepalive);
+ }
+
+ t = READ_ONCE(call->resend_at);
+ if (time_after_eq(now, t)) {
+ trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now);
+ cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET);
+ resend = true;
+ }
+
+ if (skb)
+ rxrpc_input_call_packet(call, skb);
+
+ rxrpc_transmit_some_data(call);
+
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK)
+ rxrpc_congestion_degrade(call);
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events))
+ rxrpc_send_initial_ping(call);
+
+ /* Process events */
+ if (expired) {
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
+ } else {
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
+ }
+ goto out;
+ }
+
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_lost_ack);
+
+ if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
+ rxrpc_resend(call, NULL);
+
+ if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_rx_idle);
+
+ if (call->ackr_nr_unacked > 2) {
+ if (call->peer->rtt_count < 3)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_rtt);
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_old_rtt);
+ else
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
+ }
+
+ /* Make sure the timer is restarted */
+ if (!__rxrpc_call_is_complete(call)) {
+ next = call->expect_rx_by;
+
+#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
+
+ set(call->expect_req_by);
+ set(call->expect_term_by);
+ set(call->delay_ack_at);
+ set(call->ack_lost_at);
+ set(call->resend_at);
+ set(call->keepalive_at);
+ set(call->ping_at);
+
+ now = jiffies;
+ if (time_after_eq(now, next))
+ rxrpc_poke_call(call, rxrpc_call_poke_timer_now);
+
+ rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
+ }
+
+out:
+ if (__rxrpc_call_is_complete(call)) {
+ del_timer_sync(&call->timer);
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ rxrpc_disconnect_call(call);
+ if (call->security)
+ call->security->free_call_crypto(call);
+ }
+ if (call->acks_hard_ack != call->tx_bottom)
+ rxrpc_shrink_call_tx_buffer(call);
+ _leave("");
+ return true;
+}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
new file mode 100644
index 0000000000..f10b37c147
--- /dev/null
+++ b/net/rxrpc/call_object.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC individual remote procedure call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/spinlock_types.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
+ [RXRPC_CALL_UNINITIALISED] = "Uninit ",
+ [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn",
+ [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
+ [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
+ [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
+ [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc",
+ [RXRPC_CALL_SERVER_SECURING] = "SvSecure",
+ [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
+ [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq",
+ [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
+ [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK",
+ [RXRPC_CALL_COMPLETE] = "Complete",
+};
+
+const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
+ [RXRPC_CALL_SUCCEEDED] = "Complete",
+ [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort",
+ [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CALL_LOCAL_ERROR] = "LocError",
+ [RXRPC_CALL_NETWORK_ERROR] = "NetError",
+};
+
+struct kmem_cache *rxrpc_call_jar;
+
+static DEFINE_SEMAPHORE(rxrpc_call_limiter, 1000);
+static DEFINE_SEMAPHORE(rxrpc_kernel_call_limiter, 1000);
+
+void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
+{
+ struct rxrpc_local *local = call->local;
+ bool busy;
+
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&call->attend_link);
+ trace_rxrpc_poke_call(call, busy, what);
+ if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke))
+ busy = true;
+ if (!busy) {
+ list_add_tail(&call->attend_link, &local->call_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ if (!busy)
+ rxrpc_wake_up_io_thread(local);
+ }
+}
+
+static void rxrpc_call_timer_expired(struct timer_list *t)
+{
+ struct rxrpc_call *call = from_timer(call, t, timer);
+
+ _enter("%d", call->debug_id);
+
+ if (!__rxrpc_call_is_complete(call)) {
+ trace_rxrpc_timer_expired(call, jiffies);
+ rxrpc_poke_call(call, rxrpc_call_poke_timer);
+ }
+}
+
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+ unsigned long expire_at,
+ unsigned long now,
+ enum rxrpc_timer_trace why)
+{
+ trace_rxrpc_timer(call, why, now);
+ timer_reduce(&call->timer, expire_at);
+}
+
+static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
+
+static void rxrpc_destroy_call(struct work_struct *);
+
+/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
+ unsigned long user_call_ID)
+{
+ struct rxrpc_call *call;
+ struct rb_node *p;
+
+ _enter("%p,%lx", rx, user_call_ID);
+
+ read_lock(&rx->call_lock);
+
+ p = rx->calls.rb_node;
+ while (p) {
+ call = rb_entry(p, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ p = p->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ p = p->rb_right;
+ else
+ goto found_extant_call;
+ }
+
+ read_unlock(&rx->call_lock);
+ _leave(" = NULL");
+ return NULL;
+
+found_extant_call:
+ rxrpc_get_call(call, rxrpc_call_get_sendmsg);
+ read_unlock(&rx->call_lock);
+ _leave(" = %p [%d]", call, refcount_read(&call->ref));
+ return call;
+}
+
+/*
+ * allocate a new call
+ */
+struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
+ unsigned int debug_id)
+{
+ struct rxrpc_call *call;
+ struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk));
+
+ call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
+ if (!call)
+ return NULL;
+
+ mutex_init(&call->user_mutex);
+
+ /* Prevent lockdep reporting a deadlock false positive between the afs
+ * filesystem and sys_sendmsg() via the mmap sem.
+ */
+ if (rx->sk.sk_kern_sock)
+ lockdep_set_class(&call->user_mutex,
+ &rxrpc_call_user_mutex_lock_class_key);
+
+ timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
+ INIT_WORK(&call->destroyer, rxrpc_destroy_call);
+ INIT_LIST_HEAD(&call->link);
+ INIT_LIST_HEAD(&call->wait_link);
+ INIT_LIST_HEAD(&call->accept_link);
+ INIT_LIST_HEAD(&call->recvmsg_link);
+ INIT_LIST_HEAD(&call->sock_link);
+ INIT_LIST_HEAD(&call->attend_link);
+ INIT_LIST_HEAD(&call->tx_sendmsg);
+ INIT_LIST_HEAD(&call->tx_buffer);
+ skb_queue_head_init(&call->recvmsg_queue);
+ skb_queue_head_init(&call->rx_oos_queue);
+ init_waitqueue_head(&call->waitq);
+ spin_lock_init(&call->notify_lock);
+ spin_lock_init(&call->tx_lock);
+ refcount_set(&call->ref, 1);
+ call->debug_id = debug_id;
+ call->tx_total_len = -1;
+ call->next_rx_timo = 20 * HZ;
+ call->next_req_timo = 1 * HZ;
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
+
+ memset(&call->sock_node, 0xed, sizeof(call->sock_node));
+
+ call->rx_winsize = rxrpc_rx_window_size;
+ call->tx_winsize = 16;
+
+ if (RXRPC_TX_SMSS > 2190)
+ call->cong_cwnd = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ call->cong_cwnd = 3;
+ else
+ call->cong_cwnd = 4;
+ call->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
+
+ call->rxnet = rxnet;
+ call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK;
+ atomic_inc(&rxnet->nr_calls);
+ return call;
+}
+
+/*
+ * Allocate a new client call.
+ */
+static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
+ struct sockaddr_rxrpc *srx,
+ struct rxrpc_conn_parameters *cp,
+ struct rxrpc_call_params *p,
+ gfp_t gfp,
+ unsigned int debug_id)
+{
+ struct rxrpc_call *call;
+ ktime_t now;
+ int ret;
+
+ _enter("");
+
+ call = rxrpc_alloc_call(rx, gfp, debug_id);
+ if (!call)
+ return ERR_PTR(-ENOMEM);
+ now = ktime_get_real();
+ call->acks_latest_ts = now;
+ call->cong_tstamp = now;
+ call->dest_srx = *srx;
+ call->interruptibility = p->interruptibility;
+ call->tx_total_len = p->tx_total_len;
+ call->key = key_get(cp->key);
+ call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
+ call->security_level = cp->security_level;
+ if (p->kernel)
+ __set_bit(RXRPC_CALL_KERNEL, &call->flags);
+ if (cp->upgrade)
+ __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ if (cp->exclusive)
+ __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+
+ if (p->timeouts.normal)
+ call->next_rx_timo = min(msecs_to_jiffies(p->timeouts.normal), 1UL);
+ if (p->timeouts.idle)
+ call->next_req_timo = min(msecs_to_jiffies(p->timeouts.idle), 1UL);
+ if (p->timeouts.hard)
+ call->hard_timo = p->timeouts.hard * HZ;
+
+ ret = rxrpc_init_client_call_security(call);
+ if (ret < 0) {
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
+ rxrpc_put_call(call, rxrpc_call_put_discard_error);
+ return ERR_PTR(ret);
+ }
+
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN);
+
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ p->user_call_ID, rxrpc_call_new_client);
+
+ _leave(" = %p", call);
+ return call;
+}
+
+/*
+ * Initiate the call ack/resend/expiry timer.
+ */
+void rxrpc_start_call_timer(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies;
+ unsigned long j = now + MAX_JIFFY_OFFSET;
+
+ call->delay_ack_at = j;
+ call->ack_lost_at = j;
+ call->resend_at = j;
+ call->ping_at = j;
+ call->keepalive_at = j;
+ call->expect_rx_by = j;
+ call->expect_req_by = j;
+ call->expect_term_by = j + call->hard_timo;
+ call->timer.expires = now;
+}
+
+/*
+ * Wait for a call slot to become available.
+ */
+static struct semaphore *rxrpc_get_call_slot(struct rxrpc_call_params *p, gfp_t gfp)
+{
+ struct semaphore *limiter = &rxrpc_call_limiter;
+
+ if (p->kernel)
+ limiter = &rxrpc_kernel_call_limiter;
+ if (p->interruptibility == RXRPC_UNINTERRUPTIBLE) {
+ down(limiter);
+ return limiter;
+ }
+ return down_interruptible(limiter) < 0 ? NULL : limiter;
+}
+
+/*
+ * Release a call slot.
+ */
+static void rxrpc_put_call_slot(struct rxrpc_call *call)
+{
+ struct semaphore *limiter = &rxrpc_call_limiter;
+
+ if (test_bit(RXRPC_CALL_KERNEL, &call->flags))
+ limiter = &rxrpc_kernel_call_limiter;
+ up(limiter);
+}
+
+/*
+ * Start the process of connecting a call. We obtain a peer and a connection
+ * bundle, but the actual association of a call with a connection is offloaded
+ * to the I/O thread to simplify locking.
+ */
+static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+{
+ struct rxrpc_local *local = call->local;
+ int ret = -ENOMEM;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
+ if (!call->peer)
+ goto error;
+
+ ret = rxrpc_look_up_bundle(call, gfp);
+ if (ret < 0)
+ goto error;
+
+ trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+ spin_lock(&local->client_call_lock);
+ list_add_tail(&call->wait_link, &local->new_client_calls);
+ spin_unlock(&local->client_call_lock);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+
+error:
+ __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ return ret;
+}
+
+/*
+ * Set up a call for the given parameters.
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
+ */
+struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ struct rxrpc_call_params *p,
+ gfp_t gfp,
+ unsigned int debug_id)
+ __releases(&rx->sk.sk_lock.slock)
+ __acquires(&call->user_mutex)
+{
+ struct rxrpc_call *call, *xcall;
+ struct rxrpc_net *rxnet;
+ struct semaphore *limiter;
+ struct rb_node *parent, **pp;
+ int ret;
+
+ _enter("%p,%lx", rx, p->user_call_ID);
+
+ limiter = rxrpc_get_call_slot(p, gfp);
+ if (!limiter) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-ERESTARTSYS);
+ }
+
+ call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
+ if (IS_ERR(call)) {
+ release_sock(&rx->sk);
+ up(limiter);
+ _leave(" = %ld", PTR_ERR(call));
+ return call;
+ }
+
+ /* We need to protect a partially set up call against the user as we
+ * will be acting outside the socket lock.
+ */
+ mutex_lock(&call->user_mutex);
+
+ /* Publish the call, even though it is incompletely set up as yet */
+ write_lock(&rx->call_lock);
+
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ xcall = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (p->user_call_ID < xcall->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (p->user_call_ID > xcall->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto error_dup_user_ID;
+ }
+
+ rcu_assign_pointer(call->socket, rx);
+ call->user_call_ID = p->user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ rxrpc_get_call(call, rxrpc_call_get_userid);
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ list_add(&call->sock_link, &rx->sock_calls);
+
+ write_unlock(&rx->call_lock);
+
+ rxnet = call->rxnet;
+ spin_lock(&rxnet->call_lock);
+ list_add_tail_rcu(&call->link, &rxnet->calls);
+ spin_unlock(&rxnet->call_lock);
+
+ /* From this point on, the call is protected by its own lock. */
+ release_sock(&rx->sk);
+
+ /* Set up or get a connection record and set the protocol parameters,
+ * including channel number and call ID.
+ */
+ ret = rxrpc_connect_call(call, gfp);
+ if (ret < 0)
+ goto error_attached_to_socket;
+
+ _leave(" = %p [new]", call);
+ return call;
+
+ /* We unexpectedly found the user ID in the list after taking
+ * the call_lock. This shouldn't happen unless the user races
+ * with itself and tries to add the same user ID twice at the
+ * same time in different threads.
+ */
+error_dup_user_ID:
+ write_unlock(&rx->call_lock);
+ release_sock(&rx->sk);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST);
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
+ rxrpc_call_see_userid_exists);
+ mutex_unlock(&call->user_mutex);
+ rxrpc_put_call(call, rxrpc_call_put_userid_exists);
+ _leave(" = -EEXIST");
+ return ERR_PTR(-EEXIST);
+
+ /* We got an error, but the call is attached to the socket and is in
+ * need of release. However, we might now race with recvmsg() when it
+ * completion notifies the socket. Return 0 from sys_sendmsg() and
+ * leave the error to recvmsg() to deal with.
+ */
+error_attached_to_socket:
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
+ rxrpc_call_see_connect_failed);
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ _leave(" = c=%08x [err]", call->debug_id);
+ return call;
+}
+
+/*
+ * Set up an incoming call. call->conn points to the connection.
+ * This is called in BH context and isn't allowed to fail.
+ */
+void rxrpc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ u32 chan;
+
+ _enter(",%d", call->conn->debug_id);
+
+ rcu_assign_pointer(call->socket, rx);
+ call->call_id = sp->hdr.callNumber;
+ call->dest_srx.srx_service = sp->hdr.serviceId;
+ call->cid = sp->hdr.cid;
+ call->cong_tstamp = skb->tstamp;
+
+ __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+
+ spin_lock(&conn->state_lock);
+
+ switch (conn->state) {
+ case RXRPC_CONN_SERVICE_UNSECURED:
+ case RXRPC_CONN_SERVICE_CHALLENGING:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+ break;
+ case RXRPC_CONN_SERVICE:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ break;
+
+ case RXRPC_CONN_ABORTED:
+ rxrpc_set_call_completion(call, conn->completion,
+ conn->abort_code, conn->error);
+ break;
+ default:
+ BUG();
+ }
+
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
+ /* Set the channel for this call. We don't get channel_lock as we're
+ * only defending against the data_ready handler (which we're called
+ * from) and the RESPONSE packet parser (which is only really
+ * interested in call_counter and can cope with a disagreement with the
+ * call pointer).
+ */
+ chan = sp->hdr.cid & RXRPC_CHANNELMASK;
+ conn->channels[chan].call_counter = call->call_id;
+ conn->channels[chan].call_id = call->call_id;
+ conn->channels[chan].call = call;
+ spin_unlock(&conn->state_lock);
+
+ spin_lock(&conn->peer->lock);
+ hlist_add_head(&call->error_link, &conn->peer->error_targets);
+ spin_unlock(&conn->peer->lock);
+
+ rxrpc_start_call_timer(call);
+ _leave("");
+}
+
+/*
+ * Note the re-emergence of a call.
+ */
+void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
+{
+ if (call) {
+ int r = refcount_read(&call->ref);
+
+ trace_rxrpc_call(call->debug_id, r, 0, why);
+ }
+}
+
+struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call,
+ enum rxrpc_call_trace why)
+{
+ int r;
+
+ if (!call || !__refcount_inc_not_zero(&call->ref, &r))
+ return NULL;
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
+ return call;
+}
+
+/*
+ * Note the addition of a ref on a call.
+ */
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
+{
+ int r;
+
+ __refcount_inc(&call->ref, &r);
+ trace_rxrpc_call(call->debug_id, r + 1, 0, why);
+}
+
+/*
+ * Clean up the Rx skb ring.
+ */
+static void rxrpc_cleanup_ring(struct rxrpc_call *call)
+{
+ rxrpc_purge_queue(&call->recvmsg_queue);
+ rxrpc_purge_queue(&call->rx_oos_queue);
+}
+
+/*
+ * Detach a call from its owning socket.
+ */
+void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+ bool put = false, putu = false;
+
+ _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
+
+ trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
+ call->flags, rxrpc_call_see_release);
+
+ if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
+ BUG();
+
+ rxrpc_put_call_slot(call);
+
+ /* Make sure we don't get any more notifications */
+ spin_lock(&rx->recvmsg_lock);
+
+ if (!list_empty(&call->recvmsg_link)) {
+ _debug("unlinking once-pending call %p { e=%lx f=%lx }",
+ call, call->events, call->flags);
+ list_del(&call->recvmsg_link);
+ put = true;
+ }
+
+ /* list_empty() must return false in rxrpc_notify_socket() */
+ call->recvmsg_link.next = NULL;
+ call->recvmsg_link.prev = NULL;
+
+ spin_unlock(&rx->recvmsg_lock);
+ if (put)
+ rxrpc_put_call(call, rxrpc_call_put_unnotify);
+
+ write_lock(&rx->call_lock);
+
+ if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ rb_erase(&call->sock_node, &rx->calls);
+ memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+ putu = true;
+ }
+
+ list_del(&call->sock_link);
+ write_unlock(&rx->call_lock);
+
+ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+
+ if (putu)
+ rxrpc_put_call(call, rxrpc_call_put_userid);
+
+ _leave("");
+}
+
+/*
+ * release all the calls associated with a socket
+ */
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
+{
+ struct rxrpc_call *call;
+
+ _enter("%p", rx);
+
+ while (!list_empty(&rx->to_be_accepted)) {
+ call = list_entry(rx->to_be_accepted.next,
+ struct rxrpc_call, accept_link);
+ list_del(&call->accept_link);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release_tba);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
+ }
+
+ while (!list_empty(&rx->sock_calls)) {
+ call = list_entry(rx->sock_calls.next,
+ struct rxrpc_call, sock_link);
+ rxrpc_get_call(call, rxrpc_call_get_release_sock);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put_release_sock);
+ }
+
+ _leave("");
+}
+
+/*
+ * release a call
+ */
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
+{
+ struct rxrpc_net *rxnet = call->rxnet;
+ unsigned int debug_id = call->debug_id;
+ bool dead;
+ int r;
+
+ ASSERT(call != NULL);
+
+ dead = __refcount_dec_and_test(&call->ref, &r);
+ trace_rxrpc_call(debug_id, r - 1, 0, why);
+ if (dead) {
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
+
+ if (!list_empty(&call->link)) {
+ spin_lock(&rxnet->call_lock);
+ list_del_init(&call->link);
+ spin_unlock(&rxnet->call_lock);
+ }
+
+ rxrpc_cleanup_call(call);
+ }
+}
+
+/*
+ * Free up the call under RCU.
+ */
+static void rxrpc_rcu_free_call(struct rcu_head *rcu)
+{
+ struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
+ struct rxrpc_net *rxnet = READ_ONCE(call->rxnet);
+
+ kmem_cache_free(rxrpc_call_jar, call);
+ if (atomic_dec_and_test(&rxnet->nr_calls))
+ wake_up_var(&rxnet->nr_calls);
+}
+
+/*
+ * Final call destruction - but must be done in process context.
+ */
+static void rxrpc_destroy_call(struct work_struct *work)
+{
+ struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer);
+ struct rxrpc_txbuf *txb;
+
+ del_timer_sync(&call->timer);
+
+ rxrpc_cleanup_ring(call);
+ while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
+ list_del(&txb->call_link);
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned);
+ }
+
+ rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
+ rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_deactivate_bundle(call->bundle);
+ rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call);
+ rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
+ rxrpc_put_local(call->local, rxrpc_local_put_call);
+ call_rcu(&call->rcu, rxrpc_rcu_free_call);
+}
+
+/*
+ * clean up a call
+ */
+void rxrpc_cleanup_call(struct rxrpc_call *call)
+{
+ memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
+ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+
+ del_timer(&call->timer);
+
+ if (rcu_read_lock_held())
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&call->destroyer);
+ else
+ rxrpc_destroy_call(&call->destroyer);
+}
+
+/*
+ * Make sure that all calls are gone from a network namespace. To reach this
+ * point, any open UDP sockets in that namespace must have been closed, so any
+ * outstanding calls cannot be doing I/O.
+ */
+void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
+{
+ struct rxrpc_call *call;
+
+ _enter("");
+
+ if (!list_empty(&rxnet->calls)) {
+ spin_lock(&rxnet->call_lock);
+
+ while (!list_empty(&rxnet->calls)) {
+ call = list_entry(rxnet->calls.next,
+ struct rxrpc_call, link);
+ _debug("Zapping call %p", call);
+
+ rxrpc_see_call(call, rxrpc_call_see_zap);
+ list_del_init(&call->link);
+
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+ call, refcount_read(&call->ref),
+ rxrpc_call_states[__rxrpc_call_state(call)],
+ call->flags, call->events);
+
+ spin_unlock(&rxnet->call_lock);
+ cond_resched();
+ spin_lock(&rxnet->call_lock);
+ }
+
+ spin_unlock(&rxnet->call_lock);
+ }
+
+ atomic_dec(&rxnet->nr_calls);
+ wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
+}
diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c
new file mode 100644
index 0000000000..6afb54373e
--- /dev/null
+++ b/net/rxrpc/call_state.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Call state changing functions.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "ar-internal.h"
+
+/*
+ * Transition a call to the complete state.
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE)
+ return false;
+
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl;
+ /* Allow reader of completion state to operate locklessly */
+ rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE);
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error))
+ return false;
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_send_abort_packet(call);
+ return true;
+}
+
+/*
+ * Record that a call errored out before even getting off the ground, thereby
+ * setting the state to allow it to be destroyed.
+ */
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error)
+{
+ call->abort_code = RX_CALL_DEAD;
+ call->error = error;
+ call->completion = compl;
+ call->_state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags));
+}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
new file mode 100644
index 0000000000..1d95f8bc76
--- /dev/null
+++ b/net/rxrpc/conn_client.c
@@ -0,0 +1,817 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Client connection-specific management code.
+ *
+ * Copyright (C) 2016, 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * There are flags of relevance to the cache:
+ *
+ * (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ * should not be reused. This is set when an exclusive connection is used
+ * or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations. If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/timer.h>
+#include <linux/sched/signal.h>
+
+#include "ar-internal.h"
+
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
+{
+ atomic_inc(&bundle->active);
+}
+
+/*
+ * Release a connection ID for a client connection.
+ */
+static void rxrpc_put_client_connection_id(struct rxrpc_local *local,
+ struct rxrpc_connection *conn)
+{
+ idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
+}
+
+/*
+ * Destroy the client connection ID tree.
+ */
+static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
+{
+ struct rxrpc_connection *conn;
+ int id;
+
+ if (!idr_is_empty(&local->conn_ids)) {
+ idr_for_each_entry(&local->conn_ids, conn, id) {
+ pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
+ conn, refcount_read(&conn->ref));
+ }
+ BUG();
+ }
+
+ idr_destroy(&local->conn_ids);
+}
+
+/*
+ * Allocate a connection bundle.
+ */
+static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
+ gfp_t gfp)
+{
+ static atomic_t rxrpc_bundle_id;
+ struct rxrpc_bundle *bundle;
+
+ bundle = kzalloc(sizeof(*bundle), gfp);
+ if (bundle) {
+ bundle->local = call->local;
+ bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle);
+ bundle->key = key_get(call->key);
+ bundle->security = call->security;
+ bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+ bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ bundle->service_id = call->dest_srx.srx_service;
+ bundle->security_level = call->security_level;
+ bundle->debug_id = atomic_inc_return(&rxrpc_bundle_id);
+ refcount_set(&bundle->ref, 1);
+ atomic_set(&bundle->active, 1);
+ INIT_LIST_HEAD(&bundle->waiting_calls);
+ trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
+ }
+ return bundle;
+}
+
+struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle,
+ enum rxrpc_bundle_trace why)
+{
+ int r;
+
+ __refcount_inc(&bundle->ref, &r);
+ trace_rxrpc_bundle(bundle->debug_id, r + 1, why);
+ return bundle;
+}
+
+static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
+{
+ trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
+ rxrpc_bundle_free);
+ rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
+ key_put(bundle->key);
+ kfree(bundle);
+}
+
+void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
+{
+ unsigned int id;
+ bool dead;
+ int r;
+
+ if (bundle) {
+ id = bundle->debug_id;
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+ trace_rxrpc_bundle(id, r - 1, why);
+ if (dead)
+ rxrpc_free_bundle(bundle);
+ }
+}
+
+/*
+ * Get rid of outstanding client connection preallocations when a local
+ * endpoint is destroyed.
+ */
+void rxrpc_purge_client_connections(struct rxrpc_local *local)
+{
+ rxrpc_destroy_client_conn_ids(local);
+}
+
+/*
+ * Allocate a client connection.
+ */
+static struct rxrpc_connection *
+rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_local *local = bundle->local;
+ struct rxrpc_net *rxnet = local->rxnet;
+ int id;
+
+ _enter("");
+
+ conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (id < 0) {
+ kfree(conn);
+ return ERR_PTR(id);
+ }
+
+ refcount_set(&conn->ref, 1);
+ conn->proto.cid = id << RXRPC_CIDSHIFT;
+ conn->proto.epoch = local->rxnet->epoch;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn);
+ conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn);
+ conn->key = key_get(bundle->key);
+ conn->security = bundle->security;
+ conn->exclusive = bundle->exclusive;
+ conn->upgrade = bundle->upgrade;
+ conn->orig_service_id = bundle->service_id;
+ conn->security_level = bundle->security_level;
+ conn->state = RXRPC_CONN_CLIENT_UNSECURED;
+ conn->service_id = conn->orig_service_id;
+
+ if (conn->security == &rxrpc_no_security)
+ conn->state = RXRPC_CONN_CLIENT;
+
+ atomic_inc(&rxnet->nr_conns);
+ write_lock(&rxnet->conn_lock);
+ list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+ write_unlock(&rxnet->conn_lock);
+
+ rxrpc_see_connection(conn, rxrpc_conn_new_client);
+
+ atomic_inc(&rxnet->nr_client_conns);
+ trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
+ return conn;
+}
+
+/*
+ * Determine if a connection may be reused.
+ */
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_net *rxnet;
+ int id_cursor, id, distance, limit;
+
+ if (!conn)
+ goto dont_reuse;
+
+ rxnet = conn->rxnet;
+ if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+ goto dont_reuse;
+
+ if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED &&
+ conn->state != RXRPC_CONN_CLIENT) ||
+ conn->proto.epoch != rxnet->epoch)
+ goto mark_dont_reuse;
+
+ /* The IDR tree gets very expensive on memory if the connection IDs are
+ * widely scattered throughout the number space, so we shall want to
+ * kill off connections that, say, have an ID more than about four
+ * times the maximum number of client conns away from the current
+ * allocation point to try and keep the IDs concentrated.
+ */
+ id_cursor = idr_get_cursor(&conn->local->conn_ids);
+ id = conn->proto.cid >> RXRPC_CIDSHIFT;
+ distance = id - id_cursor;
+ if (distance < 0)
+ distance = -distance;
+ limit = max_t(unsigned long, atomic_read(&rxnet->nr_conns) * 4, 1024);
+ if (distance > limit)
+ goto mark_dont_reuse;
+
+ return true;
+
+mark_dont_reuse:
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+ return false;
+}
+
+/*
+ * Look up the conn bundle that matches the connection parameters, adding it if
+ * it doesn't yet exist.
+ */
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
+{
+ struct rxrpc_bundle *bundle, *candidate;
+ struct rxrpc_local *local = call->local;
+ struct rb_node *p, **pp, *parent;
+ long diff;
+ bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+
+ _enter("{%px,%x,%u,%u}",
+ call->peer, key_serial(call->key), call->security_level,
+ upgrade);
+
+ if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
+ call->bundle = rxrpc_alloc_bundle(call, gfp);
+ return call->bundle ? 0 : -ENOMEM;
+ }
+
+ /* First, see if the bundle is already there. */
+ _debug("search 1");
+ spin_lock(&local->client_bundles_lock);
+ p = local->client_bundles.rb_node;
+ while (p) {
+ bundle = rb_entry(p, struct rxrpc_bundle, local_node);
+
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
+#undef cmp
+ if (diff < 0)
+ p = p->rb_left;
+ else if (diff > 0)
+ p = p->rb_right;
+ else
+ goto found_bundle;
+ }
+ spin_unlock(&local->client_bundles_lock);
+ _debug("not found");
+
+ /* It wasn't. We need to add one. */
+ candidate = rxrpc_alloc_bundle(call, gfp);
+ if (!candidate)
+ return -ENOMEM;
+
+ _debug("search 2");
+ spin_lock(&local->client_bundles_lock);
+ pp = &local->client_bundles.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
+
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
+#undef cmp
+ if (diff < 0)
+ pp = &(*pp)->rb_left;
+ else if (diff > 0)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_bundle_free;
+ }
+
+ _debug("new bundle");
+ rb_link_node(&candidate->local_node, parent, pp);
+ rb_insert_color(&candidate->local_node, &local->client_bundles);
+ call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
+ spin_unlock(&local->client_bundles_lock);
+ _leave(" = B=%u [new]", call->bundle->debug_id);
+ return 0;
+
+found_bundle_free:
+ rxrpc_free_bundle(candidate);
+found_bundle:
+ call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+ rxrpc_activate_bundle(bundle);
+ spin_unlock(&local->client_bundles_lock);
+ _leave(" = B=%u [found]", call->bundle->debug_id);
+ return 0;
+}
+
+/*
+ * Allocate a new connection and add it into a bundle.
+ */
+static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
+ unsigned int slot)
+{
+ struct rxrpc_connection *conn, *old;
+ unsigned int shift = slot * RXRPC_MAXCALLS;
+ unsigned int i;
+
+ old = bundle->conns[slot];
+ if (old) {
+ bundle->conns[slot] = NULL;
+ trace_rxrpc_client(old, -1, rxrpc_client_replace);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
+ }
+
+ conn = rxrpc_alloc_client_connection(bundle);
+ if (IS_ERR(conn)) {
+ bundle->alloc_error = PTR_ERR(conn);
+ return false;
+ }
+
+ rxrpc_activate_bundle(bundle);
+ conn->bundle_shift = shift;
+ bundle->conns[slot] = conn;
+ for (i = 0; i < RXRPC_MAXCALLS; i++)
+ set_bit(shift + i, &bundle->avail_chans);
+ return true;
+}
+
+/*
+ * Add a connection to a bundle if there are no usable connections or we have
+ * connections waiting for extra capacity.
+ */
+static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle)
+{
+ int slot = -1, i, usable;
+
+ _enter("");
+
+ bundle->alloc_error = 0;
+
+ /* See if there are any usable connections. */
+ usable = 0;
+ for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
+ if (rxrpc_may_reuse_conn(bundle->conns[i]))
+ usable++;
+ else if (slot == -1)
+ slot = i;
+ }
+
+ if (!usable && bundle->upgrade)
+ bundle->try_upgrade = true;
+
+ if (!usable)
+ goto alloc_conn;
+
+ if (!bundle->avail_chans &&
+ !bundle->try_upgrade &&
+ usable < ARRAY_SIZE(bundle->conns))
+ goto alloc_conn;
+
+ _leave("");
+ return usable;
+
+alloc_conn:
+ return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false;
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_bundle *bundle = conn->bundle;
+ struct rxrpc_call *call = list_entry(bundle->waiting_calls.next,
+ struct rxrpc_call, wait_link);
+ u32 call_id = chan->call_counter + 1;
+
+ _enter("C=%x,%u", conn->debug_id, channel);
+
+ list_del_init(&call->wait_link);
+
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
+
+ /* Cancel the final ACK on the previous call if it hasn't been sent yet
+ * as the DATA packet will implicitly ACK it.
+ */
+ clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+ clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
+
+ rxrpc_see_call(call, rxrpc_call_see_activate_client);
+ call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
+ call->cid = conn->proto.cid | channel;
+ call->call_id = call_id;
+ call->dest_srx.srx_service = conn->service_id;
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ if (call->cong_cwnd >= call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ else
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+
+ chan->call_id = call_id;
+ chan->call_debug_id = call->debug_id;
+ chan->call = call;
+
+ rxrpc_see_call(call, rxrpc_call_see_connected);
+ trace_rxrpc_connect_call(call);
+ call->tx_last_sent = ktime_get_real();
+ rxrpc_start_call_timer(call);
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
+ wake_up(&call->waitq);
+}
+
+/*
+ * Remove a connection from the idle list if it's on it.
+ */
+static void rxrpc_unidle_conn(struct rxrpc_connection *conn)
+{
+ if (!list_empty(&conn->cache_link)) {
+ list_del_init(&conn->cache_link);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
+ }
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_connection *conn;
+ unsigned long avail, mask;
+ unsigned int channel, slot;
+
+ trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
+
+ if (bundle->try_upgrade)
+ mask = 1;
+ else
+ mask = ULONG_MAX;
+
+ while (!list_empty(&bundle->waiting_calls)) {
+ avail = bundle->avail_chans & mask;
+ if (!avail)
+ break;
+ channel = __ffs(avail);
+ clear_bit(channel, &bundle->avail_chans);
+
+ slot = channel / RXRPC_MAXCALLS;
+ conn = bundle->conns[slot];
+ if (!conn)
+ break;
+
+ if (bundle->try_upgrade)
+ set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
+ rxrpc_unidle_conn(conn);
+
+ channel &= (RXRPC_MAXCALLS - 1);
+ conn->act_chans |= 1 << channel;
+ rxrpc_activate_one_channel(conn, channel);
+ }
+}
+
+/*
+ * Connect waiting channels (called from the I/O thread).
+ */
+void rxrpc_connect_client_calls(struct rxrpc_local *local)
+{
+ struct rxrpc_call *call;
+
+ while ((call = list_first_entry_or_null(&local->new_client_calls,
+ struct rxrpc_call, wait_link))
+ ) {
+ struct rxrpc_bundle *bundle = call->bundle;
+
+ spin_lock(&local->client_call_lock);
+ list_move_tail(&call->wait_link, &bundle->waiting_calls);
+ spin_unlock(&local->client_call_lock);
+
+ if (rxrpc_bundle_has_space(bundle))
+ rxrpc_activate_channels(bundle);
+ }
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+ unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ /* Mark the call ID as being used. If the callNumber counter
+ * exceeds ~2 billion, we kill the connection after its
+ * outstanding calls have finished so that the counter doesn't
+ * wrap.
+ */
+ chan->call_counter++;
+ if (chan->call_counter >= INT_MAX)
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
+
+ spin_lock(&call->peer->lock);
+ hlist_add_head(&call->error_link, &call->peer->error_targets);
+ spin_unlock(&call->peer->lock);
+ }
+}
+
+/*
+ * Set the reap timer.
+ */
+static void rxrpc_set_client_reap_timer(struct rxrpc_local *local)
+{
+ if (!local->kill_all_client_conns) {
+ unsigned long now = jiffies;
+ unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
+
+ if (local->rxnet->live)
+ timer_reduce(&local->client_conn_reap_timer, reap_at);
+ }
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_channel *chan = NULL;
+ struct rxrpc_local *local = bundle->local;
+ unsigned int channel;
+ bool may_reuse;
+ u32 cid;
+
+ _enter("c=%x", call->debug_id);
+
+ /* Calls that have never actually been assigned a channel can simply be
+ * discarded.
+ */
+ conn = call->conn;
+ if (!conn) {
+ _debug("call is waiting");
+ ASSERTCMP(call->call_id, ==, 0);
+ ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+ list_del_init(&call->wait_link);
+ return;
+ }
+
+ cid = call->cid;
+ channel = cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
+
+ if (WARN_ON(chan->call != call))
+ return;
+
+ may_reuse = rxrpc_may_reuse_conn(conn);
+
+ /* If a client call was exposed to the world, we save the result for
+ * retransmission.
+ *
+ * We use a barrier here so that the call number and abort code can be
+ * read without needing to take a lock.
+ *
+ * TODO: Make the incoming packet handler check this and handle
+ * terminal retransmission without requiring access to the call.
+ */
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ _debug("exposed %u,%u", call->call_id, call->abort_code);
+ __rxrpc_disconnect_call(conn, call);
+
+ if (test_and_clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
+ bundle->try_upgrade = false;
+ if (may_reuse)
+ rxrpc_activate_channels(bundle);
+ }
+ }
+
+ /* See if we can pass the channel directly to another call. */
+ if (may_reuse && !list_empty(&bundle->waiting_calls)) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
+ rxrpc_activate_one_channel(conn, channel);
+ return;
+ }
+
+ /* Schedule the final ACK to be transmitted in a short while so that it
+ * can be skipped if we find a follow-on call. The first DATA packet
+ * of the follow on call will implicitly ACK this call.
+ */
+ if (call->completion == RXRPC_CALL_SUCCEEDED &&
+ test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ unsigned long final_ack_at = jiffies + 2;
+
+ WRITE_ONCE(chan->final_ack_at, final_ack_at);
+ smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */
+ set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags);
+ rxrpc_reduce_conn_timer(conn, final_ack_at);
+ }
+
+ /* Deactivate the channel. */
+ chan->call = NULL;
+ set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans);
+ conn->act_chans &= ~(1 << channel);
+
+ /* If no channels remain active, then put the connection on the idle
+ * list for a short while. Give it a ref to stop it going away if it
+ * becomes unbundled.
+ */
+ if (!conn->act_chans) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
+ conn->idle_timestamp = jiffies;
+
+ rxrpc_get_connection(conn, rxrpc_conn_get_idle);
+ list_move_tail(&conn->cache_link, &local->idle_client_conns);
+
+ rxrpc_set_client_reap_timer(local);
+ }
+}
+
+/*
+ * Remove a connection from a bundle.
+ */
+static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_bundle *bundle = conn->bundle;
+ unsigned int bindex;
+ int i;
+
+ _enter("C=%x", conn->debug_id);
+
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, true);
+
+ bindex = conn->bundle_shift / RXRPC_MAXCALLS;
+ if (bundle->conns[bindex] == conn) {
+ _debug("clear slot %u", bindex);
+ bundle->conns[bindex] = NULL;
+ for (i = 0; i < RXRPC_MAXCALLS; i++)
+ clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
+ rxrpc_put_client_connection_id(bundle->local, conn);
+ rxrpc_deactivate_bundle(bundle);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
+ }
+}
+
+/*
+ * Drop the active count on a bundle.
+ */
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+{
+ struct rxrpc_local *local;
+ bool need_put = false;
+
+ if (!bundle)
+ return;
+
+ local = bundle->local;
+ if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
+ if (!bundle->exclusive) {
+ _debug("erase bundle");
+ rb_erase(&bundle->local_node, &local->client_bundles);
+ need_put = true;
+ }
+
+ spin_unlock(&local->client_bundles_lock);
+ if (need_put)
+ rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard);
+ }
+}
+
+/*
+ * Clean up a dead client connection.
+ */
+void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_local *local = conn->local;
+ struct rxrpc_net *rxnet = local->rxnet;
+
+ _enter("C=%x", conn->debug_id);
+
+ trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
+ atomic_dec(&rxnet->nr_client_conns);
+
+ rxrpc_put_client_connection_id(local, conn);
+}
+
+/*
+ * Discard expired client connections from the idle list. Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local)
+{
+ struct rxrpc_connection *conn;
+ unsigned long expiry, conn_expires_at, now;
+ unsigned int nr_conns;
+
+ _enter("");
+
+ /* We keep an estimate of what the number of conns ought to be after
+ * we've discarded some so that we don't overdo the discarding.
+ */
+ nr_conns = atomic_read(&local->rxnet->nr_client_conns);
+
+next:
+ conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link);
+ if (!conn)
+ return;
+
+ if (!local->kill_all_client_conns) {
+ /* If the number of connections is over the reap limit, we
+ * expedite discard by reducing the expiry timeout. We must,
+ * however, have at least a short grace period to be able to do
+ * final-ACK or ABORT retransmission.
+ */
+ expiry = rxrpc_conn_idle_client_expiry;
+ if (nr_conns > rxrpc_reap_client_connections)
+ expiry = rxrpc_conn_idle_client_fast_expiry;
+ if (conn->local->service_closed)
+ expiry = rxrpc_closed_conn_expiry * HZ;
+
+ conn_expires_at = conn->idle_timestamp + expiry;
+
+ now = READ_ONCE(jiffies);
+ if (time_after(conn_expires_at, now))
+ goto not_yet_expired;
+ }
+
+ atomic_dec(&conn->active);
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+ list_del_init(&conn->cache_link);
+
+ rxrpc_unbundle_conn(conn);
+ /* Drop the ->cache_link ref */
+ rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
+
+ nr_conns--;
+ goto next;
+
+not_yet_expired:
+ /* The connection at the front of the queue hasn't yet expired, so
+ * schedule the work item for that point if we discarded something.
+ *
+ * We don't worry if the work item is already scheduled - it can look
+ * after rescheduling itself at a later time. We could cancel it, but
+ * then things get messier.
+ */
+ _debug("not yet");
+ if (!local->kill_all_client_conns)
+ timer_reduce(&local->client_conn_reap_timer, conn_expires_at);
+
+ _leave("");
+}
+
+/*
+ * Clean up the client connections on a local endpoint.
+ */
+void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
+{
+ struct rxrpc_connection *conn;
+
+ _enter("");
+
+ local->kill_all_client_conns = true;
+
+ del_timer_sync(&local->client_conn_reap_timer);
+
+ while ((conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link))) {
+ list_del_init(&conn->cache_link);
+ atomic_dec(&conn->active);
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+ rxrpc_unbundle_conn(conn);
+ rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
+ }
+
+ _leave(" [culled]");
+}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
new file mode 100644
index 0000000000..95f4bc206b
--- /dev/null
+++ b/net/rxrpc/conn_event.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* connection-level event handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * Set the completion state on an aborted connection.
+ */
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err,
+ enum rxrpc_call_completion compl)
+{
+ bool aborted = false;
+
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ spin_lock(&conn->state_lock);
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ conn->abort_code = abort_code;
+ conn->error = err;
+ conn->completion = compl;
+ /* Order the abort info before the state change. */
+ smp_store_release(&conn->state, RXRPC_CONN_ABORTED);
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
+ aborted = true;
+ }
+ spin_unlock(&conn->state_lock);
+ }
+
+ return aborted;
+}
+
+/*
+ * Mark a socket buffer to indicate that the connection it's on should be aborted.
+ */
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+ RXRPC_CALL_LOCALLY_ABORTED)) {
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
+ sp->hdr.seq, abort_code, err);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
+ }
+ return -EPROTO;
+}
+
+/*
+ * Mark a connection as being remotely aborted.
+ */
+static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
+}
+
+/*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ unsigned int channel)
+{
+ struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
+ struct rxrpc_channel *chan;
+ struct msghdr msg;
+ struct kvec iov[3];
+ struct {
+ struct rxrpc_wire_header whdr;
+ union {
+ __be32 abort_code;
+ struct rxrpc_ackpacket ack;
+ };
+ } __attribute__((packed)) pkt;
+ struct rxrpc_ackinfo ack_info;
+ size_t len;
+ int ret, ioc;
+ u32 serial, mtu, call_id, padding;
+
+ _enter("%d", conn->debug_id);
+
+ chan = &conn->channels[channel];
+
+ /* If the last call got moved on whilst we were waiting to run, just
+ * ignore this packet.
+ */
+ call_id = chan->last_call;
+ if (skb && call_id != sp->hdr.callNumber)
+ return;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt.whdr);
+ iov[1].iov_base = &padding;
+ iov[1].iov_len = 3;
+ iov[2].iov_base = &ack_info;
+ iov[2].iov_len = sizeof(ack_info);
+
+ serial = atomic_inc_return(&conn->serial);
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(conn->proto.cid | channel);
+ pkt.whdr.callNumber = htonl(call_id);
+ pkt.whdr.serial = htonl(serial);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = chan->last_type;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = conn->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(conn->service_id);
+
+ len = sizeof(pkt.whdr);
+ switch (chan->last_type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ pkt.abort_code = htonl(chan->last_abort);
+ iov[0].iov_len += sizeof(pkt.abort_code);
+ len += sizeof(pkt.abort_code);
+ ioc = 1;
+ break;
+
+ case RXRPC_PACKET_TYPE_ACK:
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
+ pkt.ack.bufferSpace = 0;
+ pkt.ack.maxSkew = htons(skb ? skb->priority : 0);
+ pkt.ack.firstPacket = htonl(chan->last_seq + 1);
+ pkt.ack.previousPacket = htonl(chan->last_seq);
+ pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0);
+ pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE;
+ pkt.ack.nAcks = 0;
+ ack_info.rxMTU = htonl(rxrpc_rx_mtu);
+ ack_info.maxMTU = htonl(mtu);
+ ack_info.rwind = htonl(rxrpc_rx_window_size);
+ ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ pkt.whdr.flags |= RXRPC_SLOW_START_OK;
+ padding = 0;
+ iov[0].iov_len += sizeof(pkt.ack);
+ len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
+ ioc = 3;
+
+ trace_rxrpc_tx_ack(chan->call_debug_id, serial,
+ ntohl(pkt.ack.firstPacket),
+ ntohl(pkt.ack.serial),
+ pkt.ack.reason, 0, rxrpc_rx_window_size);
+ break;
+
+ default:
+ return;
+ }
+
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
+ if (ret < 0)
+ trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
+ rxrpc_tx_point_call_final_resend);
+ else
+ trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr,
+ rxrpc_tx_point_call_final_resend);
+
+ _leave("");
+}
+
+/*
+ * pass a connection-level abort onto all calls on that connection
+ */
+static void rxrpc_abort_calls(struct rxrpc_connection *conn)
+{
+ struct rxrpc_call *call;
+ int i;
+
+ _enter("{%d},%x", conn->debug_id, conn->abort_code);
+
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ call = conn->channels[i].call;
+ if (call)
+ rxrpc_set_call_completion(call,
+ conn->completion,
+ conn->abort_code,
+ conn->error);
+ }
+
+ _leave("");
+}
+
+/*
+ * mark a call as being on a now-secured channel
+ * - must be called with BH's disabled.
+ */
+static void rxrpc_call_is_secure(struct rxrpc_call *call)
+{
+ if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ rxrpc_notify_socket(call);
+ }
+}
+
+/*
+ * connection-level Rx packet processor
+ */
+static int rxrpc_process_event(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ int ret;
+
+ if (conn->state == RXRPC_CONN_ABORTED)
+ return -ECONNABORTED;
+
+ _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ return conn->security->respond_to_challenge(conn, skb);
+
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ ret = conn->security->verify_response(conn, skb);
+ if (ret < 0)
+ return ret;
+
+ ret = conn->security->init_connection_security(
+ conn, conn->key->payload.data[0]);
+ if (ret < 0)
+ return ret;
+
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
+ conn->state = RXRPC_CONN_SERVICE;
+ spin_unlock(&conn->state_lock);
+
+ if (conn->state == RXRPC_CONN_SERVICE) {
+ /* Offload call state flipping to the I/O thread. As
+ * we've already received the packet, put it on the
+ * front of the queue.
+ */
+ skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+ skb_queue_head(&conn->local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(conn->local);
+ }
+ return 0;
+
+ default:
+ WARN_ON_ONCE(1);
+ return -EPROTO;
+ }
+}
+
+/*
+ * set up security and issue a challenge
+ */
+static void rxrpc_secure_connection(struct rxrpc_connection *conn)
+{
+ if (conn->security->issue_challenge(conn) < 0)
+ rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM,
+ rxrpc_abort_nomem);
+}
+
+/*
+ * Process delayed final ACKs that we haven't subsumed into a subsequent call.
+ */
+void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force)
+{
+ unsigned long j = jiffies, next_j;
+ unsigned int channel;
+ bool set;
+
+again:
+ next_j = j + LONG_MAX;
+ set = false;
+ for (channel = 0; channel < RXRPC_MAXCALLS; channel++) {
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ unsigned long ack_at;
+
+ if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
+ continue;
+
+ ack_at = chan->final_ack_at;
+ if (time_before(j, ack_at) && !force) {
+ if (time_before(ack_at, next_j)) {
+ next_j = ack_at;
+ set = true;
+ }
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel,
+ &conn->flags))
+ rxrpc_conn_retransmit_call(conn, NULL, channel);
+ }
+
+ j = jiffies;
+ if (time_before_eq(next_j, j))
+ goto again;
+ if (set)
+ rxrpc_reduce_conn_timer(conn, next_j);
+}
+
+/*
+ * connection-level event processor
+ */
+static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
+{
+ struct sk_buff *skb;
+ int ret;
+
+ if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
+ rxrpc_secure_connection(conn);
+
+ /* go through the conn-level event packets, releasing the ref on this
+ * connection that each one has when we've finished with it */
+ while ((skb = skb_dequeue(&conn->rx_queue))) {
+ rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
+ ret = rxrpc_process_event(conn, skb);
+ switch (ret) {
+ case -ENOMEM:
+ case -EAGAIN:
+ skb_queue_head(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work);
+ break;
+ default:
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
+ break;
+ }
+ }
+}
+
+void rxrpc_process_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, processor);
+
+ rxrpc_see_connection(conn, rxrpc_conn_see_work);
+
+ if (__rxrpc_use_local(conn->local, rxrpc_local_use_conn_work)) {
+ rxrpc_do_process_connection(conn);
+ rxrpc_unuse_local(conn->local, rxrpc_local_unuse_conn_work);
+ }
+}
+
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses, some aborts and call terminal packet
+ * retransmission.
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", conn, skb);
+
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_work);
+ skb_queue_tail(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_rx_work);
+}
+
+/*
+ * Input a connection-level packet.
+ */
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_BUSY:
+ /* Just ignore BUSY packets for now. */
+ return true;
+
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (rxrpc_is_conn_aborted(conn))
+ return true;
+ rxrpc_input_conn_abort(conn, skb);
+ rxrpc_abort_calls(conn);
+ return true;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_is_conn_aborted(conn)) {
+ if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+ rxrpc_send_conn_abort(conn);
+ return true;
+ }
+ rxrpc_post_packet_to_conn(conn, skb);
+ return true;
+
+ default:
+ WARN_ON_ONCE(1);
+ return true;
+ }
+}
+
+/*
+ * Input a connection event.
+ */
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ unsigned int loop;
+
+ if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+ rxrpc_abort_calls(conn);
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ if (conn->state != RXRPC_CONN_SERVICE)
+ break;
+
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop].call);
+ break;
+ }
+
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, false);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
new file mode 100644
index 0000000000..df8a271948
--- /dev/null
+++ b/net/rxrpc/conn_object.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC virtual connection handler, common bits.
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include "ar-internal.h"
+
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60;
+unsigned int __read_mostly rxrpc_closed_conn_expiry = 10;
+
+static void rxrpc_clean_up_connection(struct work_struct *work);
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at);
+
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+ struct rxrpc_local *local = conn->local;
+ bool busy;
+
+ if (WARN_ON_ONCE(!local))
+ return;
+
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&conn->attend_link);
+ if (!busy) {
+ rxrpc_get_connection(conn, why);
+ list_add_tail(&conn->attend_link, &local->conn_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+}
+
+static void rxrpc_connection_timer(struct timer_list *timer)
+{
+ struct rxrpc_connection *conn =
+ container_of(timer, struct rxrpc_connection, timer);
+
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer);
+}
+
+/*
+ * allocate a new connection
+ */
+struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn;
+
+ _enter("");
+
+ conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+ if (conn) {
+ INIT_LIST_HEAD(&conn->cache_link);
+ timer_setup(&conn->timer, &rxrpc_connection_timer, 0);
+ INIT_WORK(&conn->processor, rxrpc_process_connection);
+ INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
+ INIT_LIST_HEAD(&conn->proc_link);
+ INIT_LIST_HEAD(&conn->link);
+ mutex_init(&conn->security_lock);
+ skb_queue_head_init(&conn->rx_queue);
+ conn->rxnet = rxnet;
+ conn->security = &rxrpc_no_security;
+ spin_lock_init(&conn->state_lock);
+ conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ conn->idle_timestamp = jiffies;
+ }
+
+ _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
+ return conn;
+}
+
+/*
+ * Look up a connection in the cache by protocol parameters.
+ *
+ * If successful, a pointer to the connection is returned, but no ref is taken.
+ * NULL is returned if there is no match.
+ *
+ * When searching for a service call, if we find a peer but no connection, we
+ * return that through *_peer in case we need to create a new service call.
+ *
+ * The caller must be holding the RCU read lock.
+ */
+struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+
+ _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
+
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
+ */
+ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ if (!conn || refcount_read(&conn->ref) == 0) {
+ _debug("no conn");
+ goto not_found;
+ }
+
+ if (conn->proto.epoch != sp->hdr.epoch ||
+ conn->local != local)
+ goto not_found;
+
+ peer = conn->peer;
+ switch (srx->transport.family) {
+ case AF_INET:
+ if (peer->srx.transport.sin.sin_port !=
+ srx->transport.sin.sin_port ||
+ peer->srx.transport.sin.sin_addr.s_addr !=
+ srx->transport.sin.sin_addr.s_addr)
+ goto not_found;
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ if (peer->srx.transport.sin6.sin6_port !=
+ srx->transport.sin6.sin6_port ||
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr)) != 0)
+ goto not_found;
+ break;
+#endif
+ default:
+ BUG();
+ }
+
+ _leave(" = %p", conn);
+ return conn;
+
+not_found:
+ _leave(" = NULL");
+ return NULL;
+}
+
+/*
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates. The caller must hold the channel_lock and must release the
+ * call's ref on the connection.
+ */
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
+{
+ struct rxrpc_channel *chan =
+ &conn->channels[call->cid & RXRPC_CHANNELMASK];
+
+ _enter("%d,%x", conn->debug_id, call->cid);
+
+ if (chan->call == call) {
+ /* Save the result of the call so that we can repeat it if necessary
+ * through the channel, whilst disposing of the actual call record.
+ */
+ trace_rxrpc_disconnect_call(call);
+ switch (call->completion) {
+ case RXRPC_CALL_SUCCEEDED:
+ chan->last_seq = call->rx_highest_seq;
+ chan->last_type = RXRPC_PACKET_TYPE_ACK;
+ break;
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ chan->last_abort = call->abort_code;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
+ default:
+ chan->last_abort = RX_CALL_DEAD;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ break;
+ }
+
+ chan->last_call = chan->call_id;
+ chan->call_id = chan->call_counter;
+ chan->call = NULL;
+ }
+
+ _leave("");
+}
+
+/*
+ * Disconnect a call and clear any channel it occupies when that call
+ * terminates.
+ */
+void rxrpc_disconnect_call(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn = call->conn;
+
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ rxrpc_see_call(call, rxrpc_call_see_disconnected);
+
+ call->peer->cong_ssthresh = call->cong_ssthresh;
+
+ if (!hlist_unhashed(&call->error_link)) {
+ spin_lock(&call->peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&call->peer->lock);
+ }
+
+ if (rxrpc_is_client_call(call)) {
+ rxrpc_disconnect_client_call(call->bundle, call);
+ } else {
+ __rxrpc_disconnect_call(conn, call);
+ conn->idle_timestamp = jiffies;
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry * HZ);
+ }
+
+ rxrpc_put_call(call, rxrpc_call_put_io_thread);
+}
+
+/*
+ * Queue a connection's work processor, getting a ref to pass to the work
+ * queue.
+ */
+void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+ if (atomic_read(&conn->active) >= 0 &&
+ rxrpc_queue_work(&conn->processor))
+ rxrpc_see_connection(conn, why);
+}
+
+/*
+ * Note the re-emergence of a connection.
+ */
+void rxrpc_see_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ if (conn) {
+ int r = refcount_read(&conn->ref);
+
+ trace_rxrpc_conn(conn->debug_id, r, why);
+ }
+}
+
+/*
+ * Get a ref on a connection.
+ */
+struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ int r;
+
+ __refcount_inc(&conn->ref, &r);
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
+ return conn;
+}
+
+/*
+ * Try to get a ref on a connection.
+ */
+struct rxrpc_connection *
+rxrpc_get_connection_maybe(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ int r;
+
+ if (conn) {
+ if (__refcount_inc_not_zero(&conn->ref, &r))
+ trace_rxrpc_conn(conn->debug_id, r + 1, why);
+ else
+ conn = NULL;
+ }
+ return conn;
+}
+
+/*
+ * Set the service connection reap timer.
+ */
+static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
+ unsigned long reap_at)
+{
+ if (rxnet->live)
+ timer_reduce(&rxnet->service_conn_reap_timer, reap_at);
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_rcu_free_connection(struct rcu_head *rcu)
+{
+ struct rxrpc_connection *conn =
+ container_of(rcu, struct rxrpc_connection, rcu);
+ struct rxrpc_net *rxnet = conn->rxnet;
+
+ _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref));
+
+ trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
+ rxrpc_conn_free);
+ kfree(conn);
+
+ if (atomic_dec_and_test(&rxnet->nr_conns))
+ wake_up_var(&rxnet->nr_conns);
+}
+
+/*
+ * Clean up a dead connection.
+ */
+static void rxrpc_clean_up_connection(struct work_struct *work)
+{
+ struct rxrpc_connection *conn =
+ container_of(work, struct rxrpc_connection, destructor);
+ struct rxrpc_net *rxnet = conn->rxnet;
+
+ ASSERT(!conn->channels[0].call &&
+ !conn->channels[1].call &&
+ !conn->channels[2].call &&
+ !conn->channels[3].call);
+ ASSERT(list_empty(&conn->cache_link));
+
+ del_timer_sync(&conn->timer);
+ cancel_work_sync(&conn->processor); /* Processing may restart the timer */
+ del_timer_sync(&conn->timer);
+
+ write_lock(&rxnet->conn_lock);
+ list_del_init(&conn->proc_link);
+ write_unlock(&rxnet->conn_lock);
+
+ rxrpc_purge_queue(&conn->rx_queue);
+
+ rxrpc_kill_client_conn(conn);
+
+ conn->security->clear(conn);
+ key_put(conn->key);
+ rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn);
+ rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn);
+ rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn);
+
+ /* Drain the Rx queue. Note that even though we've unpublished, an
+ * incoming packet could still be being added to our Rx queue, so we
+ * will need to drain it again in the RCU cleanup handler.
+ */
+ rxrpc_purge_queue(&conn->rx_queue);
+
+ call_rcu(&conn->rcu, rxrpc_rcu_free_connection);
+}
+
+/*
+ * Drop a ref on a connection.
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn,
+ enum rxrpc_conn_trace why)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (!conn)
+ return;
+
+ debug_id = conn->debug_id;
+ dead = __refcount_dec_and_test(&conn->ref, &r);
+ trace_rxrpc_conn(debug_id, r - 1, why);
+ if (dead) {
+ del_timer(&conn->timer);
+ cancel_work(&conn->processor);
+
+ if (in_softirq() || work_busy(&conn->processor) ||
+ timer_pending(&conn->timer))
+ /* Can't use the rxrpc workqueue as we need to cancel/flush
+ * something that may be running/waiting there.
+ */
+ schedule_work(&conn->destructor);
+ else
+ rxrpc_clean_up_connection(&conn->destructor);
+ }
+}
+
+/*
+ * reap dead service connections
+ */
+void rxrpc_service_connection_reaper(struct work_struct *work)
+{
+ struct rxrpc_connection *conn, *_p;
+ struct rxrpc_net *rxnet =
+ container_of(work, struct rxrpc_net, service_conn_reaper);
+ unsigned long expire_at, earliest, idle_timestamp, now;
+ int active;
+
+ LIST_HEAD(graveyard);
+
+ _enter("");
+
+ now = jiffies;
+ earliest = now + MAX_JIFFY_OFFSET;
+
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
+ ASSERTCMP(atomic_read(&conn->active), >=, 0);
+ if (likely(atomic_read(&conn->active) > 0))
+ continue;
+ if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
+ continue;
+
+ if (rxnet->live && !conn->local->dead) {
+ idle_timestamp = READ_ONCE(conn->idle_timestamp);
+ expire_at = idle_timestamp + rxrpc_connection_expiry * HZ;
+ if (conn->local->service_closed)
+ expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ;
+
+ _debug("reap CONN %d { a=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->active),
+ (long)expire_at - (long)now);
+
+ if (time_before(now, expire_at)) {
+ if (time_before(expire_at, earliest))
+ earliest = expire_at;
+ continue;
+ }
+ }
+
+ /* The activity count sits at 0 whilst the conn is unused on
+ * the list; we reduce that to -1 to make the conn unavailable.
+ */
+ active = 0;
+ if (!atomic_try_cmpxchg(&conn->active, &active, -1))
+ continue;
+ rxrpc_see_connection(conn, rxrpc_conn_see_reap_service);
+
+ if (rxrpc_conn_is_client(conn))
+ BUG();
+ else
+ rxrpc_unpublish_service_conn(conn);
+
+ list_move_tail(&conn->link, &graveyard);
+ }
+ write_unlock(&rxnet->conn_lock);
+
+ if (earliest != now + MAX_JIFFY_OFFSET) {
+ _debug("reschedule reaper %ld", (long)earliest - (long)now);
+ ASSERT(time_after(earliest, now));
+ rxrpc_set_service_reap_timer(rxnet, earliest);
+ }
+
+ while (!list_empty(&graveyard)) {
+ conn = list_entry(graveyard.next, struct rxrpc_connection,
+ link);
+ list_del_init(&conn->link);
+
+ ASSERTCMP(atomic_read(&conn->active), ==, -1);
+ rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped);
+ }
+
+ _leave("");
+}
+
+/*
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
+ */
+void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
+{
+ struct rxrpc_connection *conn, *_p;
+ bool leak = false;
+
+ _enter("");
+
+ atomic_dec(&rxnet->nr_conns);
+
+ del_timer_sync(&rxnet->service_conn_reap_timer);
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+ flush_workqueue(rxrpc_workqueue);
+
+ write_lock(&rxnet->conn_lock);
+ list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) {
+ pr_err("AF_RXRPC: Leaked conn %p {%d}\n",
+ conn, refcount_read(&conn->ref));
+ leak = true;
+ }
+ write_unlock(&rxnet->conn_lock);
+ BUG_ON(leak);
+
+ ASSERT(list_empty(&rxnet->conn_proc_list));
+
+ /* We need to wait for the connections to be destroyed by RCU as they
+ * pin things that we still need to get rid of.
+ */
+ wait_var_event(&rxnet->nr_conns, !atomic_read(&rxnet->nr_conns));
+ _leave("");
+}
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
new file mode 100644
index 0000000000..89ac05a711
--- /dev/null
+++ b/net/rxrpc/conn_service.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Service connection management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+/*
+ * Find a service connection under RCU conditions.
+ *
+ * We could use a hash table, but that is subject to bucket stuffing by an
+ * attacker as the client gets to pick the epoch and cid values and would know
+ * the hash function. So, instead, we use a hash table for the peer and from
+ * that an rbtree to find the service connection. Under ordinary circumstances
+ * it might be slower than a large hash table, but it is at least limited in
+ * depth.
+ */
+struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
+ struct sk_buff *skb)
+{
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_conn_proto k;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rb_node *p;
+ unsigned int seq = 0;
+
+ k.epoch = sp->hdr.epoch;
+ k.cid = sp->hdr.cid & RXRPC_CIDMASK;
+
+ do {
+ /* Unfortunately, rbtree walking doesn't give reliable results
+ * under just the RCU read lock, so we have to check for
+ * changes.
+ */
+ read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
+
+ p = rcu_dereference_raw(peer->service_conns.rb_node);
+ while (p) {
+ conn = rb_entry(p, struct rxrpc_connection, service_node);
+
+ if (conn->proto.index_key < k.index_key)
+ p = rcu_dereference_raw(p->rb_left);
+ else if (conn->proto.index_key > k.index_key)
+ p = rcu_dereference_raw(p->rb_right);
+ else
+ break;
+ conn = NULL;
+ }
+ } while (need_seqretry(&peer->service_conn_lock, seq));
+
+ done_seqretry(&peer->service_conn_lock, seq);
+ _leave(" = %d", conn ? conn->debug_id : -1);
+ return conn;
+}
+
+/*
+ * Insert a service connection into a peer's tree, thereby making it a target
+ * for incoming packets.
+ */
+static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn)
+{
+ struct rxrpc_connection *cursor = NULL;
+ struct rxrpc_conn_proto k = conn->proto;
+ struct rb_node **pp, *parent;
+
+ write_seqlock(&peer->service_conn_lock);
+
+ pp = &peer->service_conns.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ cursor = rb_entry(parent,
+ struct rxrpc_connection, service_node);
+
+ if (cursor->proto.index_key < k.index_key)
+ pp = &(*pp)->rb_left;
+ else if (cursor->proto.index_key > k.index_key)
+ pp = &(*pp)->rb_right;
+ else
+ goto found_extant_conn;
+ }
+
+ rb_link_node_rcu(&conn->service_node, parent, pp);
+ rb_insert_color(&conn->service_node, &peer->service_conns);
+conn_published:
+ set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
+ write_sequnlock(&peer->service_conn_lock);
+ _leave(" = %d [new]", conn->debug_id);
+ return;
+
+found_extant_conn:
+ if (refcount_read(&cursor->ref) == 0)
+ goto replace_old_connection;
+ write_sequnlock(&peer->service_conn_lock);
+ /* We should not be able to get here. rxrpc_incoming_connection() is
+ * called in a non-reentrant context, so there can't be a race to
+ * insert a new connection.
+ */
+ BUG();
+
+replace_old_connection:
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ rb_replace_node_rcu(&cursor->service_node,
+ &conn->service_node,
+ &peer->service_conns);
+ clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags);
+ goto conn_published;
+}
+
+/*
+ * Preallocate a service connection. The connection is placed on the proc and
+ * reap lists so that we don't have to get the lock from BH context.
+ */
+struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet,
+ gfp_t gfp)
+{
+ struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp);
+
+ if (conn) {
+ /* We maintain an extra ref on the connection whilst it is on
+ * the rxrpc_connections list.
+ */
+ conn->state = RXRPC_CONN_SERVICE_PREALLOC;
+ refcount_set(&conn->ref, 2);
+
+ atomic_inc(&rxnet->nr_conns);
+ write_lock(&rxnet->conn_lock);
+ list_add_tail(&conn->link, &rxnet->service_conns);
+ list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
+ write_unlock(&rxnet->conn_lock);
+
+ rxrpc_see_connection(conn, rxrpc_conn_new_service);
+ }
+
+ return conn;
+}
+
+/*
+ * Set up an incoming connection. This is called in BH context with the RCU
+ * read lock held.
+ */
+void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
+ struct rxrpc_connection *conn,
+ const struct rxrpc_security *sec,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ conn->proto.epoch = sp->hdr.epoch;
+ conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
+ conn->orig_service_id = sp->hdr.serviceId;
+ conn->service_id = sp->hdr.serviceId;
+ conn->security_ix = sp->hdr.securityIndex;
+ conn->out_clientflag = 0;
+ conn->security = sec;
+ if (conn->security_ix)
+ conn->state = RXRPC_CONN_SERVICE_UNSECURED;
+ else
+ conn->state = RXRPC_CONN_SERVICE;
+
+ /* See if we should upgrade the service. This can only happen on the
+ * first packet on a new connection. Once done, it applies to all
+ * subsequent calls on that connection.
+ */
+ if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE &&
+ conn->service_id == rx->service_upgrade.from)
+ conn->service_id = rx->service_upgrade.to;
+
+ atomic_set(&conn->active, 1);
+
+ /* Make the connection a target for incoming packets. */
+ rxrpc_publish_service_conn(conn->peer, conn);
+}
+
+/*
+ * Remove the service connection from the peer's tree, thereby removing it as a
+ * target for incoming packets.
+ */
+void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn)
+{
+ struct rxrpc_peer *peer = conn->peer;
+
+ write_seqlock(&peer->service_conn_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags))
+ rb_erase(&conn->service_node, &peer->service_conns);
+ write_sequnlock(&peer->service_conn_lock);
+}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
new file mode 100644
index 0000000000..92495e73b8
--- /dev/null
+++ b/net/rxrpc/input.c
@@ -0,0 +1,1027 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Processing of received RxRPC packets
+ *
+ * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ar-internal.h"
+
+static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
+ enum rxrpc_abort_reason why)
+{
+ rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why);
+}
+
+/*
+ * Do TCP-style congestion management [RFC 5681].
+ */
+static void rxrpc_congestion_management(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_serial_t acked_serial)
+{
+ enum rxrpc_congest_change change = rxrpc_cong_no_change;
+ unsigned int cumulative_acks = call->cong_cumul_acks;
+ unsigned int cwnd = call->cong_cwnd;
+ bool resend = false;
+
+ summary->flight_size =
+ (call->tx_top - call->acks_hard_ack) - summary->nr_acks;
+
+ if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
+ summary->retrans_timeo = true;
+ call->cong_ssthresh = max_t(unsigned int,
+ summary->flight_size / 2, 2);
+ cwnd = 1;
+ if (cwnd >= call->cong_ssthresh &&
+ call->cong_mode == RXRPC_CALL_SLOW_START) {
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_tstamp = skb->tstamp;
+ cumulative_acks = 0;
+ }
+ }
+
+ cumulative_acks += summary->nr_new_acks;
+ cumulative_acks += summary->nr_rot_new_acks;
+ if (cumulative_acks > 255)
+ cumulative_acks = 255;
+
+ summary->mode = call->cong_mode;
+ summary->cwnd = call->cong_cwnd;
+ summary->ssthresh = call->cong_ssthresh;
+ summary->cumulative_acks = cumulative_acks;
+ summary->dup_acks = call->cong_dup_acks;
+
+ switch (call->cong_mode) {
+ case RXRPC_CALL_SLOW_START:
+ if (summary->saw_nacks)
+ goto packet_loss_detected;
+ if (summary->cumulative_acks > 0)
+ cwnd += 1;
+ if (cwnd >= call->cong_ssthresh) {
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_tstamp = skb->tstamp;
+ }
+ goto out;
+
+ case RXRPC_CALL_CONGEST_AVOIDANCE:
+ if (summary->saw_nacks)
+ goto packet_loss_detected;
+
+ /* We analyse the number of packets that get ACK'd per RTT
+ * period and increase the window if we managed to fill it.
+ */
+ if (call->peer->rtt_count == 0)
+ goto out;
+ if (ktime_before(skb->tstamp,
+ ktime_add_us(call->cong_tstamp,
+ call->peer->srtt_us >> 3)))
+ goto out_no_clear_ca;
+ change = rxrpc_cong_rtt_window_end;
+ call->cong_tstamp = skb->tstamp;
+ if (cumulative_acks >= cwnd)
+ cwnd++;
+ goto out;
+
+ case RXRPC_CALL_PACKET_LOSS:
+ if (!summary->saw_nacks)
+ goto resume_normality;
+
+ if (summary->new_low_nack) {
+ change = rxrpc_cong_new_low_nack;
+ call->cong_dup_acks = 1;
+ if (call->cong_extra > 1)
+ call->cong_extra = 1;
+ goto send_extra_data;
+ }
+
+ call->cong_dup_acks++;
+ if (call->cong_dup_acks < 3)
+ goto send_extra_data;
+
+ change = rxrpc_cong_begin_retransmission;
+ call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
+ call->cong_ssthresh = max_t(unsigned int,
+ summary->flight_size / 2, 2);
+ cwnd = call->cong_ssthresh + 3;
+ call->cong_extra = 0;
+ call->cong_dup_acks = 0;
+ resend = true;
+ goto out;
+
+ case RXRPC_CALL_FAST_RETRANSMIT:
+ if (!summary->new_low_nack) {
+ if (summary->nr_new_acks == 0)
+ cwnd += 1;
+ call->cong_dup_acks++;
+ if (call->cong_dup_acks == 2) {
+ change = rxrpc_cong_retransmit_again;
+ call->cong_dup_acks = 0;
+ resend = true;
+ }
+ } else {
+ change = rxrpc_cong_progress;
+ cwnd = call->cong_ssthresh;
+ if (!summary->saw_nacks)
+ goto resume_normality;
+ }
+ goto out;
+
+ default:
+ BUG();
+ goto out;
+ }
+
+resume_normality:
+ change = rxrpc_cong_cleared_nacks;
+ call->cong_dup_acks = 0;
+ call->cong_extra = 0;
+ call->cong_tstamp = skb->tstamp;
+ if (cwnd < call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+ else
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+out:
+ cumulative_acks = 0;
+out_no_clear_ca:
+ if (cwnd >= RXRPC_TX_MAX_WINDOW)
+ cwnd = RXRPC_TX_MAX_WINDOW;
+ call->cong_cwnd = cwnd;
+ call->cong_cumul_acks = cumulative_acks;
+ trace_rxrpc_congest(call, summary, acked_serial, change);
+ if (resend)
+ rxrpc_resend(call, skb);
+ return;
+
+packet_loss_detected:
+ change = rxrpc_cong_saw_nack;
+ call->cong_mode = RXRPC_CALL_PACKET_LOSS;
+ call->cong_dup_acks = 0;
+ goto send_extra_data;
+
+send_extra_data:
+ /* Send some previously unsent DATA if we have some to advance the ACK
+ * state.
+ */
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ||
+ summary->nr_acks != call->tx_top - call->acks_hard_ack) {
+ call->cong_extra++;
+ wake_up(&call->waitq);
+ }
+ goto out_no_clear_ca;
+}
+
+/*
+ * Degrade the congestion window if we haven't transmitted a packet for >1RTT.
+ */
+void rxrpc_congestion_degrade(struct rxrpc_call *call)
+{
+ ktime_t rtt, now;
+
+ if (call->cong_mode != RXRPC_CALL_SLOW_START &&
+ call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
+ return;
+ if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ return;
+
+ rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
+ now = ktime_get_real();
+ if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now))
+ return;
+
+ trace_rxrpc_reset_cwnd(call, now);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset);
+ call->tx_last_sent = now;
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+ call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh,
+ call->cong_cwnd * 3 / 4);
+ call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND);
+}
+
+/*
+ * Apply a hard ACK by advancing the Tx window.
+ */
+static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+ struct rxrpc_ack_summary *summary)
+{
+ struct rxrpc_txbuf *txb;
+ bool rot_last = false;
+
+ list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
+ if (before_eq(txb->seq, call->acks_hard_ack))
+ continue;
+ summary->nr_rot_new_acks++;
+ if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
+ set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+ rot_last = true;
+ }
+ if (txb->seq == to)
+ break;
+ }
+
+ if (rot_last)
+ set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags);
+
+ _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last);
+
+ if (call->acks_lowest_nak == call->acks_hard_ack) {
+ call->acks_lowest_nak = to;
+ } else if (after(to, call->acks_lowest_nak)) {
+ summary->new_low_nack = true;
+ call->acks_lowest_nak = to;
+ }
+
+ smp_store_release(&call->acks_hard_ack, to);
+
+ trace_rxrpc_txqueue(call, (rot_last ?
+ rxrpc_txqueue_rotate_last :
+ rxrpc_txqueue_rotate));
+ wake_up(&call->waitq);
+ return rot_last;
+}
+
+/*
+ * End the transmission phase of a call.
+ *
+ * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
+ * or a final ACK packet.
+ */
+static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+ enum rxrpc_abort_reason abort_why)
+{
+ ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ if (reply_begun) {
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
+ break;
+ }
+
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
+ break;
+
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ rxrpc_call_completed(call);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
+ break;
+
+ default:
+ kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]);
+ rxrpc_proto_abort(call, call->tx_top, abort_why);
+ break;
+ }
+}
+
+/*
+ * Begin the reply reception phase of a call.
+ */
+static bool rxrpc_receiving_reply(struct rxrpc_call *call)
+{
+ struct rxrpc_ack_summary summary = { 0 };
+ unsigned long now, timo;
+ rxrpc_seq_t top = READ_ONCE(call->tx_top);
+
+ if (call->ackr_reason) {
+ now = jiffies;
+ timo = now + MAX_JIFFY_OFFSET;
+
+ WRITE_ONCE(call->delay_ack_at, timo);
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
+ }
+
+ if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+ if (!rxrpc_rotate_tx_window(call, top, &summary)) {
+ rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
+ return false;
+ }
+ }
+
+ rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply);
+ return true;
+}
+
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+ rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
+
+ _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]);
+
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
+ rxrpc_call_completed(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
+ rxrpc_seq_t window, rxrpc_seq_t wtop)
+{
+ call->ackr_window = window;
+ call->ackr_wtop = wtop;
+}
+
+/*
+ * Push a DATA packet onto the Rx queue.
+ */
+static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
+ rxrpc_seq_t window, rxrpc_seq_t wtop,
+ enum rxrpc_receive_trace why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
+
+ __skb_queue_tail(&call->recvmsg_queue, skb);
+ rxrpc_input_update_ack_window(call, window, wtop);
+ trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
+ if (last)
+ rxrpc_end_rx_phase(call, sp->hdr.serial);
+}
+
+/*
+ * Process a DATA packet.
+ */
+static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
+ bool *_notify)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sk_buff *oos;
+ rxrpc_serial_t serial = sp->hdr.serial;
+ unsigned int sack = call->ackr_sack_base;
+ rxrpc_seq_t window = call->ackr_window;
+ rxrpc_seq_t wtop = call->ackr_wtop;
+ rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
+ rxrpc_seq_t seq = sp->hdr.seq;
+ bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
+ int ack_reason = -1;
+
+ rxrpc_inc_stat(call->rxnet, stat_rx_data);
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ rxrpc_inc_stat(call->rxnet, stat_rx_data_reqack);
+ if (sp->hdr.flags & RXRPC_JUMBO_PACKET)
+ rxrpc_inc_stat(call->rxnet, stat_rx_data_jumbo);
+
+ if (last) {
+ if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ seq + 1 != wtop)
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last);
+ } else {
+ if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ after_eq(seq, wtop)) {
+ pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
+ call->debug_id, seq, window, wtop, wlimit);
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last);
+ }
+ }
+
+ if (after(seq, call->rx_highest_seq))
+ call->rx_highest_seq = seq;
+
+ trace_rxrpc_rx_data(call->debug_id, seq, serial, sp->hdr.flags);
+
+ if (before(seq, window)) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
+ }
+ if (after(seq, wlimit)) {
+ ack_reason = RXRPC_ACK_EXCEEDS_WINDOW;
+ goto send_ack;
+ }
+
+ /* Queue the packet. */
+ if (seq == window) {
+ if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ ack_reason = RXRPC_ACK_REQUESTED;
+ /* Send an immediate ACK if we fill in a hole */
+ else if (!skb_queue_empty(&call->rx_oos_queue))
+ ack_reason = RXRPC_ACK_DELAY;
+ else
+ call->ackr_nr_unacked++;
+
+ window++;
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
+ wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
+
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
+
+ spin_lock(&call->recvmsg_queue.lock);
+ rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue);
+ *_notify = true;
+
+ while ((oos = skb_peek(&call->rx_oos_queue))) {
+ struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
+
+ if (after(osp->hdr.seq, window))
+ break;
+
+ __skb_unlink(oos, &call->rx_oos_queue);
+ last = osp->hdr.flags & RXRPC_LAST_PACKET;
+ seq = osp->hdr.seq;
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+
+ window++;
+ rxrpc_input_queue_data(call, oos, window, wtop,
+ rxrpc_receive_queue_oos);
+ }
+
+ spin_unlock(&call->recvmsg_queue.lock);
+
+ call->ackr_sack_base = sack;
+ } else {
+ unsigned int slot;
+
+ ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
+
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
+ }
+
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
+ if (after(seq + 1, wtop)) {
+ wtop = seq + 1;
+ rxrpc_input_update_ack_window(call, window, wtop);
+ }
+
+ skb_queue_walk(&call->rx_oos_queue, oos) {
+ struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
+
+ if (after(osp->hdr.seq, seq)) {
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
+ __skb_queue_before(&call->rx_oos_queue, oos, skb);
+ goto oos_queued;
+ }
+ }
+
+ rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos);
+ __skb_queue_tail(&call->rx_oos_queue, skb);
+ oos_queued:
+ trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos,
+ sp->hdr.serial, sp->hdr.seq);
+ }
+
+send_ack:
+ if (ack_reason >= 0)
+ rxrpc_send_ACK(call, ack_reason, serial,
+ rxrpc_propose_ack_input_data);
+ else
+ rxrpc_propose_delay_ACK(call, serial,
+ rxrpc_propose_ack_input_data);
+}
+
+/*
+ * Split a jumbo packet and file the bits separately.
+ */
+static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_jumbo_header jhdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb), *jsp;
+ struct sk_buff *jskb;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len = skb->len - offset;
+ bool notify = false;
+
+ while (sp->hdr.flags & RXRPC_JUMBO_PACKET) {
+ if (len < RXRPC_JUMBO_SUBPKTLEN)
+ goto protocol_error;
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ goto protocol_error;
+ if (skb_copy_bits(skb, offset + RXRPC_JUMBO_DATALEN,
+ &jhdr, sizeof(jhdr)) < 0)
+ goto protocol_error;
+
+ jskb = skb_clone(skb, GFP_NOFS);
+ if (!jskb) {
+ kdebug("couldn't clone");
+ return false;
+ }
+ rxrpc_new_skb(jskb, rxrpc_skb_new_jumbo_subpacket);
+ jsp = rxrpc_skb(jskb);
+ jsp->offset = offset;
+ jsp->len = RXRPC_JUMBO_DATALEN;
+ rxrpc_input_data_one(call, jskb, &notify);
+ rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket);
+
+ sp->hdr.flags = jhdr.flags;
+ sp->hdr._rsvd = ntohs(jhdr._rsvd);
+ sp->hdr.seq++;
+ sp->hdr.serial++;
+ offset += RXRPC_JUMBO_SUBPKTLEN;
+ len -= RXRPC_JUMBO_SUBPKTLEN;
+ }
+
+ sp->offset = offset;
+ sp->len = len;
+ rxrpc_input_data_one(call, skb, &notify);
+ if (notify) {
+ trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+ rxrpc_notify_socket(call);
+ }
+ return true;
+
+protocol_error:
+ return false;
+}
+
+/*
+ * Process a DATA packet, adding the packet to the Rx ring. The caller's
+ * packet ref must be passed on or discarded.
+ */
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_serial_t serial = sp->hdr.serial;
+ rxrpc_seq_t seq0 = sp->hdr.seq;
+
+ _enter("{%x,%x,%x},{%u,%x}",
+ call->ackr_window, call->ackr_wtop, call->rx_highest_seq,
+ skb->len, seq0);
+
+ if (__rxrpc_call_is_complete(call))
+ return;
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ /* Received data implicitly ACKs all of the request
+ * packets we sent when we're acting as a client.
+ */
+ if (!rxrpc_receiving_reply(call))
+ goto out_notify;
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST: {
+ unsigned long timo = READ_ONCE(call->next_req_timo);
+ unsigned long now, expect_req_by;
+
+ if (timo) {
+ now = jiffies;
+ expect_req_by = now + timo;
+ WRITE_ONCE(call->expect_req_by, expect_req_by);
+ rxrpc_reduce_call_timer(call, expect_req_by, now,
+ rxrpc_timer_set_for_idle);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ if (!rxrpc_input_split_jumbo(call, skb)) {
+ rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo);
+ goto out_notify;
+ }
+ return;
+
+out_notify:
+ trace_rxrpc_notify_socket(call->debug_id, serial);
+ rxrpc_notify_socket(call);
+ _leave(" [queued]");
+}
+
+/*
+ * See if there's a cached RTT probe to complete.
+ */
+static void rxrpc_complete_rtt_probe(struct rxrpc_call *call,
+ ktime_t resp_time,
+ rxrpc_serial_t acked_serial,
+ rxrpc_serial_t ack_serial,
+ enum rxrpc_rtt_rx_trace type)
+{
+ rxrpc_serial_t orig_serial;
+ unsigned long avail;
+ ktime_t sent_at;
+ bool matched = false;
+ int i;
+
+ avail = READ_ONCE(call->rtt_avail);
+ smp_rmb(); /* Read avail bits before accessing data. */
+
+ for (i = 0; i < ARRAY_SIZE(call->rtt_serial); i++) {
+ if (!test_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &avail))
+ continue;
+
+ sent_at = call->rtt_sent_at[i];
+ orig_serial = call->rtt_serial[i];
+
+ if (orig_serial == acked_serial) {
+ clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+ smp_mb(); /* Read data before setting avail bit */
+ set_bit(i, &call->rtt_avail);
+ rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial,
+ sent_at, resp_time);
+ matched = true;
+ }
+
+ /* If a later serial is being acked, then mark this slot as
+ * being available.
+ */
+ if (after(acked_serial, orig_serial)) {
+ trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i,
+ orig_serial, acked_serial, 0, 0);
+ clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+ smp_wmb();
+ set_bit(i, &call->rtt_avail);
+ }
+ }
+
+ if (!matched)
+ trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0);
+}
+
+/*
+ * Process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+ struct rxrpc_ackinfo *ackinfo)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+ unsigned int mtu;
+ bool wake = false;
+ u32 rwind = ntohl(ackinfo->rwind);
+
+ if (rwind > RXRPC_TX_MAX_WINDOW)
+ rwind = RXRPC_TX_MAX_WINDOW;
+ if (call->tx_winsize != rwind) {
+ if (rwind > call->tx_winsize)
+ wake = true;
+ trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake);
+ call->tx_winsize = rwind;
+ }
+
+ if (call->cong_ssthresh > rwind)
+ call->cong_ssthresh = rwind;
+
+ mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+
+ peer = call->peer;
+ if (mtu < peer->maxdata) {
+ spin_lock(&peer->lock);
+ peer->maxdata = mtu;
+ peer->mtu = mtu + peer->hdrsize;
+ spin_unlock(&peer->lock);
+ }
+
+ if (wake)
+ wake_up(&call->waitq);
+}
+
+/*
+ * Process individual soft ACKs.
+ *
+ * Each ACK in the array corresponds to one packet and can be either an ACK or
+ * a NAK. If we get find an explicitly NAK'd packet we resend immediately;
+ * packets that lie beyond the end of the ACK list are scheduled for resend by
+ * the timer on the basis that the peer might just not have processed them at
+ * the time the ACK was sent.
+ */
+static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
+ rxrpc_seq_t seq, int nr_acks,
+ struct rxrpc_ack_summary *summary)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_ACK) {
+ summary->nr_acks++;
+ summary->nr_new_acks++;
+ } else {
+ if (!summary->saw_nacks &&
+ call->acks_lowest_nak != seq + i) {
+ call->acks_lowest_nak = seq + i;
+ summary->new_low_nack = true;
+ }
+ summary->saw_nacks = true;
+ }
+ }
+}
+
+/*
+ * Return true if the ACK is valid - ie. it doesn't appear to have regressed
+ * with respect to the ack state conveyed by preceding ACKs.
+ */
+static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
+ rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
+{
+ rxrpc_seq_t base = READ_ONCE(call->acks_first_seq);
+
+ if (after(first_pkt, base))
+ return true; /* The window advanced */
+
+ if (before(first_pkt, base))
+ return false; /* firstPacket regressed */
+
+ if (after_eq(prev_pkt, call->acks_prev_seq))
+ return true; /* previousPacket hasn't regressed. */
+
+ /* Some rx implementations put a serial number in previousPacket. */
+ if (after_eq(prev_pkt, base + call->tx_winsize))
+ return false;
+ return true;
+}
+
+/*
+ * Process an ACK packet.
+ *
+ * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
+ * in the ACK array. Anything before that is hard-ACK'd and may be discarded.
+ *
+ * A hard-ACK means that a packet has been processed and may be discarded; a
+ * soft-ACK means that the packet may be discarded and retransmission
+ * requested. A phase is complete when all packets are hard-ACK'd.
+ */
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_ack_summary summary = { 0 };
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_ackinfo info;
+ rxrpc_serial_t ack_serial, acked_serial;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ int nr_acks, offset, ioffset;
+
+ _enter("");
+
+ offset = sizeof(struct rxrpc_wire_header);
+ if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
+ offset += sizeof(ack);
+
+ ack_serial = sp->hdr.serial;
+ acked_serial = ntohl(ack.serial);
+ first_soft_ack = ntohl(ack.firstPacket);
+ prev_pkt = ntohl(ack.previousPacket);
+ hard_ack = first_soft_ack - 1;
+ nr_acks = ack.nAcks;
+ summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
+ ack.reason : RXRPC_ACK__INVALID);
+
+ trace_rxrpc_rx_ack(call, ack_serial, acked_serial,
+ first_soft_ack, prev_pkt,
+ summary.ack_reason, nr_acks);
+ rxrpc_inc_stat(call->rxnet, stat_rx_acks[ack.reason]);
+
+ if (acked_serial != 0) {
+ switch (ack.reason) {
+ case RXRPC_ACK_PING_RESPONSE:
+ rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+ rxrpc_rtt_rx_ping_response);
+ break;
+ case RXRPC_ACK_REQUESTED:
+ rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+ rxrpc_rtt_rx_requested_ack);
+ break;
+ default:
+ rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial,
+ rxrpc_rtt_rx_other_ack);
+ break;
+ }
+ }
+
+ /* If we get an EXCEEDS_WINDOW ACK from the server, it probably
+ * indicates that the client address changed due to NAT. The server
+ * lost the call because it switched to a different peer.
+ */
+ if (unlikely(ack.reason == RXRPC_ACK_EXCEEDS_WINDOW) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ goto send_response;
+ }
+
+ /* If we get an OUT_OF_SEQUENCE ACK from the server, that can also
+ * indicate a change of address. However, we can retransmit the call
+ * if we still have it buffered to the beginning.
+ */
+ if (unlikely(ack.reason == RXRPC_ACK_OUT_OF_SEQUENCE) &&
+ first_soft_ack == 1 &&
+ prev_pkt == 0 &&
+ call->acks_hard_ack == 0 &&
+ rxrpc_is_client_call(call)) {
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ 0, -ENETRESET);
+ goto send_response;
+ }
+
+ /* Discard any out-of-order or duplicate ACKs (outside lock). */
+ if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
+ trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial,
+ first_soft_ack, call->acks_first_seq,
+ prev_pkt, call->acks_prev_seq);
+ goto send_response;
+ }
+
+ info.rxMTU = 0;
+ ioffset = offset + nr_acks + 3;
+ if (skb->len >= ioffset + sizeof(info) &&
+ skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
+
+ if (nr_acks > 0)
+ skb_condense(skb);
+
+ call->acks_latest_ts = skb->tstamp;
+ call->acks_first_seq = first_soft_ack;
+ call->acks_prev_seq = prev_pkt;
+
+ switch (ack.reason) {
+ case RXRPC_ACK_PING:
+ break;
+ default:
+ if (after(acked_serial, call->acks_highest_serial))
+ call->acks_highest_serial = acked_serial;
+ break;
+ }
+
+ /* Parse rwind and mtu sizes if provided. */
+ if (info.rxMTU)
+ rxrpc_input_ackinfo(call, skb, &info);
+
+ if (first_soft_ack == 0)
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
+
+ /* Ignore ACKs unless we are or have just been transmitting. */
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ break;
+ default:
+ goto send_response;
+ }
+
+ if (before(hard_ack, call->acks_hard_ack) ||
+ after(hard_ack, call->tx_top))
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
+ if (nr_acks > call->tx_top - hard_ack)
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
+
+ if (after(hard_ack, call->acks_hard_ack)) {
+ if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
+ goto send_response;
+ }
+ }
+
+ if (nr_acks > 0) {
+ if (offset > (int)skb->len - nr_acks)
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
+ rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
+ nr_acks, &summary);
+ }
+
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
+ summary.nr_acks == call->tx_top - hard_ack &&
+ rxrpc_is_client_call(call))
+ rxrpc_propose_ping(call, ack_serial,
+ rxrpc_propose_ack_ping_for_lost_reply);
+
+ rxrpc_congestion_management(call, skb, &summary, acked_serial);
+
+send_response:
+ if (ack.reason == RXRPC_ACK_PING)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial,
+ rxrpc_propose_ack_respond_to_ping);
+ else if (sp->hdr.flags & RXRPC_REQUEST_ACK)
+ rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial,
+ rxrpc_propose_ack_respond_to_ack);
+}
+
+/*
+ * Process an ACKALL packet.
+ */
+static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_ack_summary summary = { 0 };
+
+ if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
+}
+
+/*
+ * Process an ABORT packet directed at a call.
+ */
+static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_abort(call, sp->hdr.serial, skb->priority);
+
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ skb->priority, -ECONNABORTED);
+}
+
+/*
+ * Process an incoming call packet.
+ */
+void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned long timo;
+
+ _enter("%p,%p", call, skb);
+
+ if (sp->hdr.serviceId != call->dest_srx.srx_service)
+ call->dest_srx.srx_service = sp->hdr.serviceId;
+ if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
+ call->rx_serial = sp->hdr.serial;
+ if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
+ set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
+
+ timo = READ_ONCE(call->next_rx_timo);
+ if (timo) {
+ unsigned long now = jiffies, expect_rx_by;
+
+ expect_rx_by = now + timo;
+ WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+ rxrpc_reduce_call_timer(call, expect_rx_by, now,
+ rxrpc_timer_set_for_normal);
+ }
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ return rxrpc_input_data(call, skb);
+
+ case RXRPC_PACKET_TYPE_ACK:
+ return rxrpc_input_ack(call, skb);
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ /* Just ignore BUSY packets from the server; the retry and
+ * lifespan timers will take care of business. BUSY packets
+ * from the client don't make sense.
+ */
+ return;
+
+ case RXRPC_PACKET_TYPE_ABORT:
+ return rxrpc_input_abort(call, skb);
+
+ case RXRPC_PACKET_TYPE_ACKALL:
+ return rxrpc_input_ackall(call, skb);
+
+ default:
+ break;
+ }
+}
+
+/*
+ * Handle a new service call on a channel implicitly completing the preceding
+ * call on that channel. This does not apply to client conns.
+ *
+ * TODO: If callNumber > call_id + 1, renegotiate security.
+ */
+void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ rxrpc_call_completed(call);
+ fallthrough;
+ case RXRPC_CALL_COMPLETE:
+ break;
+ default:
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN,
+ rxrpc_eproto_improper_term);
+ trace_rxrpc_improper_term(call);
+ break;
+ }
+
+ rxrpc_input_call_event(call, skb);
+}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
new file mode 100644
index 0000000000..34353b6e58
--- /dev/null
+++ b/net/rxrpc/insecure.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Null security operations.
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static int none_init_connection_security(struct rxrpc_connection *conn,
+ struct rxrpc_key_token *token)
+{
+ return 0;
+}
+
+/*
+ * Work out how much data we can put in an unsecured packet.
+ */
+static int none_how_much_data(struct rxrpc_call *call, size_t remain,
+ size_t *_buf_size, size_t *_data_size, size_t *_offset)
+{
+ *_buf_size = *_data_size = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
+ *_offset = 0;
+ return 0;
+}
+
+static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ return 0;
+}
+
+static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ sp->flags |= RXRPC_RX_VERIFIED;
+ return 0;
+}
+
+static void none_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
+static int none_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
+}
+
+static int none_verify_response(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_response);
+}
+
+static void none_clear(struct rxrpc_connection *conn)
+{
+}
+
+static int none_init(void)
+{
+ return 0;
+}
+
+static void none_exit(void)
+{
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+const struct rxrpc_security rxrpc_no_security = {
+ .name = "none",
+ .security_index = RXRPC_SECURITY_NONE,
+ .init = none_init,
+ .exit = none_exit,
+ .init_connection_security = none_init_connection_security,
+ .free_call_crypto = none_free_call_crypto,
+ .how_much_data = none_how_much_data,
+ .secure_packet = none_secure_packet,
+ .verify_packet = none_verify_packet,
+ .respond_to_challenge = none_respond_to_challenge,
+ .verify_response = none_verify_response,
+ .clear = none_clear,
+};
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
new file mode 100644
index 0000000000..4a3a08a0e2
--- /dev/null
+++ b/net/rxrpc/io_thread.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "ar-internal.h"
+
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ *
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
+ *
+ * Called with the RCU read lock held from the IP layer via UDP.
+ */
+int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
+{
+ struct sk_buff_head *rx_queue;
+ struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
+
+ if (unlikely(!local)) {
+ kfree_skb(skb);
+ return 0;
+ }
+ if (skb->tstamp == 0)
+ skb->tstamp = ktime_get_real();
+
+ skb->mark = RXRPC_SKB_MARK_PACKET;
+ rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
+ rx_queue = &local->rx_queue;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ if (rxrpc_inject_rx_delay ||
+ !skb_queue_empty(&local->rx_delay_queue)) {
+ skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay);
+ rx_queue = &local->rx_delay_queue;
+ }
+#endif
+
+ skb_queue_tail(rx_queue, skb);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_error_report(struct sock *sk)
+{
+ struct rxrpc_local *local;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ while ((skb = skb_dequeue(&sk->sk_error_queue))) {
+ skb->mark = RXRPC_SKB_MARK_ERROR;
+ rxrpc_new_skb(skb, rxrpc_skb_new_error_report);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+
+ rxrpc_wake_up_io_thread(local);
+ rcu_read_unlock();
+}
+
+/*
+ * Directly produce an abort from a packet.
+ */
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
+static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
+}
+
+#define just_discard true
+
+/*
+ * Process event packets targeted at a local endpoint.
+ */
+static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ char v;
+
+ _enter("");
+
+ rxrpc_see_skb(skb, rxrpc_skb_see_version);
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) {
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ }
+
+ return true;
+}
+
+/*
+ * Extract the wire header from a packet and translate the byte order.
+ */
+static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
+ struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+
+ /* dig out the RxRPC connection details */
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr);
+
+ memset(sp, 0, sizeof(*sp));
+ sp->hdr.epoch = ntohl(whdr.epoch);
+ sp->hdr.cid = ntohl(whdr.cid);
+ sp->hdr.callNumber = ntohl(whdr.callNumber);
+ sp->hdr.seq = ntohl(whdr.seq);
+ sp->hdr.serial = ntohl(whdr.serial);
+ sp->hdr.flags = whdr.flags;
+ sp->hdr.type = whdr.type;
+ sp->hdr.userStatus = whdr.userStatus;
+ sp->hdr.securityIndex = whdr.securityIndex;
+ sp->hdr._rsvd = ntohs(whdr._rsvd);
+ sp->hdr.serviceId = ntohs(whdr.serviceId);
+ return true;
+}
+
+/*
+ * Extract the abort code from an ABORT packet and stash it in skb->priority.
+ */
+static bool rxrpc_extract_abort(struct sk_buff *skb)
+{
+ __be32 wtmp;
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) < 0)
+ return false;
+ skb->priority = ntohl(wtmp);
+ return true;
+}
+
+/*
+ * Process packets received on the local endpoint
+ */
+static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+{
+ struct rxrpc_connection *conn;
+ struct sockaddr_rxrpc peer_srx;
+ struct rxrpc_skb_priv *sp;
+ struct rxrpc_peer *peer = NULL;
+ struct sk_buff *skb = *_skb;
+ bool ret = false;
+
+ skb_pull(skb, sizeof(struct udphdr));
+
+ sp = rxrpc_skb(skb);
+
+ /* dig out the RxRPC connection details */
+ if (!rxrpc_extract_header(sp, skb))
+ return just_discard;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ trace_rxrpc_rx_lose(sp);
+ return just_discard;
+ }
+ }
+
+ trace_rxrpc_rx_packet(sp);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (rxrpc_to_client(sp))
+ return just_discard;
+ return rxrpc_input_version(local, skb);
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ if (rxrpc_to_server(sp))
+ return just_discard;
+ fallthrough;
+ case RXRPC_PACKET_TYPE_ACK:
+ case RXRPC_PACKET_TYPE_ACKALL:
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+ break;
+ case RXRPC_PACKET_TYPE_ABORT:
+ if (!rxrpc_extract_abort(skb))
+ return just_discard; /* Just discard if malformed */
+ break;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+ if (sp->hdr.seq == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq);
+
+ /* Unshare the packet so that it can be modified for in-place
+ * decryption.
+ */
+ if (sp->hdr.securityIndex != 0) {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
+ *_skb = NULL;
+ return just_discard;
+ }
+
+ if (skb != *_skb) {
+ rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare);
+ *_skb = skb;
+ rxrpc_new_skb(skb, rxrpc_skb_new_unshared);
+ sp = rxrpc_skb(skb);
+ }
+ }
+ break;
+
+ case RXRPC_PACKET_TYPE_CHALLENGE:
+ if (rxrpc_to_server(sp))
+ return just_discard;
+ break;
+ case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_to_client(sp))
+ return just_discard;
+ break;
+
+ /* Packet types 9-11 should just be ignored. */
+ case RXRPC_PACKET_TYPE_PARAMS:
+ case RXRPC_PACKET_TYPE_10:
+ case RXRPC_PACKET_TYPE_11:
+ return just_discard;
+
+ default:
+ return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet);
+ }
+
+ if (sp->hdr.serviceId == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service);
+
+ if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
+ return just_discard; /* Unsupported address type. */
+
+ if (peer_srx.transport.family != local->srx.transport.family &&
+ (peer_srx.transport.family == AF_INET &&
+ local->srx.transport.family != AF_INET6)) {
+ pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
+ peer_srx.transport.family,
+ local->srx.transport.family);
+ return just_discard; /* Wrong address type. */
+ }
+
+ if (rxrpc_to_client(sp)) {
+ rcu_read_lock();
+ conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ rcu_read_unlock();
+ if (!conn)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn);
+
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ /* We need to look up service connections by the full protocol
+ * parameter set. We look up the peer first as an intermediate step
+ * and then the connection from the peer's tree.
+ */
+ rcu_read_lock();
+
+ peer = rxrpc_lookup_peer_rcu(local, &peer_srx);
+ if (!peer) {
+ rcu_read_unlock();
+ return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb);
+ }
+
+ conn = rxrpc_find_service_conn_rcu(peer, skb);
+ conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
+ if (conn) {
+ rcu_read_unlock();
+ ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
+ rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
+ return ret;
+ }
+
+ peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input);
+ rcu_read_unlock();
+
+ ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
+ rxrpc_put_peer(peer, rxrpc_peer_put_input);
+ return ret;
+}
+
+/*
+ * Deal with a packet that's associated with an extant connection.
+ */
+static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_channel *chan;
+ struct rxrpc_call *call = NULL;
+ unsigned int channel;
+ bool ret;
+
+ if (sp->hdr.securityIndex != conn->security_ix)
+ return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
+ RXKADINCONSISTENCY, -EBADMSG);
+
+ if (sp->hdr.serviceId != conn->service_id) {
+ int old_id;
+
+ if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
+ return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade);
+
+ old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
+ sp->hdr.serviceId);
+ if (old_id != conn->orig_service_id &&
+ old_id != sp->hdr.serviceId)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade);
+ }
+
+ if (after(sp->hdr.serial, conn->hi_serial))
+ conn->hi_serial = sp->hdr.serial;
+
+ /* It's a connection-level packet if the call number is 0. */
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_input_conn_packet(conn, skb);
+
+ /* Call-bound packets are routed by connection channel. */
+ channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+
+ /* Ignore really old calls */
+ if (sp->hdr.callNumber < chan->last_call)
+ return just_discard;
+
+ if (sp->hdr.callNumber == chan->last_call) {
+ if (chan->call ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
+ return just_discard;
+
+ /* For the previous service call, if completed successfully, we
+ * discard all further packets.
+ */
+ if (rxrpc_conn_is_service(conn) &&
+ chan->last_type == RXRPC_PACKET_TYPE_ACK)
+ return just_discard;
+
+ /* But otherwise we need to retransmit the final packet from
+ * data cached in the connection record.
+ */
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+ trace_rxrpc_rx_data(chan->call_debug_id,
+ sp->hdr.seq,
+ sp->hdr.serial,
+ sp->hdr.flags);
+ rxrpc_conn_retransmit_call(conn, skb, channel);
+ return just_discard;
+ }
+
+ call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input);
+
+ if (sp->hdr.callNumber > chan->call_id) {
+ if (rxrpc_to_client(sp)) {
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return rxrpc_protocol_error(skb,
+ rxrpc_eproto_unexpected_implicit_end);
+ }
+
+ if (call) {
+ rxrpc_implicit_end_call(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ call = NULL;
+ }
+ }
+
+ if (!call) {
+ if (rxrpc_to_client(sp))
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call);
+ return rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb);
+ }
+
+ ret = rxrpc_input_call_event(call, skb);
+ rxrpc_put_call(call, rxrpc_call_put_input);
+ return ret;
+}
+
+/*
+ * I/O and event handling thread.
+ */
+int rxrpc_io_thread(void *data)
+{
+ struct rxrpc_connection *conn;
+ struct sk_buff_head rx_queue;
+ struct rxrpc_local *local = data;
+ struct rxrpc_call *call;
+ struct sk_buff *skb;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ ktime_t now;
+#endif
+ bool should_stop;
+
+ complete(&local->io_thread_ready);
+
+ skb_queue_head_init(&rx_queue);
+
+ set_user_nice(current, MIN_NICE);
+
+ for (;;) {
+ rxrpc_inc_stat(local->rxnet, stat_io_loop);
+
+ /* Deal with connections that want immediate attention. */
+ conn = list_first_entry_or_null(&local->conn_attend_q,
+ struct rxrpc_connection,
+ attend_link);
+ if (conn) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&conn->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ rxrpc_input_conn_event(conn, NULL);
+ rxrpc_put_connection(conn, rxrpc_conn_put_poke);
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags))
+ rxrpc_discard_expired_client_conns(local);
+
+ /* Deal with calls that want immediate attention. */
+ if ((call = list_first_entry_or_null(&local->call_attend_q,
+ struct rxrpc_call,
+ attend_link))) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&call->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ trace_rxrpc_call_poked(call);
+ rxrpc_input_call_event(call, NULL);
+ rxrpc_put_call(call, rxrpc_call_put_poke);
+ continue;
+ }
+
+ if (!list_empty(&local->new_client_calls))
+ rxrpc_connect_client_calls(local);
+
+ /* Process received packets and errors. */
+ if ((skb = __skb_dequeue(&rx_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_PACKET:
+ skb->priority = 0;
+ if (!rxrpc_input_packet(local, &skb))
+ rxrpc_reject_packet(local, skb);
+ trace_rxrpc_rx_done(skb->mark, skb->priority);
+ rxrpc_free_skb(skb, rxrpc_skb_put_input);
+ break;
+ case RXRPC_SKB_MARK_ERROR:
+ rxrpc_input_error(local, skb);
+ rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
+ break;
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ rxrpc_input_conn_event(sp->conn, skb);
+ rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
+ break;
+ }
+ continue;
+ }
+
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+#endif
+
+ if (!skb_queue_empty(&local->rx_queue)) {
+ spin_lock_irq(&local->rx_queue.lock);
+ skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
+ spin_unlock_irq(&local->rx_queue.lock);
+ continue;
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ should_stop = kthread_should_stop();
+ if (!skb_queue_empty(&local->rx_queue) ||
+ !list_empty(&local->call_attend_q) ||
+ !list_empty(&local->conn_attend_q) ||
+ !list_empty(&local->new_client_calls) ||
+ test_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags)) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ if (should_stop)
+ break;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb = skb_peek(&local->rx_delay_queue);
+ if (skb) {
+ unsigned long timeout;
+ ktime_t tstamp = skb->tstamp;
+ ktime_t now = ktime_get_real();
+ s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now));
+
+ if (delay_ns <= 0) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ timeout = nsecs_to_jiffies(delay_ns);
+ timeout = max(timeout, 1UL);
+ schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+#endif
+
+ schedule();
+ }
+
+ __set_current_state(TASK_RUNNING);
+ rxrpc_see_local(local, rxrpc_local_stop);
+ rxrpc_destroy_local(local);
+ local->io_thread = NULL;
+ rxrpc_see_local(local, rxrpc_local_stopped);
+ return 0;
+}
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
new file mode 100644
index 0000000000..33e8302a79
--- /dev/null
+++ b/net/rxrpc/key.c
@@ -0,0 +1,699 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ * "afs@example.com"
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/skcipher.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key-type.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_preparse(struct key_preparsed_payload *);
+static void rxrpc_free_preparse(struct key_preparsed_payload *);
+static void rxrpc_destroy(struct key *);
+static void rxrpc_describe(const struct key *, struct seq_file *);
+static long rxrpc_read(const struct key *, char *, size_t);
+
+/*
+ * rxrpc defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_rxrpc = {
+ .name = "rxrpc",
+ .flags = KEY_TYPE_NET_DOMAIN,
+ .preparse = rxrpc_preparse,
+ .free_preparse = rxrpc_free_preparse,
+ .instantiate = generic_key_instantiate,
+ .destroy = rxrpc_destroy,
+ .describe = rxrpc_describe,
+ .read = rxrpc_read,
+};
+EXPORT_SYMBOL(key_type_rxrpc);
+
+/*
+ * parse an RxKAD type XDR format token
+ * - the caller guarantees we have at least 4 words
+ */
+static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
+ size_t datalen,
+ const __be32 *xdr, unsigned int toklen)
+{
+ struct rxrpc_key_token *token, **pptoken;
+ time64_t expiry;
+ size_t plen;
+ u32 tktlen;
+
+ _enter(",{%x,%x,%x,%x},%u",
+ ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+ toklen);
+
+ if (toklen <= 8 * 4)
+ return -EKEYREJECTED;
+ tktlen = ntohl(xdr[7]);
+ _debug("tktlen: %x", tktlen);
+ if (tktlen > AFSTOKEN_RK_TIX_MAX)
+ return -EKEYREJECTED;
+ if (toklen < 8 * 4 + tktlen)
+ return -EKEYREJECTED;
+
+ plen = sizeof(*token) + sizeof(*token->kad) + tktlen;
+ prep->quotalen = datalen + plen;
+
+ plen -= sizeof(*token);
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
+ if (!token)
+ return -ENOMEM;
+
+ token->kad = kzalloc(plen, GFP_KERNEL);
+ if (!token->kad) {
+ kfree(token);
+ return -ENOMEM;
+ }
+
+ token->security_index = RXRPC_SECURITY_RXKAD;
+ token->kad->ticket_len = tktlen;
+ token->kad->vice_id = ntohl(xdr[0]);
+ token->kad->kvno = ntohl(xdr[1]);
+ token->kad->start = ntohl(xdr[4]);
+ token->kad->expiry = ntohl(xdr[5]);
+ token->kad->primary_flag = ntohl(xdr[6]);
+ memcpy(&token->kad->session_key, &xdr[2], 8);
+ memcpy(&token->kad->ticket, &xdr[8], tktlen);
+
+ _debug("SCIX: %u", token->security_index);
+ _debug("TLEN: %u", token->kad->ticket_len);
+ _debug("EXPY: %x", token->kad->expiry);
+ _debug("KVNO: %u", token->kad->kvno);
+ _debug("PRIM: %u", token->kad->primary_flag);
+ _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+ token->kad->session_key[0], token->kad->session_key[1],
+ token->kad->session_key[2], token->kad->session_key[3],
+ token->kad->session_key[4], token->kad->session_key[5],
+ token->kad->session_key[6], token->kad->session_key[7]);
+ if (token->kad->ticket_len >= 8)
+ _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+ token->kad->ticket[0], token->kad->ticket[1],
+ token->kad->ticket[2], token->kad->ticket[3],
+ token->kad->ticket[4], token->kad->ticket[5],
+ token->kad->ticket[6], token->kad->ticket[7]);
+
+ /* count the number of tokens attached */
+ prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
+
+ /* attach the data */
+ for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
+ *pptoken;
+ pptoken = &(*pptoken)->next)
+ continue;
+ *pptoken = token;
+ expiry = rxrpc_u32_to_time64(token->kad->expiry);
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * attempt to parse the data as the XDR format
+ * - the caller guarantees we have more than 7 words
+ */
+static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
+{
+ const __be32 *xdr = prep->data, *token, *p;
+ const char *cp;
+ unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
+ size_t datalen = prep->datalen;
+ int ret, ret2;
+
+ _enter(",{%x,%x,%x,%x},%zu",
+ ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+ prep->datalen);
+
+ if (datalen > AFSTOKEN_LENGTH_MAX)
+ goto not_xdr;
+
+ /* XDR is an array of __be32's */
+ if (datalen & 3)
+ goto not_xdr;
+
+ /* the flags should be 0 (the setpag bit must be handled by
+ * userspace) */
+ if (ntohl(*xdr++) != 0)
+ goto not_xdr;
+ datalen -= 4;
+
+ /* check the cell name */
+ len = ntohl(*xdr++);
+ if (len < 1 || len > AFSTOKEN_CELL_MAX)
+ goto not_xdr;
+ datalen -= 4;
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > datalen)
+ goto not_xdr;
+
+ cp = (const char *) xdr;
+ for (loop = 0; loop < len; loop++)
+ if (!isprint(cp[loop]))
+ goto not_xdr;
+ for (; loop < paddedlen; loop++)
+ if (cp[loop])
+ goto not_xdr;
+ _debug("cellname: [%u/%u] '%*.*s'",
+ len, paddedlen, len, len, (const char *) xdr);
+ datalen -= paddedlen;
+ xdr += paddedlen >> 2;
+
+ /* get the token count */
+ if (datalen < 12)
+ goto not_xdr;
+ ntoken = ntohl(*xdr++);
+ datalen -= 4;
+ _debug("ntoken: %x", ntoken);
+ if (ntoken < 1 || ntoken > AFSTOKEN_MAX)
+ goto not_xdr;
+
+ /* check each token wrapper */
+ p = xdr;
+ loop = ntoken;
+ do {
+ if (datalen < 8)
+ goto not_xdr;
+ toklen = ntohl(*p++);
+ sec_ix = ntohl(*p);
+ datalen -= 4;
+ _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
+ paddedlen = (toklen + 3) & ~3;
+ if (toklen < 20 || toklen > datalen || paddedlen > datalen)
+ goto not_xdr;
+ datalen -= paddedlen;
+ p += paddedlen >> 2;
+
+ } while (--loop > 0);
+
+ _debug("remainder: %zu", datalen);
+ if (datalen != 0)
+ goto not_xdr;
+
+ /* okay: we're going to assume it's valid XDR format
+ * - we ignore the cellname, relying on the key to be correctly named
+ */
+ ret = -EPROTONOSUPPORT;
+ do {
+ toklen = ntohl(*xdr++);
+ token = xdr;
+ xdr += (toklen + 3) / 4;
+
+ sec_ix = ntohl(*token++);
+ toklen -= 4;
+
+ _debug("TOKEN type=%x len=%x", sec_ix, toklen);
+
+ switch (sec_ix) {
+ case RXRPC_SECURITY_RXKAD:
+ ret2 = rxrpc_preparse_xdr_rxkad(prep, datalen, token, toklen);
+ break;
+ default:
+ ret2 = -EPROTONOSUPPORT;
+ break;
+ }
+
+ switch (ret2) {
+ case 0:
+ ret = 0;
+ break;
+ case -EPROTONOSUPPORT:
+ break;
+ case -ENOPKG:
+ if (ret != 0)
+ ret = -ENOPKG;
+ break;
+ default:
+ ret = ret2;
+ goto error;
+ }
+
+ } while (--ntoken > 0);
+
+error:
+ _leave(" = %d", ret);
+ return ret;
+
+not_xdr:
+ _leave(" = -EPROTO");
+ return -EPROTO;
+}
+
+/*
+ * Preparse an rxrpc defined key.
+ *
+ * Data should be of the form:
+ * OFFSET LEN CONTENT
+ * 0 4 key interface version number
+ * 4 2 security index (type)
+ * 6 2 ticket length
+ * 8 4 key expiry time (time_t)
+ * 12 4 kvno
+ * 16 8 session key
+ * 24 [len] ticket
+ *
+ * if no data is provided, then a no-security key is made
+ */
+static int rxrpc_preparse(struct key_preparsed_payload *prep)
+{
+ const struct rxrpc_key_data_v1 *v1;
+ struct rxrpc_key_token *token, **pp;
+ time64_t expiry;
+ size_t plen;
+ u32 kver;
+ int ret;
+
+ _enter("%zu", prep->datalen);
+
+ /* handle a no-security key */
+ if (!prep->data && prep->datalen == 0)
+ return 0;
+
+ /* determine if the XDR payload format is being used */
+ if (prep->datalen > 7 * 4) {
+ ret = rxrpc_preparse_xdr(prep);
+ if (ret != -EPROTO)
+ return ret;
+ }
+
+ /* get the key interface version number */
+ ret = -EINVAL;
+ if (prep->datalen <= 4 || !prep->data)
+ goto error;
+ memcpy(&kver, prep->data, sizeof(kver));
+ prep->data += sizeof(kver);
+ prep->datalen -= sizeof(kver);
+
+ _debug("KEY I/F VERSION: %u", kver);
+
+ ret = -EKEYREJECTED;
+ if (kver != 1)
+ goto error;
+
+ /* deal with a version 1 key */
+ ret = -EINVAL;
+ if (prep->datalen < sizeof(*v1))
+ goto error;
+
+ v1 = prep->data;
+ if (prep->datalen != sizeof(*v1) + v1->ticket_length)
+ goto error;
+
+ _debug("SCIX: %u", v1->security_index);
+ _debug("TLEN: %u", v1->ticket_length);
+ _debug("EXPY: %x", v1->expiry);
+ _debug("KVNO: %u", v1->kvno);
+ _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+ v1->session_key[0], v1->session_key[1],
+ v1->session_key[2], v1->session_key[3],
+ v1->session_key[4], v1->session_key[5],
+ v1->session_key[6], v1->session_key[7]);
+ if (v1->ticket_length >= 8)
+ _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+ v1->ticket[0], v1->ticket[1],
+ v1->ticket[2], v1->ticket[3],
+ v1->ticket[4], v1->ticket[5],
+ v1->ticket[6], v1->ticket[7]);
+
+ ret = -EPROTONOSUPPORT;
+ if (v1->security_index != RXRPC_SECURITY_RXKAD)
+ goto error;
+
+ plen = sizeof(*token->kad) + v1->ticket_length;
+ prep->quotalen = plen + sizeof(*token);
+
+ ret = -ENOMEM;
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
+ if (!token)
+ goto error;
+ token->kad = kzalloc(plen, GFP_KERNEL);
+ if (!token->kad)
+ goto error_free;
+
+ token->security_index = RXRPC_SECURITY_RXKAD;
+ token->kad->ticket_len = v1->ticket_length;
+ token->kad->expiry = v1->expiry;
+ token->kad->kvno = v1->kvno;
+ memcpy(&token->kad->session_key, &v1->session_key, 8);
+ memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
+
+ /* count the number of tokens attached */
+ prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
+
+ /* attach the data */
+ pp = (struct rxrpc_key_token **)&prep->payload.data[0];
+ while (*pp)
+ pp = &(*pp)->next;
+ *pp = token;
+ expiry = rxrpc_u32_to_time64(token->kad->expiry);
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
+ token = NULL;
+ ret = 0;
+
+error_free:
+ kfree(token);
+error:
+ return ret;
+}
+
+/*
+ * Free token list.
+ */
+static void rxrpc_free_token_list(struct rxrpc_key_token *token)
+{
+ struct rxrpc_key_token *next;
+
+ for (; token; token = next) {
+ next = token->next;
+ switch (token->security_index) {
+ case RXRPC_SECURITY_RXKAD:
+ kfree(token->kad);
+ break;
+ default:
+ pr_err("Unknown token type %x on rxrpc key\n",
+ token->security_index);
+ BUG();
+ }
+
+ kfree(token);
+ }
+}
+
+/*
+ * Clean up preparse data.
+ */
+static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
+{
+ rxrpc_free_token_list(prep->payload.data[0]);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy(struct key *key)
+{
+ rxrpc_free_token_list(key->payload.data[0]);
+}
+
+/*
+ * describe the rxrpc key
+ */
+static void rxrpc_describe(const struct key *key, struct seq_file *m)
+{
+ const struct rxrpc_key_token *token;
+ const char *sep = ": ";
+
+ seq_puts(m, key->description);
+
+ for (token = key->payload.data[0]; token; token = token->next) {
+ seq_puts(m, sep);
+
+ switch (token->security_index) {
+ case RXRPC_SECURITY_RXKAD:
+ seq_puts(m, "ka");
+ break;
+ default: /* we have a ticket we can't encode */
+ seq_printf(m, "%u", token->security_index);
+ break;
+ }
+
+ sep = " ";
+ }
+}
+
+/*
+ * grab the security key for a socket
+ */
+int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities)
+ return -EINVAL;
+
+ description = memdup_sockptr_nul(optval, optlen);
+ if (IS_ERR(description))
+ return PTR_ERR(description);
+
+ key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->key = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/*
+ * generate a server data key
+ */
+int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
+ const void *session_key,
+ time64_t expiry,
+ u32 kvno)
+{
+ const struct cred *cred = current_cred();
+ struct key *key;
+ int ret;
+
+ struct {
+ u32 kver;
+ struct rxrpc_key_data_v1 v1;
+ } data;
+
+ _enter("");
+
+ key = key_alloc(&key_type_rxrpc, "x",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0,
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(key)) {
+ _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+ return -ENOMEM;
+ }
+
+ _debug("key %d", key_serial(key));
+
+ data.kver = 1;
+ data.v1.security_index = RXRPC_SECURITY_RXKAD;
+ data.v1.ticket_length = 0;
+ data.v1.expiry = rxrpc_time64_to_u32(expiry);
+ data.v1.kvno = 0;
+
+ memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
+
+ ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
+ if (ret < 0)
+ goto error;
+
+ conn->key = key;
+ _leave(" = 0 [%d]", key_serial(key));
+ return 0;
+
+error:
+ key_revoke(key);
+ key_put(key);
+ _leave(" = -ENOMEM [ins %d]", ret);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(rxrpc_get_server_data_key);
+
+/**
+ * rxrpc_get_null_key - Generate a null RxRPC key
+ * @keyname: The name to give the key.
+ *
+ * Generate a null RxRPC key that can be used to indicate anonymous security is
+ * required for a particular domain.
+ */
+struct key *rxrpc_get_null_key(const char *keyname)
+{
+ const struct cred *cred = current_cred();
+ struct key *key;
+ int ret;
+
+ key = key_alloc(&key_type_rxrpc, keyname,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
+ KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(key))
+ return key;
+
+ ret = key_instantiate_and_link(key, NULL, 0, NULL, NULL);
+ if (ret < 0) {
+ key_revoke(key);
+ key_put(key);
+ return ERR_PTR(ret);
+ }
+
+ return key;
+}
+EXPORT_SYMBOL(rxrpc_get_null_key);
+
+/*
+ * read the contents of an rxrpc key
+ * - this returns the result in XDR form
+ */
+static long rxrpc_read(const struct key *key,
+ char *buffer, size_t buflen)
+{
+ const struct rxrpc_key_token *token;
+ size_t size;
+ __be32 *xdr, *oldxdr;
+ u32 cnlen, toksize, ntoks, tok, zero;
+ u16 toksizes[AFSTOKEN_MAX];
+
+ _enter("");
+
+ /* we don't know what form we should return non-AFS keys in */
+ if (memcmp(key->description, "afs@", 4) != 0)
+ return -EOPNOTSUPP;
+ cnlen = strlen(key->description + 4);
+
+#define RND(X) (((X) + 3) & ~3)
+
+ /* AFS keys we return in XDR form, so we need to work out the size of
+ * the XDR */
+ size = 2 * 4; /* flags, cellname len */
+ size += RND(cnlen); /* cellname */
+ size += 1 * 4; /* token count */
+
+ ntoks = 0;
+ for (token = key->payload.data[0]; token; token = token->next) {
+ toksize = 4; /* sec index */
+
+ switch (token->security_index) {
+ case RXRPC_SECURITY_RXKAD:
+ toksize += 8 * 4; /* viceid, kvno, key*2, begin,
+ * end, primary, tktlen */
+ if (!token->no_leak_key)
+ toksize += RND(token->kad->ticket_len);
+ break;
+
+ default: /* we have a ticket we can't encode */
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
+ }
+
+ _debug("token[%u]: toksize=%u", ntoks, toksize);
+ if (WARN_ON(toksize > AFSTOKEN_LENGTH_MAX))
+ return -EIO;
+
+ toksizes[ntoks++] = toksize;
+ size += toksize + 4; /* each token has a length word */
+ }
+
+#undef RND
+
+ if (!buffer || buflen < size)
+ return size;
+
+ xdr = (__be32 *)buffer;
+ zero = 0;
+#define ENCODE(x) \
+ do { \
+ *xdr++ = htonl(x); \
+ } while(0)
+#define ENCODE_DATA(l, s) \
+ do { \
+ u32 _l = (l); \
+ ENCODE(l); \
+ memcpy(xdr, (s), _l); \
+ if (_l & 3) \
+ memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \
+ xdr += (_l + 3) >> 2; \
+ } while(0)
+#define ENCODE_BYTES(l, s) \
+ do { \
+ u32 _l = (l); \
+ memcpy(xdr, (s), _l); \
+ if (_l & 3) \
+ memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \
+ xdr += (_l + 3) >> 2; \
+ } while(0)
+#define ENCODE64(x) \
+ do { \
+ __be64 y = cpu_to_be64(x); \
+ memcpy(xdr, &y, 8); \
+ xdr += 8 >> 2; \
+ } while(0)
+#define ENCODE_STR(s) \
+ do { \
+ const char *_s = (s); \
+ ENCODE_DATA(strlen(_s), _s); \
+ } while(0)
+
+ ENCODE(0); /* flags */
+ ENCODE_DATA(cnlen, key->description + 4); /* cellname */
+ ENCODE(ntoks);
+
+ tok = 0;
+ for (token = key->payload.data[0]; token; token = token->next) {
+ toksize = toksizes[tok++];
+ ENCODE(toksize);
+ oldxdr = xdr;
+ ENCODE(token->security_index);
+
+ switch (token->security_index) {
+ case RXRPC_SECURITY_RXKAD:
+ ENCODE(token->kad->vice_id);
+ ENCODE(token->kad->kvno);
+ ENCODE_BYTES(8, token->kad->session_key);
+ ENCODE(token->kad->start);
+ ENCODE(token->kad->expiry);
+ ENCODE(token->kad->primary_flag);
+ if (token->no_leak_key)
+ ENCODE(0);
+ else
+ ENCODE_DATA(token->kad->ticket_len, token->kad->ticket);
+ break;
+
+ default:
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
+ }
+
+ if (WARN_ON((unsigned long)xdr - (unsigned long)oldxdr !=
+ toksize))
+ return -EIO;
+ }
+
+#undef ENCODE_STR
+#undef ENCODE_DATA
+#undef ENCODE64
+#undef ENCODE
+
+ if (WARN_ON(tok != ntoks))
+ return -EIO;
+ if (WARN_ON((unsigned long)xdr - (unsigned long)buffer != size))
+ return -EIO;
+ _leave(" = %zu", size);
+ return size;
+}
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
new file mode 100644
index 0000000000..993c69f974
--- /dev/null
+++ b/net/rxrpc/local_event.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
+#include "ar-internal.h"
+
+static char rxrpc_version_string[65]; // "linux-" UTS_RELEASE " AF_RXRPC";
+
+/*
+ * Generate the VERSION packet string.
+ */
+void rxrpc_gen_version_string(void)
+{
+ snprintf(rxrpc_version_string, sizeof(rxrpc_version_string),
+ "linux-%.49s AF_RXRPC", UTS_RELEASE);
+}
+
+/*
+ * Reply to a version request
+ */
+void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_host_header *hdr,
+ struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct sockaddr_rxrpc srx;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ return;
+
+ msg.msg_name = &srx.transport;
+ msg.msg_namelen = srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.seq = 0;
+ whdr.serial = 0;
+ whdr.type = RXRPC_PACKET_TYPE_VERSION;
+ whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+ whdr.userStatus = 0;
+ whdr.securityIndex = 0;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(sp->hdr.serviceId);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = (char *)rxrpc_version_string;
+ iov[1].iov_len = sizeof(rxrpc_version_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_point_version_reply);
+ else
+ trace_rxrpc_tx_packet(local->debug_id, &whdr,
+ rxrpc_tx_point_version_reply);
+
+ _leave("");
+}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
new file mode 100644
index 0000000000..34d3073681
--- /dev/null
+++ b/net/rxrpc/local_object.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Local endpoint object management
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_local_rcu(struct rcu_head *);
+
+/*
+ * Handle an ICMP/ICMP6 error turning up at the tunnel. Push it through the
+ * usual mechanism so that it gets parsed and presented through the UDP
+ * socket's error_report().
+ */
+static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload)
+{
+ if (ip_hdr(skb)->version == IPVERSION)
+ return ip_icmp_error(sk, skb, err, port, info, payload);
+ if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6))
+ return ipv6_icmp_error(sk, skb, err, port, info, payload);
+}
+
+/*
+ * Set or clear the Don't Fragment flag on a socket.
+ */
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set)
+{
+ if (set)
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
+ else
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT);
+}
+
+/*
+ * Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
+ * same or greater than.
+ *
+ * We explicitly don't compare the RxRPC service ID as we want to reject
+ * conflicting uses by differing services. Further, we don't want to share
+ * addresses with different options (IPv6), so we don't compare those bits
+ * either.
+ */
+static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ long diff;
+
+ diff = ((local->srx.transport_type - srx->transport_type) ?:
+ (local->srx.transport_len - srx->transport_len) ?:
+ (local->srx.transport.family - srx->transport.family));
+ if (diff != 0)
+ return diff;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ /* If the choice of UDP port is left up to the transport, then
+ * the endpoint record doesn't match.
+ */
+ return ((u16 __force)local->srx.transport.sin.sin_port -
+ (u16 __force)srx->transport.sin.sin_port) ?:
+ memcmp(&local->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ /* If the choice of UDP6 port is left up to the transport, then
+ * the endpoint record doesn't match.
+ */
+ return ((u16 __force)local->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&local->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
+ default:
+ BUG();
+ }
+}
+
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_local *local =
+ container_of(timer, struct rxrpc_local, client_conn_reap_timer);
+
+ if (!local->kill_all_client_conns &&
+ test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
+ rxrpc_wake_up_io_thread(local);
+}
+
+/*
+ * Allocate a new local endpoint.
+ */
+static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
+ const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+ u32 tmp;
+
+ local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+ if (local) {
+ refcount_set(&local->ref, 1);
+ atomic_set(&local->active_users, 1);
+ local->net = net;
+ local->rxnet = rxrpc_net(net);
+ INIT_HLIST_NODE(&local->link);
+ init_completion(&local->io_thread_ready);
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb_queue_head_init(&local->rx_delay_queue);
+#endif
+ skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->conn_attend_q);
+ INIT_LIST_HEAD(&local->call_attend_q);
+
+ local->client_bundles = RB_ROOT;
+ spin_lock_init(&local->client_bundles_lock);
+ local->kill_all_client_conns = false;
+ INIT_LIST_HEAD(&local->idle_client_conns);
+ timer_setup(&local->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
+
+ spin_lock_init(&local->lock);
+ rwlock_init(&local->services_lock);
+ local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ memcpy(&local->srx, srx, sizeof(*srx));
+ local->srx.srx_service = 0;
+ idr_init(&local->conn_ids);
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp &= 0x3fffffff;
+ if (tmp == 0)
+ tmp = 1;
+ idr_set_cursor(&local->conn_ids, tmp);
+ INIT_LIST_HEAD(&local->new_client_calls);
+ spin_lock_init(&local->client_call_lock);
+
+ trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
+ }
+
+ _leave(" = %p", local);
+ return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_mutex locked
+ */
+static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
+{
+ struct udp_tunnel_sock_cfg tuncfg = {NULL};
+ struct sockaddr_rxrpc *srx = &local->srx;
+ struct udp_port_cfg udp_conf = {0};
+ struct task_struct *io_thread;
+ struct sock *usk;
+ int ret;
+
+ _enter("%p{%d,%d}",
+ local, srx->transport_type, srx->transport.family);
+
+ udp_conf.family = srx->transport.family;
+ udp_conf.use_udp_checksums = true;
+ if (udp_conf.family == AF_INET) {
+ udp_conf.local_ip = srx->transport.sin.sin_addr;
+ udp_conf.local_udp_port = srx->transport.sin.sin_port;
+#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6)
+ } else {
+ udp_conf.local_ip6 = srx->transport.sin6.sin6_addr;
+ udp_conf.local_udp_port = srx->transport.sin6.sin6_port;
+ udp_conf.use_udp6_tx_checksums = true;
+ udp_conf.use_udp6_rx_checksums = true;
+#endif
+ }
+ ret = udp_sock_create(net, &udp_conf, &local->socket);
+ if (ret < 0) {
+ _leave(" = %d [socket]", ret);
+ return ret;
+ }
+
+ tuncfg.encap_type = UDP_ENCAP_RXRPC;
+ tuncfg.encap_rcv = rxrpc_encap_rcv;
+ tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
+ tuncfg.sk_user_data = local;
+ setup_udp_tunnel_sock(net, local->socket, &tuncfg);
+
+ /* set the socket up */
+ usk = local->socket->sk;
+ usk->sk_error_report = rxrpc_error_report;
+
+ switch (srx->transport.family) {
+ case AF_INET6:
+ /* we want to receive ICMPv6 errors */
+ ip6_sock_set_recverr(usk);
+
+ /* Fall through and set IPv4 options too otherwise we don't get
+ * errors from IPv4 packets sent through the IPv6 socket.
+ */
+ fallthrough;
+ case AF_INET:
+ /* we want to receive ICMP errors */
+ ip_sock_set_recverr(usk);
+
+ /* we want to set the don't fragment bit */
+ rxrpc_local_dont_fragment(local, true);
+
+ /* We want receive timestamps. */
+ sock_enable_timestamps(usk);
+ break;
+
+ default:
+ BUG();
+ }
+
+ io_thread = kthread_run(rxrpc_io_thread, local,
+ "krxrpcio/%u", ntohs(udp_conf.local_udp_port));
+ if (IS_ERR(io_thread)) {
+ ret = PTR_ERR(io_thread);
+ goto error_sock;
+ }
+
+ wait_for_completion(&local->io_thread_ready);
+ local->io_thread = io_thread;
+ _leave(" = 0");
+ return 0;
+
+error_sock:
+ kernel_sock_shutdown(local->socket, SHUT_RDWR);
+ local->socket->sk->sk_user_data = NULL;
+ sock_release(local->socket);
+ local->socket = NULL;
+ return ret;
+}
+
+/*
+ * Look up or create a new local endpoint using the specified local address.
+ */
+struct rxrpc_local *rxrpc_lookup_local(struct net *net,
+ const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_local *local;
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+ struct hlist_node *cursor;
+ long diff;
+ int ret;
+
+ _enter("{%d,%d,%pISp}",
+ srx->transport_type, srx->transport.family, &srx->transport);
+
+ mutex_lock(&rxnet->local_mutex);
+
+ hlist_for_each(cursor, &rxnet->local_endpoints) {
+ local = hlist_entry(cursor, struct rxrpc_local, link);
+
+ diff = rxrpc_local_cmp_key(local, srx);
+ if (diff != 0)
+ continue;
+
+ /* Services aren't allowed to share transport sockets, so
+ * reject that here. It is possible that the object is dying -
+ * but it may also still have the local transport address that
+ * we want bound.
+ */
+ if (srx->srx_service) {
+ local = NULL;
+ goto addr_in_use;
+ }
+
+ /* Found a match. We want to replace a dying object.
+ * Attempting to bind the transport socket may still fail if
+ * we're attempting to use a local address that the dying
+ * object is still using.
+ */
+ if (!rxrpc_use_local(local, rxrpc_local_use_lookup))
+ break;
+
+ goto found;
+ }
+
+ local = rxrpc_alloc_local(net, srx);
+ if (!local)
+ goto nomem;
+
+ ret = rxrpc_open_socket(local, net);
+ if (ret < 0)
+ goto sock_error;
+
+ if (cursor) {
+ hlist_replace_rcu(cursor, &local->link);
+ cursor->pprev = NULL;
+ } else {
+ hlist_add_head_rcu(&local->link, &rxnet->local_endpoints);
+ }
+
+found:
+ mutex_unlock(&rxnet->local_mutex);
+ _leave(" = %p", local);
+ return local;
+
+nomem:
+ ret = -ENOMEM;
+sock_error:
+ mutex_unlock(&rxnet->local_mutex);
+ if (local)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+
+addr_in_use:
+ mutex_unlock(&rxnet->local_mutex);
+ _leave(" = -EADDRINUSE");
+ return ERR_PTR(-EADDRINUSE);
+}
+
+/*
+ * Get a ref on a local endpoint.
+ */
+struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
+{
+ int r, u;
+
+ u = atomic_read(&local->active_users);
+ __refcount_inc(&local->ref, &r);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
+ return local;
+}
+
+/*
+ * Get a ref on a local endpoint unless its usage has already reached 0.
+ */
+struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
+{
+ int r, u;
+
+ if (local && __refcount_inc_not_zero(&local->ref, &r)) {
+ u = atomic_read(&local->active_users);
+ trace_rxrpc_local(local->debug_id, why, r + 1, u);
+ return local;
+ }
+
+ return NULL;
+}
+
+/*
+ * Drop a ref on a local endpoint.
+ */
+void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r, u;
+
+ if (local) {
+ debug_id = local->debug_id;
+
+ u = atomic_read(&local->active_users);
+ dead = __refcount_dec_and_test(&local->ref, &r);
+ trace_rxrpc_local(debug_id, why, r, u);
+
+ if (dead)
+ call_rcu(&local->rcu, rxrpc_local_rcu);
+ }
+}
+
+/*
+ * Start using a local endpoint.
+ */
+struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local,
+ enum rxrpc_local_trace why)
+{
+ local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use);
+ if (!local)
+ return NULL;
+
+ if (!__rxrpc_use_local(local, why)) {
+ rxrpc_put_local(local, rxrpc_local_put_for_use);
+ return NULL;
+ }
+
+ return local;
+}
+
+/*
+ * Cease using a local endpoint. Once the number of active users reaches 0, we
+ * start the closure of the transport in the I/O thread..
+ */
+void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
+{
+ unsigned int debug_id;
+ int r, u;
+
+ if (local) {
+ debug_id = local->debug_id;
+ r = refcount_read(&local->ref);
+ u = atomic_dec_return(&local->active_users);
+ trace_rxrpc_local(debug_id, why, r, u);
+ if (u == 0)
+ kthread_stop(local->io_thread);
+ }
+}
+
+/*
+ * Destroy a local endpoint's socket and then hand the record to RCU to dispose
+ * of.
+ *
+ * Closing the socket cannot be done from bottom half context or RCU callback
+ * context because it might sleep.
+ */
+void rxrpc_destroy_local(struct rxrpc_local *local)
+{
+ struct socket *socket = local->socket;
+ struct rxrpc_net *rxnet = local->rxnet;
+
+ _enter("%d", local->debug_id);
+
+ local->dead = true;
+
+ mutex_lock(&rxnet->local_mutex);
+ hlist_del_init_rcu(&local->link);
+ mutex_unlock(&rxnet->local_mutex);
+
+ rxrpc_clean_up_local_conns(local);
+ rxrpc_service_connection_reaper(&rxnet->service_conn_reaper);
+ ASSERT(!local->service);
+
+ if (socket) {
+ local->socket = NULL;
+ kernel_sock_shutdown(socket, SHUT_RDWR);
+ socket->sk->sk_user_data = NULL;
+ sock_release(socket);
+ }
+
+ /* At this point, there should be no more packets coming in to the
+ * local endpoint.
+ */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ rxrpc_purge_queue(&local->rx_delay_queue);
+#endif
+ rxrpc_purge_queue(&local->rx_queue);
+ rxrpc_purge_client_connections(local);
+}
+
+/*
+ * Destroy a local endpoint after the RCU grace period expires.
+ */
+static void rxrpc_local_rcu(struct rcu_head *rcu)
+{
+ struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu);
+
+ rxrpc_see_local(local, rxrpc_local_free);
+ kfree(local);
+}
+
+/*
+ * Verify the local endpoint list is empty by this point.
+ */
+void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet)
+{
+ struct rxrpc_local *local;
+
+ _enter("");
+
+ flush_workqueue(rxrpc_workqueue);
+
+ if (!hlist_empty(&rxnet->local_endpoints)) {
+ mutex_lock(&rxnet->local_mutex);
+ hlist_for_each_entry(local, &rxnet->local_endpoints, link) {
+ pr_err("AF_RXRPC: Leaked local %p {%d}\n",
+ local, refcount_read(&local->ref));
+ }
+ mutex_unlock(&rxnet->local_mutex);
+ BUG();
+ }
+}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
new file mode 100644
index 0000000000..825b811830
--- /dev/null
+++ b/net/rxrpc/misc.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Miscellaneous bits
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * The maximum listening backlog queue size that may be set on a socket by
+ * listen().
+ */
+unsigned int rxrpc_max_backlog __read_mostly = 10;
+
+/*
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ *
+ * We use this when we've received new data packets. If those packets aren't
+ * all consumed within this time we will send a DELAY ACK if an ACK was not
+ * requested to let the sender know it doesn't need to resend.
+ */
+unsigned long rxrpc_soft_ack_delay = HZ;
+
+/*
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ *
+ * We use this when we've consumed some previously soft-ACK'd packets when
+ * further packets aren't immediately received to decide when to send an IDLE
+ * ACK let the other end know that it can free up its Tx buffer space.
+ */
+unsigned long rxrpc_idle_ack_delay = HZ / 2;
+
+/*
+ * Receive window size in packets. This indicates the maximum number of
+ * unconsumed received packets we're willing to retain in memory. Once this
+ * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
+ * packets.
+ */
+unsigned int rxrpc_rx_window_size = 255;
+
+/*
+ * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
+ * made by gluing normal packets together that we're willing to handle.
+ */
+unsigned int rxrpc_rx_mtu = 5692;
+
+/*
+ * The maximum number of fragments in a received jumbo packet that we tell the
+ * sender that we're willing to handle.
+ */
+unsigned int rxrpc_rx_jumbo_max = 4;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+/*
+ * The delay to inject into packet reception.
+ */
+unsigned long rxrpc_inject_rx_delay;
+#endif
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
new file mode 100644
index 0000000000..a0319c040c
--- /dev/null
+++ b/net/rxrpc/net_ns.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* rxrpc network namespace handling.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/proc_fs.h>
+#include "ar-internal.h"
+
+unsigned int rxrpc_net_id;
+
+static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, service_conn_reap_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->service_conn_reaper);
+}
+
+static void rxrpc_peer_keepalive_timeout(struct timer_list *timer)
+{
+ struct rxrpc_net *rxnet =
+ container_of(timer, struct rxrpc_net, peer_keepalive_timer);
+
+ if (rxnet->live)
+ rxrpc_queue_work(&rxnet->peer_keepalive_work);
+}
+
+/*
+ * Initialise a per-network namespace record.
+ */
+static __net_init int rxrpc_init_net(struct net *net)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+ int ret, i;
+
+ rxnet->live = true;
+ get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch));
+ rxnet->epoch |= RXRPC_RANDOM_EPOCH;
+
+ INIT_LIST_HEAD(&rxnet->calls);
+ spin_lock_init(&rxnet->call_lock);
+ atomic_set(&rxnet->nr_calls, 1);
+
+ atomic_set(&rxnet->nr_conns, 1);
+ INIT_LIST_HEAD(&rxnet->conn_proc_list);
+ INIT_LIST_HEAD(&rxnet->service_conns);
+ rwlock_init(&rxnet->conn_lock);
+ INIT_WORK(&rxnet->service_conn_reaper,
+ rxrpc_service_connection_reaper);
+ timer_setup(&rxnet->service_conn_reap_timer,
+ rxrpc_service_conn_reap_timeout, 0);
+
+ atomic_set(&rxnet->nr_client_conns, 0);
+
+ INIT_HLIST_HEAD(&rxnet->local_endpoints);
+ mutex_init(&rxnet->local_mutex);
+
+ hash_init(rxnet->peer_hash);
+ spin_lock_init(&rxnet->peer_hash_lock);
+ for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++)
+ INIT_LIST_HEAD(&rxnet->peer_keepalive[i]);
+ INIT_LIST_HEAD(&rxnet->peer_keepalive_new);
+ timer_setup(&rxnet->peer_keepalive_timer,
+ rxrpc_peer_keepalive_timeout, 0);
+ INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker);
+ rxnet->peer_keepalive_base = ktime_get_seconds();
+
+ ret = -ENOMEM;
+ rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net);
+ if (!rxnet->proc_net)
+ goto err_proc;
+
+ proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops,
+ sizeof(struct seq_net_private));
+ proc_create_net("conns", 0444, rxnet->proc_net,
+ &rxrpc_connection_seq_ops,
+ sizeof(struct seq_net_private));
+ proc_create_net("peers", 0444, rxnet->proc_net,
+ &rxrpc_peer_seq_ops,
+ sizeof(struct seq_net_private));
+ proc_create_net("locals", 0444, rxnet->proc_net,
+ &rxrpc_local_seq_ops,
+ sizeof(struct seq_net_private));
+ proc_create_net_single_write("stats", S_IFREG | 0644, rxnet->proc_net,
+ rxrpc_stats_show, rxrpc_stats_clear, NULL);
+ return 0;
+
+err_proc:
+ rxnet->live = false;
+ return ret;
+}
+
+/*
+ * Clean up a per-network namespace record.
+ */
+static __net_exit void rxrpc_exit_net(struct net *net)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(net);
+
+ rxnet->live = false;
+ del_timer_sync(&rxnet->peer_keepalive_timer);
+ cancel_work_sync(&rxnet->peer_keepalive_work);
+ /* Remove the timer again as the worker may have restarted it. */
+ del_timer_sync(&rxnet->peer_keepalive_timer);
+ rxrpc_destroy_all_calls(rxnet);
+ rxrpc_destroy_all_connections(rxnet);
+ rxrpc_destroy_all_peers(rxnet);
+ rxrpc_destroy_all_locals(rxnet);
+ proc_remove(rxnet->proc_net);
+}
+
+struct pernet_operations rxrpc_net_ops = {
+ .init = rxrpc_init_net,
+ .exit = rxrpc_exit_net,
+ .id = &rxrpc_net_id,
+ .size = sizeof(struct rxrpc_net),
+};
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
new file mode 100644
index 0000000000..a0906145e8
--- /dev/null
+++ b/net/rxrpc/output.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC packet transmission
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/udp.h>
+#include "ar-internal.h"
+
+extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
+
+static ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
+{
+ struct sockaddr *sa = msg->msg_name;
+ struct sock *sk = socket->sk;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_IPV6)) {
+ if (sa->sa_family == AF_INET6) {
+ if (sk->sk_family != AF_INET6) {
+ pr_warn("AF_INET6 address on AF_INET socket\n");
+ return -ENOPROTOOPT;
+ }
+ return udpv6_sendmsg(sk, msg, len);
+ }
+ }
+ return udp_sendmsg(sk, msg, len);
+}
+
+struct rxrpc_abort_buffer {
+ struct rxrpc_wire_header whdr;
+ __be32 abort_code;
+};
+
+static const char rxrpc_keepalive_string[] = "";
+
+/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+ if (ret < 0) {
+ u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+ if (tx_backoff < HZ)
+ WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+ } else {
+ WRITE_ONCE(call->tx_backoff, 0);
+ }
+}
+
+/*
+ * Arrange for a keepalive ping a certain time after we last transmitted. This
+ * lets the far side know we're still interested in this call and helps keep
+ * the route through any intervening firewall open.
+ *
+ * Receiving a response to the ping will prevent the ->expect_rx_by timer from
+ * expiring.
+ */
+static void rxrpc_set_keepalive(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6;
+
+ keepalive_at += now;
+ WRITE_ONCE(call->keepalive_at, keepalive_at);
+ rxrpc_reduce_call_timer(call, keepalive_at, now,
+ rxrpc_timer_set_for_keepalive);
+}
+
+/*
+ * Fill out an ACK packet.
+ */
+static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
+ struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ u16 *_rwind)
+{
+ struct rxrpc_ackinfo ackinfo;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
+ int rsize;
+ u32 mtu, jmax;
+ u8 *ackp = txb->acks;
+
+ call->ackr_nr_unacked = 0;
+ atomic_set(&call->ackr_nr_consumed, 0);
+ rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
+
+ window = call->ackr_window;
+ wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
+ txb->ack.firstPacket = htonl(window);
+ txb->ack.nAcks = wtop - window;
+
+ if (after(wtop, window)) {
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
+
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
+ } else {
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
+ }
+
+ ackp += to;
+ } else if (before(wtop, window)) {
+ pr_warn("ack window backward %x %x", window, wtop);
+ } else if (txb->ack.reason == RXRPC_ACK_DELAY) {
+ txb->ack.reason = RXRPC_ACK_IDLE;
+ }
+
+ mtu = conn->peer->if_mtu;
+ mtu -= conn->peer->hdrsize;
+ jmax = rxrpc_rx_jumbo_max;
+ qsize = (window - 1) - call->rx_consumed;
+ rsize = max_t(int, call->rx_winsize - qsize, 0);
+ *_rwind = rsize;
+ ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ ackinfo.maxMTU = htonl(mtu);
+ ackinfo.rwind = htonl(rsize);
+ ackinfo.jumbo_max = htonl(jmax);
+
+ *ackp++ = 0;
+ *ackp++ = 0;
+ *ackp++ = 0;
+ memcpy(ackp, &ackinfo, sizeof(ackinfo));
+ return txb->ack.nAcks + 3 + sizeof(ackinfo);
+}
+
+/*
+ * Record the beginning of an RTT probe.
+ */
+static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial,
+ enum rxrpc_rtt_tx_trace why)
+{
+ unsigned long avail = call->rtt_avail;
+ int rtt_slot = 9;
+
+ if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK))
+ goto no_slot;
+
+ rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK);
+ if (!test_and_clear_bit(rtt_slot, &call->rtt_avail))
+ goto no_slot;
+
+ call->rtt_serial[rtt_slot] = serial;
+ call->rtt_sent_at[rtt_slot] = ktime_get_real();
+ smp_wmb(); /* Write data before avail bit */
+ set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+
+ trace_rxrpc_rtt_tx(call, why, rtt_slot, serial);
+ return rtt_slot;
+
+no_slot:
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial);
+ return -1;
+}
+
+/*
+ * Cancel an RTT probe.
+ */
+static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call,
+ rxrpc_serial_t serial, int rtt_slot)
+{
+ if (rtt_slot != -1) {
+ clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail);
+ smp_wmb(); /* Clear pending bit before setting slot */
+ set_bit(rtt_slot, &call->rtt_avail);
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial);
+ }
+}
+
+/*
+ * Transmit an ACK packet.
+ */
+int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct rxrpc_connection *conn;
+ struct msghdr msg;
+ struct kvec iov[1];
+ rxrpc_serial_t serial;
+ size_t len, n;
+ int ret, rtt_slot = -1;
+ u16 rwind;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return -ECONNRESET;
+
+ conn = call->conn;
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ if (txb->ack.reason == RXRPC_ACK_PING)
+ txb->wire.flags |= RXRPC_REQUEST_ACK;
+
+ n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
+ if (n == 0)
+ return 0;
+
+ iov[0].iov_base = &txb->wire;
+ iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
+ len = iov[0].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ txb->wire.serial = htonl(serial);
+ trace_rxrpc_tx_ack(call->debug_id, serial,
+ ntohl(txb->ack.firstPacket),
+ ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
+ rwind);
+
+ if (txb->ack.reason == RXRPC_ACK_PING)
+ rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_ack_send);
+
+ /* Grab the highest received seq as late as possible */
+ txb->ack.previousPacket = htonl(call->rx_highest_seq);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ call->peer->last_tx_at = ktime_get_seconds();
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_point_call_ack);
+ } else {
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ rxrpc_tx_point_call_ack);
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = ktime_get_real();
+ }
+ rxrpc_tx_backoff(call, ret);
+
+ if (!__rxrpc_call_is_complete(call)) {
+ if (ret < 0)
+ rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
+ rxrpc_set_keepalive(call);
+ }
+
+ return ret;
+}
+
+/*
+ * Send an ABORT call packet.
+ */
+int rxrpc_send_abort_packet(struct rxrpc_call *call)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_abort_buffer pkt;
+ struct msghdr msg;
+ struct kvec iov[1];
+ rxrpc_serial_t serial;
+ int ret;
+
+ /* Don't bother sending aborts for a client call once the server has
+ * hard-ACK'd all of its request data. After that point, we're not
+ * going to stop the operation proceeding, and whilst we might limit
+ * the reply, it's not worth it if we can send a new call on the same
+ * channel instead, thereby closing off this call.
+ */
+ if (rxrpc_is_client_call(call) &&
+ test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags))
+ return 0;
+
+ if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ return -ECONNRESET;
+
+ conn = call->conn;
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt.whdr.epoch = htonl(conn->proto.epoch);
+ pkt.whdr.cid = htonl(call->cid);
+ pkt.whdr.callNumber = htonl(call->call_id);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = call->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(call->dest_srx.srx_service);
+ pkt.abort_code = htonl(call->abort_code);
+
+ iov[0].iov_base = &pkt;
+ iov[0].iov_len = sizeof(pkt);
+
+ serial = atomic_inc_return(&conn->serial);
+ pkt.whdr.serial = htonl(serial);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
+ ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt));
+ conn->peer->last_tx_at = ktime_get_seconds();
+ if (ret < 0)
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_point_call_abort);
+ else
+ trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
+ rxrpc_tx_point_call_abort);
+ rxrpc_tx_backoff(call, ret);
+ return ret;
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ enum rxrpc_req_ack_trace why;
+ struct rxrpc_connection *conn = call->conn;
+ struct msghdr msg;
+ struct kvec iov[1];
+ rxrpc_serial_t serial;
+ size_t len;
+ int ret, rtt_slot = -1;
+
+ _enter("%x,{%d}", txb->seq, txb->len);
+
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = atomic_inc_return(&conn->serial);
+ txb->wire.serial = htonl(serial);
+
+ if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
+ txb->seq == 1)
+ txb->wire.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE;
+
+ iov[0].iov_base = &txb->wire;
+ iov[0].iov_len = sizeof(txb->wire) + txb->len;
+ len = iov[0].iov_len;
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ /* If our RTT cache needs working on, request an ACK. Also request
+ * ACKs if a DATA packet appears to have been lost.
+ *
+ * However, we mustn't request an ACK on the last reply packet of a
+ * service call, lest OpenAFS incorrectly send us an ACK with some
+ * soft-ACKs in it and then never follow up with a proper hard ACK.
+ */
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ why = rxrpc_reqack_already_on;
+ else if (test_bit(RXRPC_TXBUF_LAST, &txb->flags) && rxrpc_sending_to_client(txb))
+ why = rxrpc_reqack_no_srv_last;
+ else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events))
+ why = rxrpc_reqack_ack_lost;
+ else if (test_bit(RXRPC_TXBUF_RESENT, &txb->flags))
+ why = rxrpc_reqack_retrans;
+ else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2)
+ why = rxrpc_reqack_slow_start;
+ else if (call->tx_winsize <= 2)
+ why = rxrpc_reqack_small_txwin;
+ else if (call->peer->rtt_count < 3 && txb->seq & 1)
+ why = rxrpc_reqack_more_rtt;
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))
+ why = rxrpc_reqack_old_rtt;
+ else
+ goto dont_set_request_ack;
+
+ rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]);
+ trace_rxrpc_req_ack(call->debug_id, txb->seq, why);
+ if (why != rxrpc_reqack_no_srv_last)
+ txb->wire.flags |= RXRPC_REQUEST_ACK;
+dont_set_request_ack:
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ ret = 0;
+ trace_rxrpc_tx_data(call, txb->seq, serial,
+ txb->wire.flags,
+ test_bit(RXRPC_TXBUF_RESENT, &txb->flags),
+ true);
+ goto done;
+ }
+ }
+
+ trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags,
+ test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false);
+
+ /* Track what we've attempted to transmit at least once so that the
+ * retransmission algorithm doesn't try to resend what we haven't sent
+ * yet. However, this can race as we can receive an ACK before we get
+ * to this point. But, OTOH, if we won't get an ACK mentioning this
+ * packet unless the far side received it (though it could have
+ * discarded it anyway and NAK'd it).
+ */
+ cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq);
+
+ /* send the packet with the don't fragment bit set if we currently
+ * think it's small enough */
+ if (txb->len >= call->peer->maxdata)
+ goto send_fragmentable;
+
+ txb->last_sent = ktime_get_real();
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
+
+ /* send the packet by UDP
+ * - returns -EMSGSIZE if UDP would have to fragment the packet
+ * to go out of the interface
+ * - in which case, we'll have processed the ICMP error
+ * message and update the peer record
+ */
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
+
+ if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
+ rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_point_call_data_nofrag);
+ } else {
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ rxrpc_tx_point_call_data_nofrag);
+ }
+
+ rxrpc_tx_backoff(call, ret);
+ if (ret == -EMSGSIZE)
+ goto send_fragmentable;
+
+done:
+ if (ret >= 0) {
+ call->tx_last_sent = txb->last_sent;
+ if (txb->wire.flags & RXRPC_REQUEST_ACK) {
+ call->peer->rtt_last_req = txb->last_sent;
+ if (call->peer->rtt_count > 1) {
+ unsigned long nowj = jiffies, ack_lost_at;
+
+ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
+ ack_lost_at += nowj;
+ WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+ rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+ rxrpc_timer_set_for_lost_ack);
+ }
+ }
+
+ if (txb->seq == 1 &&
+ !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER,
+ &call->flags)) {
+ unsigned long nowj = jiffies, expect_rx_by;
+
+ expect_rx_by = nowj + call->next_rx_timo;
+ WRITE_ONCE(call->expect_rx_by, expect_rx_by);
+ rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
+ rxrpc_timer_set_for_normal);
+ }
+
+ rxrpc_set_keepalive(call);
+ } else {
+ /* Cancel the call if the initial transmission fails,
+ * particularly if that's due to network routing issues that
+ * aren't going away anytime soon. The layer above can arrange
+ * the retransmission.
+ */
+ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_USER_ABORT, ret);
+ }
+
+ _leave(" = %d [%u]", ret, call->peer->maxdata);
+ return ret;
+
+send_fragmentable:
+ /* attempt to send this message with fragmentation enabled */
+ _debug("send fragment");
+
+ txb->last_sent = ktime_get_real();
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
+
+ switch (conn->local->srx.transport.family) {
+ case AF_INET6:
+ case AF_INET:
+ rxrpc_local_dont_fragment(conn->local, false);
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ conn->peer->last_tx_at = ktime_get_seconds();
+
+ rxrpc_local_dont_fragment(conn->local, true);
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (ret < 0) {
+ rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
+ rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
+ trace_rxrpc_tx_fail(call->debug_id, serial, ret,
+ rxrpc_tx_point_call_data_frag);
+ } else {
+ trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
+ rxrpc_tx_point_call_data_frag);
+ }
+ rxrpc_tx_backoff(call, ret);
+ goto done;
+}
+
+/*
+ * Transmit a connection-level abort.
+ */
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ u32 serial;
+ int ret;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ whdr.flags = conn->out_clientflag;
+ whdr.userStatus = 0;
+ whdr.securityIndex = conn->security_ix;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(conn->service_id);
+
+ word = htonl(conn->abort_code);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_conn_abort);
+ _debug("sendmsg failed: %d", ret);
+ return;
+ }
+
+ trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+}
+
+/*
+ * Reject a packet through the local endpoint.
+ */
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ struct rxrpc_wire_header whdr;
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t size;
+ __be32 code;
+ int ret, ioc;
+
+ rxrpc_see_skb(skb, rxrpc_skb_see_reject);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &code;
+ iov[1].iov_len = sizeof(code);
+
+ msg.msg_name = &srx.transport;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ memset(&whdr, 0, sizeof(whdr));
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_REJECT_BUSY:
+ whdr.type = RXRPC_PACKET_TYPE_BUSY;
+ size = sizeof(whdr);
+ ioc = 1;
+ break;
+ case RXRPC_SKB_MARK_REJECT_ABORT:
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ code = htonl(skb->priority);
+ size = sizeof(whdr) + sizeof(code);
+ ioc = 2;
+ break;
+ default:
+ return;
+ }
+
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ msg.msg_namelen = srx.transport_len;
+
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.serviceId = htons(sp->hdr.serviceId);
+ whdr.flags = sp->hdr.flags;
+ whdr.flags ^= RXRPC_CLIENT_INITIATED;
+ whdr.flags &= RXRPC_CLIENT_INITIATED;
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size);
+ ret = do_udp_sendmsg(local->socket, &msg, size);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(local->debug_id, 0, ret,
+ rxrpc_tx_point_reject);
+ else
+ trace_rxrpc_tx_packet(local->debug_id, &whdr,
+ rxrpc_tx_point_reject);
+ }
+}
+
+/*
+ * Send a VERSION reply to a peer as a keepalive.
+ */
+void rxrpc_send_keepalive(struct rxrpc_peer *peer)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ msg.msg_name = &peer->srx.transport;
+ msg.msg_namelen = peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(peer->local->rxnet->epoch);
+ whdr.cid = 0;
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.serial = 0;
+ whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */
+ whdr.flags = RXRPC_LAST_PACKET;
+ whdr.userStatus = 0;
+ whdr.securityIndex = 0;
+ whdr._rsvd = 0;
+ whdr.serviceId = 0;
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = (char *)rxrpc_keepalive_string;
+ iov[1].iov_len = sizeof(rxrpc_keepalive_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(peer->local->socket, &msg, len);
+ if (ret < 0)
+ trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
+ rxrpc_tx_point_version_keepalive);
+ else
+ trace_rxrpc_tx_packet(peer->debug_id, &whdr,
+ rxrpc_tx_point_version_keepalive);
+
+ peer->last_tx_at = ktime_get_seconds();
+ _leave("");
+}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb)
+{
+ if (!__rxrpc_call_is_complete(call))
+ kdebug("resend");
+}
+
+/*
+ * Transmit one packet.
+ */
+void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ int ret;
+
+ ret = rxrpc_send_data_packet(call, txb);
+ if (ret < 0) {
+ switch (ret) {
+ case -ENETUNREACH:
+ case -EHOSTUNREACH:
+ case -ECONNREFUSED:
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ 0, ret);
+ break;
+ default:
+ _debug("need instant resend %d", ret);
+ rxrpc_instant_resend(call, txb);
+ }
+ } else {
+ unsigned long now = jiffies;
+ unsigned long resend_at = now + call->peer->rto_j;
+
+ WRITE_ONCE(call->resend_at, resend_at);
+ rxrpc_reduce_call_timer(call, resend_at, now,
+ rxrpc_timer_set_for_send);
+ }
+}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
new file mode 100644
index 0000000000..552ba84a25
--- /dev/null
+++ b/net/rxrpc/peer_event.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Peer event handling, typically ICMP messages.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *);
+static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *,
+ enum rxrpc_call_completion, int);
+
+/*
+ * Find the peer associated with a local error.
+ */
+static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
+ const struct sk_buff *skb,
+ struct sockaddr_rxrpc *srx)
+{
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+
+ _enter("");
+
+ memset(srx, 0, sizeof(*srx));
+ srx->transport_type = local->srx.transport_type;
+ srx->transport_len = local->srx.transport_len;
+ srx->transport.family = local->srx.transport.family;
+
+ /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice
+ * versa?
+ */
+ switch (srx->transport.family) {
+ case AF_INET:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = serr->port;
+ switch (serr->ee.ee_origin) {
+ case SO_EE_ORIGIN_ICMP:
+ memcpy(&srx->transport.sin.sin_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in_addr));
+ break;
+ case SO_EE_ORIGIN_ICMP6:
+ memcpy(&srx->transport.sin.sin_addr,
+ skb_network_header(skb) + serr->addr_offset + 12,
+ sizeof(struct in_addr));
+ break;
+ default:
+ memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr,
+ sizeof(struct in_addr));
+ break;
+ }
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ switch (serr->ee.ee_origin) {
+ case SO_EE_ORIGIN_ICMP6:
+ srx->transport.sin6.sin6_port = serr->port;
+ memcpy(&srx->transport.sin6.sin6_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in6_addr));
+ break;
+ case SO_EE_ORIGIN_ICMP:
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.family = AF_INET;
+ srx->transport.sin.sin_port = serr->port;
+ memcpy(&srx->transport.sin.sin_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in_addr));
+ break;
+ default:
+ memcpy(&srx->transport.sin6.sin6_addr,
+ &ipv6_hdr(skb)->saddr,
+ sizeof(struct in6_addr));
+ break;
+ }
+ break;
+#endif
+
+ default:
+ BUG();
+ }
+
+ return rxrpc_lookup_peer_rcu(local, srx);
+}
+
+/*
+ * Handle an MTU/fragmentation problem.
+ */
+static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
+{
+ /* wind down the local interface MTU */
+ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu)
+ peer->if_mtu = mtu;
+
+ if (mtu == 0) {
+ /* they didn't give us a size, estimate one */
+ mtu = peer->if_mtu;
+ if (mtu > 1500) {
+ mtu >>= 1;
+ if (mtu < 1500)
+ mtu = 1500;
+ } else {
+ mtu -= 100;
+ if (mtu < peer->hdrsize)
+ mtu = peer->hdrsize + 4;
+ }
+ }
+
+ if (mtu < peer->mtu) {
+ spin_lock(&peer->lock);
+ peer->mtu = mtu;
+ peer->maxdata = peer->mtu - peer->hdrsize;
+ spin_unlock(&peer->lock);
+ }
+}
+
+/*
+ * Handle an error received on the local endpoint.
+ */
+void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct sockaddr_rxrpc srx;
+ struct rxrpc_peer *peer = NULL;
+
+ _enter("L=%x", local->debug_id);
+
+ if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+ _leave("UDP empty message");
+ return;
+ }
+
+ rcu_read_lock();
+ peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error))
+ peer = NULL;
+ rcu_read_unlock();
+ if (!peer)
+ return;
+
+ trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+
+ if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_type == ICMP_DEST_UNREACH &&
+ serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+ rxrpc_adjust_mtu(peer, serr->ee.ee_info);
+ goto out;
+ }
+
+ rxrpc_store_error(peer, skb);
+out:
+ rxrpc_put_peer(peer, rxrpc_peer_put_input_error);
+}
+
+/*
+ * Map an error report to error codes on the peer record.
+ */
+static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb)
+{
+ enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR;
+ struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
+ struct sock_extended_err *ee = &serr->ee;
+ int err = ee->ee_errno;
+
+ _enter("");
+
+ switch (ee->ee_origin) {
+ case SO_EE_ORIGIN_NONE:
+ case SO_EE_ORIGIN_LOCAL:
+ compl = RXRPC_CALL_LOCAL_ERROR;
+ break;
+
+ case SO_EE_ORIGIN_ICMP6:
+ if (err == EACCES)
+ err = EHOSTUNREACH;
+ fallthrough;
+ case SO_EE_ORIGIN_ICMP:
+ default:
+ break;
+ }
+
+ rxrpc_distribute_error(peer, skb, compl, err);
+}
+
+/*
+ * Distribute an error that occurred on a peer.
+ */
+static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb,
+ enum rxrpc_call_completion compl, int err)
+{
+ struct rxrpc_call *call;
+ HLIST_HEAD(error_targets);
+
+ spin_lock(&peer->lock);
+ hlist_move_list(&peer->error_targets, &error_targets);
+
+ while (!hlist_empty(&error_targets)) {
+ call = hlist_entry(error_targets.first,
+ struct rxrpc_call, error_link);
+ hlist_del_init(&call->error_link);
+ spin_unlock(&peer->lock);
+
+ rxrpc_see_call(call, rxrpc_call_see_distribute_error);
+ rxrpc_set_call_completion(call, compl, 0, -err);
+ rxrpc_input_call_event(call, skb);
+
+ spin_lock(&peer->lock);
+ }
+
+ spin_unlock(&peer->lock);
+}
+
+/*
+ * Perform keep-alive pings.
+ */
+static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
+ struct list_head *collector,
+ time64_t base,
+ u8 cursor)
+{
+ struct rxrpc_peer *peer;
+ const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
+ time64_t keepalive_at;
+ bool use;
+ int slot;
+
+ spin_lock(&rxnet->peer_hash_lock);
+
+ while (!list_empty(collector)) {
+ peer = list_entry(collector->next,
+ struct rxrpc_peer, keepalive_link);
+
+ list_del_init(&peer->keepalive_link);
+ if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
+ continue;
+
+ use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
+ spin_unlock(&rxnet->peer_hash_lock);
+
+ if (use) {
+ keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
+ slot = keepalive_at - base;
+ _debug("%02x peer %u t=%d {%pISp}",
+ cursor, peer->debug_id, slot, &peer->srx.transport);
+
+ if (keepalive_at <= base ||
+ keepalive_at > base + RXRPC_KEEPALIVE_TIME) {
+ rxrpc_send_keepalive(peer);
+ slot = RXRPC_KEEPALIVE_TIME;
+ }
+
+ /* A transmission to this peer occurred since last we
+ * examined it so put it into the appropriate future
+ * bucket.
+ */
+ slot += cursor;
+ slot &= mask;
+ spin_lock(&rxnet->peer_hash_lock);
+ list_add_tail(&peer->keepalive_link,
+ &rxnet->peer_keepalive[slot & mask]);
+ spin_unlock(&rxnet->peer_hash_lock);
+ rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
+ }
+ rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
+ spin_lock(&rxnet->peer_hash_lock);
+ }
+
+ spin_unlock(&rxnet->peer_hash_lock);
+}
+
+/*
+ * Perform keep-alive pings with VERSION packets to keep any NAT alive.
+ */
+void rxrpc_peer_keepalive_worker(struct work_struct *work)
+{
+ struct rxrpc_net *rxnet =
+ container_of(work, struct rxrpc_net, peer_keepalive_work);
+ const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
+ time64_t base, now, delay;
+ u8 cursor, stop;
+ LIST_HEAD(collector);
+
+ now = ktime_get_seconds();
+ base = rxnet->peer_keepalive_base;
+ cursor = rxnet->peer_keepalive_cursor;
+ _enter("%lld,%u", base - now, cursor);
+
+ if (!rxnet->live)
+ return;
+
+ /* Remove to a temporary list all the peers that are currently lodged
+ * in expired buckets plus all new peers.
+ *
+ * Everything in the bucket at the cursor is processed this
+ * second; the bucket at cursor + 1 goes at now + 1s and so
+ * on...
+ */
+ spin_lock(&rxnet->peer_hash_lock);
+ list_splice_init(&rxnet->peer_keepalive_new, &collector);
+
+ stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive);
+ while (base <= now && (s8)(cursor - stop) < 0) {
+ list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask],
+ &collector);
+ base++;
+ cursor++;
+ }
+
+ base = now;
+ spin_unlock(&rxnet->peer_hash_lock);
+
+ rxnet->peer_keepalive_base = base;
+ rxnet->peer_keepalive_cursor = cursor;
+ rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor);
+ ASSERT(list_empty(&collector));
+
+ /* Schedule the timer for the next occupied timeslot. */
+ cursor = rxnet->peer_keepalive_cursor;
+ stop = cursor + RXRPC_KEEPALIVE_TIME - 1;
+ for (; (s8)(cursor - stop) < 0; cursor++) {
+ if (!list_empty(&rxnet->peer_keepalive[cursor & mask]))
+ break;
+ base++;
+ }
+
+ now = ktime_get_seconds();
+ delay = base - now;
+ if (delay < 1)
+ delay = 1;
+ delay *= HZ;
+ if (rxnet->live)
+ timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay);
+
+ _leave("");
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
new file mode 100644
index 0000000000..8d7a715a0b
--- /dev/null
+++ b/net/rxrpc/peer_object.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC remote transport endpoint record management
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+#include <linux/hashtable.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+#include "ar-internal.h"
+
+/*
+ * Hash a peer key.
+ */
+static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ const u16 *p;
+ unsigned int i, size;
+ unsigned long hash_key;
+
+ _enter("");
+
+ hash_key = (unsigned long)local / __alignof__(*local);
+ hash_key += srx->transport_type;
+ hash_key += srx->transport_len;
+ hash_key += srx->transport.family;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ hash_key += (u16 __force)srx->transport.sin.sin_port;
+ size = sizeof(srx->transport.sin.sin_addr);
+ p = (u16 *)&srx->transport.sin.sin_addr;
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ hash_key += (u16 __force)srx->transport.sin.sin_port;
+ size = sizeof(srx->transport.sin6.sin6_addr);
+ p = (u16 *)&srx->transport.sin6.sin6_addr;
+ break;
+#endif
+ default:
+ WARN(1, "AF_RXRPC: Unsupported transport address family\n");
+ return 0;
+ }
+
+ /* Step through the peer address in 16-bit portions for speed */
+ for (i = 0; i < size; i += sizeof(*p), p++)
+ hash_key += *p;
+
+ _leave(" 0x%lx", hash_key);
+ return hash_key;
+}
+
+/*
+ * Compare a peer to a key. Return -ve, 0 or +ve to indicate less than, same
+ * or greater than.
+ *
+ * Unfortunately, the primitives in linux/hashtable.h don't allow for sorted
+ * buckets and mid-bucket insertion, so we don't make full use of this
+ * information at this point.
+ */
+static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
+ struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx,
+ unsigned long hash_key)
+{
+ long diff;
+
+ diff = ((peer->hash_key - hash_key) ?:
+ ((unsigned long)peer->local - (unsigned long)local) ?:
+ (peer->srx.transport_type - srx->transport_type) ?:
+ (peer->srx.transport_len - srx->transport_len) ?:
+ (peer->srx.transport.family - srx->transport.family));
+ if (diff != 0)
+ return diff;
+
+ switch (srx->transport.family) {
+ case AF_INET:
+ return ((u16 __force)peer->srx.transport.sin.sin_port -
+ (u16 __force)srx->transport.sin.sin_port) ?:
+ memcmp(&peer->srx.transport.sin.sin_addr,
+ &srx->transport.sin.sin_addr,
+ sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ return ((u16 __force)peer->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
+ default:
+ BUG();
+ }
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+static struct rxrpc_peer *__rxrpc_lookup_peer_rcu(
+ struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx,
+ unsigned long hash_key)
+{
+ struct rxrpc_peer *peer;
+ struct rxrpc_net *rxnet = local->rxnet;
+
+ hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) {
+ if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 &&
+ refcount_read(&peer->ref) > 0)
+ return peer;
+ }
+
+ return NULL;
+}
+
+/*
+ * Look up a remote transport endpoint for the specified address using RCU.
+ */
+struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
+ const struct sockaddr_rxrpc *srx)
+{
+ struct rxrpc_peer *peer;
+ unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer)
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
+ return peer;
+}
+
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
+ struct rxrpc_peer *peer)
+{
+ struct net *net = local->net;
+ struct dst_entry *dst;
+ struct rtable *rt;
+ struct flowi fl;
+ struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ struct flowi6 *fl6 = &fl.u.ip6;
+#endif
+
+ peer->if_mtu = 1500;
+
+ memset(&fl, 0, sizeof(fl));
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ rt = ip_route_output_ports(
+ net, fl4, NULL,
+ peer->srx.transport.sin.sin_addr.s_addr, 0,
+ htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
+ if (IS_ERR(rt)) {
+ _leave(" [route err %ld]", PTR_ERR(rt));
+ return;
+ }
+ dst = &rt->dst;
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
+ fl6->flowi6_proto = IPPROTO_UDP;
+ memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+ fl6->fl6_dport = htons(7001);
+ fl6->fl6_sport = htons(7000);
+ dst = ip6_route_output(net, NULL, fl6);
+ if (dst->error) {
+ _leave(" [route err %d]", dst->error);
+ return;
+ }
+ break;
+#endif
+
+ default:
+ BUG();
+ }
+
+ peer->if_mtu = dst_mtu(dst);
+ dst_release(dst);
+
+ _leave(" [if_mtu %u]", peer->if_mtu);
+}
+
+/*
+ * Allocate a peer.
+ */
+struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
+ enum rxrpc_peer_trace why)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+ if (peer) {
+ refcount_set(&peer->ref, 1);
+ peer->local = rxrpc_get_local(local, rxrpc_local_get_peer);
+ INIT_HLIST_HEAD(&peer->error_targets);
+ peer->service_conns = RB_ROOT;
+ seqlock_init(&peer->service_conn_lock);
+ spin_lock_init(&peer->lock);
+ spin_lock_init(&peer->rtt_input_lock);
+ peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+ rxrpc_peer_init_rtt(peer);
+
+ peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
+ trace_rxrpc_peer(peer->debug_id, 1, why);
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * Initialise peer record.
+ */
+static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
+ unsigned long hash_key)
+{
+ peer->hash_key = hash_key;
+ rxrpc_assess_MTU_size(local, peer);
+ peer->mtu = peer->if_mtu;
+ peer->rtt_last_req = ktime_get_real();
+
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ peer->hdrsize = sizeof(struct iphdr);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ peer->hdrsize = sizeof(struct ipv6hdr);
+ break;
+#endif
+ default:
+ BUG();
+ }
+
+ switch (peer->srx.transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
+ BUG();
+ }
+
+ peer->hdrsize += sizeof(struct rxrpc_wire_header);
+ peer->maxdata = peer->mtu - peer->hdrsize;
+}
+
+/*
+ * Set up a new peer.
+ */
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx,
+ unsigned long hash_key,
+ gfp_t gfp)
+{
+ struct rxrpc_peer *peer;
+
+ _enter("");
+
+ peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
+ if (peer) {
+ memcpy(&peer->srx, srx, sizeof(*srx));
+ rxrpc_init_peer(local, peer, hash_key);
+ }
+
+ _leave(" = %p", peer);
+ return peer;
+}
+
+static void rxrpc_free_peer(struct rxrpc_peer *peer)
+{
+ trace_rxrpc_peer(peer->debug_id, 0, rxrpc_peer_free);
+ rxrpc_put_local(peer->local, rxrpc_local_put_peer);
+ kfree_rcu(peer, rcu);
+}
+
+/*
+ * Set up a new incoming peer. There shouldn't be any other matching peers
+ * since we've already done a search in the list from the non-reentrant context
+ * (the data_ready handler) that is the only place we can add new peers.
+ */
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
+{
+ struct rxrpc_net *rxnet = local->rxnet;
+ unsigned long hash_key;
+
+ hash_key = rxrpc_peer_hash_key(local, &peer->srx);
+ rxrpc_init_peer(local, peer, hash_key);
+
+ spin_lock(&rxnet->peer_hash_lock);
+ hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
+ list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new);
+ spin_unlock(&rxnet->peer_hash_lock);
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_peer *peer, *candidate;
+ struct rxrpc_net *rxnet = local->rxnet;
+ unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
+
+ _enter("{%pISp}", &srx->transport);
+
+ /* search the peer list first */
+ rcu_read_lock();
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
+ peer = NULL;
+ rcu_read_unlock();
+
+ if (!peer) {
+ /* The peer is not yet present in hash - create a candidate
+ * for a new record and then redo the search.
+ */
+ candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
+ if (!candidate) {
+ _leave(" = NULL [nomem]");
+ return NULL;
+ }
+
+ spin_lock(&rxnet->peer_hash_lock);
+
+ /* Need to check that we aren't racing with someone else */
+ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
+ if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client))
+ peer = NULL;
+ if (!peer) {
+ hash_add_rcu(rxnet->peer_hash,
+ &candidate->hash_link, hash_key);
+ list_add_tail(&candidate->keepalive_link,
+ &rxnet->peer_keepalive_new);
+ }
+
+ spin_unlock(&rxnet->peer_hash_lock);
+
+ if (peer)
+ rxrpc_free_peer(candidate);
+ else
+ peer = candidate;
+ }
+
+ _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref));
+ return peer;
+}
+
+/*
+ * Get a ref on a peer record.
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
+{
+ int r;
+
+ __refcount_inc(&peer->ref, &r);
+ trace_rxrpc_peer(peer->debug_id, r + 1, why);
+ return peer;
+}
+
+/*
+ * Get a ref on a peer record unless its usage has already reached 0.
+ */
+struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer,
+ enum rxrpc_peer_trace why)
+{
+ int r;
+
+ if (peer) {
+ if (__refcount_inc_not_zero(&peer->ref, &r))
+ trace_rxrpc_peer(peer->debug_id, r + 1, why);
+ else
+ peer = NULL;
+ }
+ return peer;
+}
+
+/*
+ * Discard a peer record.
+ */
+static void __rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+ struct rxrpc_net *rxnet = peer->local->rxnet;
+
+ ASSERT(hlist_empty(&peer->error_targets));
+
+ spin_lock(&rxnet->peer_hash_lock);
+ hash_del_rcu(&peer->hash_link);
+ list_del_init(&peer->keepalive_link);
+ spin_unlock(&rxnet->peer_hash_lock);
+
+ rxrpc_free_peer(peer);
+}
+
+/*
+ * Drop a ref on a peer record.
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
+{
+ unsigned int debug_id;
+ bool dead;
+ int r;
+
+ if (peer) {
+ debug_id = peer->debug_id;
+ dead = __refcount_dec_and_test(&peer->ref, &r);
+ trace_rxrpc_peer(debug_id, r - 1, why);
+ if (dead)
+ __rxrpc_put_peer(peer);
+ }
+}
+
+/*
+ * Make sure all peer records have been discarded.
+ */
+void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
+{
+ struct rxrpc_peer *peer;
+ int i;
+
+ for (i = 0; i < HASH_SIZE(rxnet->peer_hash); i++) {
+ if (hlist_empty(&rxnet->peer_hash[i]))
+ continue;
+
+ hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) {
+ pr_err("Leaked peer %u {%u} %pISp\n",
+ peer->debug_id,
+ refcount_read(&peer->ref),
+ &peer->srx.transport);
+ }
+ }
+}
+
+/**
+ * rxrpc_kernel_get_peer - Get the peer address of a call
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ * @_srx: Where to place the result
+ *
+ * Get the address of the remote peer in a call.
+ */
+void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+ struct sockaddr_rxrpc *_srx)
+{
+ *_srx = call->peer->srx;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ * @_srtt: Where to store the SRTT value.
+ *
+ * Get the call's peer smoothed RTT in uS.
+ */
+bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call,
+ u32 *_srtt)
+{
+ struct rxrpc_peer *peer = call->peer;
+
+ if (peer->rtt_count == 0) {
+ *_srtt = 1000000; /* 1S */
+ return false;
+ }
+
+ *_srtt = call->peer->srtt_us >> 3;
+ return true;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
new file mode 100644
index 0000000000..682636d3b0
--- /dev/null
+++ b/net/rxrpc/proc.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* /proc/net/ support for AF_RXRPC
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
+ [RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ",
+ [RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
+ [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
+ [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
+ [RXRPC_CONN_SERVICE] = "SvSecure",
+ [RXRPC_CONN_ABORTED] = "Aborted ",
+};
+
+/*
+ * generate a list of extant and dead calls in /proc/net/rxrpc_calls
+ */
+static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ rcu_read_lock();
+ return seq_list_start_head_rcu(&rxnet->calls, *_pos);
+}
+
+static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next_rcu(v, &rxnet->calls, pos);
+}
+
+static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_local *local;
+ struct rxrpc_call *call;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ enum rxrpc_call_state state;
+ unsigned long timeout = 0;
+ rxrpc_seq_t acks_hard_ack;
+ char lbuff[50], rbuff[50];
+
+ if (v == &rxnet->calls) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " SvID ConnID CallID End Use State Abort "
+ " DebugId TxSeq TW RxSeq RW RxSerial CW RxTimo\n");
+ return 0;
+ }
+
+ call = list_entry(v, struct rxrpc_call, link);
+
+ local = call->local;
+ if (local)
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+ else
+ strcpy(lbuff, "no_local");
+
+ sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
+
+ state = rxrpc_call_state(call);
+ if (state != RXRPC_CALL_SERVER_PREALLOC) {
+ timeout = READ_ONCE(call->expect_rx_by);
+ timeout -= jiffies;
+ }
+
+ acks_hard_ack = READ_ONCE(call->acks_hard_ack);
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
+ " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
+ lbuff,
+ rbuff,
+ call->dest_srx.srx_service,
+ call->cid,
+ call->call_id,
+ rxrpc_is_service_call(call) ? "Svc" : "Clt",
+ refcount_read(&call->ref),
+ rxrpc_call_states[state],
+ call->abort_code,
+ call->debug_id,
+ acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
+ call->ackr_window, call->ackr_wtop - call->ackr_window,
+ call->rx_serial,
+ call->cong_cwnd,
+ timeout);
+
+ return 0;
+}
+
+const struct seq_operations rxrpc_call_seq_ops = {
+ .start = rxrpc_call_seq_start,
+ .next = rxrpc_call_seq_next,
+ .stop = rxrpc_call_seq_stop,
+ .show = rxrpc_call_seq_show,
+};
+
+/*
+ * generate a list of extant virtual connections in /proc/net/rxrpc_conns
+ */
+static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_lock(&rxnet->conn_lock);
+ return seq_list_start_head(&rxnet->conn_proc_list, *_pos);
+}
+
+static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->conn_proc_list, pos);
+}
+
+static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->conn_lock);
+}
+
+static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ const char *state;
+ char lbuff[50], rbuff[50];
+
+ if (v == &rxnet->conn_proc_list) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " SvID ConnID End Ref Act State Key "
+ " Serial ISerial CallId0 CallId1 CallId2 CallId3\n"
+ );
+ return 0;
+ }
+
+ conn = list_entry(v, struct rxrpc_connection, proc_link);
+ if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) {
+ strcpy(lbuff, "no_local");
+ strcpy(rbuff, "no_connection");
+ goto print;
+ }
+
+ sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
+ sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
+print:
+ state = rxrpc_is_conn_aborted(conn) ?
+ rxrpc_call_completions[conn->completion] :
+ rxrpc_conn_states[conn->state];
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
+ " %s %08x %08x %08x %08x %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ conn->service_id,
+ conn->proto.cid,
+ rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
+ refcount_read(&conn->ref),
+ atomic_read(&conn->active),
+ state,
+ key_serial(conn->key),
+ atomic_read(&conn->serial),
+ conn->hi_serial,
+ conn->channels[0].call_id,
+ conn->channels[1].call_id,
+ conn->channels[2].call_id,
+ conn->channels[3].call_id);
+
+ return 0;
+}
+
+const struct seq_operations rxrpc_connection_seq_ops = {
+ .start = rxrpc_connection_seq_start,
+ .next = rxrpc_connection_seq_next,
+ .stop = rxrpc_connection_seq_stop,
+ .show = rxrpc_connection_seq_show,
+};
+
+/*
+ * generate a list of extant virtual peers in /proc/net/rxrpc/peers
+ */
+static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_peer *peer;
+ time64_t now;
+ char lbuff[50], rbuff[50];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " Use SST MTU LastUse RTT RTO\n"
+ );
+ return 0;
+ }
+
+ peer = list_entry(v, struct rxrpc_peer, hash_link);
+
+ sprintf(lbuff, "%pISpc", &peer->local->srx.transport);
+
+ sprintf(rbuff, "%pISpc", &peer->srx.transport);
+
+ now = ktime_get_seconds();
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %3u"
+ " %3u %5u %6llus %8u %8u\n",
+ lbuff,
+ rbuff,
+ refcount_read(&peer->ref),
+ peer->cong_ssthresh,
+ peer->mtu,
+ now - peer->last_tx_at,
+ peer->srtt_us >> 3,
+ jiffies_to_usecs(peer->rto_j));
+
+ return 0;
+}
+
+static void *rxrpc_peer_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int bucket, n;
+ unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
+ void *p;
+
+ rcu_read_lock();
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ n = *_pos & ((1U << shift) - 1);
+ bucket = *_pos >> shift;
+ for (;;) {
+ if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
+ *_pos = UINT_MAX;
+ return NULL;
+ }
+ if (n == 0) {
+ if (bucket == 0)
+ return SEQ_START_TOKEN;
+ *_pos += 1;
+ n++;
+ }
+
+ p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
+ if (p)
+ return p;
+ bucket++;
+ n = 1;
+ *_pos = (bucket << shift) | n;
+ }
+}
+
+static void *rxrpc_peer_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int bucket, n;
+ unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
+ void *p;
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ bucket = *_pos >> shift;
+
+ p = seq_hlist_next_rcu(v, &rxnet->peer_hash[bucket], _pos);
+ if (p)
+ return p;
+
+ for (;;) {
+ bucket++;
+ n = 1;
+ *_pos = (bucket << shift) | n;
+
+ if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
+ *_pos = UINT_MAX;
+ return NULL;
+ }
+ if (n == 0) {
+ *_pos += 1;
+ n++;
+ }
+
+ p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
+ if (p)
+ return p;
+ }
+}
+
+static void rxrpc_peer_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+
+const struct seq_operations rxrpc_peer_seq_ops = {
+ .start = rxrpc_peer_seq_start,
+ .next = rxrpc_peer_seq_next,
+ .stop = rxrpc_peer_seq_stop,
+ .show = rxrpc_peer_seq_show,
+};
+
+/*
+ * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
+ */
+static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_local *local;
+ char lbuff[50];
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq,
+ "Proto Local "
+ " Use Act RxQ\n");
+ return 0;
+ }
+
+ local = hlist_entry(v, struct rxrpc_local, link);
+
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+
+ seq_printf(seq,
+ "UDP %-47.47s %3u %3u %3u\n",
+ lbuff,
+ refcount_read(&local->ref),
+ atomic_read(&local->active_users),
+ local->rx_queue.qlen);
+
+ return 0;
+}
+
+static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rcu)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ unsigned int n;
+
+ rcu_read_lock();
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ n = *_pos;
+ if (n == 0)
+ return SEQ_START_TOKEN;
+
+ return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
+}
+
+static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ if (*_pos >= UINT_MAX)
+ return NULL;
+
+ return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
+}
+
+static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+const struct seq_operations rxrpc_local_seq_ops = {
+ .start = rxrpc_local_seq_start,
+ .next = rxrpc_local_seq_next,
+ .stop = rxrpc_local_seq_stop,
+ .show = rxrpc_local_seq_show,
+};
+
+/*
+ * Display stats in /proc/net/rxrpc/stats
+ */
+int rxrpc_stats_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
+
+ seq_printf(seq,
+ "Data : send=%u sendf=%u fail=%u\n",
+ atomic_read(&rxnet->stat_tx_data_send),
+ atomic_read(&rxnet->stat_tx_data_send_frag),
+ atomic_read(&rxnet->stat_tx_data_send_fail));
+ seq_printf(seq,
+ "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n",
+ atomic_read(&rxnet->stat_tx_data),
+ atomic_read(&rxnet->stat_tx_data_retrans),
+ atomic_read(&rxnet->stat_tx_data_underflow),
+ atomic_read(&rxnet->stat_tx_data_cwnd_reset));
+ seq_printf(seq,
+ "Data-Rx : nr=%u reqack=%u jumbo=%u\n",
+ atomic_read(&rxnet->stat_rx_data),
+ atomic_read(&rxnet->stat_rx_data_reqack),
+ atomic_read(&rxnet->stat_rx_data_jumbo));
+ seq_printf(seq,
+ "Ack : fill=%u send=%u skip=%u\n",
+ atomic_read(&rxnet->stat_tx_ack_fill),
+ atomic_read(&rxnet->stat_tx_ack_send),
+ atomic_read(&rxnet->stat_tx_ack_skip));
+ seq_printf(seq,
+ "Ack-Tx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_REQUESTED]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DUPLICATE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_NOSPACE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING_RESPONSE]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]),
+ atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE]));
+ seq_printf(seq,
+ "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_NOSPACE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]),
+ atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]));
+ seq_printf(seq,
+ "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n",
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]));
+ seq_printf(seq,
+ "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n",
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_retrans]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]),
+ atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin]));
+ seq_printf(seq,
+ "Buffers : txb=%u rxb=%u\n",
+ atomic_read(&rxrpc_nr_txbuf),
+ atomic_read(&rxrpc_n_rx_skbs));
+ seq_printf(seq,
+ "IO-thread: loops=%u\n",
+ atomic_read(&rxnet->stat_io_loop));
+ return 0;
+}
+
+/*
+ * Clear stats if /proc/net/rxrpc/stats is written to.
+ */
+int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
+{
+ struct seq_file *m = file->private_data;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(m));
+
+ if (size > 1 || (size == 1 && buf[0] != '\n'))
+ return -EINVAL;
+
+ atomic_set(&rxnet->stat_tx_data, 0);
+ atomic_set(&rxnet->stat_tx_data_retrans, 0);
+ atomic_set(&rxnet->stat_tx_data_underflow, 0);
+ atomic_set(&rxnet->stat_tx_data_cwnd_reset, 0);
+ atomic_set(&rxnet->stat_tx_data_send, 0);
+ atomic_set(&rxnet->stat_tx_data_send_frag, 0);
+ atomic_set(&rxnet->stat_tx_data_send_fail, 0);
+ atomic_set(&rxnet->stat_rx_data, 0);
+ atomic_set(&rxnet->stat_rx_data_reqack, 0);
+ atomic_set(&rxnet->stat_rx_data_jumbo, 0);
+
+ atomic_set(&rxnet->stat_tx_ack_fill, 0);
+ atomic_set(&rxnet->stat_tx_ack_send, 0);
+ atomic_set(&rxnet->stat_tx_ack_skip, 0);
+ memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks));
+ memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
+
+ memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
+
+ atomic_set(&rxnet->stat_io_loop, 0);
+ return size;
+}
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
new file mode 100644
index 0000000000..e8ee4af43c
--- /dev/null
+++ b/net/rxrpc/protocol.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* packet.h: Rx packet layout and definitions
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_RXRPC_PACKET_H
+#define _LINUX_RXRPC_PACKET_H
+
+typedef u32 rxrpc_seq_t; /* Rx message sequence number */
+typedef u32 rxrpc_serial_t; /* Rx message serial number */
+typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
+typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx packet header
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_wire_header {
+ __be32 epoch; /* client boot timestamp */
+#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */
+
+ __be32 cid; /* connection and channel ID */
+#define RXRPC_MAXCALLS 4 /* max active calls per conn */
+#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */
+#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */
+#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */
+#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */
+
+ __be32 callNumber; /* call ID (0 for connection-level packets) */
+ __be32 seq; /* sequence number of pkt in call stream */
+ __be32 serial; /* serial number of pkt sent to network */
+
+ uint8_t type; /* packet type */
+#define RXRPC_PACKET_TYPE_DATA 1 /* data */
+#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */
+#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */
+#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */
+#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */
+#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */
+#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */
+#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */
+#define RXRPC_PACKET_TYPE_PARAMS 9 /* Parameter negotiation (unspec'd, ignore) */
+#define RXRPC_PACKET_TYPE_10 10 /* Ignored */
+#define RXRPC_PACKET_TYPE_11 11 /* Ignored */
+#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */
+
+ uint8_t flags; /* packet flags */
+#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */
+#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */
+#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */
+#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */
+#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */
+#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */
+
+ uint8_t userStatus; /* app-layer defined status */
+#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */
+
+ uint8_t securityIndex; /* security protocol ID */
+ union {
+ __be16 _rsvd; /* reserved */
+ __be16 cksum; /* kerberos security checksum */
+ };
+ __be16 serviceId; /* service ID */
+
+} __packed;
+
+/*****************************************************************************/
+/*
+ * jumbo packet secondary header
+ * - can be mapped to read header by:
+ * - new_serial = serial + 1
+ * - new_seq = seq + 1
+ * - new_flags = j_flags
+ * - new__rsvd = j__rsvd
+ * - duplicating all other fields
+ */
+struct rxrpc_jumbo_header {
+ uint8_t flags; /* packet flags (as per rxrpc_header) */
+ uint8_t pad;
+ union {
+ __be16 _rsvd; /* reserved */
+ __be16 cksum; /* kerberos security checksum */
+ };
+} __packed;
+
+#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */
+#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
+
+/*
+ * The maximum number of subpackets that can possibly fit in a UDP packet is:
+ *
+ * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1
+ * = ((65535 - 28 - 28) / 1416) + 1
+ * = 46 non-terminal packets and 1 terminal packet.
+ */
+#define RXRPC_MAX_NR_JUMBO 47
+
+/*****************************************************************************/
+/*
+ * on-the-wire Rx ACK packet data payload
+ * - all multibyte fields should be in network byte order
+ */
+struct rxrpc_ackpacket {
+ __be16 bufferSpace; /* number of packet buffers available */
+ __be16 maxSkew; /* diff between serno being ACK'd and highest serial no
+ * received */
+ __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */
+ __be32 previousPacket; /* sequence no of previous packet received */
+ __be32 serial; /* serial no of packet that prompted this ACK */
+
+ uint8_t reason; /* reason for ACK */
+#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */
+#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */
+#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */
+#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */
+#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */
+#define RXRPC_ACK_PING 6 /* keep alive ACK */
+#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */
+#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */
+#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */
+#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */
+
+ uint8_t nAcks; /* number of ACKs */
+#define RXRPC_MAXACKS 255
+
+ uint8_t acks[]; /* list of ACK/NAKs */
+#define RXRPC_ACK_TYPE_NACK 0
+#define RXRPC_ACK_TYPE_ACK 1
+
+} __packed;
+
+/*
+ * ACK packets can have a further piece of information tagged on the end
+ */
+struct rxrpc_ackinfo {
+ __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */
+ __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */
+ __be32 rwind; /* Rx window size (packets) [AFS 3.4] */
+ __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */
+};
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 challenge packet
+ */
+struct rxkad_challenge {
+ __be32 version; /* version of this challenge type */
+ __be32 nonce; /* encrypted random number */
+ __be32 min_level; /* minimum security level */
+ __be32 __padding; /* padding to 8-byte boundary */
+} __packed;
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 response packet
+ */
+struct rxkad_response {
+ __be32 version; /* version of this response type */
+ __be32 __pad;
+
+ /* encrypted bit of the response */
+ struct {
+ __be32 epoch; /* current epoch */
+ __be32 cid; /* parent connection ID */
+ __be32 checksum; /* checksum */
+ __be32 securityIndex; /* security type */
+ __be32 call_id[4]; /* encrypted call IDs */
+ __be32 inc_nonce; /* challenge nonce + 1 */
+ __be32 level; /* desired level */
+ } encrypted;
+
+ __be32 kvno; /* Kerberos key version number */
+ __be32 ticket_len; /* Kerberos ticket length */
+} __packed;
+
+#endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
new file mode 100644
index 0000000000..a482f88c5f
--- /dev/null
+++ b/net/rxrpc/recvmsg.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC recvmsg() implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <linux/sched/signal.h>
+
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * Post a call for attention by the socket or kernel service. Further
+ * notifications are suppressed by putting recvmsg_link on a dummy queue.
+ */
+void rxrpc_notify_socket(struct rxrpc_call *call)
+{
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%d", call->debug_id);
+
+ if (!list_empty(&call->recvmsg_link))
+ return;
+
+ rcu_read_lock();
+
+ rx = rcu_dereference(call->socket);
+ sk = &rx->sk;
+ if (rx && sk->sk_state < RXRPC_CLOSE) {
+ if (call->notify_rx) {
+ spin_lock(&call->notify_lock);
+ call->notify_rx(sk, call, call->user_call_ID);
+ spin_unlock(&call->notify_lock);
+ } else {
+ spin_lock(&rx->recvmsg_lock);
+ if (list_empty(&call->recvmsg_link)) {
+ rxrpc_get_call(call, rxrpc_call_get_notify_socket);
+ list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
+ }
+ spin_unlock(&rx->recvmsg_lock);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ _debug("call %ps", sk->sk_data_ready);
+ sk->sk_data_ready(sk);
+ }
+ }
+ }
+
+ rcu_read_unlock();
+ _leave("");
+}
+
+/*
+ * Pass a call terminating message to userspace.
+ */
+static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
+{
+ u32 tmp = 0;
+ int ret;
+
+ switch (call->completion) {
+ case RXRPC_CALL_SUCCEEDED:
+ ret = 0;
+ if (rxrpc_is_service_call(call))
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp);
+ break;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ tmp = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+ break;
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ tmp = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+ break;
+ case RXRPC_CALL_NETWORK_ERROR:
+ tmp = -call->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
+ break;
+ case RXRPC_CALL_LOCAL_ERROR:
+ tmp = -call->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
+ break;
+ default:
+ pr_err("Invalid terminal call state %u\n", call->completion);
+ BUG();
+ break;
+ }
+
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
+ call->ackr_window - 1,
+ call->rx_pkt_offset, call->rx_pkt_len, ret);
+ return ret;
+}
+
+/*
+ * Discard a packet we've used up and advance the Rx window by one.
+ */
+static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ rxrpc_serial_t serial;
+ rxrpc_seq_t old_consumed = call->rx_consumed, tseq;
+ bool last;
+ int acked;
+
+ _enter("%d", call->debug_id);
+
+ skb = skb_dequeue(&call->recvmsg_queue);
+ rxrpc_see_skb(skb, rxrpc_skb_see_rotate);
+
+ sp = rxrpc_skb(skb);
+ tseq = sp->hdr.seq;
+ serial = sp->hdr.serial;
+ last = sp->hdr.flags & RXRPC_LAST_PACKET;
+
+ /* Barrier against rxrpc_input_data(). */
+ if (after(tseq, call->rx_consumed))
+ smp_store_release(&call->rx_consumed, tseq);
+
+ rxrpc_free_skb(skb, rxrpc_skb_put_rotate);
+
+ trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
+ serial, call->rx_consumed);
+
+ if (last)
+ set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags);
+
+ /* Check to see if there's an ACK that needs sending. */
+ acked = atomic_add_return(call->rx_consumed - old_consumed,
+ &call->ackr_nr_consumed);
+ if (acked > 8 &&
+ !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
+ rxrpc_poke_call(call, rxrpc_call_poke_idle);
+}
+
+/*
+ * Decrypt and verify a DATA packet.
+ */
+static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (sp->flags & RXRPC_RX_VERIFIED)
+ return 0;
+ return call->security->verify_packet(call, skb);
+}
+
+/*
+ * Deliver messages to a call. This keeps processing packets until the buffer
+ * is filled and we find either more DATA (returns 0) or the end of the DATA
+ * (returns 1). If more packets are required, it returns -EAGAIN and if the
+ * call has failed it returns -EIO.
+ */
+static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
+ struct msghdr *msg, struct iov_iter *iter,
+ size_t len, int flags, size_t *_offset)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ rxrpc_seq_t seq = 0;
+ size_t remain;
+ unsigned int rx_pkt_offset, rx_pkt_len;
+ int copy, ret = -EAGAIN, ret2;
+
+ rx_pkt_offset = call->rx_pkt_offset;
+ rx_pkt_len = call->rx_pkt_len;
+
+ if (rxrpc_call_has_failed(call)) {
+ seq = call->ackr_window - 1;
+ ret = -EIO;
+ goto done;
+ }
+
+ if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
+ seq = call->ackr_window - 1;
+ ret = 1;
+ goto done;
+ }
+
+ /* No one else can be removing stuff from the queue, so we shouldn't
+ * need the Rx lock to walk it.
+ */
+ skb = skb_peek(&call->recvmsg_queue);
+ while (skb) {
+ rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg);
+ sp = rxrpc_skb(skb);
+ seq = sp->hdr.seq;
+
+ if (!(flags & MSG_PEEK))
+ trace_rxrpc_receive(call, rxrpc_receive_front,
+ sp->hdr.serial, seq);
+
+ if (msg)
+ sock_recv_timestamp(msg, sock->sk, skb);
+
+ if (rx_pkt_offset == 0) {
+ ret2 = rxrpc_verify_data(call, skb);
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
+ sp->offset, sp->len, ret2);
+ if (ret2 < 0) {
+ kdebug("verify = %d", ret2);
+ ret = ret2;
+ goto out;
+ }
+ rx_pkt_offset = sp->offset;
+ rx_pkt_len = sp->len;
+ } else {
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
+ }
+
+ /* We have to handle short, empty and used-up DATA packets. */
+ remain = len - *_offset;
+ copy = rx_pkt_len;
+ if (copy > remain)
+ copy = remain;
+ if (copy > 0) {
+ ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
+ copy);
+ if (ret2 < 0) {
+ ret = ret2;
+ goto out;
+ }
+
+ /* handle piecemeal consumption of data packets */
+ rx_pkt_offset += copy;
+ rx_pkt_len -= copy;
+ *_offset += copy;
+ }
+
+ if (rx_pkt_len > 0) {
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_full, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
+ ASSERTCMP(*_offset, ==, len);
+ ret = 0;
+ break;
+ }
+
+ /* The whole packet has been transferred. */
+ if (sp->hdr.flags & RXRPC_LAST_PACKET)
+ ret = 1;
+ rx_pkt_offset = 0;
+ rx_pkt_len = 0;
+
+ skb = skb_peek_next(skb, &call->recvmsg_queue);
+
+ if (!(flags & MSG_PEEK))
+ rxrpc_rotate_rx_window(call);
+ }
+
+out:
+ if (!(flags & MSG_PEEK)) {
+ call->rx_pkt_offset = rx_pkt_offset;
+ call->rx_pkt_len = rx_pkt_len;
+ }
+done:
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
+ rx_pkt_offset, rx_pkt_len, ret);
+ if (ret == -EAGAIN)
+ set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
+ return ret;
+}
+
+/*
+ * Receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ * simultaneously
+ */
+int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ struct rxrpc_call *call;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct list_head *l;
+ unsigned int call_debug_id = 0;
+ size_t copied = 0;
+ long timeo;
+ int ret;
+
+ DEFINE_WAIT(wait);
+
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
+
+ if (flags & (MSG_OOB | MSG_TRUNC))
+ return -EOPNOTSUPP;
+
+ timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+
+try_again:
+ lock_sock(&rx->sk);
+
+ /* Return immediately if a client socket has no outstanding calls */
+ if (RB_EMPTY_ROOT(&rx->calls) &&
+ list_empty(&rx->recvmsg_q) &&
+ rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+ release_sock(&rx->sk);
+ return -EAGAIN;
+ }
+
+ if (list_empty(&rx->recvmsg_q)) {
+ ret = -EWOULDBLOCK;
+ if (timeo == 0) {
+ call = NULL;
+ goto error_no_call;
+ }
+
+ release_sock(&rx->sk);
+
+ /* Wait for something to happen */
+ prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
+ TASK_INTERRUPTIBLE);
+ ret = sock_error(&rx->sk);
+ if (ret)
+ goto wait_error;
+
+ if (list_empty(&rx->recvmsg_q)) {
+ if (signal_pending(current))
+ goto wait_interrupted;
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
+ timeo = schedule_timeout(timeo);
+ }
+ finish_wait(sk_sleep(&rx->sk), &wait);
+ goto try_again;
+ }
+
+ /* Find the next call and dequeue it if we're not just peeking. If we
+ * do dequeue it, that comes with a ref that we will need to release.
+ * We also want to weed out calls that got requeued whilst we were
+ * shovelling data out.
+ */
+ spin_lock(&rx->recvmsg_lock);
+ l = rx->recvmsg_q.next;
+ call = list_entry(l, struct rxrpc_call, recvmsg_link);
+
+ if (!rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue)) {
+ list_del_init(&call->recvmsg_link);
+ spin_unlock(&rx->recvmsg_lock);
+ release_sock(&rx->sk);
+ trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
+
+ if (!(flags & MSG_PEEK))
+ list_del_init(&call->recvmsg_link);
+ else
+ rxrpc_get_call(call, rxrpc_call_get_recvmsg);
+ spin_unlock(&rx->recvmsg_lock);
+
+ call_debug_id = call->debug_id;
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
+
+ /* We're going to drop the socket lock, so we need to lock the call
+ * against interference by sendmsg.
+ */
+ if (!mutex_trylock(&call->user_mutex)) {
+ ret = -EWOULDBLOCK;
+ if (flags & MSG_DONTWAIT)
+ goto error_requeue_call;
+ ret = -ERESTARTSYS;
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ goto error_requeue_call;
+ }
+
+ release_sock(&rx->sk);
+
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
+ BUG();
+
+ if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ if (flags & MSG_CMSG_COMPAT) {
+ unsigned int id32 = call->user_call_ID;
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned int), &id32);
+ } else {
+ unsigned long idl = call->user_call_ID;
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned long), &idl);
+ }
+ if (ret < 0)
+ goto error_unlock_call;
+ }
+
+ if (msg->msg_name && call->peer) {
+ size_t len = sizeof(call->dest_srx);
+
+ memcpy(msg->msg_name, &call->dest_srx, len);
+ msg->msg_namelen = len;
+ }
+
+ ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+ flags, &copied);
+ if (ret == -EAGAIN)
+ ret = 0;
+ if (ret == -EIO)
+ goto call_failed;
+ if (ret < 0)
+ goto error_unlock_call;
+
+ if (rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue))
+ goto call_complete;
+ if (rxrpc_call_has_failed(call))
+ goto call_failed;
+
+ if (!skb_queue_empty(&call->recvmsg_queue))
+ rxrpc_notify_socket(call);
+ goto not_yet_complete;
+
+call_failed:
+ rxrpc_purge_queue(&call->recvmsg_queue);
+call_complete:
+ ret = rxrpc_recvmsg_term(call, msg);
+ if (ret < 0)
+ goto error_unlock_call;
+ if (!(flags & MSG_PEEK))
+ rxrpc_release_call(rx, call);
+ msg->msg_flags |= MSG_EOR;
+ ret = 1;
+
+not_yet_complete:
+ if (ret == 0)
+ msg->msg_flags |= MSG_MORE;
+ else
+ msg->msg_flags &= ~MSG_MORE;
+ ret = copied;
+
+error_unlock_call:
+ mutex_unlock(&call->user_mutex);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
+ return ret;
+
+error_requeue_call:
+ if (!(flags & MSG_PEEK)) {
+ spin_lock(&rx->recvmsg_lock);
+ list_add(&call->recvmsg_link, &rx->recvmsg_q);
+ spin_unlock(&rx->recvmsg_lock);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
+ } else {
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ }
+error_no_call:
+ release_sock(&rx->sk);
+error_trace:
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
+ return ret;
+
+wait_interrupted:
+ ret = sock_intr_errno(timeo);
+wait_error:
+ finish_wait(sk_sleep(&rx->sk), &wait);
+ call = NULL;
+ goto error_trace;
+}
+
+/**
+ * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
+ * @sock: The socket that the call exists on
+ * @call: The call to send data through
+ * @iter: The buffer to receive into
+ * @_len: The amount of data we want to receive (decreased on return)
+ * @want_more: True if more data is expected to be read
+ * @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
+ *
+ * Allow a kernel service to receive data and pick up information about the
+ * state of a call. Returns 0 if got what was asked for and there's more
+ * available, 1 if we got what was asked for and we're at the end of the data
+ * and -EAGAIN if we need more data.
+ *
+ * Note that we may return -EAGAIN to drain empty packets at the end of the
+ * data, even if we've already copied over the requested data.
+ *
+ * *_abort should also be initialised to 0.
+ */
+int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
+ struct iov_iter *iter, size_t *_len,
+ bool want_more, u32 *_abort, u16 *_service)
+{
+ size_t offset = 0;
+ int ret;
+
+ _enter("{%d},%zu,%d", call->debug_id, *_len, want_more);
+
+ mutex_lock(&call->user_mutex);
+
+ ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset);
+ *_len -= offset;
+ if (ret == -EIO)
+ goto call_failed;
+ if (ret < 0)
+ goto out;
+
+ /* We can only reach here with a partially full buffer if we have
+ * reached the end of the data. We must otherwise have a full buffer
+ * or have been given -EAGAIN.
+ */
+ if (ret == 1) {
+ if (iov_iter_count(iter) > 0)
+ goto short_data;
+ if (!want_more)
+ goto read_phase_complete;
+ ret = 0;
+ goto out;
+ }
+
+ if (!want_more)
+ goto excess_data;
+ goto out;
+
+read_phase_complete:
+ ret = 1;
+out:
+ if (_service)
+ *_service = call->dest_srx.srx_service;
+ mutex_unlock(&call->user_mutex);
+ _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
+ return ret;
+
+short_data:
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EBADMSG);
+ ret = -EBADMSG;
+ goto out;
+excess_data:
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EMSGSIZE);
+ ret = -EMSGSIZE;
+ goto out;
+call_failed:
+ *_abort = call->abort_code;
+ ret = call->error;
+ if (call->completion == RXRPC_CALL_SUCCEEDED) {
+ ret = 1;
+ if (iov_iter_count(iter) > 0)
+ ret = -ECONNRESET;
+ }
+ goto out;
+}
+EXPORT_SYMBOL(rxrpc_kernel_recv_data);
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c
new file mode 100644
index 0000000000..be61d6f5be
--- /dev/null
+++ b/net/rxrpc/rtt.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* RTT/RTO calculation.
+ *
+ * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/rfc6298
+ * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
+ * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
+ */
+
+#include <linux/net.h>
+#include "ar-internal.h"
+
+#define RXRPC_RTO_MAX ((unsigned)(120 * HZ))
+#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */
+#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
+
+static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer)
+{
+ return 200;
+}
+
+static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer)
+{
+ return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us);
+}
+
+static u32 rxrpc_bound_rto(u32 rto)
+{
+ return min(rto, RXRPC_RTO_MAX);
+}
+
+/*
+ * Called to compute a smoothed rtt estimate. The data fed to this
+ * routine either comes from timestamps, or from segments that were
+ * known _not_ to have been retransmitted [see Karn/Partridge
+ * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
+ * piece by Van Jacobson.
+ * NOTE: the next three routines used to be one big routine.
+ * To save cycles in the RFC 1323 implementation it was better to break
+ * it up into three procedures. -- erics
+ */
+static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us)
+{
+ long m = sample_rtt_us; /* RTT */
+ u32 srtt = peer->srtt_us;
+
+ /* The following amusing code comes from Jacobson's
+ * article in SIGCOMM '88. Note that rtt and mdev
+ * are scaled versions of rtt and mean deviation.
+ * This is designed to be as fast as possible
+ * m stands for "measurement".
+ *
+ * On a 1990 paper the rto value is changed to:
+ * RTO = rtt + 4 * mdev
+ *
+ * Funny. This algorithm seems to be very broken.
+ * These formulae increase RTO, when it should be decreased, increase
+ * too slowly, when it should be increased quickly, decrease too quickly
+ * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+ * does not matter how to _calculate_ it. Seems, it was trap
+ * that VJ failed to avoid. 8)
+ */
+ if (srtt != 0) {
+ m -= (srtt >> 3); /* m is now error in rtt est */
+ srtt += m; /* rtt = 7/8 rtt + 1/8 new */
+ if (m < 0) {
+ m = -m; /* m is now abs(error) */
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ /* This is similar to one of Eifel findings.
+ * Eifel blocks mdev updates when rtt decreases.
+ * This solution is a bit different: we use finer gain
+ * for mdev in this case (alpha*beta).
+ * Like Eifel it also prevents growth of rto,
+ * but also it limits too fast rto decreases,
+ * happening in pure Eifel.
+ */
+ if (m > 0)
+ m >>= 3;
+ } else {
+ m -= (peer->mdev_us >> 2); /* similar update on mdev */
+ }
+
+ peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
+ if (peer->mdev_us > peer->mdev_max_us) {
+ peer->mdev_max_us = peer->mdev_us;
+ if (peer->mdev_max_us > peer->rttvar_us)
+ peer->rttvar_us = peer->mdev_max_us;
+ }
+ } else {
+ /* no previous measure. */
+ srtt = m << 3; /* take the measured time to be rtt */
+ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */
+ peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer));
+ peer->mdev_max_us = peer->rttvar_us;
+ }
+
+ peer->srtt_us = max(1U, srtt);
+}
+
+/*
+ * Calculate rto without backoff. This is the second half of Van Jacobson's
+ * routine referred to above.
+ */
+static void rxrpc_set_rto(struct rxrpc_peer *peer)
+{
+ u32 rto;
+
+ /* 1. If rtt variance happened to be less 50msec, it is hallucination.
+ * It cannot be less due to utterly erratic ACK generation made
+ * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
+ * to do with delayed acks, because at cwnd>2 true delack timeout
+ * is invisible. Actually, Linux-2.4 also generates erratic
+ * ACKs in some circumstances.
+ */
+ rto = __rxrpc_set_rto(peer);
+
+ /* 2. Fixups made earlier cannot be right.
+ * If we do not estimate RTO correctly without them,
+ * all the algo is pure shit and should be replaced
+ * with correct one. It is exactly, which we pretend to do.
+ */
+
+ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
+ * guarantees that rto is higher.
+ */
+ peer->rto_j = rxrpc_bound_rto(rto);
+}
+
+static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us)
+{
+ if (rtt_us < 0)
+ return;
+
+ //rxrpc_update_rtt_min(peer, rtt_us);
+ rxrpc_rtt_estimator(peer, rtt_us);
+ rxrpc_set_rto(peer);
+
+ /* RFC6298: only reset backoff on valid RTT measurement. */
+ peer->backoff = 0;
+}
+
+/*
+ * Add RTT information to cache. This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+ int rtt_slot,
+ rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+ ktime_t send_time, ktime_t resp_time)
+{
+ struct rxrpc_peer *peer = call->peer;
+ s64 rtt_us;
+
+ rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
+ if (rtt_us < 0)
+ return;
+
+ spin_lock(&peer->rtt_input_lock);
+ rxrpc_ack_update_rtt(peer, rtt_us);
+ if (peer->rtt_count < 3)
+ peer->rtt_count++;
+ spin_unlock(&peer->rtt_input_lock);
+
+ trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
+ peer->srtt_us >> 3, peer->rto_j);
+}
+
+/*
+ * Get the retransmission timeout to set in jiffies, backing it off each time
+ * we retransmit.
+ */
+unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans)
+{
+ u64 timo_j;
+ u8 backoff = READ_ONCE(peer->backoff);
+
+ timo_j = peer->rto_j;
+ timo_j <<= backoff;
+ if (retrans && timo_j * 2 <= RXRPC_RTO_MAX)
+ WRITE_ONCE(peer->backoff, backoff + 1);
+
+ if (timo_j < 1)
+ timo_j = 1;
+
+ return timo_j;
+}
+
+void rxrpc_peer_init_rtt(struct rxrpc_peer *peer)
+{
+ peer->rto_j = RXRPC_TIMEOUT_INIT;
+ peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT);
+ peer->backoff = 0;
+ //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U);
+}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 0000000000..b52dedcebc
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1267 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Kerberos-based RxRPC security
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/skcipher.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/scatterlist.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/key-type.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include "ar-internal.h"
+
+#define RXKAD_VERSION 2
+#define MAXKRB5TICKETLEN 1024
+#define RXKAD_TKT_TYPE_KERBEROS_V5 256
+#define ANAME_SZ 40 /* size of authentication name */
+#define INST_SZ 40 /* size of principal's instance */
+#define REALM_SZ 40 /* size of principal's auth domain */
+#define SNAME_SZ 40 /* size of service name */
+#define RXKAD_ALIGN 8
+
+struct rxkad_level1_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+};
+
+struct rxkad_level2_hdr {
+ __be32 data_size; /* true data size (excluding padding) */
+ __be32 checksum; /* decrypted data checksum */
+};
+
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
+ struct crypto_sync_skcipher *ci);
+
+/*
+ * this holds a pinned cipher so that keventd doesn't get called by the cipher
+ * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
+ * packets
+ */
+static struct crypto_sync_skcipher *rxkad_ci;
+static struct skcipher_request *rxkad_ci_req;
+static DEFINE_MUTEX(rxkad_ci_mutex);
+
+/*
+ * Parse the information from a server key
+ *
+ * The data should be the 8-byte secret key.
+ */
+static int rxkad_preparse_server_key(struct key_preparsed_payload *prep)
+{
+ struct crypto_skcipher *ci;
+
+ if (prep->datalen != 8)
+ return -EINVAL;
+
+ memcpy(&prep->payload.data[2], prep->data, 8);
+
+ ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ci)) {
+ _leave(" = %ld", PTR_ERR(ci));
+ return PTR_ERR(ci);
+ }
+
+ if (crypto_skcipher_setkey(ci, prep->data, 8) < 0)
+ BUG();
+
+ prep->payload.data[0] = ci;
+ _leave(" = 0");
+ return 0;
+}
+
+static void rxkad_free_preparse_server_key(struct key_preparsed_payload *prep)
+{
+
+ if (prep->payload.data[0])
+ crypto_free_skcipher(prep->payload.data[0]);
+}
+
+static void rxkad_destroy_server_key(struct key *key)
+{
+ if (key->payload.data[0]) {
+ crypto_free_skcipher(key->payload.data[0]);
+ key->payload.data[0] = NULL;
+ }
+}
+
+/*
+ * initialise connection security
+ */
+static int rxkad_init_connection_security(struct rxrpc_connection *conn,
+ struct rxrpc_key_token *token)
+{
+ struct crypto_sync_skcipher *ci;
+ int ret;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+
+ conn->security_ix = token->security_index;
+
+ ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
+ if (IS_ERR(ci)) {
+ _debug("no cipher");
+ ret = PTR_ERR(ci);
+ goto error;
+ }
+
+ if (crypto_sync_skcipher_setkey(ci, token->kad->session_key,
+ sizeof(token->kad->session_key)) < 0)
+ BUG();
+
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ case RXRPC_SECURITY_AUTH:
+ case RXRPC_SECURITY_ENCRYPT:
+ break;
+ default:
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ ret = rxkad_prime_packet_security(conn, ci);
+ if (ret < 0)
+ goto error_ci;
+
+ conn->rxkad.cipher = ci;
+ return 0;
+
+error_ci:
+ crypto_free_sync_skcipher(ci);
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Work out how much data we can put in a packet.
+ */
+static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain,
+ size_t *_buf_size, size_t *_data_size, size_t *_offset)
+{
+ size_t shdr, buf_size, chunk;
+
+ switch (call->conn->security_level) {
+ default:
+ buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN);
+ shdr = 0;
+ goto out;
+ case RXRPC_SECURITY_AUTH:
+ shdr = sizeof(struct rxkad_level1_hdr);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ shdr = sizeof(struct rxkad_level2_hdr);
+ break;
+ }
+
+ buf_size = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN);
+
+ chunk = buf_size - shdr;
+ if (remain < chunk)
+ buf_size = round_up(shdr + remain, RXKAD_ALIGN);
+
+out:
+ *_buf_size = buf_size;
+ *_data_size = chunk;
+ *_offset = shdr;
+ return 0;
+}
+
+/*
+ * prime the encryption state with the invariant parts of a connection's
+ * description
+ */
+static int rxkad_prime_packet_security(struct rxrpc_connection *conn,
+ struct crypto_sync_skcipher *ci)
+{
+ struct skcipher_request *req;
+ struct rxrpc_key_token *token;
+ struct scatterlist sg;
+ struct rxrpc_crypt iv;
+ __be32 *tmpbuf;
+ size_t tmpsize = 4 * sizeof(__be32);
+
+ _enter("");
+
+ if (!conn->key)
+ return 0;
+
+ tmpbuf = kmalloc(tmpsize, GFP_KERNEL);
+ if (!tmpbuf)
+ return -ENOMEM;
+
+ req = skcipher_request_alloc(&ci->base, GFP_NOFS);
+ if (!req) {
+ kfree(tmpbuf);
+ return -ENOMEM;
+ }
+
+ token = conn->key->payload.data[0];
+ memcpy(&iv, token->kad->session_key, sizeof(iv));
+
+ tmpbuf[0] = htonl(conn->proto.epoch);
+ tmpbuf[1] = htonl(conn->proto.cid);
+ tmpbuf[2] = 0;
+ tmpbuf[3] = htonl(conn->security_ix);
+
+ sg_init_one(&sg, tmpbuf, tmpsize);
+ skcipher_request_set_sync_tfm(req, ci);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x);
+ crypto_skcipher_encrypt(req);
+ skcipher_request_free(req);
+
+ memcpy(&conn->rxkad.csum_iv, tmpbuf + 2, sizeof(conn->rxkad.csum_iv));
+ kfree(tmpbuf);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Allocate and prepare the crypto request on a call. For any particular call,
+ * this is called serially for the packets, so no lock should be necessary.
+ */
+static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call)
+{
+ struct crypto_skcipher *tfm = &call->conn->rxkad.cipher->base;
+
+ return skcipher_request_alloc(tfm, GFP_NOFS);
+}
+
+/*
+ * Clean up the crypto on a call.
+ */
+static void rxkad_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
+/*
+ * partially encrypt a packet (level 1 security)
+ */
+static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ struct skcipher_request *req)
+{
+ struct rxkad_level1_hdr *hdr = (void *)txb->data;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg;
+ size_t pad;
+ u16 check;
+
+ _enter("");
+
+ check = txb->seq ^ ntohl(txb->wire.callNumber);
+ hdr->data_size = htonl((u32)check << 16 | txb->len);
+
+ txb->len += sizeof(struct rxkad_level1_hdr);
+ pad = txb->len;
+ pad = RXKAD_ALIGN - pad;
+ pad &= RXKAD_ALIGN - 1;
+ if (pad) {
+ memset(txb->data + txb->offset, 0, pad);
+ txb->len += pad;
+ }
+
+ /* start the encryption afresh */
+ memset(&iv, 0, sizeof(iv));
+
+ sg_init_one(&sg, txb->data, 8);
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ struct skcipher_request *req)
+{
+ const struct rxrpc_key_token *token;
+ struct rxkad_level2_hdr *rxkhdr = (void *)txb->data;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg;
+ size_t pad;
+ u16 check;
+ int ret;
+
+ _enter("");
+
+ check = txb->seq ^ ntohl(txb->wire.callNumber);
+
+ rxkhdr->data_size = htonl(txb->len | (u32)check << 16);
+ rxkhdr->checksum = 0;
+
+ txb->len += sizeof(struct rxkad_level2_hdr);
+ pad = txb->len;
+ pad = RXKAD_ALIGN - pad;
+ pad &= RXKAD_ALIGN - 1;
+ if (pad) {
+ memset(txb->data + txb->offset, 0, pad);
+ txb->len += pad;
+ }
+
+ /* encrypt from the session key */
+ token = call->conn->key->payload.data[0];
+ memcpy(&iv, token->kad->session_key, sizeof(iv));
+
+ sg_init_one(&sg, txb->data, txb->len);
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x);
+ ret = crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+ return ret;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct skcipher_request *req;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg;
+ union {
+ __be32 buf[2];
+ } crypto __aligned(8);
+ u32 x, y;
+ int ret;
+
+ _enter("{%d{%x}},{#%u},%u,",
+ call->debug_id, key_serial(call->conn->key),
+ txb->seq, txb->len);
+
+ if (!call->conn->rxkad.cipher)
+ return 0;
+
+ ret = key_validate(call->conn->key);
+ if (ret < 0)
+ return ret;
+
+ req = rxkad_get_call_crypto(call);
+ if (!req)
+ return -ENOMEM;
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
+
+ /* calculate the security checksum */
+ x = (ntohl(txb->wire.cid) & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x |= txb->seq & 0x3fffffff;
+ crypto.buf[0] = txb->wire.callNumber;
+ crypto.buf[1] = htonl(x);
+
+ sg_init_one(&sg, crypto.buf, 8);
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+
+ y = ntohl(crypto.buf[1]);
+ y = (y >> 16) & 0xffff;
+ if (y == 0)
+ y = 1; /* zero checksums are not permitted */
+ txb->wire.cksum = htons(y);
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_secure_packet_auth(call, txb, req);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_secure_packet_encrypt(call, txb, req);
+ break;
+ default:
+ ret = -EPERM;
+ break;
+ }
+
+ skcipher_request_free(req);
+ _leave(" = %d [set %x]", ret, y);
+ return ret;
+}
+
+/*
+ * decrypt partial encryption on a packet (level 1 security)
+ */
+static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ struct skcipher_request *req)
+{
+ struct rxkad_level1_hdr sechdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[16];
+ u32 data_size, buf;
+ u16 check;
+ int ret;
+
+ _enter("");
+
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_header);
+
+ /* Decrypt the skbuff in-place. TODO: We really want to decrypt
+ * directly into the target buffer.
+ */
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ ret = skb_to_sgvec(skb, sg, sp->offset, 8);
+ if (unlikely(ret < 0))
+ return ret;
+
+ /* start the decryption afresh */
+ memset(&iv, 0, sizeof(iv));
+
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, 8, iv.x);
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+
+ /* Extract the decrypted packet length */
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_encdata);
+ sp->offset += sizeof(sechdr);
+ sp->len -= sizeof(sechdr);
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= seq ^ call->call_id;
+ check &= 0xffff;
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_check);
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_data);
+ sp->len = data_size;
+
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+}
+
+/*
+ * wholly decrypt a packet (level 2 security)
+ */
+static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ struct skcipher_request *req)
+{
+ const struct rxrpc_key_token *token;
+ struct rxkad_level2_hdr sechdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_crypt iv;
+ struct scatterlist _sg[4], *sg;
+ u32 data_size, buf;
+ u16 check;
+ int nsg, ret;
+
+ _enter(",{%d}", sp->len);
+
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_header);
+
+ /* Decrypt the skbuff in-place. TODO: We really want to decrypt
+ * directly into the target buffer.
+ */
+ sg = _sg;
+ nsg = skb_shinfo(skb)->nr_frags + 1;
+ if (nsg <= 4) {
+ nsg = 4;
+ } else {
+ sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
+ if (!sg)
+ return -ENOMEM;
+ }
+
+ sg_init_table(sg, nsg);
+ ret = skb_to_sgvec(skb, sg, sp->offset, sp->len);
+ if (unlikely(ret < 0)) {
+ if (sg != _sg)
+ kfree(sg);
+ return ret;
+ }
+
+ /* decrypt from the session key */
+ token = call->conn->key->payload.data[0];
+ memcpy(&iv, token->kad->session_key, sizeof(iv));
+
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, sp->len, iv.x);
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+ if (sg != _sg)
+ kfree(sg);
+
+ /* Extract the decrypted packet length */
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_len);
+ sp->offset += sizeof(sechdr);
+ sp->len -= sizeof(sechdr);
+
+ buf = ntohl(sechdr.data_size);
+ data_size = buf & 0xffff;
+
+ check = buf >> 16;
+ check ^= seq ^ call->call_id;
+ check &= 0xffff;
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_check);
+
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_data);
+
+ sp->len = data_size;
+ _leave(" = 0 [dlen=%x]", data_size);
+ return 0;
+}
+
+/*
+ * Verify the security on a received packet and the subpackets therein.
+ */
+static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct skcipher_request *req;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg;
+ union {
+ __be32 buf[2];
+ } crypto __aligned(8);
+ rxrpc_seq_t seq = sp->hdr.seq;
+ int ret;
+ u16 cksum;
+ u32 x, y;
+
+ _enter("{%d{%x}},{#%u}",
+ call->debug_id, key_serial(call->conn->key), seq);
+
+ if (!call->conn->rxkad.cipher)
+ return 0;
+
+ req = rxkad_get_call_crypto(call);
+ if (!req)
+ return -ENOMEM;
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, call->conn->rxkad.csum_iv.x, sizeof(iv));
+
+ /* validate the security checksum */
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x |= seq & 0x3fffffff;
+ crypto.buf[0] = htonl(call->call_id);
+ crypto.buf[1] = htonl(x);
+
+ sg_init_one(&sg, crypto.buf, 8);
+ skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x);
+ crypto_skcipher_encrypt(req);
+ skcipher_request_zero(req);
+
+ y = ntohl(crypto.buf[1]);
+ cksum = (y >> 16) & 0xffff;
+ if (cksum == 0)
+ cksum = 1; /* zero checksums are not permitted */
+
+ if (cksum != sp->hdr.cksum) {
+ ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_bad_checksum);
+ goto out;
+ }
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ ret = 0;
+ break;
+ case RXRPC_SECURITY_AUTH:
+ ret = rxkad_verify_packet_1(call, skb, seq, req);
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ ret = rxkad_verify_packet_2(call, skb, seq, req);
+ break;
+ default:
+ ret = -ENOANO;
+ break;
+ }
+
+out:
+ skcipher_request_free(req);
+ return ret;
+}
+
+/*
+ * issue a challenge
+ */
+static int rxkad_issue_challenge(struct rxrpc_connection *conn)
+{
+ struct rxkad_challenge challenge;
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ u32 serial;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ get_random_bytes(&conn->rxkad.nonce, sizeof(conn->rxkad.nonce));
+
+ challenge.version = htonl(2);
+ challenge.nonce = htonl(conn->rxkad.nonce);
+ challenge.min_level = htonl(0);
+ challenge.__padding = 0;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.type = RXRPC_PACKET_TYPE_CHALLENGE;
+ whdr.flags = conn->out_clientflag;
+ whdr.userStatus = 0;
+ whdr.securityIndex = conn->security_ix;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(conn->service_id);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &challenge;
+ iov[1].iov_len = sizeof(challenge);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_rxkad_challenge);
+ return -EAGAIN;
+ }
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+ trace_rxrpc_tx_packet(conn->debug_id, &whdr,
+ rxrpc_tx_point_rxkad_challenge);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * send a Kerberos security response
+ */
+static int rxkad_send_response(struct rxrpc_connection *conn,
+ struct rxrpc_host_header *hdr,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[3];
+ size_t len;
+ u32 serial;
+ int ret;
+
+ _enter("");
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ memset(&whdr, 0, sizeof(whdr));
+ whdr.epoch = htonl(hdr->epoch);
+ whdr.cid = htonl(hdr->cid);
+ whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
+ whdr.flags = conn->out_clientflag;
+ whdr.securityIndex = hdr->securityIndex;
+ whdr.serviceId = htons(hdr->serviceId);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = resp;
+ iov[1].iov_len = sizeof(*resp);
+ iov[2].iov_base = (void *)s2->ticket;
+ iov[2].iov_len = s2->ticket_len;
+
+ len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ rxrpc_local_dont_fragment(conn->local, false);
+ ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
+ rxrpc_local_dont_fragment(conn->local, true);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_rxkad_response);
+ return -EAGAIN;
+ }
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * calculate the response checksum
+ */
+static void rxkad_calc_response_checksum(struct rxkad_response *response)
+{
+ u32 csum = 1000003;
+ int loop;
+ u8 *p = (u8 *) response;
+
+ for (loop = sizeof(*response); loop > 0; loop--)
+ csum = csum * 0x10204081 + *p++;
+
+ response->encrypted.checksum = htonl(csum);
+}
+
+/*
+ * encrypt the response packet
+ */
+static int rxkad_encrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxkad_key *s2)
+{
+ struct skcipher_request *req;
+ struct rxrpc_crypt iv;
+ struct scatterlist sg[1];
+
+ req = skcipher_request_alloc(&conn->rxkad.cipher->base, GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ /* continue encrypting from where we left off */
+ memcpy(&iv, s2->session_key, sizeof(iv));
+
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
+ skcipher_request_set_sync_tfm(req, conn->rxkad.cipher);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
+ crypto_skcipher_encrypt(req);
+ skcipher_request_free(req);
+ return 0;
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ const struct rxrpc_key_token *token;
+ struct rxkad_challenge challenge;
+ struct rxkad_response *resp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ u32 version, nonce, min_level;
+ int ret = -EPROTO;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ if (!conn->key)
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxkad_abort_chall_no_key);
+
+ ret = key_validate(conn->key);
+ if (ret < 0)
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &challenge, sizeof(challenge)) < 0)
+ return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_chall_short);
+
+ version = ntohl(challenge.version);
+ nonce = ntohl(challenge.nonce);
+ min_level = ntohl(challenge.min_level);
+
+ trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
+
+ if (version != RXKAD_VERSION)
+ return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_chall_version);
+
+ if (conn->security_level < min_level)
+ return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+ rxkad_abort_chall_level);
+
+ token = conn->key->payload.data[0];
+
+ /* build the response packet */
+ resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->version = htonl(RXKAD_VERSION);
+ resp->encrypted.epoch = htonl(conn->proto.epoch);
+ resp->encrypted.cid = htonl(conn->proto.cid);
+ resp->encrypted.securityIndex = htonl(conn->security_ix);
+ resp->encrypted.inc_nonce = htonl(nonce + 1);
+ resp->encrypted.level = htonl(conn->security_level);
+ resp->kvno = htonl(token->kad->kvno);
+ resp->ticket_len = htonl(token->kad->ticket_len);
+ resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
+
+ /* calculate the response checksum and then do the encryption */
+ rxkad_calc_response_checksum(resp);
+ ret = rxkad_encrypt_response(conn, resp, token->kad);
+ if (ret == 0)
+ ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
+ kfree(resp);
+ return ret;
+}
+
+/*
+ * decrypt the kerberos IV ticket in the response
+ */
+static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+ struct key *server_key,
+ struct sk_buff *skb,
+ void *ticket, size_t ticket_len,
+ struct rxrpc_crypt *_session_key,
+ time64_t *_expiry)
+{
+ struct skcipher_request *req;
+ struct rxrpc_crypt iv, key;
+ struct scatterlist sg[1];
+ struct in_addr addr;
+ unsigned int life;
+ time64_t issue, now;
+ bool little_endian;
+ u8 *p, *q, *name, *end;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(server_key));
+
+ *_expiry = 0;
+
+ ASSERT(server_key->payload.data[0] != NULL);
+ ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
+
+ memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
+
+ req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS);
+ if (!req)
+ return -ENOMEM;
+
+ sg_init_one(&sg[0], ticket, ticket_len);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x);
+ crypto_skcipher_decrypt(req);
+ skcipher_request_free(req);
+
+ p = ticket;
+ end = p + ticket_len;
+
+#define Z(field, fieldl) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > field##_SZ) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ p++; \
+ __str; \
+ })
+
+ /* extract the ticket flags */
+ _debug("KIV FLAGS: %x", *p);
+ little_endian = *p & 1;
+ p++;
+
+ /* extract the authentication name */
+ name = Z(ANAME, aname);
+ _debug("KIV ANAME: %s", name);
+
+ /* extract the principal's instance */
+ name = Z(INST, inst);
+ _debug("KIV INST : %s", name);
+
+ /* extract the principal's authentication domain */
+ name = Z(REALM, realm);
+ _debug("KIV REALM: %s", name);
+
+ if (end - p < 4 + 8 + 4 + 2)
+ return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO,
+ rxkad_abort_resp_tkt_short);
+
+ /* get the IPv4 address of the entity that requested the ticket */
+ memcpy(&addr, p, sizeof(addr));
+ p += 4;
+ _debug("KIV ADDR : %pI4", &addr);
+
+ /* get the session key from the ticket */
+ memcpy(&key, p, sizeof(key));
+ p += 8;
+ _debug("KIV KEY : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
+ memcpy(_session_key, &key, sizeof(key));
+
+ /* get the ticket's lifetime */
+ life = *p++ * 5 * 60;
+ _debug("KIV LIFE : %u", life);
+
+ /* get the issue time of the ticket */
+ if (little_endian) {
+ __le32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = rxrpc_u32_to_time64(le32_to_cpu(stamp));
+ } else {
+ __be32 stamp;
+ memcpy(&stamp, p, 4);
+ issue = rxrpc_u32_to_time64(be32_to_cpu(stamp));
+ }
+ p += 4;
+ now = ktime_get_real_seconds();
+ _debug("KIV ISSUE: %llx [%llx]", issue, now);
+
+ /* check the ticket is in date */
+ if (issue > now)
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED,
+ rxkad_abort_resp_tkt_future);
+ if (issue < now - life)
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED,
+ rxkad_abort_resp_tkt_expired);
+
+ *_expiry = issue + life;
+
+ /* get the service name */
+ name = Z(SNAME, sname);
+ _debug("KIV SNAME: %s", name);
+
+ /* get the service instance name */
+ name = Z(INST, sinst);
+ _debug("KIV SINST: %s", name);
+ return 0;
+}
+
+/*
+ * decrypt the response packet
+ */
+static void rxkad_decrypt_response(struct rxrpc_connection *conn,
+ struct rxkad_response *resp,
+ const struct rxrpc_crypt *session_key)
+{
+ struct skcipher_request *req = rxkad_ci_req;
+ struct scatterlist sg[1];
+ struct rxrpc_crypt iv;
+
+ _enter(",,%08x%08x",
+ ntohl(session_key->n[0]), ntohl(session_key->n[1]));
+
+ mutex_lock(&rxkad_ci_mutex);
+ if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x,
+ sizeof(*session_key)) < 0)
+ BUG();
+
+ memcpy(&iv, session_key, sizeof(iv));
+
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
+ skcipher_request_set_sync_tfm(req, rxkad_ci);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
+ crypto_skcipher_decrypt(req);
+ skcipher_request_zero(req);
+
+ mutex_unlock(&rxkad_ci_mutex);
+
+ _leave("");
+}
+
+/*
+ * verify a response
+ */
+static int rxkad_verify_response(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxkad_response *response;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_crypt session_key;
+ struct key *server_key;
+ time64_t expiry;
+ void *ticket;
+ u32 version, kvno, ticket_len, level;
+ __be32 csum;
+ int ret, i;
+
+ _enter("{%d}", conn->debug_id);
+
+ server_key = rxrpc_look_up_server_security(conn, skb, 0, 0);
+ if (IS_ERR(server_key)) {
+ ret = PTR_ERR(server_key);
+ switch (ret) {
+ case -ENOKEY:
+ return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret,
+ rxkad_abort_resp_nokey);
+ case -EKEYEXPIRED:
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_resp_key_expired);
+ default:
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret,
+ rxkad_abort_resp_key_rejected);
+ }
+ }
+
+ ret = -ENOMEM;
+ response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
+ if (!response)
+ goto temporary_error;
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ response, sizeof(*response)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short);
+ goto protocol_error;
+ }
+
+ version = ntohl(response->version);
+ ticket_len = ntohl(response->ticket_len);
+ kvno = ntohl(response->kvno);
+
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
+
+ if (version != RXKAD_VERSION) {
+ rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_resp_version);
+ goto protocol_error;
+ }
+
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) {
+ rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO,
+ rxkad_abort_resp_tkt_len);
+ goto protocol_error;
+ }
+
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) {
+ rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO,
+ rxkad_abort_resp_unknown_tkt);
+ goto protocol_error;
+ }
+
+ /* extract the kerberos ticket and decrypt and decode it */
+ ret = -ENOMEM;
+ ticket = kmalloc(ticket_len, GFP_NOFS);
+ if (!ticket)
+ goto temporary_error_free_resp;
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
+ ticket, ticket_len) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short_tkt);
+ goto protocol_error;
+ }
+
+ ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
+ &session_key, &expiry);
+ if (ret < 0)
+ goto temporary_error_free_ticket;
+
+ /* use the session key from inside the ticket to decrypt the
+ * response */
+ rxkad_decrypt_response(conn, response, &session_key);
+
+ if (ntohl(response->encrypted.epoch) != conn->proto.epoch ||
+ ntohl(response->encrypted.cid) != conn->proto.cid ||
+ ntohl(response->encrypted.securityIndex) != conn->security_ix) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_param);
+ goto protocol_error_free;
+ }
+
+ csum = response->encrypted.checksum;
+ response->encrypted.checksum = 0;
+ rxkad_calc_response_checksum(response);
+ if (response->encrypted.checksum != csum) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_checksum);
+ goto protocol_error_free;
+ }
+
+ for (i = 0; i < RXRPC_MAXCALLS; i++) {
+ u32 call_id = ntohl(response->encrypted.call_id[i]);
+ u32 counter = READ_ONCE(conn->channels[i].call_counter);
+
+ if (call_id > INT_MAX) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_callid);
+ goto protocol_error_free;
+ }
+
+ if (call_id < counter) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_ctr);
+ goto protocol_error_free;
+ }
+
+ if (call_id > counter) {
+ if (conn->channels[i].call) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_state);
+ goto protocol_error_free;
+ }
+ conn->channels[i].call_counter = call_id;
+ }
+ }
+
+ if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
+ rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
+ rxkad_abort_resp_ooseq);
+ goto protocol_error_free;
+ }
+
+ level = ntohl(response->encrypted.level);
+ if (level > RXRPC_SECURITY_ENCRYPT) {
+ rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO,
+ rxkad_abort_resp_level);
+ goto protocol_error_free;
+ }
+ conn->security_level = level;
+
+ /* create a key to hold the security data and expiration time - after
+ * this the connection security can be handled in exactly the same way
+ * as for a client connection */
+ ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
+ if (ret < 0)
+ goto temporary_error_free_ticket;
+
+ kfree(ticket);
+ kfree(response);
+ _leave(" = 0");
+ return 0;
+
+protocol_error_free:
+ kfree(ticket);
+protocol_error:
+ kfree(response);
+ key_put(server_key);
+ return -EPROTO;
+
+temporary_error_free_ticket:
+ kfree(ticket);
+temporary_error_free_resp:
+ kfree(response);
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ key_put(server_key);
+ return ret;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxkad_clear(struct rxrpc_connection *conn)
+{
+ _enter("");
+
+ if (conn->rxkad.cipher)
+ crypto_free_sync_skcipher(conn->rxkad.cipher);
+}
+
+/*
+ * Initialise the rxkad security service.
+ */
+static int rxkad_init(void)
+{
+ struct crypto_sync_skcipher *tfm;
+ struct skcipher_request *req;
+
+ /* pin the cipher we need so that the crypto layer doesn't invoke
+ * keventd to go get it */
+ tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ req = skcipher_request_alloc(&tfm->base, GFP_KERNEL);
+ if (!req)
+ goto nomem_tfm;
+
+ rxkad_ci_req = req;
+ rxkad_ci = tfm;
+ return 0;
+
+nomem_tfm:
+ crypto_free_sync_skcipher(tfm);
+ return -ENOMEM;
+}
+
+/*
+ * Clean up the rxkad security service.
+ */
+static void rxkad_exit(void)
+{
+ crypto_free_sync_skcipher(rxkad_ci);
+ skcipher_request_free(rxkad_ci_req);
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+const struct rxrpc_security rxkad = {
+ .name = "rxkad",
+ .security_index = RXRPC_SECURITY_RXKAD,
+ .no_key_abort = RXKADUNKNOWNKEY,
+ .init = rxkad_init,
+ .exit = rxkad_exit,
+ .preparse_server_key = rxkad_preparse_server_key,
+ .free_preparse_server_key = rxkad_free_preparse_server_key,
+ .destroy_server_key = rxkad_destroy_server_key,
+ .init_connection_security = rxkad_init_connection_security,
+ .how_much_data = rxkad_how_much_data,
+ .secure_packet = rxkad_secure_packet,
+ .verify_packet = rxkad_verify_packet,
+ .free_call_crypto = rxkad_free_call_crypto,
+ .issue_challenge = rxkad_issue_challenge,
+ .respond_to_challenge = rxkad_respond_to_challenge,
+ .verify_response = rxkad_verify_response,
+ .clear = rxkad_clear,
+};
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
new file mode 100644
index 0000000000..085e7892d3
--- /dev/null
+++ b/net/rxrpc/rxperf.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* In-kernel rxperf server for testing purposes.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) "rxperf: " fmt
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+MODULE_DESCRIPTION("rxperf test server (afs)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+#define RXPERF_PORT 7009
+#define RX_PERF_SERVICE 147
+#define RX_PERF_VERSION 3
+#define RX_PERF_SEND 0
+#define RX_PERF_RECV 1
+#define RX_PERF_RPC 3
+#define RX_PERF_FILE 4
+#define RX_PERF_MAGIC_COOKIE 0x4711
+
+struct rxperf_proto_params {
+ __be32 version;
+ __be32 type;
+ __be32 rsize;
+ __be32 wsize;
+} __packed;
+
+static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 };
+static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+
+enum rxperf_call_state {
+ RXPERF_CALL_SV_AWAIT_PARAMS, /* Server: Awaiting parameter block */
+ RXPERF_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */
+ RXPERF_CALL_SV_REPLYING, /* Server: Replying */
+ RXPERF_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */
+ RXPERF_CALL_COMPLETE, /* Completed or failed */
+};
+
+struct rxperf_call {
+ struct rxrpc_call *rxcall;
+ struct iov_iter iter;
+ struct kvec kvec[1];
+ struct work_struct work;
+ const char *type;
+ size_t iov_len;
+ size_t req_len; /* Size of request blob */
+ size_t reply_len; /* Size of reply blob */
+ unsigned int debug_id;
+ unsigned int operation_id;
+ struct rxperf_proto_params params;
+ __be32 tmp[2];
+ s32 abort_code;
+ enum rxperf_call_state state;
+ short error;
+ unsigned short unmarshal;
+ u16 service_id;
+ int (*deliver)(struct rxperf_call *call);
+ void (*processor)(struct work_struct *work);
+};
+
+static struct socket *rxperf_socket;
+static struct key *rxperf_sec_keyring; /* Ring of security/crypto keys */
+static struct workqueue_struct *rxperf_workqueue;
+
+static void rxperf_deliver_to_call(struct work_struct *work);
+static int rxperf_deliver_param_block(struct rxperf_call *call);
+static int rxperf_deliver_request(struct rxperf_call *call);
+static int rxperf_process_call(struct rxperf_call *call);
+static void rxperf_charge_preallocation(struct work_struct *work);
+
+static DECLARE_WORK(rxperf_charge_preallocation_work,
+ rxperf_charge_preallocation);
+
+static inline void rxperf_set_call_state(struct rxperf_call *call,
+ enum rxperf_call_state to)
+{
+ call->state = to;
+}
+
+static inline void rxperf_set_call_complete(struct rxperf_call *call,
+ int error, s32 remote_abort)
+{
+ if (call->state != RXPERF_CALL_COMPLETE) {
+ call->abort_code = remote_abort;
+ call->error = error;
+ call->state = RXPERF_CALL_COMPLETE;
+ }
+}
+
+static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ kfree((struct rxperf_call *)user_call_ID);
+}
+
+static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work);
+}
+
+static void rxperf_queue_call_work(struct rxperf_call *call)
+{
+ queue_work(rxperf_workqueue, &call->work);
+}
+
+static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)call_user_ID;
+
+ if (call->state != RXPERF_CALL_COMPLETE)
+ rxperf_queue_call_work(call);
+}
+
+static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
+{
+ struct rxperf_call *call = (struct rxperf_call *)user_call_ID;
+
+ call->rxcall = rxcall;
+}
+
+static void rxperf_notify_end_reply_tx(struct sock *sock,
+ struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
+{
+ rxperf_set_call_state((struct rxperf_call *)call_user_ID,
+ RXPERF_CALL_SV_AWAIT_ACK);
+}
+
+/*
+ * Charge the incoming call preallocation.
+ */
+static void rxperf_charge_preallocation(struct work_struct *work)
+{
+ struct rxperf_call *call;
+
+ for (;;) {
+ call = kzalloc(sizeof(*call), GFP_KERNEL);
+ if (!call)
+ break;
+
+ call->type = "unset";
+ call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+ call->deliver = rxperf_deliver_param_block;
+ call->state = RXPERF_CALL_SV_AWAIT_PARAMS;
+ call->service_id = RX_PERF_SERVICE;
+ call->iov_len = sizeof(call->params);
+ call->kvec[0].iov_len = sizeof(call->params);
+ call->kvec[0].iov_base = &call->params;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ INIT_WORK(&call->work, rxperf_deliver_to_call);
+
+ if (rxrpc_kernel_charge_accept(rxperf_socket,
+ rxperf_notify_rx,
+ rxperf_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL,
+ call->debug_id) < 0)
+ break;
+ call = NULL;
+ }
+
+ kfree(call);
+}
+
+/*
+ * Open an rxrpc socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+static int rxperf_open_socket(void)
+{
+ struct sockaddr_rxrpc srx;
+ struct socket *socket;
+ int ret;
+
+ ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6,
+ &socket);
+ if (ret < 0)
+ goto error_1;
+
+ socket->sk->sk_allocation = GFP_NOFS;
+
+ /* bind the callback manager's address to make this a server socket */
+ memset(&srx, 0, sizeof(srx));
+ srx.srx_family = AF_RXRPC;
+ srx.srx_service = RX_PERF_SERVICE;
+ srx.transport_type = SOCK_DGRAM;
+ srx.transport_len = sizeof(srx.transport.sin6);
+ srx.transport.sin6.sin6_family = AF_INET6;
+ srx.transport.sin6.sin6_port = htons(RXPERF_PORT);
+
+ ret = rxrpc_sock_set_min_security_level(socket->sk,
+ RXRPC_SECURITY_ENCRYPT);
+ if (ret < 0)
+ goto error_2;
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring);
+
+ ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx));
+ if (ret < 0)
+ goto error_2;
+
+ rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call,
+ rxperf_rx_discard_new_call);
+
+ ret = kernel_listen(socket, INT_MAX);
+ if (ret < 0)
+ goto error_2;
+
+ rxperf_socket = socket;
+ rxperf_charge_preallocation(&rxperf_charge_preallocation_work);
+ return 0;
+
+error_2:
+ sock_release(socket);
+error_1:
+ pr_err("Can't set up rxperf socket: %d\n", ret);
+ return ret;
+}
+
+/*
+ * close the rxrpc socket rxperf was using
+ */
+static void rxperf_close_socket(void)
+{
+ kernel_listen(rxperf_socket, 0);
+ kernel_sock_shutdown(rxperf_socket, SHUT_RDWR);
+ flush_workqueue(rxperf_workqueue);
+ sock_release(rxperf_socket);
+}
+
+/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort)
+{
+ static int max = 0;
+ const char *msg;
+ int m;
+
+ switch (remote_abort) {
+ case RX_EOF: msg = "unexpected EOF"; break;
+ case RXGEN_CC_MARSHAL: msg = "client marshalling"; break;
+ case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break;
+ case RXGEN_SS_MARSHAL: msg = "server marshalling"; break;
+ case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break;
+ case RXGEN_DECODE: msg = "opcode decode"; break;
+ case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break;
+ case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break;
+ case -32: msg = "insufficient data"; break;
+ default:
+ return;
+ }
+
+ m = max;
+ if (m < 3) {
+ max = m + 1;
+ pr_info("Peer reported %s failure on %s\n", msg, call->type);
+ }
+}
+
+/*
+ * deliver messages to a call
+ */
+static void rxperf_deliver_to_call(struct work_struct *work)
+{
+ struct rxperf_call *call = container_of(work, struct rxperf_call, work);
+ enum rxperf_call_state state;
+ u32 abort_code, remote_abort = 0;
+ int ret = 0;
+
+ if (call->state == RXPERF_CALL_COMPLETE)
+ return;
+
+ while (state = call->state,
+ state == RXPERF_CALL_SV_AWAIT_PARAMS ||
+ state == RXPERF_CALL_SV_AWAIT_REQUEST ||
+ state == RXPERF_CALL_SV_AWAIT_ACK
+ ) {
+ if (state == RXPERF_CALL_SV_AWAIT_ACK) {
+ if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall))
+ goto call_complete;
+ return;
+ }
+
+ ret = call->deliver(call);
+ if (ret == 0)
+ ret = rxperf_process_call(call);
+
+ switch (ret) {
+ case 0:
+ continue;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ return;
+ case -ECONNABORTED:
+ rxperf_log_error(call, call->abort_code);
+ goto call_complete;
+ case -EOPNOTSUPP:
+ abort_code = RXGEN_OPCODE;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
+ goto call_complete;
+ case -ENOTSUPP:
+ abort_code = RX_USER_ABORT;
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
+ goto call_complete;
+ case -EIO:
+ pr_err("Call %u in bad state %u\n",
+ call->debug_id, call->state);
+ fallthrough;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ case -ENOMEM:
+ case -EFAULT:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_UNMARSHAL, ret,
+ rxperf_abort_unmarshal_error);
+ goto call_complete;
+ default:
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RX_CALL_DEAD, ret,
+ rxperf_abort_general_error);
+ goto call_complete;
+ }
+ }
+
+call_complete:
+ rxperf_set_call_complete(call, ret, remote_abort);
+ /* The call may have been requeued */
+ rxrpc_kernel_shutdown_call(rxperf_socket, call->rxcall);
+ rxrpc_kernel_put_call(rxperf_socket, call->rxcall);
+ cancel_work(&call->work);
+ kfree(call);
+}
+
+/*
+ * Extract a piece of data from the received data socket buffers.
+ */
+static int rxperf_extract_data(struct rxperf_call *call, bool want_more)
+{
+ u32 remote_abort = 0;
+ int ret;
+
+ ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter,
+ &call->iov_len, want_more, &remote_abort,
+ &call->service_id);
+ pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n",
+ iov_iter_count(&call->iter), call->iov_len, want_more, ret);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
+
+ if (ret == 1) {
+ switch (call->state) {
+ case RXPERF_CALL_SV_AWAIT_REQUEST:
+ rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING);
+ break;
+ case RXPERF_CALL_COMPLETE:
+ pr_debug("premature completion %d", call->error);
+ return call->error;
+ default:
+ break;
+ }
+ return 0;
+ }
+
+ rxperf_set_call_complete(call, ret, remote_abort);
+ return ret;
+}
+
+/*
+ * Grab the operation ID from an incoming manager call.
+ */
+static int rxperf_deliver_param_block(struct rxperf_call *call)
+{
+ u32 version;
+ int ret;
+
+ /* Extract the parameter block */
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ version = ntohl(call->params.version);
+ call->operation_id = ntohl(call->params.type);
+ call->deliver = rxperf_deliver_request;
+
+ if (version != RX_PERF_VERSION) {
+ pr_info("Version mismatch %x\n", version);
+ return -ENOTSUPP;
+ }
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->reply_len = 0;
+ call->iov_len = 4; /* Expect req size */
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->iov_len = 4; /* Expect reply size */
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->iov_len = 8; /* Expect req size and reply size */
+ break;
+ case RX_PERF_FILE:
+ call->type = "file";
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST);
+ return call->deliver(call);
+}
+
+/*
+ * Deliver the request data.
+ */
+static int rxperf_deliver_request(struct rxperf_call *call)
+{
+ int ret;
+
+ switch (call->unmarshal) {
+ case 0:
+ call->kvec[0].iov_len = call->iov_len;
+ call->kvec[0].iov_base = call->tmp;
+ iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len);
+ call->unmarshal++;
+ fallthrough;
+ case 1:
+ ret = rxperf_extract_data(call, true);
+ if (ret < 0)
+ return ret;
+
+ switch (call->operation_id) {
+ case RX_PERF_SEND:
+ call->type = "send";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = 0;
+ break;
+ case RX_PERF_RECV:
+ call->type = "recv";
+ call->req_len = 0;
+ call->reply_len = ntohl(call->tmp[0]);
+ break;
+ case RX_PERF_RPC:
+ call->type = "rpc";
+ call->req_len = ntohl(call->tmp[0]);
+ call->reply_len = ntohl(call->tmp[1]);
+ break;
+ default:
+ pr_info("Can't parse extra params\n");
+ return -EIO;
+ }
+
+ pr_debug("CALL op=%s rq=%zx rp=%zx\n",
+ call->type, call->req_len, call->reply_len);
+
+ call->iov_len = call->req_len;
+ iov_iter_discard(&call->iter, READ, call->req_len);
+ call->unmarshal++;
+ fallthrough;
+ case 2:
+ ret = rxperf_extract_data(call, false);
+ if (ret < 0)
+ return ret;
+ call->unmarshal++;
+ fallthrough;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Process a call for which we've received the request.
+ */
+static int rxperf_process_call(struct rxperf_call *call)
+{
+ struct msghdr msg = {};
+ struct bio_vec bv;
+ struct kvec iov[1];
+ ssize_t n;
+ size_t reply_len = call->reply_len, len;
+
+ rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall,
+ reply_len + sizeof(rxperf_magic_cookie));
+
+ while (reply_len > 0) {
+ len = min_t(size_t, reply_len, PAGE_SIZE);
+ bvec_set_page(&bv, ZERO_PAGE(0), len, 0);
+ iov_iter_bvec(&msg.msg_iter, WRITE, &bv, 1, len);
+ msg.msg_flags = MSG_MORE;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg,
+ len, rxperf_notify_end_reply_tx);
+ if (n < 0)
+ return n;
+ if (n == 0)
+ return -EIO;
+ reply_len -= n;
+ }
+
+ len = sizeof(rxperf_magic_cookie);
+ iov[0].iov_base = (void *)rxperf_magic_cookie;
+ iov[0].iov_len = len;
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
+ msg.msg_flags = 0;
+ n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len,
+ rxperf_notify_end_reply_tx);
+ if (n >= 0)
+ return 0; /* Success */
+
+ if (n == -ENOMEM)
+ rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ rxperf_abort_oom);
+ return n;
+}
+
+/*
+ * Add a key to the security keyring.
+ */
+static int rxperf_add_key(struct key *keyring)
+{
+ key_ref_t kref;
+ int ret;
+
+ kref = key_create_or_update(make_key_ref(keyring, true),
+ "rxrpc_s",
+ __stringify(RX_PERF_SERVICE) ":2",
+ secret,
+ sizeof(secret),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH
+ | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ if (IS_ERR(kref)) {
+ pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ ret = key_link(keyring, key_ref_to_ptr(kref));
+ if (ret < 0)
+ pr_err("Can't link rxperf server key: %d\n", ret);
+ key_ref_put(kref);
+ return ret;
+}
+
+/*
+ * Initialise the rxperf server.
+ */
+static int __init rxperf_init(void)
+{
+ struct key *keyring;
+ int ret = -ENOMEM;
+
+ pr_info("Server registering\n");
+
+ rxperf_workqueue = alloc_workqueue("rxperf", 0, 0);
+ if (!rxperf_workqueue)
+ goto error_workqueue;
+
+ keyring = keyring_alloc("rxperf_server",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+ KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
+ KEY_USR_WRITE |
+ KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(keyring)) {
+ pr_err("Can't allocate rxperf server keyring: %ld\n",
+ PTR_ERR(keyring));
+ goto error_keyring;
+ }
+ rxperf_sec_keyring = keyring;
+ ret = rxperf_add_key(keyring);
+ if (ret < 0)
+ goto error_key;
+
+ ret = rxperf_open_socket();
+ if (ret < 0)
+ goto error_socket;
+ return 0;
+
+error_socket:
+error_key:
+ key_put(rxperf_sec_keyring);
+error_keyring:
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+error_workqueue:
+ pr_err("Failed to register: %d\n", ret);
+ return ret;
+}
+late_initcall(rxperf_init); /* Must be called after net/ to create socket */
+
+static void __exit rxperf_exit(void)
+{
+ pr_info("Server unregistering.\n");
+
+ rxperf_close_socket();
+ key_put(rxperf_sec_keyring);
+ destroy_workqueue(rxperf_workqueue);
+ rcu_barrier();
+}
+module_exit(rxperf_exit);
+
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
new file mode 100644
index 0000000000..cb8dd1d3b1
--- /dev/null
+++ b/net/rxrpc/security.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include "ar-internal.h"
+
+static const struct rxrpc_security *rxrpc_security_types[] = {
+ [RXRPC_SECURITY_NONE] = &rxrpc_no_security,
+#ifdef CONFIG_RXKAD
+ [RXRPC_SECURITY_RXKAD] = &rxkad,
+#endif
+};
+
+int __init rxrpc_init_security(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) {
+ if (rxrpc_security_types[i]) {
+ ret = rxrpc_security_types[i]->init();
+ if (ret < 0)
+ goto failed;
+ }
+ }
+
+ return 0;
+
+failed:
+ for (i--; i >= 0; i--)
+ if (rxrpc_security_types[i])
+ rxrpc_security_types[i]->exit();
+ return ret;
+}
+
+void rxrpc_exit_security(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++)
+ if (rxrpc_security_types[i])
+ rxrpc_security_types[i]->exit();
+}
+
+/*
+ * look up an rxrpc security module
+ */
+const struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
+{
+ if (security_index >= ARRAY_SIZE(rxrpc_security_types))
+ return NULL;
+ return rxrpc_security_types[security_index];
+}
+
+/*
+ * Initialise the security on a client call.
+ */
+int rxrpc_init_client_call_security(struct rxrpc_call *call)
+{
+ const struct rxrpc_security *sec = &rxrpc_no_security;
+ struct rxrpc_key_token *token;
+ struct key *key = call->key;
+ int ret;
+
+ if (!key)
+ goto found;
+
+ ret = key_validate(key);
+ if (ret < 0)
+ return ret;
+
+ for (token = key->payload.data[0]; token; token = token->next) {
+ sec = rxrpc_security_lookup(token->security_index);
+ if (sec)
+ goto found;
+ }
+ return -EKEYREJECTED;
+
+found:
+ call->security = sec;
+ call->security_ix = sec->security_index;
+ return 0;
+}
+
+/*
+ * initialise the security on a client connection
+ */
+int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
+{
+ struct rxrpc_key_token *token;
+ struct key *key = conn->key;
+ int ret = 0;
+
+ _enter("{%d},{%x}", conn->debug_id, key_serial(key));
+
+ for (token = key->payload.data[0]; token; token = token->next) {
+ if (token->security_index == conn->security->security_index)
+ goto found;
+ }
+ return -EKEYREJECTED;
+
+found:
+ mutex_lock(&conn->security_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = conn->security->init_connection_security(conn, token);
+ if (ret == 0) {
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
+ conn->state = RXRPC_CONN_CLIENT;
+ spin_unlock(&conn->state_lock);
+ }
+ }
+ mutex_unlock(&conn->security_lock);
+ return ret;
+}
+
+/*
+ * Set the ops a server connection.
+ */
+const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
+ struct sk_buff *skb)
+{
+ const struct rxrpc_security *sec;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ _enter("");
+
+ sec = rxrpc_security_lookup(sp->hdr.securityIndex);
+ if (!sec) {
+ rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
+ return NULL;
+ }
+
+ if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
+ !rx->securities) {
+ rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
+ return NULL;
+ }
+
+ return sec;
+}
+
+/*
+ * Find the security key for a server connection.
+ */
+struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ u32 kvno, u32 enctype)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_sock *rx;
+ struct key *key = ERR_PTR(-EKEYREJECTED);
+ key_ref_t kref = NULL;
+ char kdesc[5 + 1 + 3 + 1 + 12 + 1 + 12 + 1];
+ int ret;
+
+ _enter("");
+
+ if (enctype)
+ sprintf(kdesc, "%u:%u:%u:%u",
+ sp->hdr.serviceId, sp->hdr.securityIndex, kvno, enctype);
+ else if (kvno)
+ sprintf(kdesc, "%u:%u:%u",
+ sp->hdr.serviceId, sp->hdr.securityIndex, kvno);
+ else
+ sprintf(kdesc, "%u:%u",
+ sp->hdr.serviceId, sp->hdr.securityIndex);
+
+ read_lock(&conn->local->services_lock);
+
+ rx = conn->local->service;
+ if (!rx)
+ goto out;
+
+ /* look through the service's keyring */
+ kref = keyring_search(make_key_ref(rx->securities, 1UL),
+ &key_type_rxrpc_s, kdesc, true);
+ if (IS_ERR(kref)) {
+ key = ERR_CAST(kref);
+ goto out;
+ }
+
+ key = key_ref_to_ptr(kref);
+
+ ret = key_validate(key);
+ if (ret < 0) {
+ key_put(key);
+ key = ERR_PTR(ret);
+ goto out;
+ }
+
+out:
+ read_unlock(&conn->local->services_lock);
+ return key;
+}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
new file mode 100644
index 0000000000..8e0b94714e
--- /dev/null
+++ b/net/rxrpc/sendmsg.c
@@ -0,0 +1,824 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* AF_RXRPC sendmsg() implementation.
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <linux/sched/signal.h>
+
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * Propose an abort to be made in the I/O thread.
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why)
+{
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
+
+ if (!call->send_abort && !rxrpc_call_is_complete(call)) {
+ call->send_abort_why = why;
+ call->send_abort_err = error;
+ call->send_abort_seq = 0;
+ /* Request abort locklessly vs rxrpc_input_call_event(). */
+ smp_store_release(&call->send_abort, abort_code);
+ rxrpc_poke_call(call, rxrpc_call_poke_abort);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Wait for a call to become connected. Interruption here doesn't cause the
+ * call to be aborted.
+ */
+static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+ goto no_wait;
+
+ add_wait_queue_exclusive(&call->waitq, &myself);
+
+ for (;;) {
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
+ set_current_state(TASK_INTERRUPTIBLE);
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
+
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+ break;
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
+ signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+ *timeo = schedule_timeout(*timeo);
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
+
+no_wait:
+ if (ret == 0 && rxrpc_call_is_complete(call))
+ ret = call->error;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Return true if there's sufficient Tx queue space.
+ */
+static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
+{
+ if (_tx_win)
+ *_tx_win = call->tx_bottom;
+ return call->tx_prepared - call->tx_bottom < 256;
+}
+
+/*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (rxrpc_check_tx_space(call, NULL))
+ return 0;
+
+ if (rxrpc_call_is_complete(call))
+ return call->error;
+
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
+ *timeo = schedule_timeout(*timeo);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
+ struct rxrpc_call *call)
+{
+ rxrpc_seq_t tx_start, tx_win;
+ signed long rtt, timeout;
+
+ rtt = READ_ONCE(call->peer->srtt_us) >> 3;
+ rtt = usecs_to_jiffies(rtt) * 2;
+ if (rtt < 2)
+ rtt = 2;
+
+ timeout = rtt;
+ tx_start = smp_load_acquire(&call->acks_hard_ack);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ if (rxrpc_check_tx_space(call, &tx_win))
+ return 0;
+
+ if (rxrpc_call_is_complete(call))
+ return call->error;
+
+ if (timeout == 0 &&
+ tx_win == tx_start && signal_pending(current))
+ return -EINTR;
+
+ if (tx_win != tx_start) {
+ timeout = rtt;
+ tx_start = tx_win;
+ }
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
+ timeout = schedule_timeout(timeout);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (rxrpc_check_tx_space(call, NULL))
+ return 0;
+
+ if (rxrpc_call_is_complete(call))
+ return call->error;
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
+ *timeo = schedule_timeout(*timeo);
+ }
+}
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo,
+ bool waitall)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret;
+
+ _enter(",{%u,%u,%u,%u}",
+ call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize);
+
+ add_wait_queue(&call->waitq, &myself);
+
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_waitall(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ break;
+ case RXRPC_PREINTERRUPTIBLE:
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo);
+ break;
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ set_current_state(TASK_RUNNING);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Notify the owner of the call that the transmit phase is ended and the last
+ * packet has been queued.
+ */
+static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ rxrpc_notify_end_tx_t notify_end_tx)
+{
+ if (notify_end_tx)
+ notify_end_tx(&rx->sk, call, call->user_call_ID);
+}
+
+/*
+ * Queue a DATA packet for transmission, set the resend timeout and send
+ * the packet immediately. Returns the error from rxrpc_send_data_packet()
+ * in case the caller wants to do something with it.
+ */
+static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
+ struct rxrpc_txbuf *txb,
+ rxrpc_notify_end_tx_t notify_end_tx)
+{
+ rxrpc_seq_t seq = txb->seq;
+ bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
+
+ rxrpc_inc_stat(call->rxnet, stat_tx_data);
+
+ ASSERTCMP(txb->seq, ==, call->tx_prepared + 1);
+
+ /* We have to set the timestamp before queueing as the retransmit
+ * algorithm can see the packet as soon as we queue it.
+ */
+ txb->last_sent = ktime_get_real();
+
+ if (last)
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last);
+ else
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_queue);
+
+ /* Add the packet to the call's output buffer */
+ spin_lock(&call->tx_lock);
+ poke = list_empty(&call->tx_sendmsg);
+ list_add_tail(&txb->call_link, &call->tx_sendmsg);
+ call->tx_prepared = seq;
+ if (last)
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
+ spin_unlock(&call->tx_lock);
+
+ if (poke)
+ rxrpc_poke_call(call, rxrpc_call_poke_start);
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - The caller holds the call user access mutex, but not the socket lock.
+ */
+static int rxrpc_send_data(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len,
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
+{
+ struct rxrpc_txbuf *txb;
+ struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
+ long timeo;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;
+
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ ret = rxrpc_wait_to_be_connected(call, &timeo);
+ if (ret < 0)
+ return ret;
+
+ if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = rxrpc_init_client_conn_security(call->conn);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* this should be in poll */
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+reload:
+ ret = -EPIPE;
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ goto maybe_error;
+ state = rxrpc_call_state(call);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Request phase complete for this client call */
+ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EPROTO);
+ goto maybe_error;
+ }
+
+ ret = -EMSGSIZE;
+ if (call->tx_total_len != -1) {
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
+ }
+
+ txb = call->tx_pending;
+ call->tx_pending = NULL;
+ if (txb)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more);
+
+ do {
+ if (!txb) {
+ size_t remain, bufsize, chunk, offset;
+
+ _debug("alloc");
+
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;
+
+ /* Work out the maximum size of a packet. Assume that
+ * the security header is going to be in the padded
+ * region (enc blocksize), but the trailer is not.
+ */
+ remain = more ? INT_MAX : msg_data_left(msg);
+ ret = call->conn->security->how_much_data(call, remain,
+ &bufsize, &chunk, &offset);
+ if (ret < 0)
+ goto maybe_error;
+
+ _debug("SIZE: %zu/%zu @%zu", chunk, bufsize, offset);
+
+ /* create a buffer that we can retain until it's ACK'd */
+ ret = -ENOMEM;
+ txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_DATA,
+ GFP_KERNEL);
+ if (!txb)
+ goto maybe_error;
+
+ txb->offset = offset;
+ txb->space -= offset;
+ txb->space = min_t(size_t, chunk, txb->space);
+ }
+
+ _debug("append");
+
+ /* append next segment of data to the current buffer */
+ if (msg_data_left(msg) > 0) {
+ size_t copy = min_t(size_t, txb->space, msg_data_left(msg));
+
+ _debug("add %zu", copy);
+ if (!copy_from_iter_full(txb->data + txb->offset, copy,
+ &msg->msg_iter))
+ goto efault;
+ _debug("added");
+ txb->space -= copy;
+ txb->len += copy;
+ txb->offset += copy;
+ copied += copy;
+ if (call->tx_total_len != -1)
+ call->tx_total_len -= copy;
+ }
+
+ /* check for the far side aborting the call or a network error
+ * occurring */
+ if (rxrpc_call_is_complete(call))
+ goto call_terminated;
+
+ /* add the packet to the send queue if it's now full */
+ if (!txb->space ||
+ (msg_data_left(msg) == 0 && !more)) {
+ if (msg_data_left(msg) == 0 && !more) {
+ txb->wire.flags |= RXRPC_LAST_PACKET;
+ __set_bit(RXRPC_TXBUF_LAST, &txb->flags);
+ }
+ else if (call->tx_top - call->acks_hard_ack <
+ call->tx_winsize)
+ txb->wire.flags |= RXRPC_MORE_PACKETS;
+
+ ret = call->security->secure_packet(call, txb);
+ if (ret < 0)
+ goto out;
+
+ rxrpc_queue_packet(rx, call, txb, notify_end_tx);
+ txb = NULL;
+ }
+ } while (msg_data_left(msg) > 0);
+
+success:
+ ret = copied;
+ if (rxrpc_call_is_complete(call) &&
+ call->error < 0)
+ ret = call->error;
+out:
+ call->tx_pending = txb;
+ _leave(" = %d", ret);
+ return ret;
+
+call_terminated:
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_send_aborted);
+ _leave(" = %d", call->error);
+ return call->error;
+
+maybe_error:
+ if (copied)
+ goto success;
+ goto out;
+
+efault:
+ ret = -EFAULT;
+ goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
+}
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
+{
+ struct cmsghdr *cmsg;
+ bool got_user_ID = false;
+ int len;
+
+ if (msg->msg_controllen == 0)
+ return -EINVAL;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ len = cmsg->cmsg_len - sizeof(struct cmsghdr);
+ _debug("CMSG %d, %d, %d",
+ cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+ if (cmsg->cmsg_level != SOL_RXRPC)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case RXRPC_USER_CALL_ID:
+ if (msg->msg_flags & MSG_CMSG_COMPAT) {
+ if (len != sizeof(u32))
+ return -EINVAL;
+ p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg);
+ } else {
+ if (len != sizeof(unsigned long))
+ return -EINVAL;
+ p->call.user_call_ID = *(unsigned long *)
+ CMSG_DATA(cmsg);
+ }
+ got_user_ID = true;
+ break;
+
+ case RXRPC_ABORT:
+ if (p->command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ p->command = RXRPC_CMD_SEND_ABORT;
+ if (len != sizeof(p->abort_code))
+ return -EINVAL;
+ p->abort_code = *(unsigned int *)CMSG_DATA(cmsg);
+ if (p->abort_code == 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_CHARGE_ACCEPT:
+ if (p->command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ p->command = RXRPC_CMD_CHARGE_ACCEPT;
+ if (len != 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_EXCLUSIVE_CALL:
+ p->exclusive = true;
+ if (len != 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_UPGRADE_SERVICE:
+ p->upgrade = true;
+ if (len != 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_TX_LENGTH:
+ if (p->call.tx_total_len != -1 || len != sizeof(__s64))
+ return -EINVAL;
+ p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg);
+ if (p->call.tx_total_len < 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_SET_CALL_TIMEOUT:
+ if (len & 3 || len < 4 || len > 12)
+ return -EINVAL;
+ memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len);
+ p->call.nr_timeouts = len / 4;
+ if (p->call.timeouts.hard > INT_MAX / HZ)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000)
+ return -ERANGE;
+ if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000)
+ return -ERANGE;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (!got_user_ID)
+ return -EINVAL;
+ if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Create a new client call for sendmsg().
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ struct rxrpc_send_params *p)
+ __releases(&rx->sk.sk_lock.slock)
+ __acquires(&call->user_mutex)
+{
+ struct rxrpc_conn_parameters cp;
+ struct rxrpc_call *call;
+ struct key *key;
+
+ DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+ _enter("");
+
+ if (!msg->msg_name) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-EDESTADDRREQ);
+ }
+
+ key = rx->key;
+ if (key && !rx->key->payload.data[0])
+ key = NULL;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = rx->key;
+ cp.security_level = rx->min_sec_level;
+ cp.exclusive = rx->exclusive | p->exclusive;
+ cp.upgrade = p->upgrade;
+ cp.service_id = srx->srx_service;
+ call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+ atomic_inc_return(&rxrpc_debug_id));
+ /* The socket is now unlocked */
+
+ _leave(" = %p\n", call);
+ return call;
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ __releases(&rx->sk.sk_lock.slock)
+{
+ struct rxrpc_call *call;
+ unsigned long now, j;
+ bool dropped_lock = false;
+ int ret;
+
+ struct rxrpc_send_params p = {
+ .call.tx_total_len = -1,
+ .call.user_call_ID = 0,
+ .call.nr_timeouts = 0,
+ .call.interruptibility = RXRPC_INTERRUPTIBLE,
+ .abort_code = 0,
+ .command = RXRPC_CMD_SEND_DATA,
+ .exclusive = false,
+ .upgrade = false,
+ };
+
+ _enter("");
+
+ ret = rxrpc_sendmsg_cmsg(msg, &p);
+ if (ret < 0)
+ goto error_release_sock;
+
+ if (p.command == RXRPC_CMD_CHARGE_ACCEPT) {
+ ret = -EINVAL;
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+ goto error_release_sock;
+ ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID);
+ goto error_release_sock;
+ }
+
+ call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID);
+ if (!call) {
+ ret = -EBADSLT;
+ if (p.command != RXRPC_CMD_SEND_DATA)
+ goto error_release_sock;
+ call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p);
+ /* The socket is now unlocked... */
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ /* ... and we have the call lock. */
+ p.call.nr_timeouts = 0;
+ ret = 0;
+ if (rxrpc_call_is_complete(call))
+ goto out_put_unlock;
+ } else {
+ switch (rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_AWAIT_CONN:
+ case RXRPC_CALL_SERVER_SECURING:
+ if (p.command == RXRPC_CMD_SEND_ABORT)
+ break;
+ fallthrough;
+ case RXRPC_CALL_UNINITIALISED:
+ case RXRPC_CALL_SERVER_PREALLOC:
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
+ ret = -EBUSY;
+ goto error_release_sock;
+ default:
+ break;
+ }
+
+ ret = mutex_lock_interruptible(&call->user_mutex);
+ release_sock(&rx->sk);
+ if (ret < 0) {
+ ret = -ERESTARTSYS;
+ goto error_put;
+ }
+
+ if (p.call.tx_total_len != -1) {
+ ret = -EINVAL;
+ if (call->tx_total_len != -1 ||
+ call->tx_pending ||
+ call->tx_top != 0)
+ goto out_put_unlock;
+ call->tx_total_len = p.call.tx_total_len;
+ }
+ }
+
+ switch (p.call.nr_timeouts) {
+ case 3:
+ j = msecs_to_jiffies(p.call.timeouts.normal);
+ if (p.call.timeouts.normal > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_rx_timo, j);
+ fallthrough;
+ case 2:
+ j = msecs_to_jiffies(p.call.timeouts.idle);
+ if (p.call.timeouts.idle > 0 && j == 0)
+ j = 1;
+ WRITE_ONCE(call->next_req_timo, j);
+ fallthrough;
+ case 1:
+ if (p.call.timeouts.hard > 0) {
+ j = p.call.timeouts.hard * HZ;
+ now = jiffies;
+ j += now;
+ WRITE_ONCE(call->expect_term_by, j);
+ rxrpc_reduce_call_timer(call, j, now,
+ rxrpc_timer_set_for_hard);
+ }
+ break;
+ }
+
+ if (rxrpc_call_is_complete(call)) {
+ /* it's too late for this call */
+ ret = -ESHUTDOWN;
+ } else if (p.command == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
+ rxrpc_abort_call_sendmsg);
+ ret = 0;
+ } else if (p.command != RXRPC_CMD_SEND_DATA) {
+ ret = -EINVAL;
+ } else {
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
+ }
+
+out_put_unlock:
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
+error_put:
+ rxrpc_put_call(call, rxrpc_call_put_sendmsg);
+ _leave(" = %d", ret);
+ return ret;
+
+error_release_sock:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @sock: The socket the call is on
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ * @notify_end_tx: Notification that the last packet is queued.
+ *
+ * Allow a kernel service to send data on a call. The call must be in an state
+ * appropriate to sending data. No control data should be supplied in @msg,
+ * nor should an address be supplied. MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+ struct msghdr *msg, size_t len,
+ rxrpc_notify_end_tx_t notify_end_tx)
+{
+ bool dropped_lock = false;
+ int ret;
+
+ _enter("{%d},", call->debug_id);
+
+ ASSERTCMP(msg->msg_name, ==, NULL);
+ ASSERTCMP(msg->msg_control, ==, NULL);
+
+ mutex_lock(&call->user_mutex);
+
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+ notify_end_tx, &dropped_lock);
+ if (ret == -ESHUTDOWN)
+ ret = call->error;
+
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/**
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @sock: The socket the call is on
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ * @error: Local error value
+ * @why: Indication as to why.
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state
+ * and return true if the call was aborted, false if it was already complete.
+ */
+bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
+{
+ bool aborted;
+
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
+
+ mutex_lock(&call->user_mutex);
+ aborted = rxrpc_propose_abort(call, abort_code, error, why);
+ mutex_unlock(&call->user_mutex);
+ return aborted;
+}
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
+/**
+ * rxrpc_kernel_set_tx_length - Set the total Tx length on a call
+ * @sock: The socket the call is on
+ * @call: The call to be informed
+ * @tx_total_len: The amount of data to be transmitted for this call
+ *
+ * Allow a kernel service to set the total transmit length on a call. This
+ * allows buffer-to-packet encrypt-and-copy to be performed.
+ *
+ * This function is primarily for use for setting the reply length since the
+ * request length can be set when beginning the call.
+ */
+void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call,
+ s64 tx_total_len)
+{
+ WARN_ON(call->tx_total_len != -1);
+ call->tx_total_len = tx_total_len;
+}
+EXPORT_SYMBOL(rxrpc_kernel_set_tx_length);
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
new file mode 100644
index 0000000000..e51940589e
--- /dev/null
+++ b/net/rxrpc/server_key.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ * "afs@CAMBRIDGE.REDHAT.COM>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <crypto/skcipher.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key-type.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_vet_description_s(const char *);
+static int rxrpc_preparse_s(struct key_preparsed_payload *);
+static void rxrpc_free_preparse_s(struct key_preparsed_payload *);
+static void rxrpc_destroy_s(struct key *);
+static void rxrpc_describe_s(const struct key *, struct seq_file *);
+
+/*
+ * rxrpc server keys take "<serviceId>:<securityIndex>[:<sec-specific>]" as the
+ * description and the key material as the payload.
+ */
+struct key_type key_type_rxrpc_s = {
+ .name = "rxrpc_s",
+ .flags = KEY_TYPE_NET_DOMAIN,
+ .vet_description = rxrpc_vet_description_s,
+ .preparse = rxrpc_preparse_s,
+ .free_preparse = rxrpc_free_preparse_s,
+ .instantiate = generic_key_instantiate,
+ .destroy = rxrpc_destroy_s,
+ .describe = rxrpc_describe_s,
+};
+
+/*
+ * Vet the description for an RxRPC server key.
+ */
+static int rxrpc_vet_description_s(const char *desc)
+{
+ unsigned long service, sec_class;
+ char *p;
+
+ service = simple_strtoul(desc, &p, 10);
+ if (*p != ':' || service > 65535)
+ return -EINVAL;
+ sec_class = simple_strtoul(p + 1, &p, 10);
+ if ((*p && *p != ':') || sec_class < 1 || sec_class > 255)
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Preparse a server secret key.
+ */
+static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
+{
+ const struct rxrpc_security *sec;
+ unsigned int service, sec_class;
+ int n;
+
+ _enter("%zu", prep->datalen);
+
+ if (!prep->orig_description)
+ return -EINVAL;
+
+ if (sscanf(prep->orig_description, "%u:%u%n", &service, &sec_class, &n) != 2)
+ return -EINVAL;
+
+ sec = rxrpc_security_lookup(sec_class);
+ if (!sec)
+ return -ENOPKG;
+
+ prep->payload.data[1] = (struct rxrpc_security *)sec;
+
+ if (!sec->preparse_server_key)
+ return -EINVAL;
+
+ return sec->preparse_server_key(prep);
+}
+
+static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
+{
+ const struct rxrpc_security *sec = prep->payload.data[1];
+
+ if (sec && sec->free_preparse_server_key)
+ sec->free_preparse_server_key(prep);
+}
+
+static void rxrpc_destroy_s(struct key *key)
+{
+ const struct rxrpc_security *sec = key->payload.data[1];
+
+ if (sec && sec->destroy_server_key)
+ sec->destroy_server_key(key);
+}
+
+static void rxrpc_describe_s(const struct key *key, struct seq_file *m)
+{
+ const struct rxrpc_security *sec = key->payload.data[1];
+
+ seq_puts(m, key->description);
+ if (sec && sec->describe_server_key)
+ sec->describe_server_key(key, m);
+}
+
+/*
+ * grab the security keyring for a server socket
+ */
+int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
+{
+ struct key *key;
+ char *description;
+
+ _enter("");
+
+ if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+ return -EINVAL;
+
+ description = memdup_sockptr_nul(optval, optlen);
+ if (IS_ERR(description))
+ return PTR_ERR(description);
+
+ key = request_key(&key_type_keyring, description, NULL);
+ if (IS_ERR(key)) {
+ kfree(description);
+ _leave(" = %ld", PTR_ERR(key));
+ return PTR_ERR(key);
+ }
+
+ rx->securities = key;
+ kfree(description);
+ _leave(" = 0 [key %x]", key->serial);
+ return 0;
+}
+
+/**
+ * rxrpc_sock_set_security_keyring - Set the security keyring for a kernel service
+ * @sk: The socket to set the keyring on
+ * @keyring: The keyring to set
+ *
+ * Set the server security keyring on an rxrpc socket. This is used to provide
+ * the encryption keys for a kernel service.
+ */
+int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret = 0;
+
+ lock_sock(sk);
+ if (rx->securities)
+ ret = -EINVAL;
+ else if (rx->sk.sk_state != RXRPC_UNBOUND)
+ ret = -EISCONN;
+ else
+ rx->securities = key_get(keyring);
+ release_sock(sk);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_security_keyring);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
new file mode 100644
index 0000000000..3bcd6ee803
--- /dev/null
+++ b/net/rxrpc/skbuff.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Socket buffer accounting
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+#define select_skb_count(skb) (&rxrpc_n_rx_skbs)
+
+/*
+ * Note the allocation or reception of a socket buffer.
+ */
+void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
+{
+ int n = atomic_inc_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
+}
+
+/*
+ * Note the re-emergence of a socket buffer from a queue or buffer.
+ */
+void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
+{
+ if (skb) {
+ int n = atomic_read(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
+ }
+}
+
+/*
+ * Note the addition of a ref on a socket buffer.
+ */
+void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
+{
+ int n = atomic_inc_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
+ skb_get(skb);
+}
+
+/*
+ * Note the dropping of a ref on a socket buffer by the core.
+ */
+void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
+{
+ int n = atomic_inc_return(&rxrpc_n_rx_skbs);
+ trace_rxrpc_skb(skb, 0, n, why);
+}
+
+/*
+ * Note the destruction of a socket buffer.
+ */
+void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
+{
+ if (skb) {
+ int n = atomic_dec_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
+ consume_skb(skb);
+ }
+}
+
+/*
+ * Clear a queue of socket buffers.
+ */
+void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue((list))) != NULL) {
+ int n = atomic_dec_return(select_skb_count(skb));
+ trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
+ rxrpc_skb_put_purge);
+ consume_skb(skb);
+ }
+}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 0000000000..ecaeb4ecfb
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* sysctls for configuring RxRPC operating parameters
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned int four = 4;
+static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
+static const unsigned int n_65535 = 65535;
+static const unsigned int n_max_acks = 255;
+static const unsigned long one_jiffy = 1;
+static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+static const unsigned long max_500 = 500;
+#endif
+
+/*
+ * RxRPC operating parameters.
+ *
+ * See Documentation/networking/rxrpc.rst and the variable definitions for more
+ * information on the individual parameters.
+ */
+static struct ctl_table rxrpc_sysctl_table[] = {
+ /* Values measured in milliseconds but used in jiffies */
+ {
+ .procname = "soft_ack_delay",
+ .data = &rxrpc_soft_ack_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
+ },
+ {
+ .procname = "idle_ack_delay",
+ .data = &rxrpc_idle_ack_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
+ },
+ {
+ .procname = "idle_conn_expiry",
+ .data = &rxrpc_conn_idle_client_expiry,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
+ },
+ {
+ .procname = "idle_conn_fast_expiry",
+ .data = &rxrpc_conn_idle_client_fast_expiry,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_ms_jiffies_minmax,
+ .extra1 = (void *)&one_jiffy,
+ .extra2 = (void *)&max_jiffies,
+ },
+
+ /* Values used in milliseconds */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ {
+ .procname = "inject_rx_delay",
+ .data = &rxrpc_inject_rx_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)SYSCTL_LONG_ZERO,
+ .extra2 = (void *)&max_500,
+ },
+#endif
+
+ /* Non-time values */
+ {
+ .procname = "reap_client_conns",
+ .data = &rxrpc_reap_client_connections,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)SYSCTL_ONE,
+ .extra2 = (void *)&n_65535,
+ },
+ {
+ .procname = "max_backlog",
+ .data = &rxrpc_max_backlog,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&four,
+ .extra2 = (void *)&max_backlog,
+ },
+ {
+ .procname = "rx_window_size",
+ .data = &rxrpc_rx_window_size,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)SYSCTL_ONE,
+ .extra2 = (void *)&n_max_acks,
+ },
+ {
+ .procname = "rx_mtu",
+ .data = &rxrpc_rx_mtu,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)SYSCTL_ONE,
+ .extra2 = (void *)&n_65535,
+ },
+ {
+ .procname = "rx_jumbo_max",
+ .data = &rxrpc_rx_jumbo_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)SYSCTL_ONE,
+ .extra2 = (void *)&four,
+ },
+ { }
+};
+
+int __init rxrpc_sysctl_init(void)
+{
+ rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc",
+ rxrpc_sysctl_table);
+ if (!rxrpc_sysctl_reg_table)
+ return -ENOMEM;
+ return 0;
+}
+
+void rxrpc_sysctl_exit(void)
+{
+ if (rxrpc_sysctl_reg_table)
+ unregister_net_sysctl_table(rxrpc_sysctl_reg_table);
+}
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
new file mode 100644
index 0000000000..d43be85123
--- /dev/null
+++ b/net/rxrpc/txbuf.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxRPC Tx data buffering.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include "ar-internal.h"
+
+static atomic_t rxrpc_txbuf_debug_ids;
+atomic_t rxrpc_nr_txbuf;
+
+/*
+ * Allocate and partially initialise an I/O request structure.
+ */
+struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type,
+ gfp_t gfp)
+{
+ struct rxrpc_txbuf *txb;
+
+ txb = kmalloc(sizeof(*txb), gfp);
+ if (txb) {
+ INIT_LIST_HEAD(&txb->call_link);
+ INIT_LIST_HEAD(&txb->tx_link);
+ refcount_set(&txb->ref, 1);
+ txb->call_debug_id = call->debug_id;
+ txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids);
+ txb->space = sizeof(txb->data);
+ txb->len = 0;
+ txb->offset = 0;
+ txb->flags = 0;
+ txb->ack_why = 0;
+ txb->seq = call->tx_prepared + 1;
+ txb->wire.epoch = htonl(call->conn->proto.epoch);
+ txb->wire.cid = htonl(call->cid);
+ txb->wire.callNumber = htonl(call->call_id);
+ txb->wire.seq = htonl(txb->seq);
+ txb->wire.type = packet_type;
+ txb->wire.flags = call->conn->out_clientflag;
+ txb->wire.userStatus = 0;
+ txb->wire.securityIndex = call->security_ix;
+ txb->wire._rsvd = 0;
+ txb->wire.serviceId = htons(call->dest_srx.srx_service);
+
+ trace_rxrpc_txbuf(txb->debug_id,
+ txb->call_debug_id, txb->seq, 1,
+ packet_type == RXRPC_PACKET_TYPE_DATA ?
+ rxrpc_txbuf_alloc_data :
+ rxrpc_txbuf_alloc_ack);
+ atomic_inc(&rxrpc_nr_txbuf);
+ }
+
+ return txb;
+}
+
+void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ int r;
+
+ __refcount_inc(&txb->ref, &r);
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r + 1, what);
+}
+
+void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ int r = refcount_read(&txb->ref);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, r, what);
+}
+
+static void rxrpc_free_txbuf(struct rcu_head *rcu)
+{
+ struct rxrpc_txbuf *txb = container_of(rcu, struct rxrpc_txbuf, rcu);
+
+ trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0,
+ rxrpc_txbuf_free);
+ kfree(txb);
+ atomic_dec(&rxrpc_nr_txbuf);
+}
+
+void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what)
+{
+ unsigned int debug_id, call_debug_id;
+ rxrpc_seq_t seq;
+ bool dead;
+ int r;
+
+ if (txb) {
+ debug_id = txb->debug_id;
+ call_debug_id = txb->call_debug_id;
+ seq = txb->seq;
+ dead = __refcount_dec_and_test(&txb->ref, &r);
+ trace_rxrpc_txbuf(debug_id, call_debug_id, seq, r - 1, what);
+ if (dead)
+ call_rcu(&txb->rcu, rxrpc_free_txbuf);
+ }
+}
+
+/*
+ * Shrink the transmit buffer.
+ */
+void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
+{
+ struct rxrpc_txbuf *txb;
+ rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ bool wake = false;
+
+ _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
+
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
+ hard_ack = smp_load_acquire(&call->acks_hard_ack);
+ if (before(hard_ack, txb->seq))
+ break;
+
+ if (txb->seq != call->tx_bottom + 1)
+ rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step);
+ ASSERTCMP(txb->seq, ==, call->tx_bottom + 1);
+ smp_store_release(&call->tx_bottom, call->tx_bottom + 1);
+ list_del_rcu(&txb->call_link);
+
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
+
+ rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
+ if (after(call->acks_hard_ack, call->tx_bottom + 128))
+ wake = true;
+ }
+
+ if (wake)
+ wake_up(&call->waitq);
+}
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
new file mode 100644
index 0000000000..2e4b9d86e8
--- /dev/null
+++ b/net/rxrpc/utils.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Utility routines
+ *
+ * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include "ar-internal.h"
+
+/*
+ * Fill out a peer address from a socket buffer containing a packet.
+ */
+int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
+{
+ memset(srx, 0, sizeof(*srx));
+
+ switch (ntohs(skb->protocol)) {
+ case ETH_P_IP:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin);
+ srx->transport.sin.sin_family = AF_INET;
+ srx->transport.sin.sin_port = udp_hdr(skb)->source;
+ srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ return 0;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case ETH_P_IPV6:
+ srx->transport_type = SOCK_DGRAM;
+ srx->transport_len = sizeof(srx->transport.sin6);
+ srx->transport.sin6.sin6_family = AF_INET6;
+ srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
+ srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ return 0;
+#endif
+
+ default:
+ pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
+ ntohs(skb->protocol));
+ return -EAFNOSUPPORT;
+ }
+}