diff options
Diffstat (limited to '')
31 files changed, 15919 insertions, 0 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig new file mode 100644 index 000000000..d706bb408 --- /dev/null +++ b/net/rxrpc/Kconfig @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# RxRPC session sockets +# + +config AF_RXRPC + tristate "RxRPC session sockets" + depends on INET + select CRYPTO + select KEYS + help + Say Y or M here to include support for RxRPC session sockets (just + the transport part, not the presentation part: (un)marshalling is + left to the application). + + These are used for AFS kernel filesystem and userspace utilities. + + This module at the moment only supports client operations and is + currently incomplete. + + See Documentation/networking/rxrpc.rst. + +config AF_RXRPC_IPV6 + bool "IPv6 support for RxRPC" + depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + help + Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as + its network transport. + +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + + +config AF_RXRPC_DEBUG + bool "RxRPC dynamic debugging" + depends on AF_RXRPC + help + Say Y here to make runtime controllable debugging messages appear. + + See Documentation/networking/rxrpc.rst. + + +config RXKAD + bool "RxRPC Kerberos security" + depends on AF_RXRPC + select CRYPTO + select CRYPTO_MANAGER + select CRYPTO_SKCIPHER + select CRYPTO_PCBC + select CRYPTO_FCRYPT + help + Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC + through the use of the key retention service. + + See Documentation/networking/rxrpc.rst. diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile new file mode 100644 index 000000000..ddd0f9571 --- /dev/null +++ b/net/rxrpc/Makefile @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Linux kernel RxRPC +# + +obj-$(CONFIG_AF_RXRPC) += rxrpc.o + +rxrpc-y := \ + af_rxrpc.o \ + call_accept.o \ + call_event.o \ + call_object.o \ + conn_client.o \ + conn_event.o \ + conn_object.o \ + conn_service.o \ + input.o \ + insecure.o \ + key.o \ + local_event.o \ + local_object.o \ + misc.o \ + net_ns.o \ + output.o \ + peer_event.o \ + peer_object.o \ + recvmsg.o \ + rtt.o \ + security.o \ + sendmsg.o \ + skbuff.o \ + utils.o + +rxrpc-$(CONFIG_PROC_FS) += proc.o +rxrpc-$(CONFIG_RXKAD) += rxkad.o +rxrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c new file mode 100644 index 000000000..0354f90dc --- /dev/null +++ b/net/rxrpc/af_rxrpc.c @@ -0,0 +1,1077 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AF_RXRPC implementation + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/slab.h> +#include <linux/skbuff.h> +#include <linux/random.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/key-type.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#define CREATE_TRACE_POINTS +#include "ar-internal.h" + +MODULE_DESCRIPTION("RxRPC network protocol"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_RXRPC); + +unsigned int rxrpc_debug; // = RXRPC_DEBUG_KPROTO; +module_param_named(debug, rxrpc_debug, uint, 0644); +MODULE_PARM_DESC(debug, "RxRPC debugging mask"); + +static struct proto rxrpc_proto; +static const struct proto_ops rxrpc_rpc_ops; + +/* current debugging ID */ +atomic_t rxrpc_debug_id; +EXPORT_SYMBOL(rxrpc_debug_id); + +/* count of skbs currently in use */ +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; + +struct workqueue_struct *rxrpc_workqueue; + +static void rxrpc_sock_destructor(struct sock *); + +/* + * see if an RxRPC socket is currently writable + */ +static inline int rxrpc_writable(struct sock *sk) +{ + return refcount_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf; +} + +/* + * wait for write bufferage to become available + */ +static void rxrpc_write_space(struct sock *sk) +{ + _enter("%p", sk); + rcu_read_lock(); + if (rxrpc_writable(sk)) { + struct socket_wq *wq = rcu_dereference(sk->sk_wq); + + if (skwq_has_sleeper(wq)) + wake_up_interruptible(&wq->wait); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + } + rcu_read_unlock(); +} + +/* + * validate an RxRPC address + */ +static int rxrpc_validate_address(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, + int len) +{ + unsigned int tail; + + if (len < sizeof(struct sockaddr_rxrpc)) + return -EINVAL; + + if (srx->srx_family != AF_RXRPC) + return -EAFNOSUPPORT; + + if (srx->transport_type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + len -= offsetof(struct sockaddr_rxrpc, transport); + if (srx->transport_len < sizeof(sa_family_t) || + srx->transport_len > len) + return -EINVAL; + + if (srx->transport.family != rx->family && + srx->transport.family == AF_INET && rx->family != AF_INET6) + return -EAFNOSUPPORT; + + switch (srx->transport.family) { + case AF_INET: + if (srx->transport_len < sizeof(struct sockaddr_in)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + if (srx->transport_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport) + + sizeof(struct sockaddr_in6); + break; +#endif + + default: + return -EAFNOSUPPORT; + } + + if (tail < len) + memset((void *)srx + tail, 0, len - tail); + _debug("INET: %pISp", &srx->transport); + return 0; +} + +/* + * bind a local address to an RxRPC socket + */ +static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) +{ + struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; + struct rxrpc_local *local; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + u16 service_id; + int ret; + + _enter("%p,%p,%d", rx, saddr, len); + + ret = rxrpc_validate_address(rx, srx, len); + if (ret < 0) + goto error; + service_id = srx->srx_service; + + lock_sock(&rx->sk); + + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + rx->srx = *srx; + local = rxrpc_lookup_local(sock_net(&rx->sk), &rx->srx); + if (IS_ERR(local)) { + ret = PTR_ERR(local); + goto error_unlock; + } + + if (service_id) { + write_lock(&local->services_lock); + if (rcu_access_pointer(local->service)) + goto service_in_use; + rx->local = local; + rcu_assign_pointer(local->service, rx); + write_unlock(&local->services_lock); + + rx->sk.sk_state = RXRPC_SERVER_BOUND; + } else { + rx->local = local; + rx->sk.sk_state = RXRPC_CLIENT_BOUND; + } + break; + + case RXRPC_SERVER_BOUND: + ret = -EINVAL; + if (service_id == 0) + goto error_unlock; + ret = -EADDRINUSE; + if (service_id == rx->srx.srx_service) + goto error_unlock; + ret = -EINVAL; + srx->srx_service = rx->srx.srx_service; + if (memcmp(srx, &rx->srx, sizeof(*srx)) != 0) + goto error_unlock; + rx->second_service = service_id; + rx->sk.sk_state = RXRPC_SERVER_BOUND2; + break; + + default: + ret = -EINVAL; + goto error_unlock; + } + + release_sock(&rx->sk); + _leave(" = 0"); + return 0; + +service_in_use: + write_unlock(&local->services_lock); + rxrpc_unuse_local(local); + rxrpc_put_local(local); + ret = -EADDRINUSE; +error_unlock: + release_sock(&rx->sk); +error: + _leave(" = %d", ret); + return ret; +} + +/* + * set the number of pending calls permitted on a listening socket + */ +static int rxrpc_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int max, old; + int ret; + + _enter("%p,%d", rx, backlog); + + lock_sock(&rx->sk); + + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + ret = -EADDRNOTAVAIL; + break; + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + ASSERT(rx->local != NULL); + max = READ_ONCE(rxrpc_max_backlog); + ret = -EINVAL; + if (backlog == INT_MAX) + backlog = max; + else if (backlog < 0 || backlog > max) + break; + old = sk->sk_max_ack_backlog; + sk->sk_max_ack_backlog = backlog; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; + break; + case RXRPC_SERVER_LISTENING: + if (backlog == 0) { + rx->sk.sk_state = RXRPC_SERVER_LISTEN_DISABLED; + sk->sk_max_ack_backlog = 0; + rxrpc_discard_prealloc(rx); + ret = 0; + break; + } + fallthrough; + default: + ret = -EBUSY; + break; + } + + release_sock(&rx->sk); + _leave(" = %d", ret); + return ret; +} + +/** + * rxrpc_kernel_begin_call - Allow a kernel service to begin a call + * @sock: The socket on which to make the call + * @srx: The address of the peer to contact + * @key: The security context to use (defaults to socket setting) + * @user_call_ID: The ID to use + * @tx_total_len: Total length of data to transmit during the call (or -1) + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue + * @upgrade: Request service upgrade for call + * @interruptibility: The call is interruptible, or can be canceled. + * @debug_id: The debug ID for tracing to be assigned to the call + * + * Allow a kernel service to begin a call on the nominated socket. This just + * sets up all the internal tracking structures and allocates connection and + * call IDs as appropriate. The call to be used is returned. + * + * The default socket destination address and security may be overridden by + * supplying @srx and @key. + */ +struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, + struct sockaddr_rxrpc *srx, + struct key *key, + unsigned long user_call_ID, + s64 tx_total_len, + gfp_t gfp, + rxrpc_notify_rx_t notify_rx, + bool upgrade, + enum rxrpc_interruptibility interruptibility, + unsigned int debug_id) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call_params p; + struct rxrpc_call *call; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + _enter(",,%x,%lx", key_serial(key), user_call_ID); + + ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); + if (ret < 0) + return ERR_PTR(ret); + + lock_sock(&rx->sk); + + if (!key) + key = rx->key; + if (key && !key->payload.data[0]) + key = NULL; /* a no-security key */ + + memset(&p, 0, sizeof(p)); + p.user_call_ID = user_call_ID; + p.tx_total_len = tx_total_len; + p.interruptibility = interruptibility; + p.kernel = true; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = key; + cp.security_level = rx->min_sec_level; + cp.exclusive = false; + cp.upgrade = upgrade; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); + /* The socket has been unlocked. */ + if (!IS_ERR(call)) { + call->notify_rx = notify_rx; + mutex_unlock(&call->user_mutex); + } + + rxrpc_put_peer(cp.peer); + _leave(" = %p", call); + return call; +} +EXPORT_SYMBOL(rxrpc_kernel_begin_call); + +/* + * Dummy function used to stop the notifier talking to recvmsg(). + */ +static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ +} + +/** + * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on + * @call: The call to end + * + * Allow a kernel service to end a call it was using. The call must be + * complete before this is called (the call should be aborted if necessary). + */ +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) +{ + _enter("%d{%d}", call->debug_id, refcount_read(&call->ref)); + + mutex_lock(&call->user_mutex); + rxrpc_release_call(rxrpc_sk(sock->sk), call); + + /* Make sure we're not going to call back into a kernel service */ + if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); + call->notify_rx = rxrpc_dummy_notify_rx; + spin_unlock_bh(&call->notify_lock); + } + + mutex_unlock(&call->user_mutex); + rxrpc_put_call(call, rxrpc_call_put_kernel); +} +EXPORT_SYMBOL(rxrpc_kernel_end_call); + +/** + * rxrpc_kernel_check_life - Check to see whether a call is still alive + * @sock: The socket the call is on + * @call: The call to check + * + * Allow a kernel service to find out whether a call is still alive - + * ie. whether it has completed. + */ +bool rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call) +{ + return call->state != RXRPC_CALL_COMPLETE; +} +EXPORT_SYMBOL(rxrpc_kernel_check_life); + +/** + * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. + * @sock: The socket the call is on + * @call: The call to query + * + * Allow a kernel service to retrieve the epoch value from a service call to + * see if the client at the other end rebooted. + */ +u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) +{ + return call->conn->proto.epoch; +} +EXPORT_SYMBOL(rxrpc_kernel_get_epoch); + +/** + * rxrpc_kernel_new_call_notification - Get notifications of new calls + * @sock: The socket to intercept received messages on + * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls + * + * Allow a kernel service to be given notifications about new calls. + */ +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + + rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; +} +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); + +/** + * rxrpc_kernel_set_max_life - Set maximum lifespan on a call + * @sock: The socket the call is on + * @call: The call to configure + * @hard_timeout: The maximum lifespan of the call in jiffies + * + * Set the maximum lifespan of a call. The call will end with ETIME or + * ETIMEDOUT if it takes longer than this. + */ +void rxrpc_kernel_set_max_life(struct socket *sock, struct rxrpc_call *call, + unsigned long hard_timeout) +{ + unsigned long now; + + mutex_lock(&call->user_mutex); + + now = jiffies; + hard_timeout += now; + WRITE_ONCE(call->expect_term_by, hard_timeout); + rxrpc_reduce_call_timer(call, hard_timeout, now, rxrpc_timer_set_for_hard); + + mutex_unlock(&call->user_mutex); +} +EXPORT_SYMBOL(rxrpc_kernel_set_max_life); + +/* + * connect an RxRPC socket + * - this just targets it at a specific destination; no actual connection + * negotiation takes place + */ +static int rxrpc_connect(struct socket *sock, struct sockaddr *addr, + int addr_len, int flags) +{ + struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)addr; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + _enter("%p,%p,%d,%d", rx, addr, addr_len, flags); + + ret = rxrpc_validate_address(rx, srx, addr_len); + if (ret < 0) { + _leave(" = %d [bad addr]", ret); + return ret; + } + + lock_sock(&rx->sk); + + ret = -EISCONN; + if (test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) + goto error; + + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + rx->sk.sk_state = RXRPC_CLIENT_UNBOUND; + case RXRPC_CLIENT_UNBOUND: + case RXRPC_CLIENT_BOUND: + break; + default: + ret = -EBUSY; + goto error; + } + + rx->connect_srx = *srx; + set_bit(RXRPC_SOCK_CONNECTED, &rx->flags); + ret = 0; + +error: + release_sock(&rx->sk); + return ret; +} + +/* + * send a message through an RxRPC socket + * - in a client this does a number of things: + * - finds/sets up a connection for the security specified (if any) + * - initiates a call (ID in control data) + * - ends the request phase of a call (if MSG_MORE is not set) + * - sends a call data packet + * - may send an abort (abort code in control data) + */ +static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) +{ + struct rxrpc_local *local; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + _enter(",{%d},,%zu", rx->sk.sk_state, len); + + if (m->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (m->msg_name) { + ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen); + if (ret < 0) { + _leave(" = %d [bad addr]", ret); + return ret; + } + } + + lock_sock(&rx->sk); + + switch (rx->sk.sk_state) { + case RXRPC_UNBOUND: + case RXRPC_CLIENT_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; + rx->srx.transport.family = rx->family; + switch (rx->family) { + case AF_INET: + rx->srx.transport_len = sizeof(struct sockaddr_in); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + rx->srx.transport_len = sizeof(struct sockaddr_in6); + break; +#endif + default: + ret = -EAFNOSUPPORT; + goto error_unlock; + } + local = rxrpc_lookup_local(sock_net(sock->sk), &rx->srx); + if (IS_ERR(local)) { + ret = PTR_ERR(local); + goto error_unlock; + } + + rx->local = local; + rx->sk.sk_state = RXRPC_CLIENT_BOUND; + fallthrough; + + case RXRPC_CLIENT_BOUND: + if (!m->msg_name && + test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) { + m->msg_name = &rx->connect_srx; + m->msg_namelen = sizeof(rx->connect_srx); + } + fallthrough; + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_LISTENING: + ret = rxrpc_do_sendmsg(rx, m, len); + /* The socket has been unlocked */ + goto out; + default: + ret = -EINVAL; + goto error_unlock; + } + +error_unlock: + release_sock(&rx->sk); +out: + _leave(" = %d", ret); + return ret; +} + +int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val) +{ + if (sk->sk_state != RXRPC_UNBOUND) + return -EISCONN; + if (val > RXRPC_SECURITY_MAX) + return -EINVAL; + lock_sock(sk); + rxrpc_sk(sk)->min_sec_level = val; + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(rxrpc_sock_set_min_security_level); + +/* + * set RxRPC socket options + */ +static int rxrpc_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + unsigned int min_sec_level; + u16 service_upgrade[2]; + int ret; + + _enter(",%d,%d,,%d", level, optname, optlen); + + lock_sock(&rx->sk); + ret = -EOPNOTSUPP; + + if (level == SOL_RXRPC) { + switch (optname) { + case RXRPC_EXCLUSIVE_CONNECTION: + ret = -EINVAL; + if (optlen != 0) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_UNBOUND) + goto error; + rx->exclusive = true; + goto success; + + case RXRPC_SECURITY_KEY: + ret = -EINVAL; + if (rx->key) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_UNBOUND) + goto error; + ret = rxrpc_request_key(rx, optval, optlen); + goto error; + + case RXRPC_SECURITY_KEYRING: + ret = -EINVAL; + if (rx->key) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_UNBOUND) + goto error; + ret = rxrpc_server_keyring(rx, optval, optlen); + goto error; + + case RXRPC_MIN_SECURITY_LEVEL: + ret = -EINVAL; + if (optlen != sizeof(unsigned int)) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_UNBOUND) + goto error; + ret = copy_from_sockptr(&min_sec_level, optval, + sizeof(unsigned int)); + if (ret < 0) + goto error; + ret = -EINVAL; + if (min_sec_level > RXRPC_SECURITY_MAX) + goto error; + rx->min_sec_level = min_sec_level; + goto success; + + case RXRPC_UPGRADEABLE_SERVICE: + ret = -EINVAL; + if (optlen != sizeof(service_upgrade) || + rx->service_upgrade.from != 0) + goto error; + ret = -EISCONN; + if (rx->sk.sk_state != RXRPC_SERVER_BOUND2) + goto error; + ret = -EFAULT; + if (copy_from_sockptr(service_upgrade, optval, + sizeof(service_upgrade)) != 0) + goto error; + ret = -EINVAL; + if ((service_upgrade[0] != rx->srx.srx_service || + service_upgrade[1] != rx->second_service) && + (service_upgrade[0] != rx->second_service || + service_upgrade[1] != rx->srx.srx_service)) + goto error; + rx->service_upgrade.from = service_upgrade[0]; + rx->service_upgrade.to = service_upgrade[1]; + goto success; + + default: + break; + } + } + +success: + ret = 0; +error: + release_sock(&rx->sk); + return ret; +} + +/* + * Get socket options. + */ +static int rxrpc_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *_optlen) +{ + int optlen; + + if (level != SOL_RXRPC) + return -EOPNOTSUPP; + + if (get_user(optlen, _optlen)) + return -EFAULT; + + switch (optname) { + case RXRPC_SUPPORTED_CMSG: + if (optlen < sizeof(int)) + return -ETOOSMALL; + if (put_user(RXRPC__SUPPORTED - 1, (int __user *)optval) || + put_user(sizeof(int), _optlen)) + return -EFAULT; + return 0; + + default: + return -EOPNOTSUPP; + } +} + +/* + * permit an RxRPC socket to be polled + */ +static __poll_t rxrpc_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + __poll_t mask; + + sock_poll_wait(file, sock, wait); + mask = 0; + + /* the socket is readable if there are any messages waiting on the Rx + * queue */ + if (!list_empty(&rx->recvmsg_q)) + mask |= EPOLLIN | EPOLLRDNORM; + + /* the socket is writable if there is space to add new data to the + * socket; there is no guarantee that any particular call in progress + * on the socket may have space in the Tx ACK window */ + if (rxrpc_writable(sk)) + mask |= EPOLLOUT | EPOLLWRNORM; + + return mask; +} + +/* + * create an RxRPC socket + */ +static int rxrpc_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct rxrpc_net *rxnet; + struct rxrpc_sock *rx; + struct sock *sk; + + _enter("%p,%d", sock, protocol); + + /* we support transport protocol UDP/UDP6 only */ + if (protocol != PF_INET && + IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) + return -EPROTONOSUPPORT; + + if (sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; + + sock->ops = &rxrpc_rpc_ops; + sock->state = SS_UNCONNECTED; + + sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern); + if (!sk) + return -ENOMEM; + + sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); + sk->sk_state = RXRPC_UNBOUND; + sk->sk_write_space = rxrpc_write_space; + sk->sk_max_ack_backlog = 0; + sk->sk_destruct = rxrpc_sock_destructor; + + rx = rxrpc_sk(sk); + rx->family = protocol; + rx->calls = RB_ROOT; + + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); + rwlock_init(&rx->call_lock); + memset(&rx->srx, 0, sizeof(rx->srx)); + + rxnet = rxrpc_net(sock_net(&rx->sk)); + timer_reduce(&rxnet->peer_keepalive_timer, jiffies + 1); + + _leave(" = 0 [%p]", rx); + return 0; +} + +/* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + +/* + * RxRPC socket destructor + */ +static void rxrpc_sock_destructor(struct sock *sk) +{ + _enter("%p", sk); + + rxrpc_purge_queue(&sk->sk_receive_queue); + + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); + WARN_ON(!sk_unhashed(sk)); + WARN_ON(sk->sk_socket); + + if (!sock_flag(sk, SOCK_DEAD)) { + printk("Attempt to release alive rxrpc socket: %p\n", sk); + return; + } +} + +/* + * release an RxRPC socket + */ +static int rxrpc_release_sock(struct sock *sk) +{ + struct rxrpc_sock *rx = rxrpc_sk(sk); + + _enter("%p{%d,%d}", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); + + /* declare the socket closed for business */ + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; + + /* We want to kill off all connections from a service socket + * as fast as possible because we can't share these; client + * sockets, on the other hand, can share an endpoint. + */ + switch (sk->sk_state) { + case RXRPC_SERVER_BOUND: + case RXRPC_SERVER_BOUND2: + case RXRPC_SERVER_LISTENING: + case RXRPC_SERVER_LISTEN_DISABLED: + rx->local->service_closed = true; + break; + } + + spin_lock_bh(&sk->sk_receive_queue.lock); + sk->sk_state = RXRPC_CLOSE; + spin_unlock_bh(&sk->sk_receive_queue.lock); + + if (rx->local && rcu_access_pointer(rx->local->service) == rx) { + write_lock(&rx->local->services_lock); + rcu_assign_pointer(rx->local->service, NULL); + write_unlock(&rx->local->services_lock); + } + + /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); + rxrpc_release_calls_on_socket(rx); + flush_workqueue(rxrpc_workqueue); + rxrpc_purge_queue(&sk->sk_receive_queue); + + rxrpc_unuse_local(rx->local); + rxrpc_put_local(rx->local); + rx->local = NULL; + key_put(rx->key); + rx->key = NULL; + key_put(rx->securities); + rx->securities = NULL; + sock_put(sk); + + _leave(" = 0"); + return 0; +} + +/* + * release an RxRPC BSD socket on close() or equivalent + */ +static int rxrpc_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + _enter("%p{%p}", sock, sk); + + if (!sk) + return 0; + + sock->sk = NULL; + + return rxrpc_release_sock(sk); +} + +/* + * RxRPC network protocol + */ +static const struct proto_ops rxrpc_rpc_ops = { + .family = PF_RXRPC, + .owner = THIS_MODULE, + .release = rxrpc_release, + .bind = rxrpc_bind, + .connect = rxrpc_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = sock_no_getname, + .poll = rxrpc_poll, + .ioctl = sock_no_ioctl, + .listen = rxrpc_listen, + .shutdown = rxrpc_shutdown, + .setsockopt = rxrpc_setsockopt, + .getsockopt = rxrpc_getsockopt, + .sendmsg = rxrpc_sendmsg, + .recvmsg = rxrpc_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto rxrpc_proto = { + .name = "RXRPC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct rxrpc_sock), + .max_header = sizeof(struct rxrpc_wire_header), +}; + +static const struct net_proto_family rxrpc_family_ops = { + .family = PF_RXRPC, + .create = rxrpc_create, + .owner = THIS_MODULE, +}; + +/* + * initialise and register the RxRPC protocol + */ +static int __init af_rxrpc_init(void) +{ + int ret = -1; + unsigned int tmp; + + BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb)); + + get_random_bytes(&tmp, sizeof(tmp)); + tmp &= 0x3fffffff; + if (tmp == 0) + tmp = 1; + idr_set_cursor(&rxrpc_client_conn_ids, tmp); + + ret = -ENOMEM; + rxrpc_call_jar = kmem_cache_create( + "rxrpc_call_jar", sizeof(struct rxrpc_call), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!rxrpc_call_jar) { + pr_notice("Failed to allocate call jar\n"); + goto error_call_jar; + } + + rxrpc_workqueue = alloc_workqueue("krxrpcd", 0, 1); + if (!rxrpc_workqueue) { + pr_notice("Failed to allocate work queue\n"); + goto error_work_queue; + } + + ret = rxrpc_init_security(); + if (ret < 0) { + pr_crit("Cannot initialise security\n"); + goto error_security; + } + + ret = register_pernet_device(&rxrpc_net_ops); + if (ret) + goto error_pernet; + + ret = proto_register(&rxrpc_proto, 1); + if (ret < 0) { + pr_crit("Cannot register protocol\n"); + goto error_proto; + } + + ret = sock_register(&rxrpc_family_ops); + if (ret < 0) { + pr_crit("Cannot register socket family\n"); + goto error_sock; + } + + ret = register_key_type(&key_type_rxrpc); + if (ret < 0) { + pr_crit("Cannot register client key type\n"); + goto error_key_type; + } + + ret = register_key_type(&key_type_rxrpc_s); + if (ret < 0) { + pr_crit("Cannot register server key type\n"); + goto error_key_type_s; + } + + ret = rxrpc_sysctl_init(); + if (ret < 0) { + pr_crit("Cannot register sysctls\n"); + goto error_sysctls; + } + + return 0; + +error_sysctls: + unregister_key_type(&key_type_rxrpc_s); +error_key_type_s: + unregister_key_type(&key_type_rxrpc); +error_key_type: + sock_unregister(PF_RXRPC); +error_sock: + proto_unregister(&rxrpc_proto); +error_proto: + unregister_pernet_device(&rxrpc_net_ops); +error_pernet: + rxrpc_exit_security(); +error_security: + destroy_workqueue(rxrpc_workqueue); +error_work_queue: + kmem_cache_destroy(rxrpc_call_jar); +error_call_jar: + return ret; +} + +/* + * unregister the RxRPC protocol + */ +static void __exit af_rxrpc_exit(void) +{ + _enter(""); + rxrpc_sysctl_exit(); + unregister_key_type(&key_type_rxrpc_s); + unregister_key_type(&key_type_rxrpc); + sock_unregister(PF_RXRPC); + proto_unregister(&rxrpc_proto); + unregister_pernet_device(&rxrpc_net_ops); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); + + /* Make sure the local and peer records pinned by any dying connections + * are released. + */ + rcu_barrier(); + rxrpc_destroy_client_conn_ids(); + + destroy_workqueue(rxrpc_workqueue); + rxrpc_exit_security(); + kmem_cache_destroy(rxrpc_call_jar); + _leave(""); +} + +module_init(af_rxrpc_init); +module_exit(af_rxrpc_exit); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h new file mode 100644 index 000000000..d86894a1c --- /dev/null +++ b/net/rxrpc/ar-internal.h @@ -0,0 +1,1235 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* AF_RXRPC internal definitions + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/atomic.h> +#include <linux/seqlock.h> +#include <linux/win_minmax.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "protocol.h" + +#define FCRYPT_BSIZE 8 +struct rxrpc_crypt { + union { + u8 x[FCRYPT_BSIZE]; + __be32 n[2]; + }; +} __attribute__((aligned(8))); + +#define rxrpc_queue_work(WS) queue_work(rxrpc_workqueue, (WS)) +#define rxrpc_queue_delayed_work(WS,D) \ + queue_delayed_work(rxrpc_workqueue, (WS), (D)) + +struct rxrpc_connection; + +/* + * Mark applied to socket buffers in skb->mark. skb->priority is used + * to pass supplementary information. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ + RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ +}; + +/* + * sk_state for RxRPC sockets + */ +enum { + RXRPC_UNBOUND = 0, + RXRPC_CLIENT_UNBOUND, /* Unbound socket used as client */ + RXRPC_CLIENT_BOUND, /* client local address bound */ + RXRPC_SERVER_BOUND, /* server local address bound */ + RXRPC_SERVER_BOUND2, /* second server local address bound */ + RXRPC_SERVER_LISTENING, /* server listening for connections */ + RXRPC_SERVER_LISTEN_DISABLED, /* server listening disabled */ + RXRPC_CLOSE, /* socket is being closed */ +}; + +/* + * Per-network namespace data. + */ +struct rxrpc_net { + struct proc_dir_entry *proc_net; /* Subdir in /proc/net */ + u32 epoch; /* Local epoch for detecting local-end reset */ + struct list_head calls; /* List of calls active in this namespace */ + rwlock_t call_lock; /* Lock for ->calls */ + atomic_t nr_calls; /* Count of allocated calls */ + + atomic_t nr_conns; + struct list_head conn_proc_list; /* List of conns in this namespace for proc */ + struct list_head service_conns; /* Service conns in this namespace */ + rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */ + struct work_struct service_conn_reaper; + struct timer_list service_conn_reap_timer; + + bool live; + + bool kill_all_client_conns; + atomic_t nr_client_conns; + spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ + spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ + struct list_head idle_client_conns; + struct work_struct client_conn_reaper; + struct timer_list client_conn_reap_timer; + + struct hlist_head local_endpoints; + struct mutex local_mutex; /* Lock for ->local_endpoints */ + + DECLARE_HASHTABLE (peer_hash, 10); + spinlock_t peer_hash_lock; /* Lock for ->peer_hash */ + +#define RXRPC_KEEPALIVE_TIME 20 /* NAT keepalive time in seconds */ + u8 peer_keepalive_cursor; + time64_t peer_keepalive_base; + struct list_head peer_keepalive[32]; + struct list_head peer_keepalive_new; + struct timer_list peer_keepalive_timer; + struct work_struct peer_keepalive_work; +}; + +/* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + +/* + * RxRPC socket definition + */ +struct rxrpc_sock { + /* WARNING: sk has to be the first member */ + struct sock sk; + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ + struct rxrpc_local *local; /* local endpoint */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ + struct key *key; /* security for this socket */ + struct key *securities; /* list of server security descriptors */ + struct rb_root calls; /* User ID -> call mapping */ + unsigned long flags; +#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ + rwlock_t call_lock; /* lock for calls */ + u32 min_sec_level; /* minimum security level */ +#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT + bool exclusive; /* Exclusive connection for a client socket */ + u16 second_service; /* Additional service bound to the endpoint */ + struct { + /* Service upgrade information */ + u16 from; /* Service ID to upgrade (if not 0) */ + u16 to; /* service ID to upgrade to */ + } service_upgrade; + sa_family_t family; /* Protocol family created with */ + struct sockaddr_rxrpc srx; /* Primary Service/local addresses */ + struct sockaddr_rxrpc connect_srx; /* Default client address from connect() */ +}; + +#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk) + +/* + * CPU-byteorder normalised Rx packet header. + */ +struct rxrpc_host_header { + u32 epoch; /* client boot timestamp */ + u32 cid; /* connection and channel ID */ + u32 callNumber; /* call ID (0 for connection-level packets) */ + u32 seq; /* sequence number of pkt in call stream */ + u32 serial; /* serial number of pkt sent to network */ + u8 type; /* packet type */ + u8 flags; /* packet flags */ + u8 userStatus; /* app-layer defined status */ + u8 securityIndex; /* security protocol ID */ + union { + u16 _rsvd; /* reserved */ + u16 cksum; /* kerberos security checksum */ + }; + u16 serviceId; /* service ID */ +} __packed; + +/* + * RxRPC socket buffer private variables + * - max 48 bytes (struct sk_buff::cb) + */ +struct rxrpc_skb_priv { + atomic_t nr_ring_pins; /* Number of rxtx ring pins */ + u8 nr_subpackets; /* Number of subpackets */ + u8 rx_flags; /* Received packet flags */ +#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ +#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ + union { + int remain; /* amount of space remaining for next write */ + + /* List of requested ACKs on subpackets */ + unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / + BITS_PER_LONG]; + }; + + struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ +}; + +#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) + +/* + * RxRPC security module interface + */ +struct rxrpc_security { + const char *name; /* name of this service */ + u8 security_index; /* security type provided */ + u32 no_key_abort; /* Abort code indicating no key */ + + /* Initialise a security service */ + int (*init)(void); + + /* Clean up a security service */ + void (*exit)(void); + + /* initialise a connection's security */ + int (*init_connection_security)(struct rxrpc_connection *); + + /* prime a connection's packet security */ + int (*prime_packet_security)(struct rxrpc_connection *); + + /* impose security on a packet */ + int (*secure_packet)(struct rxrpc_call *, + struct sk_buff *, + size_t, + void *); + + /* verify the security on a received packet */ + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); + + /* issue a challenge */ + int (*issue_challenge)(struct rxrpc_connection *); + + /* respond to a challenge */ + int (*respond_to_challenge)(struct rxrpc_connection *, + struct sk_buff *, + u32 *); + + /* verify a response */ + int (*verify_response)(struct rxrpc_connection *, + struct sk_buff *, + u32 *); + + /* clear connection security */ + void (*clear)(struct rxrpc_connection *); +}; + +/* + * RxRPC local transport endpoint description + * - owned by a single AF_RXRPC socket + * - pointed to by transport socket struct sk_user_data + */ +struct rxrpc_local { + struct rcu_head rcu; + atomic_t active_users; /* Number of users of the local endpoint */ + refcount_t ref; /* Number of references to the structure */ + struct rxrpc_net *rxnet; /* The network ns in which this resides */ + struct hlist_node link; + struct socket *socket; /* my UDP socket */ + struct work_struct processor; + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ + struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ + struct sk_buff_head reject_queue; /* packets awaiting rejection */ + struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ + struct rb_root client_bundles; /* Client connection bundles by socket params */ + spinlock_t client_bundles_lock; /* Lock for client_bundles */ + spinlock_t lock; /* access lock */ + rwlock_t services_lock; /* lock for services list */ + int debug_id; /* debug ID for printks */ + bool dead; + bool service_closed; /* Service socket closed */ + struct sockaddr_rxrpc srx; /* local address */ +}; + +/* + * RxRPC remote transport endpoint definition + * - matched by local endpoint, remote port, address and protocol type + */ +struct rxrpc_peer { + struct rcu_head rcu; /* This must be first */ + refcount_t ref; + unsigned long hash_key; + struct hlist_node hash_link; + struct rxrpc_local *local; + struct hlist_head error_targets; /* targets for net error distribution */ + struct rb_root service_conns; /* Service connections */ + struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ + time64_t last_tx_at; /* Last time packet sent here */ + seqlock_t service_conn_lock; + spinlock_t lock; /* access lock */ + unsigned int if_mtu; /* interface MTU for this peer */ + unsigned int mtu; /* network MTU for this peer */ + unsigned int maxdata; /* data size (MTU - hdrsize) */ + unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ + int debug_id; /* debug ID for printks */ + struct sockaddr_rxrpc srx; /* remote address */ + + /* calculated RTT cache */ +#define RXRPC_RTT_CACHE_SIZE 32 + spinlock_t rtt_input_lock; /* RTT lock for input routine */ + ktime_t rtt_last_req; /* Time of last RTT request */ + unsigned int rtt_count; /* Number of samples we've got */ + + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_j; /* Retransmission timeout in jiffies */ + u8 backoff; /* Backoff timeout */ + + u8 cong_cwnd; /* Congestion window size */ +}; + +/* + * Keys for matching a connection. + */ +struct rxrpc_conn_proto { + union { + struct { + u32 epoch; /* epoch of this connection */ + u32 cid; /* connection ID */ + }; + u64 index_key; + }; +}; + +struct rxrpc_conn_parameters { + struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Remote endpoint */ + struct key *key; /* Security details */ + bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ + u16 service_id; /* Service ID for this connection */ + u32 security_level; /* Security level selected */ +}; + +/* + * Bits in the connection flags. + */ +enum rxrpc_conn_flag { + RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ + RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ + RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ + RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */ + RXRPC_CONN_FINAL_ACK_0, /* Need final ACK for channel 0 */ + RXRPC_CONN_FINAL_ACK_1, /* Need final ACK for channel 1 */ + RXRPC_CONN_FINAL_ACK_2, /* Need final ACK for channel 2 */ + RXRPC_CONN_FINAL_ACK_3, /* Need final ACK for channel 3 */ +}; + +#define RXRPC_CONN_FINAL_ACK_MASK ((1UL << RXRPC_CONN_FINAL_ACK_0) | \ + (1UL << RXRPC_CONN_FINAL_ACK_1) | \ + (1UL << RXRPC_CONN_FINAL_ACK_2) | \ + (1UL << RXRPC_CONN_FINAL_ACK_3)) + +/* + * Events that can be raised upon a connection. + */ +enum rxrpc_conn_event { + RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ +}; + +/* + * The connection protocol state. + */ +enum rxrpc_conn_proto_state { + RXRPC_CONN_UNUSED, /* Connection not yet attempted */ + RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ + RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ + RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ + RXRPC_CONN_SERVICE, /* Service secured connection */ + RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ + RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ + RXRPC_CONN__NR_STATES +}; + +/* + * RxRPC client connection bundle. + */ +struct rxrpc_bundle { + struct rxrpc_conn_parameters params; + refcount_t ref; + atomic_t active; /* Number of active users */ + unsigned int debug_id; + bool try_upgrade; /* True if the bundle is attempting upgrade */ + bool alloc_conn; /* True if someone's getting a conn */ + short alloc_error; /* Error from last conn allocation */ + spinlock_t channel_lock; + struct rb_node local_node; /* Node in local->client_conns */ + struct list_head waiting_calls; /* Calls waiting for channels */ + unsigned long avail_chans; /* Mask of available channels */ + struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */ +}; + +/* + * RxRPC connection definition + * - matched by { local, peer, epoch, conn_id, direction } + * - each connection can only handle four simultaneous calls + */ +struct rxrpc_connection { + struct rxrpc_conn_proto proto; + struct rxrpc_conn_parameters params; + + refcount_t ref; + struct rcu_head rcu; + struct list_head cache_link; + + unsigned char act_chans; /* Mask of active channels */ + struct rxrpc_channel { + unsigned long final_ack_at; /* Time at which to issue final ACK */ + struct rxrpc_call __rcu *call; /* Active call */ + unsigned int call_debug_id; /* call->debug_id */ + u32 call_id; /* ID of current call */ + u32 call_counter; /* Call ID counter */ + u32 last_call; /* ID of last call */ + u8 last_type; /* Type of last packet */ + union { + u32 last_seq; + u32 last_abort; + }; + } channels[RXRPC_MAXCALLS]; + + struct timer_list timer; /* Conn event timer */ + struct work_struct processor; /* connection event processor */ + struct rxrpc_bundle *bundle; /* Client connection bundle */ + struct rb_node service_node; /* Node in peer->service_conns */ + struct list_head proc_link; /* link in procfs list */ + struct list_head link; /* link in master connection list */ + struct sk_buff_head rx_queue; /* received conn-level packets */ + const struct rxrpc_security *security; /* applied security module */ + struct key *server_key; /* security for this service */ + struct crypto_sync_skcipher *cipher; /* encryption handle */ + struct rxrpc_crypt csum_iv; /* packet checksum base */ + unsigned long flags; + unsigned long events; + unsigned long idle_timestamp; /* Time at which last became idle */ + spinlock_t state_lock; /* state-change lock */ + enum rxrpc_conn_proto_state state; /* current state of connection */ + u32 abort_code; /* Abort code of connection abort */ + int debug_id; /* debug ID for printks */ + atomic_t serial; /* packet serial number counter */ + unsigned int hi_serial; /* highest serial number received */ + u32 security_nonce; /* response re-use preventer */ + u32 service_id; /* Service ID, possibly upgraded */ + u8 size_align; /* data size alignment (for security) */ + u8 security_size; /* security header size */ + u8 security_ix; /* security type */ + u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + u8 bundle_shift; /* Index into bundle->avail_chans */ + short error; /* Local error code */ +}; + +static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) +{ + return sp->hdr.flags & RXRPC_CLIENT_INITIATED; +} + +static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp) +{ + return !rxrpc_to_server(sp); +} + +/* + * Flags in call->flags. + */ +enum rxrpc_call_flag { + RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ + RXRPC_CALL_HAS_USERID, /* has a user ID attached */ + RXRPC_CALL_IS_SERVICE, /* Call is service call */ + RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ + RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ + RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ + RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ + RXRPC_CALL_KERNEL, /* The call was made by the kernel */ + RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ +}; + +/* + * Events that can be raised on a call. + */ +enum rxrpc_call_event { + RXRPC_CALL_EV_ACK, /* need to generate ACK */ + RXRPC_CALL_EV_ABORT, /* need to generate abort */ + RXRPC_CALL_EV_RESEND, /* Tx resend required */ + RXRPC_CALL_EV_PING, /* Ping send required */ + RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ + RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ +}; + +/* + * The states that a call can be in. + */ +enum rxrpc_call_state { + RXRPC_CALL_UNINITIALISED, + RXRPC_CALL_CLIENT_AWAIT_CONN, /* - client waiting for connection to become available */ + RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ + RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ + RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ + RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ + RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ + RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ + RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ + RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ + RXRPC_CALL_COMPLETE, /* - call complete */ + NR__RXRPC_CALL_STATES +}; + +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + +/* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES +}; + +/* + * RxRPC call definition + * - matched by { connection, call_id } + */ +struct rxrpc_call { + struct rcu_head rcu; + struct rxrpc_connection *conn; /* connection carrying call */ + struct rxrpc_peer *peer; /* Peer record for remote address */ + struct rxrpc_sock __rcu *socket; /* socket responsible */ + struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + const struct rxrpc_security *security; /* applied security module */ + struct mutex user_mutex; /* User access mutex */ + unsigned long ack_at; /* When deferred ACK needs to happen */ + unsigned long ack_lost_at; /* When ACK is figured as lost */ + unsigned long resend_at; /* When next resend needs to happen */ + unsigned long ping_at; /* When next to send a ping */ + unsigned long keepalive_at; /* When next to send a keepalive ping */ + unsigned long expect_rx_by; /* When we expect to get a packet by */ + unsigned long expect_req_by; /* When we expect to get a request DATA packet by */ + unsigned long expect_term_by; /* When we expect call termination by */ + u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ + u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ + struct list_head link; /* link in master call list */ + struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ + struct hlist_node error_link; /* link in error distribution list */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ + struct sk_buff *tx_pending; /* Tx socket buffer being filled */ + wait_queue_head_t waitq; /* Wait queue for channel or Tx */ + s64 tx_total_len; /* Total length left to be transmitted (or -1) */ + __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ + unsigned long user_call_ID; /* user-defined call ID */ + unsigned long flags; + unsigned long events; + spinlock_t lock; + spinlock_t notify_lock; /* Kernel notification lock */ + rwlock_t state_lock; /* lock for state transition */ + u32 abort_code; /* Local/remote abort code */ + int error; /* Local error incurred */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ + refcount_t ref; + u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ + enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ + u32 call_id; /* call ID on connection */ + u32 cid; /* connection ID plus channel index */ + int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ + bool rx_pkt_last; /* Current recvmsg packet is last */ + + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) +#define RXRPC_INIT_RX_WINDOW_SIZE 63 + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + +#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. + */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + u16 tx_backoff; /* Delay to insert due to Tx failure */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. + */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + rxrpc_serial_t rx_serial; /* Highest serial received for this call */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ + u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + + spinlock_t input_lock; /* Lock for packet input to this call */ + + /* Receive-phase ACK management (ACKs we send). */ + u8 ackr_reason; /* reason to ACK */ + rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_seq_t ackr_highest_seq; /* Higest sequence number received */ + atomic_t ackr_nr_unacked; /* Number of unacked packets */ + atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ + + /* RTT management */ + rxrpc_serial_t rtt_serial[4]; /* Serial number of DATA or PING sent */ + ktime_t rtt_sent_at[4]; /* Time packet sent */ + unsigned long rtt_avail; /* Mask of available slots in bits 0-3, + * Mask of pending samples in 8-11 */ +#define RXRPC_CALL_RTT_AVAIL_MASK 0xf +#define RXRPC_CALL_RTT_PEND_SHIFT 8 + + /* Transmission-phase ACK management (ACKs we've received). */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ + rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ + rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ + rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ +}; + +/* + * Summary of a new ACK and the changes it made to the Tx buffer packet states. + */ +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; +}; + +/* + * sendmsg() cmsg-specified parameters. + */ +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ + RXRPC_CMD_CHARGE_ACCEPT, /* [server] charge accept preallocation */ +}; + +struct rxrpc_call_params { + s64 tx_total_len; /* Total Tx data length (if send data) */ + unsigned long user_call_ID; /* User's call ID */ + struct { + u32 hard; /* Maximum lifetime (sec) */ + u32 idle; /* Max time since last data packet (msec) */ + u32 normal; /* Max time since last call packet (msec) */ + } timeouts; + u8 nr_timeouts; /* Number of timeouts specified */ + bool kernel; /* T if kernel is making the call */ + enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */ +}; + +struct rxrpc_send_params { + struct rxrpc_call_params call; + u32 abort_code; /* Abort code to Tx (if abort) */ + enum rxrpc_command command : 8; /* The command to implement */ + bool exclusive; /* Shared or exclusive call */ + bool upgrade; /* If the connection is upgradeable */ +}; + +#include <trace/events/rxrpc.h> + +/* + * af_rxrpc.c + */ +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; +extern struct workqueue_struct *rxrpc_workqueue; + +/* + * call_accept.c + */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_sock *, + struct sk_buff *); +void rxrpc_accept_incoming_calls(struct rxrpc_local *); +int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); + +/* + * call_event.c + */ +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, + enum rxrpc_propose_ack_trace); +void rxrpc_process_call(struct work_struct *); + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why); + +void rxrpc_delete_call_timer(struct rxrpc_call *call); + +/* + * call_object.c + */ +extern const char *const rxrpc_call_states[]; +extern const char *const rxrpc_call_completions[]; +extern unsigned int rxrpc_max_call_lifetime; +extern struct kmem_cache *rxrpc_call_jar; + +struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); +struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, + struct rxrpc_conn_parameters *, + struct sockaddr_rxrpc *, + struct rxrpc_call_params *, gfp_t, + unsigned int); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); +void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); +void rxrpc_release_calls_on_socket(struct rxrpc_sock *); +bool __rxrpc_queue_call(struct rxrpc_call *); +bool rxrpc_queue_call(struct rxrpc_call *); +void rxrpc_see_call(struct rxrpc_call *); +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); +void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_cleanup_call(struct rxrpc_call *); +void rxrpc_destroy_all_calls(struct rxrpc_net *); + +static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) +{ + return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags); +} + +static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) +{ + return !rxrpc_is_service_call(call); +} + +/* + * conn_client.c + */ +extern unsigned int rxrpc_reap_client_connections; +extern unsigned long rxrpc_conn_idle_client_expiry; +extern unsigned long rxrpc_conn_idle_client_fast_expiry; +extern struct idr rxrpc_client_conn_ids; + +void rxrpc_destroy_client_conn_ids(void); +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *); +void rxrpc_put_bundle(struct rxrpc_bundle *); +int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, + struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, + gfp_t); +void rxrpc_expose_client_call(struct rxrpc_call *); +void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); +void rxrpc_put_client_conn(struct rxrpc_connection *); +void rxrpc_discard_expired_client_conns(struct work_struct *); +void rxrpc_destroy_all_client_connections(struct rxrpc_net *); +void rxrpc_clean_up_local_conns(struct rxrpc_local *); + +/* + * conn_event.c + */ +void rxrpc_process_connection(struct work_struct *); +void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); + +/* + * conn_object.c + */ +extern unsigned int rxrpc_connection_expiry; +extern unsigned int rxrpc_closed_conn_expiry; + +struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); +struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, + struct sk_buff *, + struct rxrpc_peer **); +void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); +void rxrpc_disconnect_call(struct rxrpc_call *); +void rxrpc_kill_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); +void rxrpc_service_connection_reaper(struct work_struct *); +void rxrpc_destroy_all_connections(struct rxrpc_net *); + +static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) +{ + return conn->out_clientflag; +} + +static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) +{ + return !rxrpc_conn_is_client(conn); +} + +static inline void rxrpc_put_connection(struct rxrpc_connection *conn) +{ + if (!conn) + return; + + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); +} + +static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, + unsigned long expire_at) +{ + timer_reduce(&conn->timer, expire_at); +} + +/* + * conn_service.c + */ +struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, + struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *, + const struct rxrpc_security *, struct key *, + struct sk_buff *); +void rxrpc_unpublish_service_conn(struct rxrpc_connection *); + +/* + * input.c + */ +int rxrpc_input_packet(struct sock *, struct sk_buff *); + +/* + * insecure.c + */ +extern const struct rxrpc_security rxrpc_no_security; + +/* + * key.c + */ +extern struct key_type key_type_rxrpc; +extern struct key_type key_type_rxrpc_s; + +int rxrpc_request_key(struct rxrpc_sock *, sockptr_t , int); +int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); +int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, + u32); + +/* + * local_event.c + */ +extern void rxrpc_process_local_events(struct rxrpc_local *); + +/* + * local_object.c + */ +struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); +void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); +void rxrpc_queue_local(struct rxrpc_local *); +void rxrpc_destroy_all_locals(struct rxrpc_net *); + +static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +{ + return atomic_dec_return(&local->active_users) == 0; +} + +static inline bool __rxrpc_use_local(struct rxrpc_local *local) +{ + return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; +} + +/* + * misc.c + */ +extern unsigned int rxrpc_max_backlog __read_mostly; +extern unsigned long rxrpc_requested_ack_delay; +extern unsigned long rxrpc_soft_ack_delay; +extern unsigned long rxrpc_idle_ack_delay; +extern unsigned int rxrpc_rx_window_size; +extern unsigned int rxrpc_rx_mtu; +extern unsigned int rxrpc_rx_jumbo_max; + +extern const s8 rxrpc_ack_priority[]; + +/* + * net_ns.c + */ +extern unsigned int rxrpc_net_id; +extern struct pernet_operations rxrpc_net_ops; + +static inline struct rxrpc_net *rxrpc_net(struct net *net) +{ + return net_generic(net, rxrpc_net_id); +} + +/* + * output.c + */ +int rxrpc_send_ack_packet(struct rxrpc_call *, bool, rxrpc_serial_t *); +int rxrpc_send_abort_packet(struct rxrpc_call *); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); +void rxrpc_reject_packets(struct rxrpc_local *); +void rxrpc_send_keepalive(struct rxrpc_peer *); + +/* + * peer_event.c + */ +void rxrpc_error_report(struct sock *); +void rxrpc_peer_keepalive_worker(struct work_struct *); + +/* + * peer_object.c + */ +struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, + const struct sockaddr_rxrpc *); +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct sockaddr_rxrpc *, gfp_t); +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, + struct rxrpc_peer *); +void rxrpc_destroy_all_peers(struct rxrpc_net *); +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); +void rxrpc_put_peer(struct rxrpc_peer *); +void rxrpc_put_peer_locked(struct rxrpc_peer *); + +/* + * proc.c + */ +extern const struct seq_operations rxrpc_call_seq_ops; +extern const struct seq_operations rxrpc_connection_seq_ops; +extern const struct seq_operations rxrpc_peer_seq_ops; +extern const struct seq_operations rxrpc_local_seq_ops; + +/* + * recvmsg.c + */ +void rxrpc_notify_socket(struct rxrpc_call *); +bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int); +bool __rxrpc_call_completed(struct rxrpc_call *); +bool rxrpc_call_completed(struct rxrpc_call *); +bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int); +int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); + +/* + * Abort a call due to a protocol error. + */ +static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call, + struct sk_buff *skb, + const char *eproto_why, + const char *why, + u32 abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why); + return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO); +} + +#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \ + __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \ + (abort_why), (abort_code)) + +/* + * rtt.c + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +void rxrpc_peer_init_rtt(struct rxrpc_peer *); + +/* + * rxkad.c + */ +#ifdef CONFIG_RXKAD +extern const struct rxrpc_security rxkad; +#endif + +/* + * security.c + */ +int __init rxrpc_init_security(void); +void rxrpc_exit_security(void); +int rxrpc_init_client_conn_security(struct rxrpc_connection *); +bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *, + const struct rxrpc_security **, struct key **, + struct sk_buff *); + +/* + * sendmsg.c + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); + +/* + * skbuff.c + */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); +void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_purge_queue(struct sk_buff_head *); + +/* + * sysctl.c + */ +#ifdef CONFIG_SYSCTL +extern int __init rxrpc_sysctl_init(void); +extern void rxrpc_sysctl_exit(void); +#else +static inline int __init rxrpc_sysctl_init(void) { return 0; } +static inline void rxrpc_sysctl_exit(void) {} +#endif + +/* + * utils.c + */ +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); + +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +/* + * debug tracing + */ +extern unsigned int rxrpc_debug; + +#define dbgprintk(FMT,...) \ + printk("[%-6.6s] "FMT"\n", current->comm ,##__VA_ARGS__) + +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) +#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__) +#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__) + + +#if defined(__KDEBUG) +#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) +#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) +#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) +#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__) +#define _net(FMT,...) knet(FMT,##__VA_ARGS__) + +#elif defined(CONFIG_AF_RXRPC_DEBUG) +#define RXRPC_DEBUG_KENTER 0x01 +#define RXRPC_DEBUG_KLEAVE 0x02 +#define RXRPC_DEBUG_KDEBUG 0x04 +#define RXRPC_DEBUG_KPROTO 0x08 +#define RXRPC_DEBUG_KNET 0x10 + +#define _enter(FMT,...) \ +do { \ + if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER)) \ + kenter(FMT,##__VA_ARGS__); \ +} while (0) + +#define _leave(FMT,...) \ +do { \ + if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE)) \ + kleave(FMT,##__VA_ARGS__); \ +} while (0) + +#define _debug(FMT,...) \ +do { \ + if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG)) \ + kdebug(FMT,##__VA_ARGS__); \ +} while (0) + +#define _proto(FMT,...) \ +do { \ + if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \ + kproto(FMT,##__VA_ARGS__); \ +} while (0) + +#define _net(FMT,...) \ +do { \ + if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \ + knet(FMT,##__VA_ARGS__); \ +} while (0) + +#else +#define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) +#define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) +#define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) +#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__) +#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__) +#endif + +/* + * debug assertion checking + */ +#if 1 // defined(__KDEBUGALL) + +#define ASSERT(X) \ +do { \ + if (unlikely(!(X))) { \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ + if (unlikely(!(_x OP _y))) { \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ + if (unlikely((C) && !(X))) { \ + pr_err("Assertion failed\n"); \ + BUG(); \ + } \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ + if (unlikely((C) && !(_x OP _y))) { \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ + BUG(); \ + } \ +} while (0) + +#else + +#define ASSERT(X) \ +do { \ +} while (0) + +#define ASSERTCMP(X, OP, Y) \ +do { \ +} while (0) + +#define ASSERTIF(C, X) \ +do { \ +} while (0) + +#define ASSERTIFCMP(C, X, OP, Y) \ +do { \ +} while (0) + +#endif /* __KDEBUGALL */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c new file mode 100644 index 000000000..2a14d69b1 --- /dev/null +++ b/net/rxrpc/call_accept.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* incoming call handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/errqueue.h> +#include <linux/udp.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/icmp.h> +#include <linux/gfp.h> +#include <linux/circ_buf.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <net/ip.h> +#include "ar-internal.h" + +static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, + unsigned long user_call_ID) +{ +} + +/* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. + */ +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_call *call, *xcall; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + struct rb_node *parent, **pp; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; + + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; + + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; + + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); + } + + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; + + conn = rxrpc_prealloc_service_connection(rxnet, gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, + refcount_read(&conn->ref), here); + } + + /* Now it gets complicated, because calls get registered with the + * socket here, with a user ID preassigned by the user. + */ + call = rxrpc_alloc_call(rx, gfp, debug_id); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, + refcount_read(&call->ref), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < xcall->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > xcall->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + if (user_attach_call) { + rxrpc_get_call(call, rxrpc_call_got_kernel); + user_attach_call(call, user_call_ID); + } + + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + + list_add(&call->sock_link, &rx->sock_calls); + + write_unlock(&rx->call_lock); + + rxnet = call->rxnet; + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); + return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; +} + +/* + * Allocate the preallocation buffers for incoming service calls. These must + * be charged manually. + */ +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + return 0; +} + +/* + * Discard the preallocation on a service. + */ +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) +{ + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + rxrpc_put_local(peer->local); + kfree(peer); + tail = (tail + 1) & (size - 1); + } + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxnet->conn_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxnet->conn_lock); + kfree(conn); + if (atomic_dec_and_test(&rxnet->nr_conns)) + wake_up_var(&rxnet->nr_conns); + tail = (tail + 1) & (size - 1); + } + + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + rcu_assign_pointer(call->socket, rx); + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + if (call->notify_rx) + call->notify_rx = rxrpc_dummy_notify; + rxrpc_put_call(call, rxrpc_call_put_kernel); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); + } + + kfree(b); +} + +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; + + if (call->peer->rtt_count < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, + true, true, + rxrpc_propose_ack_ping_for_params); +} + +/* + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. + */ +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + const struct rxrpc_security *sec, + struct key *key, + struct sk_buff *skb) +{ + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_call *call; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; + + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); + + if (call_count == 0) + return NULL; + + if (!conn) { + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) + return NULL; + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_new_incoming_peer(rx, local, peer); + } + + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + conn->params.local = rxrpc_get_local(local); + conn->params.peer = peer; + rxrpc_see_connection(conn); + rxrpc_new_incoming_connection(rx, conn, sec, key, skb); + } else { + rxrpc_get_connection(conn); + } + + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_see_call(call); + call->conn = conn; + call->security = conn->security; + call->security_ix = conn->security_ix; + call->peer = rxrpc_get_peer(conn->params.peer); + call->cong_cwnd = call->peer->cong_cwnd; + return call; +} + +/* + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. + * + * The call is returned with the user access mutex held. + */ +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_sock *rx, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const struct rxrpc_security *sec = NULL; + struct rxrpc_connection *conn; + struct rxrpc_peer *peer = NULL; + struct rxrpc_call *call = NULL; + struct key *key = NULL; + + _enter(""); + + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || + rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = RX_INVALID_OPERATION; + goto no_call; + } + + /* The peer, connection and call may all have sprung into existence due + * to a duplicate packet being handled on another CPU in parallel, so + * we have to recheck the routing. However, we're now holding + * rx->incoming_lock, so the values should remain stable. + */ + conn = rxrpc_find_connection_rcu(local, skb, &peer); + + if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb)) + goto no_call; + + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb); + key_put(key); + if (!call) { + skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; + goto no_call; + } + + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; + + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); + + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; + + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (call->state < RXRPC_CALL_COMPLETE) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + write_unlock(&call->state_lock); + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->abort_code, conn->error); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->abort_code, conn->error); + break; + default: + BUG(); + } + spin_unlock(&conn->state_lock); + spin_unlock(&rx->incoming_lock); + + rxrpc_send_ping(call, skb); + + /* We have to discard the prealloc queue's ref here and rely on a + * combination of the RCU read lock and refs held either by the socket + * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel + * service to prevent the call from being deallocated too early. + */ + rxrpc_put_call(call, rxrpc_call_put); + + _leave(" = %p{%d}", call, call->debug_id); + return call; + +no_call: + spin_unlock(&rx->incoming_lock); + _leave(" = NULL [%u]", skb->mark); + return NULL; +} + +/* + * Charge up socket with preallocated calls, attaching user call IDs. + */ +int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (rx->sk.sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID, + GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); +} + +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. + * @debug_id: The tracing debug ID. + * + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. + * + * Note that the call may be come connected before this function returns. + */ +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; + + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp, debug_id); +} +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c new file mode 100644 index 000000000..2a93e7b5f --- /dev/null +++ b/net/rxrpc/call_event.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/circ_buf.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/udp.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * Propose a PING ACK be sent. + */ +static void rxrpc_propose_ping(struct rxrpc_call *call, + bool immediate, bool background) +{ + if (immediate) { + if (background && + !test_and_set_bit(RXRPC_CALL_EV_PING, &call->events)) + rxrpc_queue_call(call); + } else { + unsigned long now = jiffies; + unsigned long ping_at = now + rxrpc_idle_ack_delay; + + if (time_before(ping_at, call->ping_at)) { + WRITE_ONCE(call->ping_at, ping_at); + rxrpc_reduce_call_timer(call, ping_at, now, + rxrpc_timer_set_for_ping); + } + } +} + +/* + * propose an ACK be sent + */ +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) +{ + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; + unsigned long expiry = rxrpc_soft_ack_delay; + s8 prior = rxrpc_ack_priority[ack_reason]; + + /* Pings are handled specially because we don't want to accidentally + * lose a ping response by subsuming it into a ping. + */ + if (ack_reason == RXRPC_ACK_PING) { + rxrpc_propose_ping(call, immediate, background); + goto trace; + } + + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; + call->ackr_serial = serial; + } + if (!immediate) + goto trace; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + } else { + outcome = rxrpc_propose_ack_subsume; + } + + switch (ack_reason) { + case RXRPC_ACK_REQUESTED: + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; + + case RXRPC_ACK_DELAY: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; + + case RXRPC_ACK_IDLE: + if (rxrpc_idle_ack_delay < expiry) + expiry = rxrpc_idle_ack_delay; + break; + + default: + immediate = true; + break; + } + + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + unsigned long now = jiffies, ack_at; + + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); + else + ack_at = expiry; + + ack_at += READ_ONCE(call->tx_backoff); + ack_at += now; + if (time_before(ack_at, call->ack_at)) { + WRITE_ONCE(call->ack_at, ack_at); + rxrpc_reduce_call_timer(call, ack_at, now, + rxrpc_timer_set_for_ack); + } + } + +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); +} + +/* + * propose an ACK be sent, locking the call structure + */ +void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) +{ + spin_lock_bh(&call->lock); + __rxrpc_propose_ACK(call, ack_reason, serial, + immediate, background, why); + spin_unlock_bh(&call->lock); +} + +/* + * Handle congestion being detected by the retransmit timeout. + */ +static void rxrpc_congestion_timeout(struct rxrpc_call *call) +{ + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); +} + +/* + * Perform retransmission of NAK'd and unack'd packets. + */ +static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) +{ + struct sk_buff *skb; + unsigned long resend_at; + rxrpc_seq_t cursor, seq, top; + ktime_t now, max_age, oldest, ack_ts; + int ix; + u8 annotation, anno_type, retrans = 0, unacked = 0; + + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); + + now = ktime_get_real(); + max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); + + spin_lock_bh(&call->lock); + + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; + + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK); + oldest = now; + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_seen); + + if (anno_type == RXRPC_TX_ANNO_UNACK) { + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; + continue; + } + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; + } + + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); + } + + resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); + WRITE_ONCE(call->resend_at, resend_at); + + if (unacked) + rxrpc_congestion_timeout(call); + + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_reduce_call_timer(call, resend_at, now_j, + rxrpc_timer_set_for_resend); + spin_unlock_bh(&call->lock); + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_ack_packet(call, true, NULL); + goto out; + } + + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) + continue; + + /* We need to reset the retransmission state, but we need to do + * so before we drop the lock as a new ACK/NAK may come in and + * confuse things + */ + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK | RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + rxrpc_get_skb(skb, rxrpc_skb_got); + spin_unlock_bh(&call->lock); + + if (rxrpc_send_data_packet(call, skb, true) < 0) { + rxrpc_free_skb(skb, rxrpc_skb_freed); + return; + } + + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + + rxrpc_free_skb(skb, rxrpc_skb_freed); + spin_lock_bh(&call->lock); + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; + } + +out_unlock: + spin_unlock_bh(&call->lock); +out: + _leave(""); +} + +/* + * Handle retransmission and deferred ACK/abort generation. + */ +void rxrpc_process_call(struct work_struct *work) +{ + struct rxrpc_call *call = + container_of(work, struct rxrpc_call, processor); + rxrpc_serial_t *send_ack; + unsigned long now, next, t; + unsigned int iterations = 0; + + rxrpc_see_call(call); + + //printk("\n--------------------\n"); + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); + +recheck_state: + /* Limit the number of times we do this before returning to the manager */ + iterations++; + if (iterations > 5) + goto requeue; + + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_abort_packet(call); + goto recheck_state; + } + + if (call->state == RXRPC_CALL_COMPLETE) { + rxrpc_delete_call_timer(call); + goto out_put; + } + + /* Work out if any timeouts tripped */ + now = jiffies; + t = READ_ONCE(call->expect_rx_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_req_by); + if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && + time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->expect_term_by); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); + set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + } + + t = READ_ONCE(call->ack_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); + cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK, &call->events); + } + + t = READ_ONCE(call->ack_lost_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_lost_ack, now); + cmpxchg(&call->ack_lost_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); + } + + t = READ_ONCE(call->keepalive_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_keepalive, now); + cmpxchg(&call->keepalive_at, t, now + MAX_JIFFY_OFFSET); + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, true, + rxrpc_propose_ack_ping_for_keepalive); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->ping_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ping, now); + cmpxchg(&call->ping_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_PING, &call->events); + } + + t = READ_ONCE(call->resend_at); + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); + cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); + set_bit(RXRPC_CALL_EV_RESEND, &call->events); + } + + /* Process events */ + if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { + if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && + (int)call->conn->hi_serial - (int)call->rx_serial > 0) { + trace_rxrpc_call_reset(call); + rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET); + } else { + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + } + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; + } + + send_ack = NULL; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { + call->acks_lost_top = call->tx_top; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + send_ack = &call->acks_lost_ping; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events) || + send_ack) { + if (call->ackr_reason) { + rxrpc_send_ack_packet(call, false, send_ack); + goto recheck_state; + } + } + + if (test_and_clear_bit(RXRPC_CALL_EV_PING, &call->events)) { + rxrpc_send_ack_packet(call, true, NULL); + goto recheck_state; + } + + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && + call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_resend(call, now); + goto recheck_state; + } + + /* Make sure the timer is restarted */ + next = call->expect_rx_by; + +#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } + + set(call->expect_req_by); + set(call->expect_term_by); + set(call->ack_at); + set(call->ack_lost_at); + set(call->resend_at); + set(call->keepalive_at); + set(call->ping_at); + + now = jiffies; + if (time_after_eq(now, next)) + goto recheck_state; + + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + + /* other events may have been raised since we started checking */ + if (call->events && call->state < RXRPC_CALL_COMPLETE) + goto requeue; + +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: + _leave(""); + return; + +requeue: + __rxrpc_queue_call(call); + goto out; +} diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c new file mode 100644 index 000000000..10dad2834 --- /dev/null +++ b/net/rxrpc/call_object.c @@ -0,0 +1,737 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC individual remote procedure call handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/circ_buf.h> +#include <linux/spinlock_types.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { + [RXRPC_CALL_UNINITIALISED] = "Uninit ", + [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", + [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", + [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", + [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", + [RXRPC_CALL_SERVER_SECURING] = "SvSecure", + [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", + [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", + [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", + [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", + [RXRPC_CALL_COMPLETE] = "Complete", +}; + +const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { + [RXRPC_CALL_SUCCEEDED] = "Complete", + [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", + [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CALL_LOCAL_ERROR] = "LocError", + [RXRPC_CALL_NETWORK_ERROR] = "NetError", +}; + +struct kmem_cache *rxrpc_call_jar; + +static struct semaphore rxrpc_call_limiter = + __SEMAPHORE_INITIALIZER(rxrpc_call_limiter, 1000); +static struct semaphore rxrpc_kernel_call_limiter = + __SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000); + +static void rxrpc_call_timer_expired(struct timer_list *t) +{ + struct rxrpc_call *call = from_timer(call, t, timer); + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) { + trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies); + __rxrpc_queue_call(call); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } +} + +void rxrpc_reduce_call_timer(struct rxrpc_call *call, + unsigned long expire_at, + unsigned long now, + enum rxrpc_timer_trace why) +{ + if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { + trace_rxrpc_timer(call, why, now); + if (timer_reduce(&call->timer, expire_at)) + rxrpc_put_call(call, rxrpc_call_put_notimer); + } +} + +void rxrpc_delete_call_timer(struct rxrpc_call *call) +{ + if (del_timer_sync(&call->timer)) + rxrpc_put_call(call, rxrpc_call_put_timer); +} + +static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; + +/* + * find an extant server call + * - called in process context with IRQs enabled + */ +struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, + unsigned long user_call_ID) +{ + struct rxrpc_call *call; + struct rb_node *p; + + _enter("%p,%lx", rx, user_call_ID); + + read_lock(&rx->call_lock); + + p = rx->calls.rb_node; + while (p) { + call = rb_entry(p, struct rxrpc_call, sock_node); + + if (user_call_ID < call->user_call_ID) + p = p->rb_left; + else if (user_call_ID > call->user_call_ID) + p = p->rb_right; + else + goto found_extant_call; + } + + read_unlock(&rx->call_lock); + _leave(" = NULL"); + return NULL; + +found_extant_call: + rxrpc_get_call(call, rxrpc_call_got); + read_unlock(&rx->call_lock); + _leave(" = %p [%d]", call, refcount_read(&call->ref)); + return call; +} + +/* + * allocate a new call + */ +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, + unsigned int debug_id) +{ + struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); + + call = kmem_cache_zalloc(rxrpc_call_jar, gfp); + if (!call) + return NULL; + + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), + gfp); + if (!call->rxtx_buffer) + goto nomem; + + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; + + mutex_init(&call->user_mutex); + + /* Prevent lockdep reporting a deadlock false positive between the afs + * filesystem and sys_sendmsg() via the mmap sem. + */ + if (rx->sk.sk_kern_sock) + lockdep_set_class(&call->user_mutex, + &rxrpc_call_user_mutex_lock_class_key); + + timer_setup(&call->timer, rxrpc_call_timer_expired, 0); + INIT_WORK(&call->processor, &rxrpc_process_call); + INIT_LIST_HEAD(&call->link); + INIT_LIST_HEAD(&call->chan_wait_link); + INIT_LIST_HEAD(&call->accept_link); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); + init_waitqueue_head(&call->waitq); + spin_lock_init(&call->lock); + spin_lock_init(&call->notify_lock); + spin_lock_init(&call->input_lock); + rwlock_init(&call->state_lock); + refcount_set(&call->ref, 1); + call->debug_id = debug_id; + call->tx_total_len = -1; + call->next_rx_timo = 20 * HZ; + call->next_req_timo = 1 * HZ; + + memset(&call->sock_node, 0xed, sizeof(call->sock_node)); + + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = rxrpc_rx_window_size; + call->tx_winsize = 16; + call->rx_expect_next = 1; + + call->cong_cwnd = 2; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; + + call->rxnet = rxnet; + call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; + atomic_inc(&rxnet->nr_calls); + return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; +} + +/* + * Allocate a new client call. + */ +static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, + struct sockaddr_rxrpc *srx, + gfp_t gfp, + unsigned int debug_id) +{ + struct rxrpc_call *call; + ktime_t now; + + _enter(""); + + call = rxrpc_alloc_call(rx, gfp, debug_id); + if (!call) + return ERR_PTR(-ENOMEM); + call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; + call->service_id = srx->srx_service; + call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; + + _leave(" = %p", call); + return call; +} + +/* + * Initiate the call ack/resend/expiry timer. + */ +static void rxrpc_start_call_timer(struct rxrpc_call *call) +{ + unsigned long now = jiffies; + unsigned long j = now + MAX_JIFFY_OFFSET; + + call->ack_at = j; + call->ack_lost_at = j; + call->resend_at = j; + call->ping_at = j; + call->expect_rx_by = j; + call->expect_req_by = j; + call->expect_term_by = j; + call->timer.expires = now; +} + +/* + * Wait for a call slot to become available. + */ +static struct semaphore *rxrpc_get_call_slot(struct rxrpc_call_params *p, gfp_t gfp) +{ + struct semaphore *limiter = &rxrpc_call_limiter; + + if (p->kernel) + limiter = &rxrpc_kernel_call_limiter; + if (p->interruptibility == RXRPC_UNINTERRUPTIBLE) { + down(limiter); + return limiter; + } + return down_interruptible(limiter) < 0 ? NULL : limiter; +} + +/* + * Release a call slot. + */ +static void rxrpc_put_call_slot(struct rxrpc_call *call) +{ + struct semaphore *limiter = &rxrpc_call_limiter; + + if (test_bit(RXRPC_CALL_KERNEL, &call->flags)) + limiter = &rxrpc_kernel_call_limiter; + up(limiter); +} + +/* + * Set up a call for the given parameters. + * - Called with the socket lock held, which it must release. + * - If it returns a call, the call's lock will need releasing by the caller. + */ +struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + struct rxrpc_call_params *p, + gfp_t gfp, + unsigned int debug_id) + __releases(&rx->sk.sk_lock.slock) + __acquires(&call->user_mutex) +{ + struct rxrpc_call *call, *xcall; + struct rxrpc_net *rxnet; + struct semaphore *limiter; + struct rb_node *parent, **pp; + const void *here = __builtin_return_address(0); + int ret; + + _enter("%p,%lx", rx, p->user_call_ID); + + limiter = rxrpc_get_call_slot(p, gfp); + if (!limiter) { + release_sock(&rx->sk); + return ERR_PTR(-ERESTARTSYS); + } + + call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); + if (IS_ERR(call)) { + release_sock(&rx->sk); + up(limiter); + _leave(" = %ld", PTR_ERR(call)); + return call; + } + + call->interruptibility = p->interruptibility; + call->tx_total_len = p->tx_total_len; + trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, + refcount_read(&call->ref), + here, (const void *)p->user_call_ID); + if (p->kernel) + __set_bit(RXRPC_CALL_KERNEL, &call->flags); + + /* We need to protect a partially set up call against the user as we + * will be acting outside the socket lock. + */ + mutex_lock(&call->user_mutex); + + /* Publish the call, even though it is incompletely set up as yet */ + write_lock(&rx->call_lock); + + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + + if (p->user_call_ID < xcall->user_call_ID) + pp = &(*pp)->rb_left; + else if (p->user_call_ID > xcall->user_call_ID) + pp = &(*pp)->rb_right; + else + goto error_dup_user_ID; + } + + rcu_assign_pointer(call->socket, rx); + call->user_call_ID = p->user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + + write_unlock(&rx->call_lock); + + rxnet = call->rxnet; + write_lock(&rxnet->call_lock); + list_add_tail(&call->link, &rxnet->calls); + write_unlock(&rxnet->call_lock); + + /* From this point on, the call is protected by its own lock. */ + release_sock(&rx->sk); + + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); + if (ret < 0) + goto error_attached_to_socket; + + trace_rxrpc_call(call->debug_id, rxrpc_call_connected, + refcount_read(&call->ref), here, NULL); + + rxrpc_start_call_timer(call); + + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); + + _leave(" = %p [new]", call); + return call; + + /* We unexpectedly found the user ID in the list after taking + * the call_lock. This shouldn't happen unless the user races + * with itself and tries to add the same user ID twice at the + * same time in different threads. + */ +error_dup_user_ID: + write_unlock(&rx->call_lock); + release_sock(&rx->sk); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, -EEXIST); + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); + rxrpc_release_call(rx, call); + mutex_unlock(&call->user_mutex); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = -EEXIST"); + return ERR_PTR(-EEXIST); + + /* We got an error, but the call is attached to the socket and is in + * need of release. However, we might now race with recvmsg() when + * completing the call queues it. Return 0 from sys_sendmsg() and + * leave the error to recvmsg() to deal with. + */ +error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + refcount_read(&call->ref), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + _leave(" = c=%08x [err]", call->debug_id); + return call; +} + +/* + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. + */ +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) +{ + struct rxrpc_connection *conn = call->conn; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + u32 chan; + + _enter(",%d", call->conn->debug_id); + + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; + + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). + */ + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; + rcu_assign_pointer(conn->channels[chan].call, call); + + spin_lock(&conn->params.peer->lock); + hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); + spin_unlock(&conn->params.peer->lock); + + _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); + + rxrpc_start_call_timer(call); + _leave(""); +} + +/* + * Queue a call's work processor, getting a ref to pass to the work queue. + */ +bool rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, + here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* + * Queue a call's work processor, passing the callers ref to the work queue. + */ +bool __rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = refcount_read(&call->ref); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, + here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* + * Note the re-emergence of a call. + */ +void rxrpc_see_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + if (call) { + int n = refcount_read(&call->ref); + + trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, + here, NULL); + } +} + +bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n; + + if (!__refcount_inc_not_zero(&call->ref, &n)) + return false; + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + return true; +} + +/* + * Note the addition of a ref on a call. + */ +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n; + + __refcount_inc(&call->ref, &n); + trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); +} + +/* + * Clean up the RxTx skb ring. + */ +static void rxrpc_cleanup_ring(struct rxrpc_call *call) +{ + int i; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); + call->rxtx_buffer[i] = NULL; + } +} + +/* + * Detach a call from its owning socket. + */ +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_connection *conn = call->conn; + bool put = false; + + _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); + + trace_rxrpc_call(call->debug_id, rxrpc_call_release, + refcount_read(&call->ref), + here, (const void *)call->flags); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + + spin_lock_bh(&call->lock); + if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) + BUG(); + spin_unlock_bh(&call->lock); + + rxrpc_put_call_slot(call); + rxrpc_delete_call_timer(call); + + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); + + if (!list_empty(&call->recvmsg_link)) { + _debug("unlinking once-pending call %p { e=%lx f=%lx }", + call, call->events, call->flags); + list_del(&call->recvmsg_link); + put = true; + } + + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; + + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); + + write_lock(&rx->call_lock); + + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + rb_erase(&call->sock_node, &rx->calls); + memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); + rxrpc_put_call(call, rxrpc_call_put_userid); + } + + list_del(&call->sock_link); + write_unlock(&rx->call_lock); + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); + _leave(""); +} + +/* + * release all the calls associated with a socket + */ +void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) +{ + struct rxrpc_call *call; + + _enter("%p", rx); + + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_put_call(call, rxrpc_call_put); + } + + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); + rxrpc_send_abort_packet(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + + _leave(""); +} + +/* + * release a call + */ +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + struct rxrpc_net *rxnet = call->rxnet; + const void *here = __builtin_return_address(0); + unsigned int debug_id = call->debug_id; + bool dead; + int n; + + ASSERT(call != NULL); + + dead = __refcount_dec_and_test(&call->ref, &n); + trace_rxrpc_call(debug_id, op, n, here, NULL); + if (dead) { + _debug("call %d dead", call->debug_id); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + + if (!list_empty(&call->link)) { + write_lock(&rxnet->call_lock); + list_del_init(&call->link); + write_unlock(&rxnet->call_lock); + } + + rxrpc_cleanup_call(call); + } +} + +/* + * Final call destruction - but must be done in process context. + */ +static void rxrpc_destroy_call(struct work_struct *work) +{ + struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); + struct rxrpc_net *rxnet = call->rxnet; + + rxrpc_delete_call_timer(call); + + rxrpc_put_connection(call->conn); + rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); + kmem_cache_free(rxrpc_call_jar, call); + if (atomic_dec_and_test(&rxnet->nr_calls)) + wake_up_var(&rxnet->nr_calls); +} + +/* + * Final call destruction under RCU. + */ +static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) +{ + struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + + if (in_softirq()) { + INIT_WORK(&call->processor, rxrpc_destroy_call); + if (!rxrpc_queue_work(&call->processor)) + BUG(); + } else { + rxrpc_destroy_call(&call->processor); + } +} + +/* + * clean up a call + */ +void rxrpc_cleanup_call(struct rxrpc_call *call) +{ + _net("DESTROY CALL %d", call->debug_id); + + memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); + + rxrpc_cleanup_ring(call); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); + + call_rcu(&call->rcu, rxrpc_rcu_destroy_call); +} + +/* + * Make sure that all calls are gone from a network namespace. To reach this + * point, any open UDP sockets in that namespace must have been closed, so any + * outstanding calls cannot be doing I/O. + */ +void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) +{ + struct rxrpc_call *call; + + _enter(""); + + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); + + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); + + rxrpc_see_call(call); + list_del_init(&call->link); + + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, refcount_read(&call->ref), + rxrpc_call_states[call->state], + call->flags, call->events); + + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } + + write_unlock(&rxnet->call_lock); + } + + atomic_dec(&rxnet->nr_calls); + wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); +} diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c new file mode 100644 index 000000000..f5fa5f308 --- /dev/null +++ b/net/rxrpc/conn_client.c @@ -0,0 +1,1137 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Client connection-specific management code. + * + * Copyright (C) 2016, 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * Client connections need to be cached for a little while after they've made a + * call so as to handle retransmitted DATA packets in case the server didn't + * receive the final ACK or terminating ABORT we sent it. + * + * There are flags of relevance to the cache: + * + * (2) DONT_REUSE - The connection should be discarded as soon as possible and + * should not be reused. This is set when an exclusive connection is used + * or a call ID counter overflows. + * + * The caching state may only be changed if the cache lock is held. + * + * There are two idle client connection expiry durations. If the total number + * of connections is below the reap threshold, we use the normal duration; if + * it's above, we use the fast duration. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> +#include <linux/idr.h> +#include <linux/timer.h> +#include <linux/sched/signal.h> + +#include "ar-internal.h" + +__read_mostly unsigned int rxrpc_reap_client_connections = 900; +__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; + +/* + * We use machine-unique IDs for our client connections. + */ +DEFINE_IDR(rxrpc_client_conn_ids); +static DEFINE_SPINLOCK(rxrpc_conn_id_lock); + +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + +/* + * Get a connection ID and epoch for a client connection from the global pool. + * The connection struct pointer is then recorded in the idr radix tree. The + * epoch doesn't change until the client is rebooted (or, at least, unless the + * module is unloaded). + */ +static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, + gfp_t gfp) +{ + struct rxrpc_net *rxnet = conn->params.local->rxnet; + int id; + + _enter(""); + + idr_preload(gfp); + spin_lock(&rxrpc_conn_id_lock); + + id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, + 1, 0x40000000, GFP_NOWAIT); + if (id < 0) + goto error; + + spin_unlock(&rxrpc_conn_id_lock); + idr_preload_end(); + + conn->proto.epoch = rxnet->epoch; + conn->proto.cid = id << RXRPC_CIDSHIFT; + set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); + _leave(" [CID %x]", conn->proto.cid); + return 0; + +error: + spin_unlock(&rxrpc_conn_id_lock); + idr_preload_end(); + _leave(" = %d", id); + return id; +} + +/* + * Release a connection ID for a client connection from the global pool. + */ +static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn) +{ + if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) { + spin_lock(&rxrpc_conn_id_lock); + idr_remove(&rxrpc_client_conn_ids, + conn->proto.cid >> RXRPC_CIDSHIFT); + spin_unlock(&rxrpc_conn_id_lock); + } +} + +/* + * Destroy the client connection ID tree. + */ +void rxrpc_destroy_client_conn_ids(void) +{ + struct rxrpc_connection *conn; + int id; + + if (!idr_is_empty(&rxrpc_client_conn_ids)) { + idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) { + pr_err("AF_RXRPC: Leaked client conn %p {%d}\n", + conn, refcount_read(&conn->ref)); + } + BUG(); + } + + idr_destroy(&rxrpc_client_conn_ids); +} + +/* + * Allocate a connection bundle. + */ +static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, + gfp_t gfp) +{ + struct rxrpc_bundle *bundle; + + bundle = kzalloc(sizeof(*bundle), gfp); + if (bundle) { + bundle->params = *cp; + rxrpc_get_peer(bundle->params.peer); + refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); + spin_lock_init(&bundle->channel_lock); + INIT_LIST_HEAD(&bundle->waiting_calls); + } + return bundle; +} + +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) +{ + refcount_inc(&bundle->ref); + return bundle; +} + +static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) +{ + rxrpc_put_peer(bundle->params.peer); + kfree(bundle); +} + +void rxrpc_put_bundle(struct rxrpc_bundle *bundle) +{ + unsigned int d = bundle->debug_id; + bool dead; + int r; + + dead = __refcount_dec_and_test(&bundle->ref, &r); + + _debug("PUT B=%x %d", d, r - 1); + if (dead) + rxrpc_free_bundle(bundle); +} + +/* + * Allocate a client connection. + */ +static struct rxrpc_connection * +rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) +{ + struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = bundle->params.local->rxnet; + int ret; + + _enter(""); + + conn = rxrpc_alloc_connection(gfp); + if (!conn) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } + + refcount_set(&conn->ref, 1); + conn->bundle = bundle; + conn->params = bundle->params; + conn->out_clientflag = RXRPC_CLIENT_INITIATED; + conn->state = RXRPC_CONN_CLIENT; + conn->service_id = conn->params.service_id; + + ret = rxrpc_get_client_connection_id(conn, gfp); + if (ret < 0) + goto error_0; + + ret = rxrpc_init_client_conn_security(conn); + if (ret < 0) + goto error_1; + + ret = conn->security->prime_packet_security(conn); + if (ret < 0) + goto error_2; + + atomic_inc(&rxnet->nr_conns); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); + + rxrpc_get_bundle(bundle); + rxrpc_get_peer(conn->params.peer); + rxrpc_get_local(conn->params.local); + key_get(conn->params.key); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, + refcount_read(&conn->ref), + __builtin_return_address(0)); + + atomic_inc(&rxnet->nr_client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); + _leave(" = %p", conn); + return conn; + +error_2: + conn->security->clear(conn); +error_1: + rxrpc_put_client_connection_id(conn); +error_0: + kfree(conn); + _leave(" = %d", ret); + return ERR_PTR(ret); +} + +/* + * Determine if a connection may be reused. + */ +static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_net *rxnet; + int id_cursor, id, distance, limit; + + if (!conn) + goto dont_reuse; + + rxnet = conn->params.local->rxnet; + if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) + goto dont_reuse; + + if (conn->state != RXRPC_CONN_CLIENT || + conn->proto.epoch != rxnet->epoch) + goto mark_dont_reuse; + + /* The IDR tree gets very expensive on memory if the connection IDs are + * widely scattered throughout the number space, so we shall want to + * kill off connections that, say, have an ID more than about four + * times the maximum number of client conns away from the current + * allocation point to try and keep the IDs concentrated. + */ + id_cursor = idr_get_cursor(&rxrpc_client_conn_ids); + id = conn->proto.cid >> RXRPC_CIDSHIFT; + distance = id - id_cursor; + if (distance < 0) + distance = -distance; + limit = max_t(unsigned long, atomic_read(&rxnet->nr_conns) * 4, 1024); + if (distance > limit) + goto mark_dont_reuse; + + return true; + +mark_dont_reuse: + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); +dont_reuse: + return false; +} + +/* + * Look up the conn bundle that matches the connection parameters, adding it if + * it doesn't yet exist. + */ +static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp, + gfp_t gfp) +{ + static atomic_t rxrpc_bundle_id; + struct rxrpc_bundle *bundle, *candidate; + struct rxrpc_local *local = cp->local; + struct rb_node *p, **pp, *parent; + long diff; + + _enter("{%px,%x,%u,%u}", + cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade); + + if (cp->exclusive) + return rxrpc_alloc_bundle(cp, gfp); + + /* First, see if the bundle is already there. */ + _debug("search 1"); + spin_lock(&local->client_bundles_lock); + p = local->client_bundles.rb_node; + while (p) { + bundle = rb_entry(p, struct rxrpc_bundle, local_node); + +#define cmp(X) ((long)bundle->params.X - (long)cp->X) + diff = (cmp(peer) ?: + cmp(key) ?: + cmp(security_level) ?: + cmp(upgrade)); +#undef cmp + if (diff < 0) + p = p->rb_left; + else if (diff > 0) + p = p->rb_right; + else + goto found_bundle; + } + spin_unlock(&local->client_bundles_lock); + _debug("not found"); + + /* It wasn't. We need to add one. */ + candidate = rxrpc_alloc_bundle(cp, gfp); + if (!candidate) + return NULL; + + _debug("search 2"); + spin_lock(&local->client_bundles_lock); + pp = &local->client_bundles.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + bundle = rb_entry(parent, struct rxrpc_bundle, local_node); + +#define cmp(X) ((long)bundle->params.X - (long)cp->X) + diff = (cmp(peer) ?: + cmp(key) ?: + cmp(security_level) ?: + cmp(upgrade)); +#undef cmp + if (diff < 0) + pp = &(*pp)->rb_left; + else if (diff > 0) + pp = &(*pp)->rb_right; + else + goto found_bundle_free; + } + + _debug("new bundle"); + candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); + rb_link_node(&candidate->local_node, parent, pp); + rb_insert_color(&candidate->local_node, &local->client_bundles); + rxrpc_get_bundle(candidate); + spin_unlock(&local->client_bundles_lock); + _leave(" = %u [new]", candidate->debug_id); + return candidate; + +found_bundle_free: + rxrpc_free_bundle(candidate); +found_bundle: + rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); + spin_unlock(&local->client_bundles_lock); + _leave(" = %u [found]", bundle->debug_id); + return bundle; +} + +/* + * Create or find a client bundle to use for a call. + * + * If we return with a connection, the call will be on its waiting list. It's + * left to the caller to assign a channel and wake up the call. + */ +static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + struct rxrpc_bundle *bundle; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp); + if (!cp->peer) + goto error; + + call->cong_cwnd = cp->peer->cong_cwnd; + if (call->cong_cwnd >= call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + else + call->cong_mode = RXRPC_CALL_SLOW_START; + if (cp->upgrade) + __set_bit(RXRPC_CALL_UPGRADE, &call->flags); + + /* Find the client connection bundle. */ + bundle = rxrpc_look_up_bundle(cp, gfp); + if (!bundle) + goto error; + + /* Get this call queued. Someone else may activate it whilst we're + * lining up a new connection, but that's fine. + */ + spin_lock(&bundle->channel_lock); + list_add_tail(&call->chan_wait_link, &bundle->waiting_calls); + spin_unlock(&bundle->channel_lock); + + _leave(" = [B=%x]", bundle->debug_id); + return bundle; + +error: + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); +} + +/* + * Allocate a new connection and add it into a bundle. + */ +static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) + __releases(bundle->channel_lock) +{ + struct rxrpc_connection *candidate = NULL, *old = NULL; + bool conflict; + int i; + + _enter(""); + + conflict = bundle->alloc_conn; + if (!conflict) + bundle->alloc_conn = true; + spin_unlock(&bundle->channel_lock); + if (conflict) { + _leave(" [conf]"); + return; + } + + candidate = rxrpc_alloc_client_connection(bundle, gfp); + + spin_lock(&bundle->channel_lock); + bundle->alloc_conn = false; + + if (IS_ERR(candidate)) { + bundle->alloc_error = PTR_ERR(candidate); + spin_unlock(&bundle->channel_lock); + _leave(" [err %ld]", PTR_ERR(candidate)); + return; + } + + bundle->alloc_error = 0; + + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) { + unsigned int shift = i * RXRPC_MAXCALLS; + int j; + + old = bundle->conns[i]; + if (!rxrpc_may_reuse_conn(old)) { + if (old) + trace_rxrpc_client(old, -1, rxrpc_client_replace); + candidate->bundle_shift = shift; + atomic_inc(&bundle->active); + bundle->conns[i] = candidate; + for (j = 0; j < RXRPC_MAXCALLS; j++) + set_bit(shift + j, &bundle->avail_chans); + candidate = NULL; + break; + } + + old = NULL; + } + + spin_unlock(&bundle->channel_lock); + + if (candidate) { + _debug("discard C=%x", candidate->debug_id); + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + } + + rxrpc_put_connection(old); + _leave(""); +} + +/* + * Add a connection to a bundle if there are no usable connections or we have + * connections waiting for extra capacity. + */ +static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp) +{ + struct rxrpc_call *call; + int i, usable; + + _enter(""); + + spin_lock(&bundle->channel_lock); + + /* See if there are any usable connections. */ + usable = 0; + for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) + if (rxrpc_may_reuse_conn(bundle->conns[i])) + usable++; + + if (!usable && !list_empty(&bundle->waiting_calls)) { + call = list_first_entry(&bundle->waiting_calls, + struct rxrpc_call, chan_wait_link); + if (test_bit(RXRPC_CALL_UPGRADE, &call->flags)) + bundle->try_upgrade = true; + } + + if (!usable) + goto alloc_conn; + + if (!bundle->avail_chans && + !bundle->try_upgrade && + !list_empty(&bundle->waiting_calls) && + usable < ARRAY_SIZE(bundle->conns)) + goto alloc_conn; + + spin_unlock(&bundle->channel_lock); + _leave(""); + return; + +alloc_conn: + return rxrpc_add_conn_to_bundle(bundle, gfp); +} + +/* + * Assign a channel to the call at the front of the queue and wake the call up. + * We don't increment the callNumber counter until this number has been exposed + * to the world. + */ +static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_bundle *bundle = conn->bundle; + struct rxrpc_call *call = list_entry(bundle->waiting_calls.next, + struct rxrpc_call, chan_wait_link); + u32 call_id = chan->call_counter + 1; + + _enter("C=%x,%u", conn->debug_id, channel); + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + + /* Cancel the final ACK on the previous call if it hasn't been sent yet + * as the DATA packet will implicitly ACK it. + */ + clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); + + rxrpc_see_call(call); + list_del_init(&call->chan_wait_link); + call->peer = rxrpc_get_peer(conn->params.peer); + call->conn = rxrpc_get_connection(conn); + call->cid = conn->proto.cid | channel; + call->call_id = call_id; + call->security = conn->security; + call->security_ix = conn->security_ix; + call->service_id = conn->service_id; + + trace_rxrpc_connect_call(call); + _net("CONNECT call %08x:%08x as call %d on conn %d", + call->cid, call->call_id, call->debug_id, conn->debug_id); + + write_lock_bh(&call->state_lock); + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + write_unlock_bh(&call->state_lock); + + /* Paired with the read barrier in rxrpc_connect_call(). This orders + * cid and epoch in the connection wrt to call_id without the need to + * take the channel_lock. + * + * We provisionally assign a callNumber at this point, but we don't + * confirm it until the call is about to be exposed. + * + * TODO: Pair with a barrier in the data_ready handler when that looks + * at the call ID through a connection channel. + */ + smp_wmb(); + + chan->call_id = call_id; + chan->call_debug_id = call->debug_id; + rcu_assign_pointer(chan->call, call); + wake_up(&call->waitq); +} + +/* + * Remove a connection from the idle list if it's on it. + */ +static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) +{ + struct rxrpc_net *rxnet = bundle->params.local->rxnet; + bool drop_ref; + + if (!list_empty(&conn->cache_link)) { + drop_ref = false; + spin_lock(&rxnet->client_conn_cache_lock); + if (!list_empty(&conn->cache_link)) { + list_del_init(&conn->cache_link); + drop_ref = true; + } + spin_unlock(&rxnet->client_conn_cache_lock); + if (drop_ref) + rxrpc_put_connection(conn); + } +} + +/* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle) +{ + struct rxrpc_connection *conn; + unsigned long avail, mask; + unsigned int channel, slot; + + if (bundle->try_upgrade) + mask = 1; + else + mask = ULONG_MAX; + + while (!list_empty(&bundle->waiting_calls)) { + avail = bundle->avail_chans & mask; + if (!avail) + break; + channel = __ffs(avail); + clear_bit(channel, &bundle->avail_chans); + + slot = channel / RXRPC_MAXCALLS; + conn = bundle->conns[slot]; + if (!conn) + break; + + if (bundle->try_upgrade) + set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags); + rxrpc_unidle_conn(bundle, conn); + + channel &= (RXRPC_MAXCALLS - 1); + conn->act_chans |= 1 << channel; + rxrpc_activate_one_channel(conn, channel); + } +} + +/* + * Assign channels and callNumbers to waiting calls. + */ +static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) +{ + _enter("B=%x", bundle->debug_id); + + trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans); + + if (!bundle->avail_chans) + return; + + spin_lock(&bundle->channel_lock); + rxrpc_activate_channels_locked(bundle); + spin_unlock(&bundle->channel_lock); + _leave(""); +} + +/* + * Wait for a callNumber and a channel to be granted to a call. + */ +static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle, + struct rxrpc_call *call, gfp_t gfp) +{ + DECLARE_WAITQUEUE(myself, current); + int ret = 0; + + _enter("%d", call->debug_id); + + if (!gfpflags_allow_blocking(gfp)) { + rxrpc_maybe_add_conn(bundle, gfp); + rxrpc_activate_channels(bundle); + ret = bundle->alloc_error ?: -EAGAIN; + goto out; + } + + add_wait_queue_exclusive(&call->waitq, &myself); + for (;;) { + rxrpc_maybe_add_conn(bundle, gfp); + rxrpc_activate_channels(bundle); + ret = bundle->alloc_error; + if (ret < 0) + break; + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: + set_current_state(TASK_INTERRUPTIBLE); + break; + case RXRPC_UNINTERRUPTIBLE: + default: + set_current_state(TASK_UNINTERRUPTIBLE); + break; + } + if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN) + break; + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && + signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + schedule(); + } + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + +out: + _leave(" = %d", ret); + return ret; +} + +/* + * find a connection for a call + * - called in process context with IRQs enabled + */ +int rxrpc_connect_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + struct rxrpc_bundle *bundle; + struct rxrpc_net *rxnet = cp->local->rxnet; + int ret = 0; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper); + + bundle = rxrpc_prep_call(rx, call, cp, srx, gfp); + if (IS_ERR(bundle)) { + ret = PTR_ERR(bundle); + goto out; + } + + if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = rxrpc_wait_for_channel(bundle, call, gfp); + if (ret < 0) + goto wait_failed; + } + +granted_channel: + /* Paired with the write barrier in rxrpc_activate_one_channel(). */ + smp_rmb(); + +out_put_bundle: + rxrpc_deactivate_bundle(bundle); + rxrpc_put_bundle(bundle); +out: + _leave(" = %d", ret); + return ret; + +wait_failed: + spin_lock(&bundle->channel_lock); + list_del_init(&call->chan_wait_link); + spin_unlock(&bundle->channel_lock); + + if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) { + ret = 0; + goto granted_channel; + } + + trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed); + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_disconnect_client_call(bundle, call); + goto out_put_bundle; +} + +/* + * Note that a call, and thus a connection, is about to be exposed to the + * world. + */ +void rxrpc_expose_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + /* Mark the call ID as being used. If the callNumber counter + * exceeds ~2 billion, we kill the connection after its + * outstanding calls have finished so that the counter doesn't + * wrap. + */ + chan->call_counter++; + if (chan->call_counter >= INT_MAX) + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); + } +} + +/* + * Set the reap timer. + */ +static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet) +{ + if (!rxnet->kill_all_client_conns) { + unsigned long now = jiffies; + unsigned long reap_at = now + rxrpc_conn_idle_client_expiry; + + if (rxnet->live) + timer_reduce(&rxnet->client_conn_reap_timer, reap_at); + } +} + +/* + * Disconnect a client call. + */ +void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call *call) +{ + struct rxrpc_connection *conn; + struct rxrpc_channel *chan = NULL; + struct rxrpc_net *rxnet = bundle->params.local->rxnet; + unsigned int channel; + bool may_reuse; + u32 cid; + + _enter("c=%x", call->debug_id); + + spin_lock(&bundle->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + + /* Calls that have never actually been assigned a channel can simply be + * discarded. + */ + conn = call->conn; + if (!conn) { + _debug("call is waiting"); + ASSERTCMP(call->call_id, ==, 0); + ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + list_del_init(&call->chan_wait_link); + goto out; + } + + cid = call->cid; + channel = cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); + + if (rcu_access_pointer(chan->call) != call) { + spin_unlock(&bundle->channel_lock); + BUG(); + } + + may_reuse = rxrpc_may_reuse_conn(conn); + + /* If a client call was exposed to the world, we save the result for + * retransmission. + * + * We use a barrier here so that the call number and abort code can be + * read without needing to take a lock. + * + * TODO: Make the incoming packet handler check this and handle + * terminal retransmission without requiring access to the call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + _debug("exposed %u,%u", call->call_id, call->abort_code); + __rxrpc_disconnect_call(conn, call); + + if (test_and_clear_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_active); + bundle->try_upgrade = false; + if (may_reuse) + rxrpc_activate_channels_locked(bundle); + } + + } + + /* See if we can pass the channel directly to another call. */ + if (may_reuse && !list_empty(&bundle->waiting_calls)) { + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); + rxrpc_activate_one_channel(conn, channel); + goto out; + } + + /* Schedule the final ACK to be transmitted in a short while so that it + * can be skipped if we find a follow-on call. The first DATA packet + * of the follow on call will implicitly ACK this call. + */ + if (call->completion == RXRPC_CALL_SUCCEEDED && + test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + unsigned long final_ack_at = jiffies + 2; + + WRITE_ONCE(chan->final_ack_at, final_ack_at); + smp_wmb(); /* vs rxrpc_process_delayed_final_acks() */ + set_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); + rxrpc_reduce_conn_timer(conn, final_ack_at); + } + + /* Deactivate the channel. */ + rcu_assign_pointer(chan->call, NULL); + set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans); + conn->act_chans &= ~(1 << channel); + + /* If no channels remain active, then put the connection on the idle + * list for a short while. Give it a ref to stop it going away if it + * becomes unbundled. + */ + if (!conn->act_chans) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); + conn->idle_timestamp = jiffies; + + rxrpc_get_connection(conn); + spin_lock(&rxnet->client_conn_cache_lock); + list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); + spin_unlock(&rxnet->client_conn_cache_lock); + + rxrpc_set_client_reap_timer(rxnet); + } + +out: + spin_unlock(&bundle->channel_lock); + _leave(""); + return; +} + +/* + * Remove a connection from a bundle. + */ +static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_bundle *bundle = conn->bundle; + unsigned int bindex; + bool need_drop = false; + int i; + + _enter("C=%x", conn->debug_id); + + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, true); + + spin_lock(&bundle->channel_lock); + bindex = conn->bundle_shift / RXRPC_MAXCALLS; + if (bundle->conns[bindex] == conn) { + _debug("clear slot %u", bindex); + bundle->conns[bindex] = NULL; + for (i = 0; i < RXRPC_MAXCALLS; i++) + clear_bit(conn->bundle_shift + i, &bundle->avail_chans); + need_drop = true; + } + spin_unlock(&bundle->channel_lock); + + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} + +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { + _debug("erase bundle"); + rb_erase(&bundle->local_node, &local->client_bundles); + need_put = true; + } + + spin_unlock(&local->client_bundles_lock); + if (need_put) + rxrpc_put_bundle(bundle); + } +} + +/* + * Clean up a dead client connection. + */ +static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_local *local = conn->params.local; + struct rxrpc_net *rxnet = local->rxnet; + + _enter("C=%x", conn->debug_id); + + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + atomic_dec(&rxnet->nr_client_conns); + + rxrpc_put_client_connection_id(conn); + rxrpc_kill_connection(conn); +} + +/* + * Clean up a dead client connections. + */ +void rxrpc_put_client_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; + bool dead; + int r; + + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); + if (dead) + rxrpc_kill_client_conn(conn); +} + +/* + * Discard expired client connections from the idle list. Each conn in the + * idle list has been exposed and holds an extra ref because of that. + * + * This may be called from conn setup or from a work item so cannot be + * considered non-reentrant. + */ +void rxrpc_discard_expired_client_conns(struct work_struct *work) +{ + struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = + container_of(work, struct rxrpc_net, client_conn_reaper); + unsigned long expiry, conn_expires_at, now; + unsigned int nr_conns; + + _enter(""); + + if (list_empty(&rxnet->idle_client_conns)) { + _leave(" [empty]"); + return; + } + + /* Don't double up on the discarding */ + if (!spin_trylock(&rxnet->client_conn_discard_lock)) { + _leave(" [already]"); + return; + } + + /* We keep an estimate of what the number of conns ought to be after + * we've discarded some so that we don't overdo the discarding. + */ + nr_conns = atomic_read(&rxnet->nr_client_conns); + +next: + spin_lock(&rxnet->client_conn_cache_lock); + + if (list_empty(&rxnet->idle_client_conns)) + goto out; + + conn = list_entry(rxnet->idle_client_conns.next, + struct rxrpc_connection, cache_link); + + if (!rxnet->kill_all_client_conns) { + /* If the number of connections is over the reap limit, we + * expedite discard by reducing the expiry timeout. We must, + * however, have at least a short grace period to be able to do + * final-ACK or ABORT retransmission. + */ + expiry = rxrpc_conn_idle_client_expiry; + if (nr_conns > rxrpc_reap_client_connections) + expiry = rxrpc_conn_idle_client_fast_expiry; + if (conn->params.local->service_closed) + expiry = rxrpc_closed_conn_expiry * HZ; + + conn_expires_at = conn->idle_timestamp + expiry; + + now = READ_ONCE(jiffies); + if (time_after(conn_expires_at, now)) + goto not_yet_expired; + } + + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + list_del_init(&conn->cache_link); + + spin_unlock(&rxnet->client_conn_cache_lock); + + rxrpc_unbundle_conn(conn); + rxrpc_put_connection(conn); /* Drop the ->cache_link ref */ + + nr_conns--; + goto next; + +not_yet_expired: + /* The connection at the front of the queue hasn't yet expired, so + * schedule the work item for that point if we discarded something. + * + * We don't worry if the work item is already scheduled - it can look + * after rescheduling itself at a later time. We could cancel it, but + * then things get messier. + */ + _debug("not yet"); + if (!rxnet->kill_all_client_conns) + timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at); + +out: + spin_unlock(&rxnet->client_conn_cache_lock); + spin_unlock(&rxnet->client_conn_discard_lock); + _leave(""); +} + +/* + * Preemptively destroy all the client connection records rather than waiting + * for them to time out + */ +void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet) +{ + _enter(""); + + spin_lock(&rxnet->client_conn_cache_lock); + rxnet->kill_all_client_conns = true; + spin_unlock(&rxnet->client_conn_cache_lock); + + del_timer_sync(&rxnet->client_conn_reap_timer); + + if (!rxrpc_queue_work(&rxnet->client_conn_reaper)) + _debug("destroy: queue failed"); + + _leave(""); +} + +/* + * Clean up the client connections on a local endpoint. + */ +void rxrpc_clean_up_local_conns(struct rxrpc_local *local) +{ + struct rxrpc_connection *conn, *tmp; + struct rxrpc_net *rxnet = local->rxnet; + LIST_HEAD(graveyard); + + _enter(""); + + spin_lock(&rxnet->client_conn_cache_lock); + + list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, + cache_link) { + if (conn->params.local == local) { + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + list_move(&conn->cache_link, &graveyard); + } + } + + spin_unlock(&rxnet->client_conn_cache_lock); + + while (!list_empty(&graveyard)) { + conn = list_entry(graveyard.next, + struct rxrpc_connection, cache_link); + list_del_init(&conn->cache_link); + rxrpc_unbundle_conn(conn); + rxrpc_put_connection(conn); + } + + _leave(" [culled]"); +} diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c new file mode 100644 index 000000000..aff184145 --- /dev/null +++ b/net/rxrpc/conn_event.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* connection-level event handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/errqueue.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <net/ip.h> +#include "ar-internal.h" + +/* + * Retransmit terminal ACK or ABORT of the previous call. + */ +static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb, + unsigned int channel) +{ + struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL; + struct rxrpc_channel *chan; + struct msghdr msg; + struct kvec iov[3]; + struct { + struct rxrpc_wire_header whdr; + union { + __be32 abort_code; + struct rxrpc_ackpacket ack; + }; + } __attribute__((packed)) pkt; + struct rxrpc_ackinfo ack_info; + size_t len; + int ret, ioc; + u32 serial, mtu, call_id, padding; + + _enter("%d", conn->debug_id); + + chan = &conn->channels[channel]; + + /* If the last call got moved on whilst we were waiting to run, just + * ignore this packet. + */ + call_id = READ_ONCE(chan->last_call); + /* Sync with __rxrpc_disconnect_call() */ + smp_rmb(); + if (skb && call_id != sp->hdr.callNumber) + return; + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt.whdr); + iov[1].iov_base = &padding; + iov[1].iov_len = 3; + iov[2].iov_base = &ack_info; + iov[2].iov_len = sizeof(ack_info); + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(conn->proto.cid | channel); + pkt.whdr.callNumber = htonl(call_id); + pkt.whdr.seq = 0; + pkt.whdr.type = chan->last_type; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = conn->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(conn->service_id); + + len = sizeof(pkt.whdr); + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + pkt.abort_code = htonl(chan->last_abort); + iov[0].iov_len += sizeof(pkt.abort_code); + len += sizeof(pkt.abort_code); + ioc = 1; + break; + + case RXRPC_PACKET_TYPE_ACK: + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; + pkt.ack.bufferSpace = 0; + pkt.ack.maxSkew = htons(skb ? skb->priority : 0); + pkt.ack.firstPacket = htonl(chan->last_seq + 1); + pkt.ack.previousPacket = htonl(chan->last_seq); + pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); + pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; + pkt.ack.nAcks = 0; + ack_info.rxMTU = htonl(rxrpc_rx_mtu); + ack_info.maxMTU = htonl(mtu); + ack_info.rwind = htonl(rxrpc_rx_window_size); + ack_info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; + padding = 0; + iov[0].iov_len += sizeof(pkt.ack); + len += sizeof(pkt.ack) + 3 + sizeof(ack_info); + ioc = 3; + break; + + default: + return; + } + + /* Resync with __rxrpc_disconnect_call() and check that the last call + * didn't get advanced whilst we were filling out the packets. + */ + smp_rmb(); + if (READ_ONCE(chan->last_call) != call_id) + return; + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); + break; + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(chan->call_debug_id, serial, + ntohl(pkt.ack.firstPacket), + ntohl(pkt.ack.serial), + pkt.ack.reason, 0); + _proto("Tx ACK %%%u [re]", serial); + break; + } + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, + rxrpc_tx_point_call_final_resend); + else + trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr, + rxrpc_tx_point_call_final_resend); + + _leave(""); +} + +/* + * pass a connection-level abort onto all calls on that connection + */ +static void rxrpc_abort_calls(struct rxrpc_connection *conn, + enum rxrpc_call_completion compl, + rxrpc_serial_t serial) +{ + struct rxrpc_call *call; + int i; + + _enter("{%d},%x", conn->debug_id, conn->abort_code); + + spin_lock(&conn->bundle->channel_lock); + + for (i = 0; i < RXRPC_MAXCALLS; i++) { + call = rcu_dereference_protected( + conn->channels[i].call, + lockdep_is_held(&conn->bundle->channel_lock)); + if (call) { + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort(call->debug_id, + "CON", call->cid, + call->call_id, 0, + conn->abort_code, + conn->error); + else + trace_rxrpc_rx_abort(call, serial, + conn->abort_code); + rxrpc_set_call_completion(call, compl, + conn->abort_code, + conn->error); + } + } + + spin_unlock(&conn->bundle->channel_lock); + _leave(""); +} + +/* + * generate a connection-level abort + */ +static int rxrpc_abort_connection(struct rxrpc_connection *conn, + int error, u32 abort_code) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + __be32 word; + size_t len; + u32 serial; + int ret; + + _enter("%d,,%u,%u", conn->debug_id, error, abort_code); + + /* generate a connection-level abort */ + spin_lock_bh(&conn->state_lock); + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { + spin_unlock_bh(&conn->state_lock); + _leave(" = 0 [already dead]"); + return 0; + } + + conn->error = error; + conn->abort_code = abort_code; + conn->state = RXRPC_CONN_LOCALLY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + spin_unlock_bh(&conn->state_lock); + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(conn->proto.cid); + whdr.callNumber = 0; + whdr.seq = 0; + whdr.type = RXRPC_PACKET_TYPE_ABORT; + whdr.flags = conn->out_clientflag; + whdr.userStatus = 0; + whdr.securityIndex = conn->security_ix; + whdr._rsvd = 0; + whdr.serviceId = htons(conn->service_id); + + word = htonl(conn->abort_code); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &word; + iov[1].iov_len = sizeof(word); + + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); + whdr.serial = htonl(serial); + _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_conn_abort); + _debug("sendmsg failed: %d", ret); + return -EAGAIN; + } + + trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); + + conn->params.peer->last_tx_at = ktime_get_seconds(); + + _leave(" = 0"); + return 0; +} + +/* + * mark a call as being on a now-secured channel + * - must be called with BH's disabled. + */ +static void rxrpc_call_is_secure(struct rxrpc_call *call) +{ + _enter("%p", call); + if (call) { + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); + } +} + +/* + * connection-level Rx packet processor + */ +static int rxrpc_process_event(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + __be32 wtmp; + u32 abort_code; + int loop, ret; + + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { + _leave(" = -ECONNABORTED [%u]", conn->state); + return -ECONNABORTED; + } + + _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); + return 0; + + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + + case RXRPC_PACKET_TYPE_ABORT: + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_abort")); + return -EPROTO; + } + abort_code = ntohl(wtmp); + _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); + + conn->error = -ECONNABORTED; + conn->abort_code = abort_code; + conn->state = RXRPC_CONN_REMOTELY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); + return -ECONNABORTED; + + case RXRPC_PACKET_TYPE_CHALLENGE: + return conn->security->respond_to_challenge(conn, skb, + _abort_code); + + case RXRPC_PACKET_TYPE_RESPONSE: + ret = conn->security->verify_response(conn, skb, _abort_code); + if (ret < 0) + return ret; + + ret = conn->security->init_connection_security(conn); + if (ret < 0) + return ret; + + ret = conn->security->prime_packet_security(conn); + if (ret < 0) + return ret; + + spin_lock(&conn->bundle->channel_lock); + spin_lock_bh(&conn->state_lock); + + if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { + conn->state = RXRPC_CONN_SERVICE; + spin_unlock_bh(&conn->state_lock); + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure( + rcu_dereference_protected( + conn->channels[loop].call, + lockdep_is_held(&conn->bundle->channel_lock))); + } else { + spin_unlock_bh(&conn->state_lock); + } + + spin_unlock(&conn->bundle->channel_lock); + return 0; + + default: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); + return -EPROTO; + } +} + +/* + * set up security and issue a challenge + */ +static void rxrpc_secure_connection(struct rxrpc_connection *conn) +{ + u32 abort_code; + int ret; + + _enter("{%d}", conn->debug_id); + + ASSERT(conn->security_ix != 0); + ASSERT(conn->server_key); + + if (conn->security->issue_challenge(conn) < 0) { + abort_code = RX_CALL_DEAD; + ret = -ENOMEM; + goto abort; + } + + _leave(""); + return; + +abort: + _debug("abort %d, %d", ret, abort_code); + rxrpc_abort_connection(conn, ret, abort_code); + _leave(" [aborted]"); +} + +/* + * Process delayed final ACKs that we haven't subsumed into a subsequent call. + */ +void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn, bool force) +{ + unsigned long j = jiffies, next_j; + unsigned int channel; + bool set; + +again: + next_j = j + LONG_MAX; + set = false; + for (channel = 0; channel < RXRPC_MAXCALLS; channel++) { + struct rxrpc_channel *chan = &conn->channels[channel]; + unsigned long ack_at; + + if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags)) + continue; + + smp_rmb(); /* vs rxrpc_disconnect_client_call */ + ack_at = READ_ONCE(chan->final_ack_at); + + if (time_before(j, ack_at) && !force) { + if (time_before(ack_at, next_j)) { + next_j = ack_at; + set = true; + } + continue; + } + + if (test_and_clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, + &conn->flags)) + rxrpc_conn_retransmit_call(conn, NULL, channel); + } + + j = jiffies; + if (time_before_eq(next_j, j)) + goto again; + if (set) + rxrpc_reduce_conn_timer(conn, next_j); +} + +/* + * connection-level event processor + */ +static void rxrpc_do_process_connection(struct rxrpc_connection *conn) +{ + struct sk_buff *skb; + u32 abort_code = RX_PROTOCOL_ERROR; + int ret; + + if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) + rxrpc_secure_connection(conn); + + /* Process delayed ACKs whose time has come. */ + if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK) + rxrpc_process_delayed_final_acks(conn, false); + + /* go through the conn-level event packets, releasing the ref on this + * connection that each one has when we've finished with it */ + while ((skb = skb_dequeue(&conn->rx_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_seen); + ret = rxrpc_process_event(conn, skb, &abort_code); + switch (ret) { + case -EPROTO: + case -EKEYEXPIRED: + case -EKEYREJECTED: + goto protocol_error; + case -ENOMEM: + case -EAGAIN: + goto requeue_and_leave; + case -ECONNABORTED: + default: + rxrpc_free_skb(skb, rxrpc_skb_freed); + break; + } + } + + return; + +requeue_and_leave: + skb_queue_head(&conn->rx_queue, skb); + return; + +protocol_error: + if (rxrpc_abort_connection(conn, ret, abort_code) < 0) + goto requeue_and_leave; + rxrpc_free_skb(skb, rxrpc_skb_freed); + return; +} + +void rxrpc_process_connection(struct work_struct *work) +{ + struct rxrpc_connection *conn = + container_of(work, struct rxrpc_connection, processor); + + rxrpc_see_connection(conn); + + if (__rxrpc_use_local(conn->params.local)) { + rxrpc_do_process_connection(conn); + rxrpc_unuse_local(conn->params.local); + } + + rxrpc_put_connection(conn); + _leave(""); + return; +} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c new file mode 100644 index 000000000..d829b9755 --- /dev/null +++ b/net/rxrpc/conn_object.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC virtual connection handler, common bits. + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include "ar-internal.h" + +/* + * Time till a connection expires after last use (in seconds). + */ +unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; +unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; + +static void rxrpc_destroy_connection(struct rcu_head *); + +static void rxrpc_connection_timer(struct timer_list *timer) +{ + struct rxrpc_connection *conn = + container_of(timer, struct rxrpc_connection, timer); + + rxrpc_queue_conn(conn); +} + +/* + * allocate a new connection + */ +struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) +{ + struct rxrpc_connection *conn; + + _enter(""); + + conn = kzalloc(sizeof(struct rxrpc_connection), gfp); + if (conn) { + INIT_LIST_HEAD(&conn->cache_link); + timer_setup(&conn->timer, &rxrpc_connection_timer, 0); + INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_LIST_HEAD(&conn->proc_link); + INIT_LIST_HEAD(&conn->link); + skb_queue_head_init(&conn->rx_queue); + conn->security = &rxrpc_no_security; + spin_lock_init(&conn->state_lock); + conn->debug_id = atomic_inc_return(&rxrpc_debug_id); + conn->size_align = 4; + conn->idle_timestamp = jiffies; + } + + _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); + return conn; +} + +/* + * Look up a connection in the cache by protocol parameters. + * + * If successful, a pointer to the connection is returned, but no ref is taken. + * NULL is returned if there is no match. + * + * When searching for a service call, if we find a peer but no connection, we + * return that through *_peer in case we need to create a new service call. + * + * The caller must be holding the RCU read lock. + */ +struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, + struct sk_buff *skb, + struct rxrpc_peer **_peer) +{ + struct rxrpc_connection *conn; + struct rxrpc_conn_proto k; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct sockaddr_rxrpc srx; + struct rxrpc_peer *peer; + + _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); + + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + goto not_found; + + if (srx.transport.family != local->srx.transport.family && + (srx.transport.family == AF_INET && + local->srx.transport.family != AF_INET6)) { + pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", + srx.transport.family, + local->srx.transport.family); + goto not_found; + } + + k.epoch = sp->hdr.epoch; + k.cid = sp->hdr.cid & RXRPC_CIDMASK; + + if (rxrpc_to_server(sp)) { + /* We need to look up service connections by the full protocol + * parameter set. We look up the peer first as an intermediate + * step and then the connection from the peer's tree. + */ + peer = rxrpc_lookup_peer_rcu(local, &srx); + if (!peer) + goto not_found; + *_peer = peer; + conn = rxrpc_find_service_conn_rcu(peer, skb); + if (!conn || refcount_read(&conn->ref) == 0) + goto not_found; + _leave(" = %p", conn); + return conn; + } else { + /* Look up client connections by connection ID alone as their + * IDs are unique for this machine. + */ + conn = idr_find(&rxrpc_client_conn_ids, + sp->hdr.cid >> RXRPC_CIDSHIFT); + if (!conn || refcount_read(&conn->ref) == 0) { + _debug("no conn"); + goto not_found; + } + + if (conn->proto.epoch != k.epoch || + conn->params.local != local) + goto not_found; + + peer = conn->params.peer; + switch (srx.transport.family) { + case AF_INET: + if (peer->srx.transport.sin.sin_port != + srx.transport.sin.sin_port || + peer->srx.transport.sin.sin_addr.s_addr != + srx.transport.sin.sin_addr.s_addr) + goto not_found; + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx.transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; +#endif + default: + BUG(); + } + + _leave(" = %p", conn); + return conn; + } + +not_found: + _leave(" = NULL"); + return NULL; +} + +/* + * Disconnect a call and clear any channel it occupies when that call + * terminates. The caller must hold the channel_lock and must release the + * call's ref on the connection. + */ +void __rxrpc_disconnect_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) +{ + struct rxrpc_channel *chan = + &conn->channels[call->cid & RXRPC_CHANNELMASK]; + + _enter("%d,%x", conn->debug_id, call->cid); + + if (rcu_access_pointer(chan->call) == call) { + /* Save the result of the call so that we can repeat it if necessary + * through the channel, whilst disposing of the actual call record. + */ + trace_rxrpc_disconnect_call(call); + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + chan->last_seq = call->rx_hard_ack; + chan->last_type = RXRPC_PACKET_TYPE_ACK; + break; + case RXRPC_CALL_LOCALLY_ABORTED: + chan->last_abort = call->abort_code; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + break; + default: + chan->last_abort = RX_CALL_DEAD; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + break; + } + + /* Sync with rxrpc_conn_retransmit(). */ + smp_wmb(); + chan->last_call = chan->call_id; + chan->call_id = chan->call_counter; + + rcu_assign_pointer(chan->call, NULL); + } + + _leave(""); +} + +/* + * Disconnect a call and clear any channel it occupies when that call + * terminates. + */ +void rxrpc_disconnect_call(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn = call->conn; + + call->peer->cong_cwnd = call->cong_cwnd; + + if (!hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_del_rcu(&call->error_link); + spin_unlock_bh(&call->peer->lock); + } + + if (rxrpc_is_client_call(call)) + return rxrpc_disconnect_client_call(conn->bundle, call); + + spin_lock(&conn->bundle->channel_lock); + __rxrpc_disconnect_call(conn, call); + spin_unlock(&conn->bundle->channel_lock); + + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + conn->idle_timestamp = jiffies; +} + +/* + * Kill off a connection. + */ +void rxrpc_kill_connection(struct rxrpc_connection *conn) +{ + struct rxrpc_net *rxnet = conn->params.local->rxnet; + + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); + + write_lock(&rxnet->conn_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxnet->conn_lock); + + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); + + /* Leave final destruction to RCU. The connection processor work item + * must carry a ref on the connection to prevent us getting here whilst + * it is queued or running. + */ + call_rcu(&conn->rcu, rxrpc_destroy_connection); +} + +/* + * Queue a connection's work processor, getting a ref to pass to the work + * queue. + */ +bool rxrpc_queue_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int r; + + if (!__refcount_inc_not_zero(&conn->ref, &r)) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = refcount_read(&conn->ref); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); + } +} + +/* + * Get a ref on a connection. + */ +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int r; + + __refcount_inc(&conn->ref, &r); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); + return conn; +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int r; + + if (conn) { + if (__refcount_inc_not_zero(&conn->ref, &r)) + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); + else + conn = NULL; + } + return conn; +} + +/* + * Set the service connection reap timer. + */ +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at) +{ + if (rxnet->live) + timer_reduce(&rxnet->service_conn_reap_timer, reap_at); +} + +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; + int r; + + __refcount_dec(&conn->ref, &r); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); + if (r - 1 == 1) + rxrpc_set_service_reap_timer(conn->params.local->rxnet, + jiffies + rxrpc_connection_expiry); +} + +/* + * destroy a virtual connection + */ +static void rxrpc_destroy_connection(struct rcu_head *rcu) +{ + struct rxrpc_connection *conn = + container_of(rcu, struct rxrpc_connection, rcu); + + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + + ASSERTCMP(refcount_read(&conn->ref), ==, 0); + + _net("DESTROY CONN %d", conn->debug_id); + + del_timer_sync(&conn->timer); + rxrpc_purge_queue(&conn->rx_queue); + + conn->security->clear(conn); + key_put(conn->params.key); + key_put(conn->server_key); + rxrpc_put_bundle(conn->bundle); + rxrpc_put_peer(conn->params.peer); + + if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns)) + wake_up_var(&conn->params.local->rxnet->nr_conns); + rxrpc_put_local(conn->params.local); + + kfree(conn); + _leave(""); +} + +/* + * reap dead service connections + */ +void rxrpc_service_connection_reaper(struct work_struct *work) +{ + struct rxrpc_connection *conn, *_p; + struct rxrpc_net *rxnet = + container_of(work, struct rxrpc_net, service_conn_reaper); + unsigned long expire_at, earliest, idle_timestamp, now; + + LIST_HEAD(graveyard); + + _enter(""); + + now = jiffies; + earliest = now + MAX_JIFFY_OFFSET; + + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { + ASSERTCMP(refcount_read(&conn->ref), >, 0); + if (likely(refcount_read(&conn->ref) > 1)) + continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; + + if (rxnet->live && !conn->params.local->dead) { + idle_timestamp = READ_ONCE(conn->idle_timestamp); + expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; + if (conn->params.local->service_closed) + expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; + + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, refcount_read(&conn->ref), + (long)expire_at - (long)now); + + if (time_before(now, expire_at)) { + if (time_before(expire_at, earliest)) + earliest = expire_at; + continue; + } + } + + /* The usage count sits at 1 whilst the object is unused on the + * list; we reduce that to 0 to make the object unavailable. + */ + if (!refcount_dec_if_one(&conn->ref)) + continue; + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); + + if (rxrpc_conn_is_client(conn)) + BUG(); + else + rxrpc_unpublish_service_conn(conn); + + list_move_tail(&conn->link, &graveyard); + } + write_unlock(&rxnet->conn_lock); + + if (earliest != now + MAX_JIFFY_OFFSET) { + _debug("reschedule reaper %ld", (long)earliest - (long)now); + ASSERT(time_after(earliest, now)); + rxrpc_set_service_reap_timer(rxnet, earliest); + } + + while (!list_empty(&graveyard)) { + conn = list_entry(graveyard.next, struct rxrpc_connection, + link); + list_del_init(&conn->link); + + ASSERTCMP(refcount_read(&conn->ref), ==, 0); + rxrpc_kill_connection(conn); + } + + _leave(""); +} + +/* + * preemptively destroy all the service connection records rather than + * waiting for them to time out + */ +void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) +{ + struct rxrpc_connection *conn, *_p; + bool leak = false; + + _enter(""); + + atomic_dec(&rxnet->nr_conns); + rxrpc_destroy_all_client_connections(rxnet); + + del_timer_sync(&rxnet->service_conn_reap_timer); + rxrpc_queue_work(&rxnet->service_conn_reaper); + flush_workqueue(rxrpc_workqueue); + + write_lock(&rxnet->conn_lock); + list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { + pr_err("AF_RXRPC: Leaked conn %p {%d}\n", + conn, refcount_read(&conn->ref)); + leak = true; + } + write_unlock(&rxnet->conn_lock); + BUG_ON(leak); + + ASSERT(list_empty(&rxnet->conn_proc_list)); + + /* We need to wait for the connections to be destroyed by RCU as they + * pin things that we still need to get rid of. + */ + wait_var_event(&rxnet->nr_conns, !atomic_read(&rxnet->nr_conns)); + _leave(""); +} diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c new file mode 100644 index 000000000..68508166b --- /dev/null +++ b/net/rxrpc/conn_service.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Service connection management + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/slab.h> +#include "ar-internal.h" + +static struct rxrpc_bundle rxrpc_service_dummy_bundle = { + .ref = REFCOUNT_INIT(1), + .debug_id = UINT_MAX, + .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock), +}; + +/* + * Find a service connection under RCU conditions. + * + * We could use a hash table, but that is subject to bucket stuffing by an + * attacker as the client gets to pick the epoch and cid values and would know + * the hash function. So, instead, we use a hash table for the peer and from + * that an rbtree to find the service connection. Under ordinary circumstances + * it might be slower than a large hash table, but it is at least limited in + * depth. + */ +struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, + struct sk_buff *skb) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_conn_proto k; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rb_node *p; + unsigned int seq = 0; + + k.epoch = sp->hdr.epoch; + k.cid = sp->hdr.cid & RXRPC_CIDMASK; + + do { + /* Unfortunately, rbtree walking doesn't give reliable results + * under just the RCU read lock, so we have to check for + * changes. + */ + read_seqbegin_or_lock(&peer->service_conn_lock, &seq); + + p = rcu_dereference_raw(peer->service_conns.rb_node); + while (p) { + conn = rb_entry(p, struct rxrpc_connection, service_node); + + if (conn->proto.index_key < k.index_key) + p = rcu_dereference_raw(p->rb_left); + else if (conn->proto.index_key > k.index_key) + p = rcu_dereference_raw(p->rb_right); + else + break; + conn = NULL; + } + } while (need_seqretry(&peer->service_conn_lock, seq)); + + done_seqretry(&peer->service_conn_lock, seq); + _leave(" = %d", conn ? conn->debug_id : -1); + return conn; +} + +/* + * Insert a service connection into a peer's tree, thereby making it a target + * for incoming packets. + */ +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) +{ + struct rxrpc_connection *cursor = NULL; + struct rxrpc_conn_proto k = conn->proto; + struct rb_node **pp, *parent; + + write_seqlock_bh(&peer->service_conn_lock); + + pp = &peer->service_conns.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + cursor = rb_entry(parent, + struct rxrpc_connection, service_node); + + if (cursor->proto.index_key < k.index_key) + pp = &(*pp)->rb_left; + else if (cursor->proto.index_key > k.index_key) + pp = &(*pp)->rb_right; + else + goto found_extant_conn; + } + + rb_link_node_rcu(&conn->service_node, parent, pp); + rb_insert_color(&conn->service_node, &peer->service_conns); +conn_published: + set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); + write_sequnlock_bh(&peer->service_conn_lock); + _leave(" = %d [new]", conn->debug_id); + return; + +found_extant_conn: + if (refcount_read(&cursor->ref) == 0) + goto replace_old_connection; + write_sequnlock_bh(&peer->service_conn_lock); + /* We should not be able to get here. rxrpc_incoming_connection() is + * called in a non-reentrant context, so there can't be a race to + * insert a new connection. + */ + BUG(); + +replace_old_connection: + /* The old connection is from an outdated epoch. */ + _debug("replace conn"); + rb_replace_node_rcu(&cursor->service_node, + &conn->service_node, + &peer->service_conns); + clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &cursor->flags); + goto conn_published; +} + +/* + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. + */ +struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, + gfp_t gfp) +{ + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + refcount_set(&conn->ref, 2); + conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); + + atomic_inc(&rxnet->nr_conns); + write_lock(&rxnet->conn_lock); + list_add_tail(&conn->link, &rxnet->service_conns); + list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); + write_unlock(&rxnet->conn_lock); + + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, + refcount_read(&conn->ref), + __builtin_return_address(0)); + } + + return conn; +} + +/* + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. + */ +void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, + const struct rxrpc_security *sec, + struct key *key, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _enter(""); + + conn->proto.epoch = sp->hdr.epoch; + conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; + conn->params.service_id = sp->hdr.serviceId; + conn->service_id = sp->hdr.serviceId; + conn->security_ix = sp->hdr.securityIndex; + conn->out_clientflag = 0; + conn->security = sec; + conn->server_key = key_get(key); + if (conn->security_ix) + conn->state = RXRPC_CONN_SERVICE_UNSECURED; + else + conn->state = RXRPC_CONN_SERVICE; + + /* See if we should upgrade the service. This can only happen on the + * first packet on a new connection. Once done, it applies to all + * subsequent calls on that connection. + */ + if (sp->hdr.userStatus == RXRPC_USERSTATUS_SERVICE_UPGRADE && + conn->service_id == rx->service_upgrade.from) + conn->service_id = rx->service_upgrade.to; + + /* Make the connection a target for incoming packets. */ + rxrpc_publish_service_conn(conn->params.peer, conn); + + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); +} + +/* + * Remove the service connection from the peer's tree, thereby removing it as a + * target for incoming packets. + */ +void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_peer *peer = conn->params.peer; + + write_seqlock_bh(&peer->service_conn_lock); + if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags)) + rb_erase(&conn->service_node, &peer->service_conns); + write_sequnlock_bh(&peer->service_conn_lock); +} diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c new file mode 100644 index 000000000..e9178115a --- /dev/null +++ b/net/rxrpc/input.c @@ -0,0 +1,1475 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC packet reception + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/errqueue.h> +#include <linux/udp.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/icmp.h> +#include <linux/gfp.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <net/ip.h> +#include <net/udp.h> +#include <net/net_namespace.h> +#include "ar-internal.h" + +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) +{ + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } +} + +/* + * Do TCP-style congestion management [RFC 5681]. + */ +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) +{ + enum rxrpc_congest_change change = rxrpc_cong_no_change; + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; + + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd >= call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; + } + } + + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; + + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd >= call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + } + goto out; + + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_count == 0) + goto out; + if (ktime_before(skb->tstamp, + ktime_add_us(call->cong_tstamp, + call->peer->srtt_us >> 3))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; + + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; + + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; + } + + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; + + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; + } + goto out; + + default: + BUG(); + goto out; + } + +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd < call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; +out: + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, acked_serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + return; + +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; +} + +/* + * Apply a hard ACK by advancing the Tx window. + */ +static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) +{ + struct sk_buff *skb, *list = NULL; + bool rot_last = false; + int ix; + u8 annotation; + + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } + + spin_lock(&call->lock); + + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; + rxrpc_see_skb(skb, rxrpc_skb_rotated); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) { + set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; + } + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; + } + + spin_unlock(&call->lock); + + trace_rxrpc_transmit(call, (rot_last ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); + wake_up(&call->waitq); + + while (list) { + skb = list; + list = skb->next; + skb_mark_not_on_list(skb); + rxrpc_free_skb(skb, rxrpc_skb_freed); + } + + return rot_last; +} + +/* + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. + */ +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) +{ + unsigned int state; + + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + + write_lock(&call->state_lock); + + state = call->state; + switch (state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + if (reply_begun) + call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; + + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + state = call->state; + break; + + default: + goto bad_state; + } + + write_unlock(&call->state_lock); + if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + else + trace_rxrpc_transmit(call, rxrpc_transmit_end); + _leave(" = ok"); + return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} + +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + struct rxrpc_ack_summary summary = { 0 }; + unsigned long now, timo; + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + spin_unlock_bh(&call->lock); + now = jiffies; + timo = now + MAX_JIFFY_OFFSET; + WRITE_ONCE(call->resend_at, timo); + WRITE_ONCE(call->ack_at, timo); + trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); + } + + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + if (!rxrpc_rotate_tx_window(call, top, &summary)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } + } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; +} + +/* + * Scan a data packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_data(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = skb->len; + u8 flags = sp->hdr.flags; + + for (;;) { + if (flags & RXRPC_REQUEST_ACK) + __set_bit(sp->nr_subpackets, sp->rx_req_ack); + sp->nr_subpackets++; + + if (!(flags & RXRPC_JUMBO_PACKET)) + break; + + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } + + if (flags & RXRPC_LAST_PACKET) + sp->rx_flags |= RXRPC_SKB_INCL_LAST; + return true; + +protocol_error: + return false; +} + +/* + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). + */ +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + bool is_jumbo, bool *_jumbo_bad) +{ + /* Discard normal packets that are duplicates. */ + if (is_jumbo) + return; + + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_bad) { + call->nr_jumbo_bad++; + *_jumbo_bad = true; + } +} + +/* + * Process a DATA packet, adding the packet to the Rx ring. The caller's + * packet ref must be passed on or discarded. + */ +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + enum rxrpc_call_state state; + unsigned int j, nr_subpackets, nr_unacked = 0; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = serial; + rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_bad = false; + u8 ack = 0; + + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->len, seq0); + + _proto("Rx DATA %%%u { #%u f=%02x n=%u }", + sp->hdr.serial, seq0, sp->hdr.flags, sp->nr_subpackets); + + state = READ_ONCE(call->state); + if (state >= RXRPC_CALL_COMPLETE) { + rxrpc_free_skb(skb, rxrpc_skb_freed); + return; + } + + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { + unsigned long timo = READ_ONCE(call->next_req_timo); + unsigned long now, expect_req_by; + + if (timo) { + now = jiffies; + expect_req_by = now + timo; + WRITE_ONCE(call->expect_req_by, expect_req_by); + rxrpc_reduce_call_timer(call, expect_req_by, now, + rxrpc_timer_set_for_idle); + } + } + + spin_lock(&call->input_lock); + + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || + state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) + goto unlock; + + hard_ack = READ_ONCE(call->rx_hard_ack); + + nr_subpackets = sp->nr_subpackets; + if (nr_subpackets > 1) { + if (call->nr_jumbo_bad > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; + } + } + + for (j = 0; j < nr_subpackets; j++) { + rxrpc_serial_t serial = sp->hdr.serial + j; + rxrpc_seq_t seq = seq0 + j; + unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK; + bool terminal = (j == nr_subpackets - 1); + bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST); + u8 flags, annotation = j; + + _proto("Rx DATA+%u %%%u { #%x t=%u l=%u }", + j, serial, seq, terminal, last); + + if (last) { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) { + rxrpc_proto_abort("LSN", call, seq); + goto unlock; + } + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) { + rxrpc_proto_abort("LSA", call, seq); + goto unlock; + } + } + + flags = 0; + if (last) + flags |= RXRPC_LAST_PACKET; + if (!terminal) + flags |= RXRPC_JUMBO_PACKET; + if (test_bit(j, sp->rx_req_ack)) + flags |= RXRPC_REQUEST_ACK; + trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation); + + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + continue; + } + + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, nr_subpackets > 1, + &jumbo_bad); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + } + immediate_ack = true; + continue; + } + + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + if (flags & RXRPC_JUMBO_PACKET) { + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } + } + + goto ack; + } + + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + if (after(seq0, call->ackr_highest_seq)) + call->ackr_highest_seq = seq0; + + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + if (!terminal) + rxrpc_get_skb(skb, rxrpc_skb_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) { + smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } + + if (terminal) { + /* From this point on, we're not allowed to touch the + * packet any longer as its ref now belongs to the Rx + * ring. + */ + skb = NULL; + sp = NULL; + } + + nr_unacked++; + + if (last) { + set_bit(RXRPC_CALL_RX_LAST, &call->flags); + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } + if (!ack) + ack_serial = serial; + } + +ack: + if (atomic_add_return(nr_unacked, &call->ackr_nr_unacked) > 2 && !ack) + ack = RXRPC_ACK_IDLE; + + if (ack) + rxrpc_propose_ACK(call, ack, ack_serial, + immediate_ack, true, + rxrpc_propose_ack_input_data); + else + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, + false, true, + rxrpc_propose_ack_input_data); + + trace_rxrpc_notify_socket(call->debug_id, serial); + rxrpc_notify_socket(call); + +unlock: + spin_unlock(&call->input_lock); + rxrpc_free_skb(skb, rxrpc_skb_freed); + _leave(" [queued]"); +} + +/* + * See if there's a cached RTT probe to complete. + */ +static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t acked_serial, + rxrpc_serial_t ack_serial, + enum rxrpc_rtt_rx_trace type) +{ + rxrpc_serial_t orig_serial; + unsigned long avail; + ktime_t sent_at; + bool matched = false; + int i; + + avail = READ_ONCE(call->rtt_avail); + smp_rmb(); /* Read avail bits before accessing data. */ + + for (i = 0; i < ARRAY_SIZE(call->rtt_serial); i++) { + if (!test_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &avail)) + continue; + + sent_at = call->rtt_sent_at[i]; + orig_serial = call->rtt_serial[i]; + + if (orig_serial == acked_serial) { + clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_mb(); /* Read data before setting avail bit */ + set_bit(i, &call->rtt_avail); + if (type != rxrpc_rtt_rx_cancel) + rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + sent_at, resp_time); + else + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_cancel, i, + orig_serial, acked_serial, 0, 0); + matched = true; + } + + /* If a later serial is being acked, then mark this slot as + * being available. + */ + if (after(acked_serial, orig_serial)) { + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i, + orig_serial, acked_serial, 0, 0); + clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_wmb(); + set_bit(i, &call->rtt_avail); + } + } + + if (!matched) + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0); +} + +/* + * Process the response to a ping that we sent to find out if we lost an ACK. + * + * If we got back a ping response that indicates a lower tx_top than what we + * had at the time of the ping transmission, we adjudge all the DATA packets + * sent between the response tx_top and the ping-time tx_top to have been lost. + */ +static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) +{ + rxrpc_seq_t top, bottom, seq; + bool resend = false; + + spin_lock_bh(&call->lock); + + bottom = call->tx_hard_ack + 1; + top = call->acks_lost_top; + if (before(bottom, top)) { + for (seq = bottom; before_eq(seq, top); seq++) { + int ix = seq & RXRPC_RXTX_BUFF_MASK; + u8 annotation = call->rxtx_annotations[ix]; + u8 anno_type = annotation & RXRPC_TX_ANNO_MASK; + + if (anno_type != RXRPC_TX_ANNO_UNACK) + continue; + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_RETRANS; + call->rxtx_annotations[ix] = annotation; + resend = true; + } + } + + spin_unlock_bh(&call->lock); + + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); +} + +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t acked_serial, + rxrpc_serial_t ack_serial) +{ + if (acked_serial == call->acks_lost_ping) + rxrpc_input_check_for_lost_ack(call); +} + +/* + * Process the extra information that may be appended to an ACK packet + */ +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + bool wake = false; + u32 rwind = ntohl(ackinfo->rwind); + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + rwind, ntohl(ackinfo->jumbo_max)); + + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + if (call->tx_winsize != rwind) { + if (rwind > call->tx_winsize) + wake = true; + trace_rxrpc_rx_rwind_change(call, sp->hdr.serial, rwind, wake); + call->tx_winsize = rwind; + } + + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } + + if (wake) + wake_up(&call->waitq); +} + +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) +{ + int ix; + u8 annotation, anno_type; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + switch (*acks++) { + case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + summary->nr_new_acks++; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; + break; + case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; + if (anno_type == RXRPC_TX_ANNO_NAK) + continue; + summary->nr_new_nacks++; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); + } + } +} + +/* + * Return true if the ACK is valid - ie. it doesn't appear to have regressed + * with respect to the ack state conveyed by preceding ACKs. + */ +static bool rxrpc_is_ack_valid(struct rxrpc_call *call, + rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) +{ + rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); + + if (after(first_pkt, base)) + return true; /* The window advanced */ + + if (before(first_pkt, base)) + return false; /* firstPacket regressed */ + + if (after_eq(prev_pkt, call->acks_prev_seq)) + return true; /* previousPacket hasn't regressed. */ + + /* Some rx implementations put a serial number in previousPacket. */ + if (after_eq(prev_pkt, base + call->tx_winsize)) + return false; + return true; +} + +/* + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. + */ +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_serial_t ack_serial, acked_serial; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; + int nr_acks, offset, ioffset; + + _enter(""); + + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); + } + offset += sizeof(buf.ack); + + ack_serial = sp->hdr.serial; + acked_serial = ntohl(buf.ack.serial); + first_soft_ack = ntohl(buf.ack.firstPacket); + prev_pkt = ntohl(buf.ack.previousPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); + + trace_rxrpc_rx_ack(call, ack_serial, acked_serial, + first_soft_ack, prev_pkt, + summary.ack_reason, nr_acks); + + switch (buf.ack.reason) { + case RXRPC_ACK_PING_RESPONSE: + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + ack_serial); + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_ping_response); + break; + case RXRPC_ACK_REQUESTED: + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_requested_ack); + break; + default: + if (acked_serial != 0) + rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, + rxrpc_rtt_rx_cancel); + break; + } + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", ack_serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + ack_serial, true, true, + rxrpc_propose_ack_respond_to_ping); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, + ack_serial, true, true, + rxrpc_propose_ack_respond_to_ack); + } + + /* Discard any out-of-order or duplicate ACKs (outside lock). */ + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); + return; + } + + buf.info.rxMTU = 0; + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info) && + skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + + spin_lock(&call->input_lock); + + /* Discard any out-of-order or duplicate ACKs (inside lock). */ + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, + first_soft_ack, call->acks_first_seq, + prev_pkt, call->acks_prev_seq); + goto out; + } + call->acks_latest_ts = skb->tstamp; + + call->acks_first_seq = first_soft_ack; + call->acks_prev_seq = prev_pkt; + + /* Parse rwind and mtu sizes if provided. */ + if (buf.info.rxMTU) + rxrpc_input_ackinfo(call, skb, &buf.info); + + if (first_soft_ack == 0) { + rxrpc_proto_abort("AK0", call, 0); + goto out; + } + + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + goto out; + } + + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) { + rxrpc_proto_abort("AKW", call, 0); + goto out; + } + if (nr_acks > call->tx_top - hard_ack) { + rxrpc_proto_abort("AKN", call, 0); + goto out; + } + + if (after(hard_ack, call->tx_hard_ack)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + rxrpc_end_tx_phase(call, false, "ETA"); + goto out; + } + } + + if (nr_acks > 0) { + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) { + rxrpc_proto_abort("XSA", call, 0); + goto out; + } + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); + } + + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack && + rxrpc_is_client_call(call)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, ack_serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); + + rxrpc_congestion_management(call, skb, &summary, acked_serial); +out: + spin_unlock(&call->input_lock); +} + +/* + * Process an ACKALL packet. + */ +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _proto("Rx ACKALL %%%u", sp->hdr.serial); + + spin_lock(&call->input_lock); + + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) + rxrpc_end_tx_phase(call, false, "ETL"); + + spin_unlock(&call->input_lock); +} + +/* + * Process an ABORT packet directed at a call. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + __be32 wtmp; + u32 abort_code = RX_CALL_DEAD; + + _enter(""); + + if (skb->len >= 4 && + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); + + trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); + + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, -ECONNABORTED); +} + +/* + * Process an incoming call packet. + */ +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long timo; + + _enter("%p,%p", call, skb); + + timo = READ_ONCE(call->next_rx_timo); + if (timo) { + unsigned long now = jiffies, expect_rx_by; + + expect_rx_by = now + timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, now, + rxrpc_timer_set_for_normal); + } + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb); + goto no_free; + + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb); + break; + + case RXRPC_PACKET_TYPE_BUSY: + _proto("Rx BUSY %%%u", sp->hdr.serial); + + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; + + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; + + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; + + default: + break; + } + + rxrpc_free_skb(skb, rxrpc_skb_freed); +no_free: + _leave(""); +} + +/* + * Handle a new service call on a channel implicitly completing the preceding + * call on that channel. This does not apply to client conns. + * + * TODO: If callNumber > call_id + 1, renegotiate security. + */ +static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, + struct rxrpc_connection *conn, + struct rxrpc_call *call) +{ + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_SERVER_AWAIT_ACK: + rxrpc_call_completed(call); + fallthrough; + case RXRPC_CALL_COMPLETE: + break; + default: + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } + trace_rxrpc_improper_term(call); + break; + } + + spin_lock(&rx->incoming_lock); + __rxrpc_disconnect_call(conn, call); + spin_unlock(&rx->incoming_lock); +} + +/* + * post connection-level events to the connection + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. + */ +static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + _enter("%p,%p", conn, skb); + + skb_queue_tail(&conn->rx_queue, skb); + rxrpc_queue_conn(conn); +} + +/* + * post endpoint-level events to the local endpoint + * - this includes debug and version messages + */ +static void rxrpc_post_packet_to_local(struct rxrpc_local *local, + struct sk_buff *skb) +{ + _enter("%p,%p", local, skb); + + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->event_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_freed); + } +} + +/* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + if (rxrpc_get_local_maybe(local)) { + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); + } else { + rxrpc_free_skb(skb, rxrpc_skb_freed); + } +} + +/* + * Extract the wire header from a packet and translate the byte order. + */ +static noinline +int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +{ + struct rxrpc_wire_header whdr; + + /* dig out the RxRPC connection details */ + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); + return -EBADMSG; + } + + memset(sp, 0, sizeof(*sp)); + sp->hdr.epoch = ntohl(whdr.epoch); + sp->hdr.cid = ntohl(whdr.cid); + sp->hdr.callNumber = ntohl(whdr.callNumber); + sp->hdr.seq = ntohl(whdr.seq); + sp->hdr.serial = ntohl(whdr.serial); + sp->hdr.flags = whdr.flags; + sp->hdr.type = whdr.type; + sp->hdr.userStatus = whdr.userStatus; + sp->hdr.securityIndex = whdr.securityIndex; + sp->hdr._rsvd = ntohs(whdr._rsvd); + sp->hdr.serviceId = ntohs(whdr.serviceId); + return 0; +} + +/* + * handle data received on the local endpoint + * - may be called in interrupt context + * + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. + * + * Called with the RCU read lock held from the IP layer via UDP. + */ +int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) +{ + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); + struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call = NULL; + struct rxrpc_skb_priv *sp; + struct rxrpc_peer *peer = NULL; + struct rxrpc_sock *rx = NULL; + unsigned int channel; + + _enter("%p", udp_sk); + + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + + rxrpc_new_skb(skb, rxrpc_skb_received); + + skb_pull(skb, sizeof(struct udphdr)); + + /* The UDP protocol already released all skb resources; + * we are free to add our own data there. + */ + sp = rxrpc_skb(skb); + + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); + rxrpc_free_skb(skb, rxrpc_skb_lost); + return 0; + } + } + + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + trace_rxrpc_rx_packet(sp); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: + if (rxrpc_to_client(sp)) + goto discard; + rxrpc_post_packet_to_local(local, skb); + goto out; + + case RXRPC_PACKET_TYPE_BUSY: + if (rxrpc_to_server(sp)) + goto discard; + fallthrough; + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + fallthrough; + case RXRPC_PACKET_TYPE_ABORT: + break; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) + goto bad_message; + if (!rxrpc_validate_data(skb)) + goto bad_message; + + /* Unshare the packet so that it can be modified for in-place + * decryption. + */ + if (sp->hdr.securityIndex != 0) { + struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); + if (!nskb) { + rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); + goto out; + } + + if (nskb != skb) { + rxrpc_eaten_skb(skb, rxrpc_skb_received); + skb = nskb; + rxrpc_new_skb(skb, rxrpc_skb_unshared); + sp = rxrpc_skb(skb); + } + } + break; + + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + goto discard; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + goto discard; + break; + + /* Packet types 9-11 should just be ignored. */ + case RXRPC_PACKET_TYPE_PARAMS: + case RXRPC_PACKET_TYPE_10: + case RXRPC_PACKET_TYPE_11: + goto discard; + + default: + _proto("Rx Bad Packet Type %u", sp->hdr.type); + goto bad_message; + } + + if (sp->hdr.serviceId == 0) + goto bad_message; + + if (rxrpc_to_server(sp)) { + /* Weed out packets to services we're not offering. Packets + * that would begin a call are explicitly rejected and the rest + * are just discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service)) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard; + } + } + + conn = rxrpc_find_connection_rcu(local, skb, &peer); + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; + + if (sp->hdr.serviceId != conn->service_id) { + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->params.service_id, + sp->hdr.serviceId); + + if (old_id != conn->params.service_id && + old_id != sp->hdr.serviceId) + goto reupgrade; + } + + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out; + } + + if ((int)sp->hdr.serial - (int)conn->hi_serial > 0) + conn->hi_serial = sp->hdr.serial; + + /* Call-bound packets are routed by connection channel. */ + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + goto discard; + + if (sp->hdr.callNumber == chan->last_call) { + if (chan->call || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) + goto discard; + + /* For the previous service call, if completed + * successfully, we discard all further packets. + */ + if (rxrpc_conn_is_service(conn) && + chan->last_type == RXRPC_PACKET_TYPE_ACK) + goto discard; + + /* But otherwise we need to retransmit the final packet + * from data cached in the connection record. + */ + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) + trace_rxrpc_rx_data(chan->call_debug_id, + sp->hdr.seq, + sp->hdr.serial, + sp->hdr.flags, 0); + rxrpc_post_packet_to_conn(conn, skb); + goto out; + } + + call = rcu_dereference(chan->call); + + if (sp->hdr.callNumber > chan->call_id) { + if (rxrpc_to_client(sp)) + goto reject_packet; + if (call) + rxrpc_input_implicit_end_call(rx, conn, call); + call = NULL; + } + + if (call) { + if (sp->hdr.serviceId != call->service_id) + call->service_id = sp->hdr.serviceId; + if ((int)sp->hdr.serial - (int)call->rx_serial > 0) + call->rx_serial = sp->hdr.serial; + if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) + set_bit(RXRPC_CALL_RX_HEARD, &call->flags); + } + } + + if (!call || refcount_read(&call->ref) == 0) { + if (rxrpc_to_client(sp) || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message; + if (sp->hdr.seq != 1) + goto discard; + call = rxrpc_new_incoming_call(local, rx, skb); + if (!call) + goto reject_packet; + } + + /* Process a call packet; this either discards or passes on the ref + * elsewhere. + */ + rxrpc_input_call_packet(call, skb); + goto out; + +discard: + rxrpc_free_skb(skb, rxrpc_skb_freed); +out: + trace_rxrpc_rx_done(0, 0); + return 0; + +wrong_security: + trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; + +unsupported_service: + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto post_abort; + +reupgrade: + trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); + goto protocol_error; + +bad_message: + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: + skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; +reject_packet: + trace_rxrpc_rx_done(skb->mark, skb->priority); + rxrpc_reject_packet(local, skb); + _leave(" [badmsg]"); + return 0; +} diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c new file mode 100644 index 000000000..f6c59f5fa --- /dev/null +++ b/net/rxrpc/insecure.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Null security operations. + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static int none_init_connection_security(struct rxrpc_connection *conn) +{ + return 0; +} + +static int none_prime_packet_security(struct rxrpc_connection *conn) +{ + return 0; +} + +static int none_secure_packet(struct rxrpc_call *call, + struct sk_buff *skb, + size_t data_size, + void *sechdr) +{ + return 0; +} + +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) +{ + return 0; +} + +static void none_free_call_crypto(struct rxrpc_call *call) +{ +} + +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + +static int none_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("chall_none")); + return -EPROTO; +} + +static int none_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("resp_none")); + return -EPROTO; +} + +static void none_clear(struct rxrpc_connection *conn) +{ +} + +static int none_init(void) +{ + return 0; +} + +static void none_exit(void) +{ +} + +/* + * RxRPC Kerberos-based security + */ +const struct rxrpc_security rxrpc_no_security = { + .name = "none", + .security_index = RXRPC_SECURITY_NONE, + .init = none_init, + .exit = none_exit, + .init_connection_security = none_init_connection_security, + .prime_packet_security = none_prime_packet_security, + .free_call_crypto = none_free_call_crypto, + .secure_packet = none_secure_packet, + .verify_packet = none_verify_packet, + .locate_data = none_locate_data, + .respond_to_challenge = none_respond_to_challenge, + .verify_response = none_verify_response, + .clear = none_clear, +}; diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c new file mode 100644 index 000000000..979338a64 --- /dev/null +++ b/net/rxrpc/key.c @@ -0,0 +1,1245 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC key management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * RxRPC keys should have a description of describing their purpose: + * "afs@CAMBRIDGE.REDHAT.COM> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/skcipher.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/key-type.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> +#include <keys/user-type.h> +#include "ar-internal.h" + +static int rxrpc_vet_description_s(const char *); +static int rxrpc_preparse(struct key_preparsed_payload *); +static int rxrpc_preparse_s(struct key_preparsed_payload *); +static void rxrpc_free_preparse(struct key_preparsed_payload *); +static void rxrpc_free_preparse_s(struct key_preparsed_payload *); +static void rxrpc_destroy(struct key *); +static void rxrpc_destroy_s(struct key *); +static void rxrpc_describe(const struct key *, struct seq_file *); +static long rxrpc_read(const struct key *, char *, size_t); + +/* + * rxrpc defined keys take an arbitrary string as the description and an + * arbitrary blob of data as the payload + */ +struct key_type key_type_rxrpc = { + .name = "rxrpc", + .flags = KEY_TYPE_NET_DOMAIN, + .preparse = rxrpc_preparse, + .free_preparse = rxrpc_free_preparse, + .instantiate = generic_key_instantiate, + .destroy = rxrpc_destroy, + .describe = rxrpc_describe, + .read = rxrpc_read, +}; +EXPORT_SYMBOL(key_type_rxrpc); + +/* + * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the + * description and an 8-byte decryption key as the payload + */ +struct key_type key_type_rxrpc_s = { + .name = "rxrpc_s", + .flags = KEY_TYPE_NET_DOMAIN, + .vet_description = rxrpc_vet_description_s, + .preparse = rxrpc_preparse_s, + .free_preparse = rxrpc_free_preparse_s, + .instantiate = generic_key_instantiate, + .destroy = rxrpc_destroy_s, + .describe = rxrpc_describe, +}; + +/* + * Vet the description for an RxRPC server key + */ +static int rxrpc_vet_description_s(const char *desc) +{ + unsigned long num; + char *p; + + num = simple_strtoul(desc, &p, 10); + if (*p != ':' || num > 65535) + return -EINVAL; + num = simple_strtoul(p + 1, &p, 10); + if (*p || num < 1 || num > 255) + return -EINVAL; + return 0; +} + +/* + * parse an RxKAD type XDR format token + * - the caller guarantees we have at least 4 words + */ +static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, + size_t datalen, + const __be32 *xdr, unsigned int toklen) +{ + struct rxrpc_key_token *token, **pptoken; + time64_t expiry; + size_t plen; + u32 tktlen; + + _enter(",{%x,%x,%x,%x},%u", + ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), + toklen); + + if (toklen <= 8 * 4) + return -EKEYREJECTED; + tktlen = ntohl(xdr[7]); + _debug("tktlen: %x", tktlen); + if (tktlen > AFSTOKEN_RK_TIX_MAX) + return -EKEYREJECTED; + if (toklen < 8 * 4 + tktlen) + return -EKEYREJECTED; + + plen = sizeof(*token) + sizeof(*token->kad) + tktlen; + prep->quotalen = datalen + plen; + + plen -= sizeof(*token); + token = kzalloc(sizeof(*token), GFP_KERNEL); + if (!token) + return -ENOMEM; + + token->kad = kzalloc(plen, GFP_KERNEL); + if (!token->kad) { + kfree(token); + return -ENOMEM; + } + + token->security_index = RXRPC_SECURITY_RXKAD; + token->kad->ticket_len = tktlen; + token->kad->vice_id = ntohl(xdr[0]); + token->kad->kvno = ntohl(xdr[1]); + token->kad->start = ntohl(xdr[4]); + token->kad->expiry = ntohl(xdr[5]); + token->kad->primary_flag = ntohl(xdr[6]); + memcpy(&token->kad->session_key, &xdr[2], 8); + memcpy(&token->kad->ticket, &xdr[8], tktlen); + + _debug("SCIX: %u", token->security_index); + _debug("TLEN: %u", token->kad->ticket_len); + _debug("EXPY: %x", token->kad->expiry); + _debug("KVNO: %u", token->kad->kvno); + _debug("PRIM: %u", token->kad->primary_flag); + _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x", + token->kad->session_key[0], token->kad->session_key[1], + token->kad->session_key[2], token->kad->session_key[3], + token->kad->session_key[4], token->kad->session_key[5], + token->kad->session_key[6], token->kad->session_key[7]); + if (token->kad->ticket_len >= 8) + _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x", + token->kad->ticket[0], token->kad->ticket[1], + token->kad->ticket[2], token->kad->ticket[3], + token->kad->ticket[4], token->kad->ticket[5], + token->kad->ticket[6], token->kad->ticket[7]); + + /* count the number of tokens attached */ + prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1); + + /* attach the data */ + for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0]; + *pptoken; + pptoken = &(*pptoken)->next) + continue; + *pptoken = token; + expiry = rxrpc_u32_to_time64(token->kad->expiry); + if (expiry < prep->expiry) + prep->expiry = expiry; + + _leave(" = 0"); + return 0; +} + +static void rxrpc_free_krb5_principal(struct krb5_principal *princ) +{ + int loop; + + if (princ->name_parts) { + for (loop = princ->n_name_parts - 1; loop >= 0; loop--) + kfree(princ->name_parts[loop]); + kfree(princ->name_parts); + } + kfree(princ->realm); +} + +static void rxrpc_free_krb5_tagged(struct krb5_tagged_data *td) +{ + kfree(td->data); +} + +/* + * free up an RxK5 token + */ +static void rxrpc_rxk5_free(struct rxk5_key *rxk5) +{ + int loop; + + rxrpc_free_krb5_principal(&rxk5->client); + rxrpc_free_krb5_principal(&rxk5->server); + rxrpc_free_krb5_tagged(&rxk5->session); + + if (rxk5->addresses) { + for (loop = rxk5->n_addresses - 1; loop >= 0; loop--) + rxrpc_free_krb5_tagged(&rxk5->addresses[loop]); + kfree(rxk5->addresses); + } + if (rxk5->authdata) { + for (loop = rxk5->n_authdata - 1; loop >= 0; loop--) + rxrpc_free_krb5_tagged(&rxk5->authdata[loop]); + kfree(rxk5->authdata); + } + + kfree(rxk5->ticket); + kfree(rxk5->ticket2); + kfree(rxk5); +} + +/* + * extract a krb5 principal + */ +static int rxrpc_krb5_decode_principal(struct krb5_principal *princ, + const __be32 **_xdr, + unsigned int *_toklen) +{ + const __be32 *xdr = *_xdr; + unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen; + + /* there must be at least one name, and at least #names+1 length + * words */ + if (toklen <= 12) + return -EINVAL; + + _enter(",{%x,%x,%x},%u", + ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), toklen); + + n_parts = ntohl(*xdr++); + toklen -= 4; + if (n_parts <= 0 || n_parts > AFSTOKEN_K5_COMPONENTS_MAX) + return -EINVAL; + princ->n_name_parts = n_parts; + + if (toklen <= (n_parts + 1) * 4) + return -EINVAL; + + princ->name_parts = kcalloc(n_parts, sizeof(char *), GFP_KERNEL); + if (!princ->name_parts) + return -ENOMEM; + + for (loop = 0; loop < n_parts; loop++) { + if (toklen < 4) + return -EINVAL; + tmp = ntohl(*xdr++); + toklen -= 4; + if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX) + return -EINVAL; + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; + princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL); + if (!princ->name_parts[loop]) + return -ENOMEM; + memcpy(princ->name_parts[loop], xdr, tmp); + princ->name_parts[loop][tmp] = 0; + toklen -= paddedlen; + xdr += paddedlen >> 2; + } + + if (toklen < 4) + return -EINVAL; + tmp = ntohl(*xdr++); + toklen -= 4; + if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX) + return -EINVAL; + paddedlen = (tmp + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; + princ->realm = kmalloc(tmp + 1, GFP_KERNEL); + if (!princ->realm) + return -ENOMEM; + memcpy(princ->realm, xdr, tmp); + princ->realm[tmp] = 0; + toklen -= paddedlen; + xdr += paddedlen >> 2; + + _debug("%s/...@%s", princ->name_parts[0], princ->realm); + + *_xdr = xdr; + *_toklen = toklen; + _leave(" = 0 [toklen=%u]", toklen); + return 0; +} + +/* + * extract a piece of krb5 tagged data + */ +static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td, + size_t max_data_size, + const __be32 **_xdr, + unsigned int *_toklen) +{ + const __be32 *xdr = *_xdr; + unsigned int toklen = *_toklen, len, paddedlen; + + /* there must be at least one tag and one length word */ + if (toklen <= 8) + return -EINVAL; + + _enter(",%zu,{%x,%x},%u", + max_data_size, ntohl(xdr[0]), ntohl(xdr[1]), toklen); + + td->tag = ntohl(*xdr++); + len = ntohl(*xdr++); + toklen -= 8; + if (len > max_data_size) + return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; + td->data_len = len; + + if (len > 0) { + td->data = kmemdup(xdr, len, GFP_KERNEL); + if (!td->data) + return -ENOMEM; + toklen -= paddedlen; + xdr += paddedlen >> 2; + } + + _debug("tag %x len %x", td->tag, td->data_len); + + *_xdr = xdr; + *_toklen = toklen; + _leave(" = 0 [toklen=%u]", toklen); + return 0; +} + +/* + * extract an array of tagged data + */ +static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td, + u8 *_n_elem, + u8 max_n_elem, + size_t max_elem_size, + const __be32 **_xdr, + unsigned int *_toklen) +{ + struct krb5_tagged_data *td; + const __be32 *xdr = *_xdr; + unsigned int toklen = *_toklen, n_elem, loop; + int ret; + + /* there must be at least one count */ + if (toklen < 4) + return -EINVAL; + + _enter(",,%u,%zu,{%x},%u", + max_n_elem, max_elem_size, ntohl(xdr[0]), toklen); + + n_elem = ntohl(*xdr++); + toklen -= 4; + if (n_elem > max_n_elem) + return -EINVAL; + *_n_elem = n_elem; + if (n_elem > 0) { + if (toklen <= (n_elem + 1) * 4) + return -EINVAL; + + _debug("n_elem %d", n_elem); + + td = kcalloc(n_elem, sizeof(struct krb5_tagged_data), + GFP_KERNEL); + if (!td) + return -ENOMEM; + *_td = td; + + for (loop = 0; loop < n_elem; loop++) { + ret = rxrpc_krb5_decode_tagged_data(&td[loop], + max_elem_size, + &xdr, &toklen); + if (ret < 0) + return ret; + } + } + + *_xdr = xdr; + *_toklen = toklen; + _leave(" = 0 [toklen=%u]", toklen); + return 0; +} + +/* + * extract a krb5 ticket + */ +static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen, + const __be32 **_xdr, unsigned int *_toklen) +{ + const __be32 *xdr = *_xdr; + unsigned int toklen = *_toklen, len, paddedlen; + + /* there must be at least one length word */ + if (toklen <= 4) + return -EINVAL; + + _enter(",{%x},%u", ntohl(xdr[0]), toklen); + + len = ntohl(*xdr++); + toklen -= 4; + if (len > AFSTOKEN_K5_TIX_MAX) + return -EINVAL; + paddedlen = (len + 3) & ~3; + if (paddedlen > toklen) + return -EINVAL; + *_tktlen = len; + + _debug("ticket len %u", len); + + if (len > 0) { + *_ticket = kmemdup(xdr, len, GFP_KERNEL); + if (!*_ticket) + return -ENOMEM; + toklen -= paddedlen; + xdr += paddedlen >> 2; + } + + *_xdr = xdr; + *_toklen = toklen; + _leave(" = 0 [toklen=%u]", toklen); + return 0; +} + +/* + * parse an RxK5 type XDR format token + * - the caller guarantees we have at least 4 words + */ +static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, + size_t datalen, + const __be32 *xdr, unsigned int toklen) +{ + struct rxrpc_key_token *token, **pptoken; + struct rxk5_key *rxk5; + const __be32 *end_xdr = xdr + (toklen >> 2); + time64_t expiry; + int ret; + + _enter(",{%x,%x,%x,%x},%u", + ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), + toklen); + + /* reserve some payload space for this subkey - the length of the token + * is a reasonable approximation */ + prep->quotalen = datalen + toklen; + + token = kzalloc(sizeof(*token), GFP_KERNEL); + if (!token) + return -ENOMEM; + + rxk5 = kzalloc(sizeof(*rxk5), GFP_KERNEL); + if (!rxk5) { + kfree(token); + return -ENOMEM; + } + + token->security_index = RXRPC_SECURITY_RXK5; + token->k5 = rxk5; + + /* extract the principals */ + ret = rxrpc_krb5_decode_principal(&rxk5->client, &xdr, &toklen); + if (ret < 0) + goto error; + ret = rxrpc_krb5_decode_principal(&rxk5->server, &xdr, &toklen); + if (ret < 0) + goto error; + + /* extract the session key and the encoding type (the tag field -> + * ENCTYPE_xxx) */ + ret = rxrpc_krb5_decode_tagged_data(&rxk5->session, AFSTOKEN_DATA_MAX, + &xdr, &toklen); + if (ret < 0) + goto error; + + if (toklen < 4 * 8 + 2 * 4) + goto inval; + rxk5->authtime = be64_to_cpup((const __be64 *) xdr); + xdr += 2; + rxk5->starttime = be64_to_cpup((const __be64 *) xdr); + xdr += 2; + rxk5->endtime = be64_to_cpup((const __be64 *) xdr); + xdr += 2; + rxk5->renew_till = be64_to_cpup((const __be64 *) xdr); + xdr += 2; + rxk5->is_skey = ntohl(*xdr++); + rxk5->flags = ntohl(*xdr++); + toklen -= 4 * 8 + 2 * 4; + + _debug("times: a=%llx s=%llx e=%llx rt=%llx", + rxk5->authtime, rxk5->starttime, rxk5->endtime, + rxk5->renew_till); + _debug("is_skey=%x flags=%x", rxk5->is_skey, rxk5->flags); + + /* extract the permitted client addresses */ + ret = rxrpc_krb5_decode_tagged_array(&rxk5->addresses, + &rxk5->n_addresses, + AFSTOKEN_K5_ADDRESSES_MAX, + AFSTOKEN_DATA_MAX, + &xdr, &toklen); + if (ret < 0) + goto error; + + ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); + + /* extract the tickets */ + ret = rxrpc_krb5_decode_ticket(&rxk5->ticket, &rxk5->ticket_len, + &xdr, &toklen); + if (ret < 0) + goto error; + ret = rxrpc_krb5_decode_ticket(&rxk5->ticket2, &rxk5->ticket2_len, + &xdr, &toklen); + if (ret < 0) + goto error; + + ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); + + /* extract the typed auth data */ + ret = rxrpc_krb5_decode_tagged_array(&rxk5->authdata, + &rxk5->n_authdata, + AFSTOKEN_K5_AUTHDATA_MAX, + AFSTOKEN_BDATALN_MAX, + &xdr, &toklen); + if (ret < 0) + goto error; + + ASSERTCMP((end_xdr - xdr) << 2, ==, toklen); + + if (toklen != 0) + goto inval; + + /* attach the payload */ + for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0]; + *pptoken; + pptoken = &(*pptoken)->next) + continue; + *pptoken = token; + expiry = rxrpc_u32_to_time64(token->k5->endtime); + if (expiry < prep->expiry) + prep->expiry = expiry; + + _leave(" = 0"); + return 0; + +inval: + ret = -EINVAL; +error: + rxrpc_rxk5_free(rxk5); + kfree(token); + _leave(" = %d", ret); + return ret; +} + +/* + * attempt to parse the data as the XDR format + * - the caller guarantees we have more than 7 words + */ +static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep) +{ + const __be32 *xdr = prep->data, *token; + const char *cp; + unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix; + size_t datalen = prep->datalen; + int ret; + + _enter(",{%x,%x,%x,%x},%zu", + ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]), + prep->datalen); + + if (datalen > AFSTOKEN_LENGTH_MAX) + goto not_xdr; + + /* XDR is an array of __be32's */ + if (datalen & 3) + goto not_xdr; + + /* the flags should be 0 (the setpag bit must be handled by + * userspace) */ + if (ntohl(*xdr++) != 0) + goto not_xdr; + datalen -= 4; + + /* check the cell name */ + len = ntohl(*xdr++); + if (len < 1 || len > AFSTOKEN_CELL_MAX) + goto not_xdr; + datalen -= 4; + paddedlen = (len + 3) & ~3; + if (paddedlen > datalen) + goto not_xdr; + + cp = (const char *) xdr; + for (loop = 0; loop < len; loop++) + if (!isprint(cp[loop])) + goto not_xdr; + for (; loop < paddedlen; loop++) + if (cp[loop]) + goto not_xdr; + _debug("cellname: [%u/%u] '%*.*s'", + len, paddedlen, len, len, (const char *) xdr); + datalen -= paddedlen; + xdr += paddedlen >> 2; + + /* get the token count */ + if (datalen < 12) + goto not_xdr; + ntoken = ntohl(*xdr++); + datalen -= 4; + _debug("ntoken: %x", ntoken); + if (ntoken < 1 || ntoken > AFSTOKEN_MAX) + goto not_xdr; + + /* check each token wrapper */ + token = xdr; + loop = ntoken; + do { + if (datalen < 8) + goto not_xdr; + toklen = ntohl(*xdr++); + sec_ix = ntohl(*xdr); + datalen -= 4; + _debug("token: [%x/%zx] %x", toklen, datalen, sec_ix); + paddedlen = (toklen + 3) & ~3; + if (toklen < 20 || toklen > datalen || paddedlen > datalen) + goto not_xdr; + datalen -= paddedlen; + xdr += paddedlen >> 2; + + } while (--loop > 0); + + _debug("remainder: %zu", datalen); + if (datalen != 0) + goto not_xdr; + + /* okay: we're going to assume it's valid XDR format + * - we ignore the cellname, relying on the key to be correctly named + */ + do { + xdr = token; + toklen = ntohl(*xdr++); + token = xdr + ((toklen + 3) >> 2); + sec_ix = ntohl(*xdr++); + toklen -= 4; + + _debug("TOKEN type=%u [%p-%p]", sec_ix, xdr, token); + + switch (sec_ix) { + case RXRPC_SECURITY_RXKAD: + ret = rxrpc_preparse_xdr_rxkad(prep, datalen, xdr, toklen); + if (ret != 0) + goto error; + break; + + case RXRPC_SECURITY_RXK5: + ret = rxrpc_preparse_xdr_rxk5(prep, datalen, xdr, toklen); + if (ret != 0) + goto error; + break; + + default: + ret = -EPROTONOSUPPORT; + goto error; + } + + } while (--ntoken > 0); + + _leave(" = 0"); + return 0; + +not_xdr: + _leave(" = -EPROTO"); + return -EPROTO; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * Preparse an rxrpc defined key. + * + * Data should be of the form: + * OFFSET LEN CONTENT + * 0 4 key interface version number + * 4 2 security index (type) + * 6 2 ticket length + * 8 4 key expiry time (time_t) + * 12 4 kvno + * 16 8 session key + * 24 [len] ticket + * + * if no data is provided, then a no-security key is made + */ +static int rxrpc_preparse(struct key_preparsed_payload *prep) +{ + const struct rxrpc_key_data_v1 *v1; + struct rxrpc_key_token *token, **pp; + time64_t expiry; + size_t plen; + u32 kver; + int ret; + + _enter("%zu", prep->datalen); + + /* handle a no-security key */ + if (!prep->data && prep->datalen == 0) + return 0; + + /* determine if the XDR payload format is being used */ + if (prep->datalen > 7 * 4) { + ret = rxrpc_preparse_xdr(prep); + if (ret != -EPROTO) + return ret; + } + + /* get the key interface version number */ + ret = -EINVAL; + if (prep->datalen <= 4 || !prep->data) + goto error; + memcpy(&kver, prep->data, sizeof(kver)); + prep->data += sizeof(kver); + prep->datalen -= sizeof(kver); + + _debug("KEY I/F VERSION: %u", kver); + + ret = -EKEYREJECTED; + if (kver != 1) + goto error; + + /* deal with a version 1 key */ + ret = -EINVAL; + if (prep->datalen < sizeof(*v1)) + goto error; + + v1 = prep->data; + if (prep->datalen != sizeof(*v1) + v1->ticket_length) + goto error; + + _debug("SCIX: %u", v1->security_index); + _debug("TLEN: %u", v1->ticket_length); + _debug("EXPY: %x", v1->expiry); + _debug("KVNO: %u", v1->kvno); + _debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x", + v1->session_key[0], v1->session_key[1], + v1->session_key[2], v1->session_key[3], + v1->session_key[4], v1->session_key[5], + v1->session_key[6], v1->session_key[7]); + if (v1->ticket_length >= 8) + _debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x", + v1->ticket[0], v1->ticket[1], + v1->ticket[2], v1->ticket[3], + v1->ticket[4], v1->ticket[5], + v1->ticket[6], v1->ticket[7]); + + ret = -EPROTONOSUPPORT; + if (v1->security_index != RXRPC_SECURITY_RXKAD) + goto error; + + plen = sizeof(*token->kad) + v1->ticket_length; + prep->quotalen = plen + sizeof(*token); + + ret = -ENOMEM; + token = kzalloc(sizeof(*token), GFP_KERNEL); + if (!token) + goto error; + token->kad = kzalloc(plen, GFP_KERNEL); + if (!token->kad) + goto error_free; + + token->security_index = RXRPC_SECURITY_RXKAD; + token->kad->ticket_len = v1->ticket_length; + token->kad->expiry = v1->expiry; + token->kad->kvno = v1->kvno; + memcpy(&token->kad->session_key, &v1->session_key, 8); + memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length); + + /* count the number of tokens attached */ + prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1); + + /* attach the data */ + pp = (struct rxrpc_key_token **)&prep->payload.data[0]; + while (*pp) + pp = &(*pp)->next; + *pp = token; + expiry = rxrpc_u32_to_time64(token->kad->expiry); + if (expiry < prep->expiry) + prep->expiry = expiry; + token = NULL; + ret = 0; + +error_free: + kfree(token); +error: + return ret; +} + +/* + * Free token list. + */ +static void rxrpc_free_token_list(struct rxrpc_key_token *token) +{ + struct rxrpc_key_token *next; + + for (; token; token = next) { + next = token->next; + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + kfree(token->kad); + break; + case RXRPC_SECURITY_RXK5: + if (token->k5) + rxrpc_rxk5_free(token->k5); + break; + default: + pr_err("Unknown token type %x on rxrpc key\n", + token->security_index); + BUG(); + } + + kfree(token); + } +} + +/* + * Clean up preparse data. + */ +static void rxrpc_free_preparse(struct key_preparsed_payload *prep) +{ + rxrpc_free_token_list(prep->payload.data[0]); +} + +/* + * Preparse a server secret key. + * + * The data should be the 8-byte secret key. + */ +static int rxrpc_preparse_s(struct key_preparsed_payload *prep) +{ + struct crypto_skcipher *ci; + + _enter("%zu", prep->datalen); + + if (prep->datalen != 8) + return -EINVAL; + + memcpy(&prep->payload.data[2], prep->data, 8); + + ci = crypto_alloc_skcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(ci)) { + _leave(" = %ld", PTR_ERR(ci)); + return PTR_ERR(ci); + } + + if (crypto_skcipher_setkey(ci, prep->data, 8) < 0) + BUG(); + + prep->payload.data[0] = ci; + _leave(" = 0"); + return 0; +} + +/* + * Clean up preparse data. + */ +static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep) +{ + if (prep->payload.data[0]) + crypto_free_skcipher(prep->payload.data[0]); +} + +/* + * dispose of the data dangling from the corpse of a rxrpc key + */ +static void rxrpc_destroy(struct key *key) +{ + rxrpc_free_token_list(key->payload.data[0]); +} + +/* + * dispose of the data dangling from the corpse of a rxrpc key + */ +static void rxrpc_destroy_s(struct key *key) +{ + if (key->payload.data[0]) { + crypto_free_skcipher(key->payload.data[0]); + key->payload.data[0] = NULL; + } +} + +/* + * describe the rxrpc key + */ +static void rxrpc_describe(const struct key *key, struct seq_file *m) +{ + seq_puts(m, key->description); +} + +/* + * grab the security key for a socket + */ +int rxrpc_request_key(struct rxrpc_sock *rx, sockptr_t optval, int optlen) +{ + struct key *key; + char *description; + + _enter(""); + + if (optlen <= 0 || optlen > PAGE_SIZE - 1 || rx->securities) + return -EINVAL; + + description = memdup_sockptr_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); + + key = request_key_net(&key_type_rxrpc, description, sock_net(&rx->sk), NULL); + if (IS_ERR(key)) { + kfree(description); + _leave(" = %ld", PTR_ERR(key)); + return PTR_ERR(key); + } + + rx->key = key; + kfree(description); + _leave(" = 0 [key %x]", key->serial); + return 0; +} + +/* + * grab the security keyring for a server socket + */ +int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) +{ + struct key *key; + char *description; + + _enter(""); + + if (optlen <= 0 || optlen > PAGE_SIZE - 1) + return -EINVAL; + + description = memdup_sockptr_nul(optval, optlen); + if (IS_ERR(description)) + return PTR_ERR(description); + + key = request_key(&key_type_keyring, description, NULL); + if (IS_ERR(key)) { + kfree(description); + _leave(" = %ld", PTR_ERR(key)); + return PTR_ERR(key); + } + + rx->securities = key; + kfree(description); + _leave(" = 0 [key %x]", key->serial); + return 0; +} + +/* + * generate a server data key + */ +int rxrpc_get_server_data_key(struct rxrpc_connection *conn, + const void *session_key, + time64_t expiry, + u32 kvno) +{ + const struct cred *cred = current_cred(); + struct key *key; + int ret; + + struct { + u32 kver; + struct rxrpc_key_data_v1 v1; + } data; + + _enter(""); + + key = key_alloc(&key_type_rxrpc, "x", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0, + KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(key)) { + _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); + return -ENOMEM; + } + + _debug("key %d", key_serial(key)); + + data.kver = 1; + data.v1.security_index = RXRPC_SECURITY_RXKAD; + data.v1.ticket_length = 0; + data.v1.expiry = rxrpc_time64_to_u32(expiry); + data.v1.kvno = 0; + + memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key)); + + ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL); + if (ret < 0) + goto error; + + conn->params.key = key; + _leave(" = 0 [%d]", key_serial(key)); + return 0; + +error: + key_revoke(key); + key_put(key); + _leave(" = -ENOMEM [ins %d]", ret); + return -ENOMEM; +} +EXPORT_SYMBOL(rxrpc_get_server_data_key); + +/** + * rxrpc_get_null_key - Generate a null RxRPC key + * @keyname: The name to give the key. + * + * Generate a null RxRPC key that can be used to indicate anonymous security is + * required for a particular domain. + */ +struct key *rxrpc_get_null_key(const char *keyname) +{ + const struct cred *cred = current_cred(); + struct key *key; + int ret; + + key = key_alloc(&key_type_rxrpc, keyname, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, + KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA, NULL); + if (IS_ERR(key)) + return key; + + ret = key_instantiate_and_link(key, NULL, 0, NULL, NULL); + if (ret < 0) { + key_revoke(key); + key_put(key); + return ERR_PTR(ret); + } + + return key; +} +EXPORT_SYMBOL(rxrpc_get_null_key); + +/* + * read the contents of an rxrpc key + * - this returns the result in XDR form + */ +static long rxrpc_read(const struct key *key, + char *buffer, size_t buflen) +{ + const struct rxrpc_key_token *token; + const struct krb5_principal *princ; + size_t size; + __be32 *xdr, *oldxdr; + u32 cnlen, toksize, ntoks, tok, zero; + u16 toksizes[AFSTOKEN_MAX]; + int loop; + + _enter(""); + + /* we don't know what form we should return non-AFS keys in */ + if (memcmp(key->description, "afs@", 4) != 0) + return -EOPNOTSUPP; + cnlen = strlen(key->description + 4); + +#define RND(X) (((X) + 3) & ~3) + + /* AFS keys we return in XDR form, so we need to work out the size of + * the XDR */ + size = 2 * 4; /* flags, cellname len */ + size += RND(cnlen); /* cellname */ + size += 1 * 4; /* token count */ + + ntoks = 0; + for (token = key->payload.data[0]; token; token = token->next) { + toksize = 4; /* sec index */ + + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + toksize += 8 * 4; /* viceid, kvno, key*2, begin, + * end, primary, tktlen */ + toksize += RND(token->kad->ticket_len); + break; + + case RXRPC_SECURITY_RXK5: + princ = &token->k5->client; + toksize += 4 + princ->n_name_parts * 4; + for (loop = 0; loop < princ->n_name_parts; loop++) + toksize += RND(strlen(princ->name_parts[loop])); + toksize += 4 + RND(strlen(princ->realm)); + + princ = &token->k5->server; + toksize += 4 + princ->n_name_parts * 4; + for (loop = 0; loop < princ->n_name_parts; loop++) + toksize += RND(strlen(princ->name_parts[loop])); + toksize += 4 + RND(strlen(princ->realm)); + + toksize += 8 + RND(token->k5->session.data_len); + + toksize += 4 * 8 + 2 * 4; + + toksize += 4 + token->k5->n_addresses * 8; + for (loop = 0; loop < token->k5->n_addresses; loop++) + toksize += RND(token->k5->addresses[loop].data_len); + + toksize += 4 + RND(token->k5->ticket_len); + toksize += 4 + RND(token->k5->ticket2_len); + + toksize += 4 + token->k5->n_authdata * 8; + for (loop = 0; loop < token->k5->n_authdata; loop++) + toksize += RND(token->k5->authdata[loop].data_len); + break; + + default: /* we have a ticket we can't encode */ + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; + } + + _debug("token[%u]: toksize=%u", ntoks, toksize); + ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX); + + toksizes[ntoks++] = toksize; + size += toksize + 4; /* each token has a length word */ + } + +#undef RND + + if (!buffer || buflen < size) + return size; + + xdr = (__be32 *)buffer; + zero = 0; +#define ENCODE(x) \ + do { \ + *xdr++ = htonl(x); \ + } while(0) +#define ENCODE_DATA(l, s) \ + do { \ + u32 _l = (l); \ + ENCODE(l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ + xdr += (_l + 3) >> 2; \ + } while(0) +#define ENCODE_BYTES(l, s) \ + do { \ + u32 _l = (l); \ + memcpy(xdr, (s), _l); \ + if (_l & 3) \ + memcpy((u8 *)xdr + _l, &zero, 4 - (_l & 3)); \ + xdr += (_l + 3) >> 2; \ + } while(0) +#define ENCODE64(x) \ + do { \ + __be64 y = cpu_to_be64(x); \ + memcpy(xdr, &y, 8); \ + xdr += 8 >> 2; \ + } while(0) +#define ENCODE_STR(s) \ + do { \ + const char *_s = (s); \ + ENCODE_DATA(strlen(_s), _s); \ + } while(0) + + ENCODE(0); /* flags */ + ENCODE_DATA(cnlen, key->description + 4); /* cellname */ + ENCODE(ntoks); + + tok = 0; + for (token = key->payload.data[0]; token; token = token->next) { + toksize = toksizes[tok++]; + ENCODE(toksize); + oldxdr = xdr; + ENCODE(token->security_index); + + switch (token->security_index) { + case RXRPC_SECURITY_RXKAD: + ENCODE(token->kad->vice_id); + ENCODE(token->kad->kvno); + ENCODE_BYTES(8, token->kad->session_key); + ENCODE(token->kad->start); + ENCODE(token->kad->expiry); + ENCODE(token->kad->primary_flag); + ENCODE_DATA(token->kad->ticket_len, token->kad->ticket); + break; + + case RXRPC_SECURITY_RXK5: + princ = &token->k5->client; + ENCODE(princ->n_name_parts); + for (loop = 0; loop < princ->n_name_parts; loop++) + ENCODE_STR(princ->name_parts[loop]); + ENCODE_STR(princ->realm); + + princ = &token->k5->server; + ENCODE(princ->n_name_parts); + for (loop = 0; loop < princ->n_name_parts; loop++) + ENCODE_STR(princ->name_parts[loop]); + ENCODE_STR(princ->realm); + + ENCODE(token->k5->session.tag); + ENCODE_DATA(token->k5->session.data_len, + token->k5->session.data); + + ENCODE64(token->k5->authtime); + ENCODE64(token->k5->starttime); + ENCODE64(token->k5->endtime); + ENCODE64(token->k5->renew_till); + ENCODE(token->k5->is_skey); + ENCODE(token->k5->flags); + + ENCODE(token->k5->n_addresses); + for (loop = 0; loop < token->k5->n_addresses; loop++) { + ENCODE(token->k5->addresses[loop].tag); + ENCODE_DATA(token->k5->addresses[loop].data_len, + token->k5->addresses[loop].data); + } + + ENCODE_DATA(token->k5->ticket_len, token->k5->ticket); + ENCODE_DATA(token->k5->ticket2_len, token->k5->ticket2); + + ENCODE(token->k5->n_authdata); + for (loop = 0; loop < token->k5->n_authdata; loop++) { + ENCODE(token->k5->authdata[loop].tag); + ENCODE_DATA(token->k5->authdata[loop].data_len, + token->k5->authdata[loop].data); + } + break; + + default: + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; + } + + ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, + toksize); + } + +#undef ENCODE_STR +#undef ENCODE_DATA +#undef ENCODE64 +#undef ENCODE + + ASSERTCMP(tok, ==, ntoks); + ASSERTCMP((char __user *) xdr - buffer, ==, size); + _leave(" = %zu", size); + return size; +} diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c new file mode 100644 index 000000000..3ce6d628c --- /dev/null +++ b/net/rxrpc/local_event.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AF_RXRPC local endpoint management + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <generated/utsrelease.h> +#include "ar-internal.h" + +static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC"; + +/* + * Reply to a version request + */ +static void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_host_header *hdr, + struct sk_buff *skb) +{ + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct sockaddr_rxrpc srx; + struct msghdr msg; + struct kvec iov[2]; + size_t len; + int ret; + + _enter(""); + + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + return; + + msg.msg_name = &srx.transport; + msg.msg_namelen = srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.seq = 0; + whdr.serial = 0; + whdr.type = RXRPC_PACKET_TYPE_VERSION; + whdr.flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED); + whdr.userStatus = 0; + whdr.securityIndex = 0; + whdr._rsvd = 0; + whdr.serviceId = htons(sp->hdr.serviceId); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = (char *)rxrpc_version_string; + iov[1].iov_len = sizeof(rxrpc_version_string); + + len = iov[0].iov_len + iov[1].iov_len; + + _proto("Tx VERSION (reply)"); + + ret = kernel_sendmsg(local->socket, &msg, iov, 2, len); + if (ret < 0) + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_point_version_reply); + else + trace_rxrpc_tx_packet(local->debug_id, &whdr, + rxrpc_tx_point_version_reply); + + _leave(""); +} + +/* + * Process event packets targetted at a local endpoint. + */ +void rxrpc_process_local_events(struct rxrpc_local *local) +{ + struct sk_buff *skb; + char v; + + _enter(""); + + skb = skb_dequeue(&local->event_queue); + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + rxrpc_see_skb(skb, rxrpc_skb_seen); + _debug("{%d},{%u}", local->debug_id, sp->hdr.type); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) + return; + _proto("Rx VERSION { %02x }", v); + if (v == 0) + rxrpc_send_version_request(local, &sp->hdr, skb); + break; + + default: + /* Just ignore anything we don't understand */ + break; + } + + rxrpc_free_skb(skb, rxrpc_skb_freed); + } + + _leave(""); +} diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c new file mode 100644 index 000000000..2c66ee981 --- /dev/null +++ b/net/rxrpc/local_object.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Local endpoint object management + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/udp.h> +#include <linux/ip.h> +#include <linux/hashtable.h> +#include <net/sock.h> +#include <net/udp.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static void rxrpc_local_processor(struct work_struct *); +static void rxrpc_local_rcu(struct rcu_head *); + +/* + * Compare a local to an address. Return -ve, 0 or +ve to indicate less than, + * same or greater than. + * + * We explicitly don't compare the RxRPC service ID as we want to reject + * conflicting uses by differing services. Further, we don't want to share + * addresses with different options (IPv6), so we don't compare those bits + * either. + */ +static long rxrpc_local_cmp_key(const struct rxrpc_local *local, + const struct sockaddr_rxrpc *srx) +{ + long diff; + + diff = ((local->srx.transport_type - srx->transport_type) ?: + (local->srx.transport_len - srx->transport_len) ?: + (local->srx.transport.family - srx->transport.family)); + if (diff != 0) + return diff; + + switch (srx->transport.family) { + case AF_INET: + /* If the choice of UDP port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin.sin_port - + (u16 __force)srx->transport.sin.sin_port) ?: + memcmp(&local->srx.transport.sin.sin_addr, + &srx->transport.sin.sin_addr, + sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + /* If the choice of UDP6 port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&local->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif + default: + BUG(); + } +} + +/* + * Allocate a new local endpoint. + */ +static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, + const struct sockaddr_rxrpc *srx) +{ + struct rxrpc_local *local; + + local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL); + if (local) { + refcount_set(&local->ref, 1); + atomic_set(&local->active_users, 1); + local->rxnet = rxnet; + INIT_HLIST_NODE(&local->link); + INIT_WORK(&local->processor, rxrpc_local_processor); + init_rwsem(&local->defrag_sem); + skb_queue_head_init(&local->reject_queue); + skb_queue_head_init(&local->event_queue); + local->client_bundles = RB_ROOT; + spin_lock_init(&local->client_bundles_lock); + spin_lock_init(&local->lock); + rwlock_init(&local->services_lock); + local->debug_id = atomic_inc_return(&rxrpc_debug_id); + memcpy(&local->srx, srx, sizeof(*srx)); + local->srx.srx_service = 0; + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); + } + + _leave(" = %p", local); + return local; +} + +/* + * create the local socket + * - must be called with rxrpc_local_mutex locked + */ +static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) +{ + struct sock *usk; + int ret; + + _enter("%p{%d,%d}", + local, local->srx.transport_type, local->srx.transport.family); + + /* create a socket to represent the local endpoint */ + ret = sock_create_kern(net, local->srx.transport.family, + local->srx.transport_type, 0, &local->socket); + if (ret < 0) { + _leave(" = %d [socket]", ret); + return ret; + } + + /* set the socket up */ + usk = local->socket->sk; + inet_sk(usk)->mc_loop = 0; + + /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ + inet_inc_convert_csum(usk); + + rcu_assign_sk_user_data(usk, local); + + udp_sk(usk)->encap_type = UDP_ENCAP_RXRPC; + udp_sk(usk)->encap_rcv = rxrpc_input_packet; + udp_sk(usk)->encap_destroy = NULL; + udp_sk(usk)->gro_receive = NULL; + udp_sk(usk)->gro_complete = NULL; + + udp_encap_enable(); +#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) + if (local->srx.transport.family == AF_INET6) + udpv6_encap_enable(); +#endif + usk->sk_error_report = rxrpc_error_report; + + /* if a local address was supplied then bind it */ + if (local->srx.transport_len > sizeof(sa_family_t)) { + _debug("bind"); + ret = kernel_bind(local->socket, + (struct sockaddr *)&local->srx.transport, + local->srx.transport_len); + if (ret < 0) { + _debug("bind failed %d", ret); + goto error; + } + } + + switch (local->srx.transport.family) { + case AF_INET6: + /* we want to receive ICMPv6 errors */ + ip6_sock_set_recverr(local->socket->sk); + + /* Fall through and set IPv4 options too otherwise we don't get + * errors from IPv4 packets sent through the IPv6 socket. + */ + fallthrough; + case AF_INET: + /* we want to receive ICMP errors */ + ip_sock_set_recverr(local->socket->sk); + + /* we want to set the don't fragment bit */ + ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO); + + /* We want receive timestamps. */ + sock_enable_timestamps(local->socket->sk); + break; + + default: + BUG(); + } + + _leave(" = 0"); + return 0; + +error: + kernel_sock_shutdown(local->socket, SHUT_RDWR); + local->socket->sk->sk_user_data = NULL; + sock_release(local->socket); + local->socket = NULL; + + _leave(" = %d", ret); + return ret; +} + +/* + * Look up or create a new local endpoint using the specified local address. + */ +struct rxrpc_local *rxrpc_lookup_local(struct net *net, + const struct sockaddr_rxrpc *srx) +{ + struct rxrpc_local *local; + struct rxrpc_net *rxnet = rxrpc_net(net); + struct hlist_node *cursor; + const char *age; + long diff; + int ret; + + _enter("{%d,%d,%pISp}", + srx->transport_type, srx->transport.family, &srx->transport); + + mutex_lock(&rxnet->local_mutex); + + hlist_for_each(cursor, &rxnet->local_endpoints) { + local = hlist_entry(cursor, struct rxrpc_local, link); + + diff = rxrpc_local_cmp_key(local, srx); + if (diff != 0) + continue; + + /* Services aren't allowed to share transport sockets, so + * reject that here. It is possible that the object is dying - + * but it may also still have the local transport address that + * we want bound. + */ + if (srx->srx_service) { + local = NULL; + goto addr_in_use; + } + + /* Found a match. We want to replace a dying object. + * Attempting to bind the transport socket may still fail if + * we're attempting to use a local address that the dying + * object is still using. + */ + if (!rxrpc_use_local(local)) + break; + + age = "old"; + goto found; + } + + local = rxrpc_alloc_local(rxnet, srx); + if (!local) + goto nomem; + + ret = rxrpc_open_socket(local, net); + if (ret < 0) + goto sock_error; + + if (cursor) { + hlist_replace_rcu(cursor, &local->link); + cursor->pprev = NULL; + } else { + hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); + } + age = "new"; + +found: + mutex_unlock(&rxnet->local_mutex); + + _net("LOCAL %s %d {%pISp}", + age, local->debug_id, &local->srx.transport); + + _leave(" = %p", local); + return local; + +nomem: + ret = -ENOMEM; +sock_error: + mutex_unlock(&rxnet->local_mutex); + if (local) + call_rcu(&local->rcu, rxrpc_local_rcu); + _leave(" = %d", ret); + return ERR_PTR(ret); + +addr_in_use: + mutex_unlock(&rxnet->local_mutex); + _leave(" = -EADDRINUSE"); + return ERR_PTR(-EADDRINUSE); +} + +/* + * Get a ref on a local endpoint. + */ +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) +{ + const void *here = __builtin_return_address(0); + int r; + + __refcount_inc(&local->ref, &r); + trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); + return local; +} + +/* + * Get a ref on a local endpoint unless its usage has already reached 0. + */ +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) +{ + const void *here = __builtin_return_address(0); + int r; + + if (local) { + if (__refcount_inc_not_zero(&local->ref, &r)) + trace_rxrpc_local(local->debug_id, rxrpc_local_got, + r + 1, here); + else + local = NULL; + } + return local; +} + +/* + * Queue a local endpoint and pass the caller's reference to the work item. + */ +void rxrpc_queue_local(struct rxrpc_local *local) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id = local->debug_id; + int r = refcount_read(&local->ref); + + if (rxrpc_queue_work(&local->processor)) + trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); + else + rxrpc_put_local(local); +} + +/* + * Drop a ref on a local endpoint. + */ +void rxrpc_put_local(struct rxrpc_local *local) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id; + bool dead; + int r; + + if (local) { + debug_id = local->debug_id; + + dead = __refcount_dec_and_test(&local->ref, &r); + trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); + + if (dead) + call_rcu(&local->rcu, rxrpc_local_rcu); + } +} + +/* + * Start using a local endpoint. + */ +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +{ + local = rxrpc_get_local_maybe(local); + if (!local) + return NULL; + + if (!__rxrpc_use_local(local)) { + rxrpc_put_local(local); + return NULL; + } + + return local; +} + +/* + * Cease using a local endpoint. Once the number of active users reaches 0, we + * start the closure of the transport in the work processor. + */ +void rxrpc_unuse_local(struct rxrpc_local *local) +{ + if (local) { + if (__rxrpc_unuse_local(local)) { + rxrpc_get_local(local); + rxrpc_queue_local(local); + } + } +} + +/* + * Destroy a local endpoint's socket and then hand the record to RCU to dispose + * of. + * + * Closing the socket cannot be done from bottom half context or RCU callback + * context because it might sleep. + */ +static void rxrpc_local_destroyer(struct rxrpc_local *local) +{ + struct socket *socket = local->socket; + struct rxrpc_net *rxnet = local->rxnet; + + _enter("%d", local->debug_id); + + local->dead = true; + + mutex_lock(&rxnet->local_mutex); + hlist_del_init_rcu(&local->link); + mutex_unlock(&rxnet->local_mutex); + + rxrpc_clean_up_local_conns(local); + rxrpc_service_connection_reaper(&rxnet->service_conn_reaper); + ASSERT(!local->service); + + if (socket) { + local->socket = NULL; + kernel_sock_shutdown(socket, SHUT_RDWR); + socket->sk->sk_user_data = NULL; + sock_release(socket); + } + + /* At this point, there should be no more packets coming in to the + * local endpoint. + */ + rxrpc_purge_queue(&local->reject_queue); + rxrpc_purge_queue(&local->event_queue); +} + +/* + * Process events on an endpoint. The work item carries a ref which + * we must release. + */ +static void rxrpc_local_processor(struct work_struct *work) +{ + struct rxrpc_local *local = + container_of(work, struct rxrpc_local, processor); + bool again; + + if (local->dead) + return; + + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, + refcount_read(&local->ref), NULL); + + do { + again = false; + if (!__rxrpc_use_local(local)) { + rxrpc_local_destroyer(local); + break; + } + + if (!skb_queue_empty(&local->reject_queue)) { + rxrpc_reject_packets(local); + again = true; + } + + if (!skb_queue_empty(&local->event_queue)) { + rxrpc_process_local_events(local); + again = true; + } + + __rxrpc_unuse_local(local); + } while (again); + + rxrpc_put_local(local); +} + +/* + * Destroy a local endpoint after the RCU grace period expires. + */ +static void rxrpc_local_rcu(struct rcu_head *rcu) +{ + struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu); + + _enter("%d", local->debug_id); + + ASSERT(!work_pending(&local->processor)); + + _net("DESTROY LOCAL %d", local->debug_id); + kfree(local); + _leave(""); +} + +/* + * Verify the local endpoint list is empty by this point. + */ +void rxrpc_destroy_all_locals(struct rxrpc_net *rxnet) +{ + struct rxrpc_local *local; + + _enter(""); + + flush_workqueue(rxrpc_workqueue); + + if (!hlist_empty(&rxnet->local_endpoints)) { + mutex_lock(&rxnet->local_mutex); + hlist_for_each_entry(local, &rxnet->local_endpoints, link) { + pr_err("AF_RXRPC: Leaked local %p {%d}\n", + local, refcount_read(&local->ref)); + } + mutex_unlock(&rxnet->local_mutex); + BUG(); + } +} diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c new file mode 100644 index 000000000..d4144fd86 --- /dev/null +++ b/net/rxrpc/misc.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Miscellaneous bits + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/kernel.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * The maximum listening backlog queue size that may be set on a socket by + * listen(). + */ +unsigned int rxrpc_max_backlog __read_mostly = 10; + +/* + * How long to wait before scheduling ACK generation after seeing a + * packet with RXRPC_REQUEST_ACK set (in jiffies). + */ +unsigned long rxrpc_requested_ack_delay = 1; + +/* + * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * + * We use this when we've received new data packets. If those packets aren't + * all consumed within this time we will send a DELAY ACK if an ACK was not + * requested to let the sender know it doesn't need to resend. + */ +unsigned long rxrpc_soft_ack_delay = HZ; + +/* + * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * + * We use this when we've consumed some previously soft-ACK'd packets when + * further packets aren't immediately received to decide when to send an IDLE + * ACK let the other end know that it can free up its Tx buffer space. + */ +unsigned long rxrpc_idle_ack_delay = HZ / 2; + +/* + * Receive window size in packets. This indicates the maximum number of + * unconsumed received packets we're willing to retain in memory. Once this + * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further + * packets. + */ +unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; +#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE +#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE +#endif + +/* + * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet + * made by gluing normal packets together that we're willing to handle. + */ +unsigned int rxrpc_rx_mtu = 5692; + +/* + * The maximum number of fragments in a received jumbo packet that we tell the + * sender that we're willing to handle. + */ +unsigned int rxrpc_rx_jumbo_max = 4; + +const s8 rxrpc_ack_priority[] = { + [0] = 0, + [RXRPC_ACK_DELAY] = 1, + [RXRPC_ACK_REQUESTED] = 2, + [RXRPC_ACK_IDLE] = 3, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, +}; diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c new file mode 100644 index 000000000..34f389975 --- /dev/null +++ b/net/rxrpc/net_ns.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* rxrpc network namespace handling. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/proc_fs.h> +#include "ar-internal.h" + +unsigned int rxrpc_net_id; + +static void rxrpc_client_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, client_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->client_conn_reaper); +} + +static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, service_conn_reap_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->service_conn_reaper); +} + +static void rxrpc_peer_keepalive_timeout(struct timer_list *timer) +{ + struct rxrpc_net *rxnet = + container_of(timer, struct rxrpc_net, peer_keepalive_timer); + + if (rxnet->live) + rxrpc_queue_work(&rxnet->peer_keepalive_work); +} + +/* + * Initialise a per-network namespace record. + */ +static __net_init int rxrpc_init_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + int ret, i; + + rxnet->live = true; + get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); + rxnet->epoch |= RXRPC_RANDOM_EPOCH; + + INIT_LIST_HEAD(&rxnet->calls); + rwlock_init(&rxnet->call_lock); + atomic_set(&rxnet->nr_calls, 1); + + atomic_set(&rxnet->nr_conns, 1); + INIT_LIST_HEAD(&rxnet->conn_proc_list); + INIT_LIST_HEAD(&rxnet->service_conns); + rwlock_init(&rxnet->conn_lock); + INIT_WORK(&rxnet->service_conn_reaper, + rxrpc_service_connection_reaper); + timer_setup(&rxnet->service_conn_reap_timer, + rxrpc_service_conn_reap_timeout, 0); + + atomic_set(&rxnet->nr_client_conns, 0); + rxnet->kill_all_client_conns = false; + spin_lock_init(&rxnet->client_conn_cache_lock); + spin_lock_init(&rxnet->client_conn_discard_lock); + INIT_LIST_HEAD(&rxnet->idle_client_conns); + INIT_WORK(&rxnet->client_conn_reaper, + rxrpc_discard_expired_client_conns); + timer_setup(&rxnet->client_conn_reap_timer, + rxrpc_client_conn_reap_timeout, 0); + + INIT_HLIST_HEAD(&rxnet->local_endpoints); + mutex_init(&rxnet->local_mutex); + + hash_init(rxnet->peer_hash); + spin_lock_init(&rxnet->peer_hash_lock); + for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) + INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); + INIT_LIST_HEAD(&rxnet->peer_keepalive_new); + timer_setup(&rxnet->peer_keepalive_timer, + rxrpc_peer_keepalive_timeout, 0); + INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); + rxnet->peer_keepalive_base = ktime_get_seconds(); + + ret = -ENOMEM; + rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); + if (!rxnet->proc_net) + goto err_proc; + + proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops, + sizeof(struct seq_net_private)); + proc_create_net("conns", 0444, rxnet->proc_net, + &rxrpc_connection_seq_ops, + sizeof(struct seq_net_private)); + proc_create_net("peers", 0444, rxnet->proc_net, + &rxrpc_peer_seq_ops, + sizeof(struct seq_net_private)); + proc_create_net("locals", 0444, rxnet->proc_net, + &rxrpc_local_seq_ops, + sizeof(struct seq_net_private)); + return 0; + +err_proc: + rxnet->live = false; + return ret; +} + +/* + * Clean up a per-network namespace record. + */ +static __net_exit void rxrpc_exit_net(struct net *net) +{ + struct rxrpc_net *rxnet = rxrpc_net(net); + + rxnet->live = false; + del_timer_sync(&rxnet->peer_keepalive_timer); + cancel_work_sync(&rxnet->peer_keepalive_work); + /* Remove the timer again as the worker may have restarted it. */ + del_timer_sync(&rxnet->peer_keepalive_timer); + rxrpc_destroy_all_calls(rxnet); + rxrpc_destroy_all_connections(rxnet); + rxrpc_destroy_all_peers(rxnet); + rxrpc_destroy_all_locals(rxnet); + proc_remove(rxnet->proc_net); +} + +struct pernet_operations rxrpc_net_ops = { + .init = rxrpc_init_net, + .exit = rxrpc_exit_net, + .id = &rxrpc_net_id, + .size = sizeof(struct rxrpc_net), +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c new file mode 100644 index 000000000..08c117bc0 --- /dev/null +++ b/net/rxrpc/output.c @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC packet transmission + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/export.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +struct rxrpc_ack_buffer { + struct rxrpc_wire_header whdr; + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; + struct rxrpc_ackinfo ackinfo; +}; + +struct rxrpc_abort_buffer { + struct rxrpc_wire_header whdr; + __be32 abort_code; +}; + +static const char rxrpc_keepalive_string[] = ""; + +/* + * Increase Tx backoff on transmission failure and clear it on success. + */ +static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret) +{ + if (ret < 0) { + u16 tx_backoff = READ_ONCE(call->tx_backoff); + + if (tx_backoff < HZ) + WRITE_ONCE(call->tx_backoff, tx_backoff + 1); + } else { + WRITE_ONCE(call->tx_backoff, 0); + } +} + +/* + * Arrange for a keepalive ping a certain time after we last transmitted. This + * lets the far side know we're still interested in this call and helps keep + * the route through any intervening firewall open. + * + * Receiving a response to the ping will prevent the ->expect_rx_by timer from + * expiring. + */ +static void rxrpc_set_keepalive(struct rxrpc_call *call) +{ + unsigned long now = jiffies, keepalive_at = call->next_rx_timo / 6; + + keepalive_at += now; + WRITE_ONCE(call->keepalive_at, keepalive_at); + rxrpc_reduce_call_timer(call, keepalive_at, now, + rxrpc_timer_set_for_keepalive); +} + +/* + * Fill out an ACK packet. + */ +static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + struct rxrpc_call *call, + struct rxrpc_ack_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top, + u8 reason) +{ + rxrpc_serial_t serial; + unsigned int tmp; + rxrpc_seq_t hard_ack, top, seq; + int ix; + u32 mtu, jmax; + u8 *ackp = pkt->acks; + + tmp = atomic_xchg(&call->ackr_nr_unacked, 0); + tmp |= atomic_xchg(&call->ackr_nr_consumed, 0); + if (!tmp && (reason == RXRPC_ACK_DELAY || + reason == RXRPC_ACK_IDLE)) + return 0; + + /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; + + pkt->ack.bufferSpace = htons(0); + pkt->ack.maxSkew = htons(0); + pkt->ack.firstPacket = htonl(hard_ack + 1); + pkt->ack.previousPacket = htonl(call->ackr_highest_seq); + pkt->ack.serial = htonl(serial); + pkt->ack.reason = reason; + pkt->ack.nAcks = top - hard_ack; + + if (reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); + } + + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; + pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + pkt->ackinfo.maxMTU = htonl(mtu); + pkt->ackinfo.rwind = htonl(call->rx_winsize); + pkt->ackinfo.jumbo_max = htonl(jmax); + + *ackp++ = 0; + *ackp++ = 0; + *ackp++ = 0; + return top - hard_ack + 3; +} + +/* + * Record the beginning of an RTT probe. + */ +static int rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, + enum rxrpc_rtt_tx_trace why) +{ + unsigned long avail = call->rtt_avail; + int rtt_slot = 9; + + if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) + goto no_slot; + + rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); + if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) + goto no_slot; + + call->rtt_serial[rtt_slot] = serial; + call->rtt_sent_at[rtt_slot] = ktime_get_real(); + smp_wmb(); /* Write data before avail bit */ + set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + + trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); + return rtt_slot; + +no_slot: + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); + return -1; +} + +/* + * Cancel an RTT probe. + */ +static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, + rxrpc_serial_t serial, int rtt_slot) +{ + if (rtt_slot != -1) { + clear_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + smp_wmb(); /* Clear pending bit before setting slot */ + set_bit(rtt_slot, &call->rtt_avail); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_cancel, rtt_slot, serial); + } +} + +/* + * Send an ACK call packet. + */ +int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, + rxrpc_serial_t *_serial) +{ + struct rxrpc_connection *conn; + struct rxrpc_ack_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + size_t len, n; + int ret, rtt_slot = -1; + u8 reason; + + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + conn = call->conn; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt->whdr.epoch = htonl(conn->proto.epoch); + pkt->whdr.cid = htonl(call->cid); + pkt->whdr.callNumber = htonl(call->call_id); + pkt->whdr.seq = 0; + pkt->whdr.type = RXRPC_PACKET_TYPE_ACK; + pkt->whdr.flags = RXRPC_SLOW_START_OK | conn->out_clientflag; + pkt->whdr.userStatus = 0; + pkt->whdr.securityIndex = call->security_ix; + pkt->whdr._rsvd = 0; + pkt->whdr.serviceId = htons(call->service_id); + + spin_lock_bh(&call->lock); + if (ping) { + reason = RXRPC_ACK_PING; + } else { + reason = call->ackr_reason; + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } + call->ackr_reason = 0; + } + n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); + + spin_unlock_bh(&call->lock); + if (n == 0) { + kfree(pkt); + return 0; + } + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr) + sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + trace_rxrpc_tx_ack(call->debug_id, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + if (_serial) + *_serial = serial; + + if (ping) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_point_call_ack); + else + trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr, + rxrpc_tx_point_call_ack); + rxrpc_tx_backoff(call, ret); + + if (call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohl(pkt->ack.serial), + false, true, + rxrpc_propose_ack_retry_tx); + } + + rxrpc_set_keepalive(call); + } + +out: + kfree(pkt); + return ret; +} + +/* + * Send an ABORT call packet. + */ +int rxrpc_send_abort_packet(struct rxrpc_call *call) +{ + struct rxrpc_connection *conn; + struct rxrpc_abort_buffer pkt; + struct msghdr msg; + struct kvec iov[1]; + rxrpc_serial_t serial; + int ret; + + /* Don't bother sending aborts for a client call once the server has + * hard-ACK'd all of its request data. After that point, we're not + * going to stop the operation proceeding, and whilst we might limit + * the reply, it's not worth it if we can send a new call on the same + * channel instead, thereby closing off this call. + */ + if (rxrpc_is_client_call(call) && + test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + return 0; + + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) + return -ECONNRESET; + + conn = call->conn; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(call->cid); + pkt.whdr.callNumber = htonl(call->call_id); + pkt.whdr.seq = 0; + pkt.whdr.type = RXRPC_PACKET_TYPE_ABORT; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = call->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(call->service_id); + pkt.abort_code = htonl(call->abort_code); + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, 1, sizeof(pkt)); + conn->params.peer->last_tx_at = ktime_get_seconds(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_point_call_abort); + else + trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, + rxrpc_tx_point_call_abort); + rxrpc_tx_backoff(call, ret); + return ret; +} + +/* + * send a packet through the transport endpoint + */ +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) +{ + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; + int ret, rtt_slot = -1; + + _enter(",{%d}", skb->len); + + if (hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_add_head_rcu(&call->error_link, &call->peer->error_targets); + spin_unlock_bh(&call->peer->lock); + } + + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && + sp->hdr.seq == 1) + whdr.userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + * + * However, we mustn't request an ACK on the last reply packet of a + * service call, lest OpenAFS incorrectly send us an ACK with some + * soft-ACKs in it and then never follow up with a proper hard ACK. + */ + if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || + rxrpc_to_server(sp) + ) && + (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || + retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real()))) + whdr.flags |= RXRPC_REQUEST_ACK; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + ret = 0; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, retrans, true); + goto done; + } + } + + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, + false); + + /* send the packet with the don't fragment bit set if we currently + * think it's small enough */ + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; + + down_read(&conn->params.local->defrag_sem); + + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + if (whdr.flags & RXRPC_REQUEST_ACK) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); + + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + + up_read(&conn->params.local->defrag_sem); + if (ret < 0) { + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_point_call_data_nofrag); + } else { + trace_rxrpc_tx_packet(call->debug_id, &whdr, + rxrpc_tx_point_call_data_nofrag); + } + + rxrpc_tx_backoff(call, ret); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + if (ret >= 0) { + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = skb->tstamp; + if (call->peer->rtt_count > 1) { + unsigned long nowj = jiffies, ack_lost_at; + + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); + ack_lost_at += nowj; + WRITE_ONCE(call->ack_lost_at, ack_lost_at); + rxrpc_reduce_call_timer(call, ack_lost_at, nowj, + rxrpc_timer_set_for_lost_ack); + } + } + + if (sp->hdr.seq == 1 && + !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, + &call->flags)) { + unsigned long nowj = jiffies, expect_rx_by; + + expect_rx_by = nowj + call->next_rx_timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, nowj, + rxrpc_timer_set_for_normal); + } + + rxrpc_set_keepalive(call); + } else { + /* Cancel the call if the initial transmission fails, + * particularly if that's due to network routing issues that + * aren't going away anytime soon. The layer above can arrange + * the retransmission. + */ + if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_USER_ABORT, ret); + } + + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; + +send_fragmentable: + /* attempt to send this message with fragmentation enabled */ + _debug("send fragment"); + + down_write(&conn->params.local->defrag_sem); + + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + if (whdr.flags & RXRPC_REQUEST_ACK) + rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); + + switch (conn->params.local->srx.transport.family) { + case AF_INET6: + case AF_INET: + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DONT); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); + conn->params.peer->last_tx_at = ktime_get_seconds(); + + ip_sock_set_mtu_discover(conn->params.local->socket->sk, + IP_PMTUDISC_DO); + break; + + default: + BUG(); + } + + if (ret < 0) { + rxrpc_cancel_rtt_probe(call, serial, rtt_slot); + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_point_call_data_frag); + } else { + trace_rxrpc_tx_packet(call->debug_id, &whdr, + rxrpc_tx_point_call_data_frag); + } + rxrpc_tx_backoff(call, ret); + + up_write(&conn->params.local->defrag_sem); + goto done; +} + +/* + * reject packets through the local endpoint + */ +void rxrpc_reject_packets(struct rxrpc_local *local) +{ + struct sockaddr_rxrpc srx; + struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; + struct sk_buff *skb; + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; + int ret, ioc; + + _enter("%d", local->debug_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + + msg.msg_name = &srx.transport; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&whdr, 0, sizeof(whdr)); + + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_seen); + sp = rxrpc_skb(skb); + + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; + code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + rxrpc_free_skb(skb, rxrpc_skb_freed); + continue; + } + + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + ret = kernel_sendmsg(local->socket, &msg, + iov, ioc, size); + if (ret < 0) + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_point_reject); + else + trace_rxrpc_tx_packet(local->debug_id, &whdr, + rxrpc_tx_point_reject); + } + + rxrpc_free_skb(skb, rxrpc_skb_freed); + } + + _leave(""); +} + +/* + * Send a VERSION reply to a peer as a keepalive. + */ +void rxrpc_send_keepalive(struct rxrpc_peer *peer) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + size_t len; + int ret; + + _enter(""); + + msg.msg_name = &peer->srx.transport; + msg.msg_namelen = peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(peer->local->rxnet->epoch); + whdr.cid = 0; + whdr.callNumber = 0; + whdr.seq = 0; + whdr.serial = 0; + whdr.type = RXRPC_PACKET_TYPE_VERSION; /* Not client-initiated */ + whdr.flags = RXRPC_LAST_PACKET; + whdr.userStatus = 0; + whdr.securityIndex = 0; + whdr._rsvd = 0; + whdr.serviceId = 0; + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = (char *)rxrpc_keepalive_string; + iov[1].iov_len = sizeof(rxrpc_keepalive_string); + + len = iov[0].iov_len + iov[1].iov_len; + + _proto("Tx VERSION (keepalive)"); + + ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); + if (ret < 0) + trace_rxrpc_tx_fail(peer->debug_id, 0, ret, + rxrpc_tx_point_version_keepalive); + else + trace_rxrpc_tx_packet(peer->debug_id, &whdr, + rxrpc_tx_point_version_keepalive); + + peer->last_tx_at = ktime_get_seconds(); + _leave(""); +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c new file mode 100644 index 000000000..be032850a --- /dev/null +++ b/net/rxrpc/peer_event.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Peer event handling, typically ICMP messages. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/errqueue.h> +#include <linux/udp.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/icmp.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <net/ip.h> +#include "ar-internal.h" + +static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); +static void rxrpc_distribute_error(struct rxrpc_peer *, int, + enum rxrpc_call_completion); + +/* + * Find the peer associated with an ICMP packet. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, + const struct sk_buff *skb, + struct sockaddr_rxrpc *srx) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + + _enter(""); + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice + * versa? + */ + switch (srx->transport.family) { + case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = serr->port; + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP"); + memcpy(&srx->transport.sin.sin_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6 on v4 sock"); + memcpy(&srx->transport.sin.sin_addr, + skb_network_header(skb) + serr->addr_offset + 12, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr, + sizeof(struct in_addr)); + break; + } + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6"); + srx->transport.sin6.sin6_port = serr->port; + memcpy(&srx->transport.sin6.sin6_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in6_addr)); + break; + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP on v6 sock"); + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = serr->port; + memcpy(&srx->transport.sin.sin_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx->transport.sin6.sin6_addr, + &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + break; + } + break; +#endif + + default: + BUG(); + } + + return rxrpc_lookup_peer_rcu(local, srx); +} + +/* + * Handle an MTU/fragmentation problem. + */ +static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) +{ + u32 mtu = serr->ee.ee_info; + + _net("Rx ICMP Fragmentation Needed (%d)", mtu); + + /* wind down the local interface MTU */ + if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) { + peer->if_mtu = mtu; + _net("I/F MTU %u", mtu); + } + + if (mtu == 0) { + /* they didn't give us a size, estimate one */ + mtu = peer->if_mtu; + if (mtu > 1500) { + mtu >>= 1; + if (mtu < 1500) + mtu = 1500; + } else { + mtu -= 100; + if (mtu < peer->hdrsize) + mtu = peer->hdrsize + 4; + } + } + + if (mtu < peer->mtu) { + spin_lock_bh(&peer->lock); + peer->mtu = mtu; + peer->maxdata = peer->mtu - peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", + peer->mtu, peer->maxdata); + } +} + +/* + * Handle an error received on the local endpoint. + */ +void rxrpc_error_report(struct sock *sk) +{ + struct sock_exterr_skb *serr; + struct sockaddr_rxrpc srx; + struct rxrpc_local *local; + struct rxrpc_peer *peer; + struct sk_buff *skb; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + _enter("%p{%d}", sk, local->debug_id); + + /* Clear the outstanding error value on the socket so that it doesn't + * cause kernel_sendmsg() to return it later. + */ + sock_error(sk); + + skb = sock_dequeue_err_skb(sk); + if (!skb) { + rcu_read_unlock(); + _leave("UDP socket errqueue empty"); + return; + } + rxrpc_new_skb(skb, rxrpc_skb_received); + serr = SKB_EXT_ERR(skb); + if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { + _leave("UDP empty message"); + rcu_read_unlock(); + rxrpc_free_skb(skb, rxrpc_skb_freed); + return; + } + + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + rcu_read_unlock(); + rxrpc_free_skb(skb, rxrpc_skb_freed); + _leave(" [no peer]"); + return; + } + + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && + serr->ee.ee_type == ICMP_DEST_UNREACH && + serr->ee.ee_code == ICMP_FRAG_NEEDED)) { + rxrpc_adjust_mtu(peer, serr); + rcu_read_unlock(); + rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_put_peer(peer); + _leave(" [MTU update]"); + return; + } + + rxrpc_store_error(peer, serr); + rcu_read_unlock(); + rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_put_peer(peer); + + _leave(""); +} + +/* + * Map an error report to error codes on the peer record. + */ +static void rxrpc_store_error(struct rxrpc_peer *peer, + struct sock_exterr_skb *serr) +{ + enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; + struct sock_extended_err *ee; + int err; + + _enter(""); + + ee = &serr->ee; + + err = ee->ee_errno; + + switch (ee->ee_origin) { + case SO_EE_ORIGIN_ICMP: + switch (ee->ee_type) { + case ICMP_DEST_UNREACH: + switch (ee->ee_code) { + case ICMP_NET_UNREACH: + _net("Rx Received ICMP Network Unreachable"); + break; + case ICMP_HOST_UNREACH: + _net("Rx Received ICMP Host Unreachable"); + break; + case ICMP_PORT_UNREACH: + _net("Rx Received ICMP Port Unreachable"); + break; + case ICMP_NET_UNKNOWN: + _net("Rx Received ICMP Unknown Network"); + break; + case ICMP_HOST_UNKNOWN: + _net("Rx Received ICMP Unknown Host"); + break; + default: + _net("Rx Received ICMP DestUnreach code=%u", + ee->ee_code); + break; + } + break; + + case ICMP_TIME_EXCEEDED: + _net("Rx Received ICMP TTL Exceeded"); + break; + + default: + _proto("Rx Received ICMP error { type=%u code=%u }", + ee->ee_type, ee->ee_code); + break; + } + break; + + case SO_EE_ORIGIN_NONE: + case SO_EE_ORIGIN_LOCAL: + _proto("Rx Received local error { error=%d }", err); + compl = RXRPC_CALL_LOCAL_ERROR; + break; + + case SO_EE_ORIGIN_ICMP6: + if (err == EACCES) + err = EHOSTUNREACH; + fallthrough; + default: + _proto("Rx Received error report { orig=%u }", ee->ee_origin); + break; + } + + rxrpc_distribute_error(peer, err, compl); +} + +/* + * Distribute an error that occurred on a peer. + */ +static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, + enum rxrpc_call_completion compl) +{ + struct rxrpc_call *call; + + hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { + rxrpc_see_call(call); + rxrpc_set_call_completion(call, compl, 0, -error); + } +} + +/* + * Perform keep-alive pings. + */ +static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, + struct list_head *collector, + time64_t base, + u8 cursor) +{ + struct rxrpc_peer *peer; + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t keepalive_at; + int slot; + + spin_lock_bh(&rxnet->peer_hash_lock); + + while (!list_empty(collector)) { + peer = list_entry(collector->next, + struct rxrpc_peer, keepalive_link); + + list_del_init(&peer->keepalive_link); + if (!rxrpc_get_peer_maybe(peer)) + continue; + + if (__rxrpc_use_local(peer->local)) { + spin_unlock_bh(&rxnet->peer_hash_lock); + + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; + } + + /* A transmission to this peer occurred since last we + * examined it so put it into the appropriate future + * bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_unuse_local(peer->local); + } + rxrpc_put_peer_locked(peer); + } + + spin_unlock_bh(&rxnet->peer_hash_lock); +} + +/* + * Perform keep-alive pings with VERSION packets to keep any NAT alive. + */ +void rxrpc_peer_keepalive_worker(struct work_struct *work) +{ + struct rxrpc_net *rxnet = + container_of(work, struct rxrpc_net, peer_keepalive_work); + const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1; + time64_t base, now, delay; + u8 cursor, stop; + LIST_HEAD(collector); + + now = ktime_get_seconds(); + base = rxnet->peer_keepalive_base; + cursor = rxnet->peer_keepalive_cursor; + _enter("%lld,%u", base - now, cursor); + + if (!rxnet->live) + return; + + /* Remove to a temporary list all the peers that are currently lodged + * in expired buckets plus all new peers. + * + * Everything in the bucket at the cursor is processed this + * second; the bucket at cursor + 1 goes at now + 1s and so + * on... + */ + spin_lock_bh(&rxnet->peer_hash_lock); + list_splice_init(&rxnet->peer_keepalive_new, &collector); + + stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); + while (base <= now && (s8)(cursor - stop) < 0) { + list_splice_tail_init(&rxnet->peer_keepalive[cursor & mask], + &collector); + base++; + cursor++; + } + + base = now; + spin_unlock_bh(&rxnet->peer_hash_lock); + + rxnet->peer_keepalive_base = base; + rxnet->peer_keepalive_cursor = cursor; + rxrpc_peer_keepalive_dispatch(rxnet, &collector, base, cursor); + ASSERT(list_empty(&collector)); + + /* Schedule the timer for the next occupied timeslot. */ + cursor = rxnet->peer_keepalive_cursor; + stop = cursor + RXRPC_KEEPALIVE_TIME - 1; + for (; (s8)(cursor - stop) < 0; cursor++) { + if (!list_empty(&rxnet->peer_keepalive[cursor & mask])) + break; + base++; + } + + now = ktime_get_seconds(); + delay = base - now; + if (delay < 1) + delay = 1; + delay *= HZ; + if (rxnet->live) + timer_reduce(&rxnet->peer_keepalive_timer, jiffies + delay); + + _leave(""); +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c new file mode 100644 index 000000000..26d2ae9ba --- /dev/null +++ b/net/rxrpc/peer_object.c @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC remote transport endpoint record management + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/slab.h> +#include <linux/hashtable.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <net/ip.h> +#include <net/route.h> +#include <net/ip6_route.h> +#include "ar-internal.h" + +/* + * Hash a peer key. + */ +static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, + const struct sockaddr_rxrpc *srx) +{ + const u16 *p; + unsigned int i, size; + unsigned long hash_key; + + _enter(""); + + hash_key = (unsigned long)local / __alignof__(*local); + hash_key += srx->transport_type; + hash_key += srx->transport_len; + hash_key += srx->transport.family; + + switch (srx->transport.family) { + case AF_INET: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin.sin_addr); + p = (u16 *)&srx->transport.sin.sin_addr; + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin6.sin6_addr); + p = (u16 *)&srx->transport.sin6.sin6_addr; + break; +#endif + default: + WARN(1, "AF_RXRPC: Unsupported transport address family\n"); + return 0; + } + + /* Step through the peer address in 16-bit portions for speed */ + for (i = 0; i < size; i += sizeof(*p), p++) + hash_key += *p; + + _leave(" 0x%lx", hash_key); + return hash_key; +} + +/* + * Compare a peer to a key. Return -ve, 0 or +ve to indicate less than, same + * or greater than. + * + * Unfortunately, the primitives in linux/hashtable.h don't allow for sorted + * buckets and mid-bucket insertion, so we don't make full use of this + * information at this point. + */ +static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, + struct rxrpc_local *local, + const struct sockaddr_rxrpc *srx, + unsigned long hash_key) +{ + long diff; + + diff = ((peer->hash_key - hash_key) ?: + ((unsigned long)peer->local - (unsigned long)local) ?: + (peer->srx.transport_type - srx->transport_type) ?: + (peer->srx.transport_len - srx->transport_len) ?: + (peer->srx.transport.family - srx->transport.family)); + if (diff != 0) + return diff; + + switch (srx->transport.family) { + case AF_INET: + return ((u16 __force)peer->srx.transport.sin.sin_port - + (u16 __force)srx->transport.sin.sin_port) ?: + memcmp(&peer->srx.transport.sin.sin_addr, + &srx->transport.sin.sin_addr, + sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + return ((u16 __force)peer->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif + default: + BUG(); + } +} + +/* + * Look up a remote transport endpoint for the specified address using RCU. + */ +static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( + struct rxrpc_local *local, + const struct sockaddr_rxrpc *srx, + unsigned long hash_key) +{ + struct rxrpc_peer *peer; + struct rxrpc_net *rxnet = local->rxnet; + + hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { + if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && + refcount_read(&peer->ref) > 0) + return peer; + } + + return NULL; +} + +/* + * Look up a remote transport endpoint for the specified address using RCU. + */ +struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, + const struct sockaddr_rxrpc *srx) +{ + struct rxrpc_peer *peer; + unsigned long hash_key = rxrpc_peer_hash_key(local, srx); + + peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); + if (peer) { + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); + } + return peer; +} + +/* + * assess the MTU size for the network interface through which this peer is + * reached + */ +static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, + struct rxrpc_peer *peer) +{ + struct net *net = sock_net(&rx->sk); + struct dst_entry *dst; + struct rtable *rt; + struct flowi fl; + struct flowi4 *fl4 = &fl.u.ip4; +#ifdef CONFIG_AF_RXRPC_IPV6 + struct flowi6 *fl6 = &fl.u.ip6; +#endif + + peer->if_mtu = 1500; + + memset(&fl, 0, sizeof(fl)); + switch (peer->srx.transport.family) { + case AF_INET: + rt = ip_route_output_ports( + net, fl4, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); + return; + } + dst = &rt->dst; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + fl6->flowi6_iif = LOOPBACK_IFINDEX; + fl6->flowi6_scope = RT_SCOPE_UNIVERSE; + fl6->flowi6_proto = IPPROTO_UDP; + memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)); + fl6->fl6_dport = htons(7001); + fl6->fl6_sport = htons(7000); + dst = ip6_route_output(net, NULL, fl6); + if (dst->error) { + _leave(" [route err %d]", dst->error); + return; + } + break; +#endif + + default: + BUG(); + } + + peer->if_mtu = dst_mtu(dst); + dst_release(dst); + + _leave(" [if_mtu %u]", peer->if_mtu); +} + +/* + * Allocate a peer. + */ +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_peer *peer; + + _enter(""); + + peer = kzalloc(sizeof(struct rxrpc_peer), gfp); + if (peer) { + refcount_set(&peer->ref, 1); + peer->local = rxrpc_get_local(local); + INIT_HLIST_HEAD(&peer->error_targets); + peer->service_conns = RB_ROOT; + seqlock_init(&peer->service_conn_lock); + spin_lock_init(&peer->lock); + spin_lock_init(&peer->rtt_input_lock); + peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + + rxrpc_peer_init_rtt(peer); + + if (RXRPC_TX_SMSS > 2190) + peer->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + peer->cong_cwnd = 3; + else + peer->cong_cwnd = 4; + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here); + } + + _leave(" = %p", peer); + return peer; +} + +/* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer, + unsigned long hash_key) +{ + peer->hash_key = hash_key; + rxrpc_assess_MTU_size(rx, peer); + peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); + + switch (peer->srx.transport.family) { + case AF_INET: + peer->hdrsize = sizeof(struct iphdr); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + peer->hdrsize = sizeof(struct ipv6hdr); + break; +#endif + default: + BUG(); + } + + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + +/* + * Set up a new peer. + */ +static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, + unsigned long hash_key, + gfp_t gfp) +{ + struct rxrpc_peer *peer; + + _enter(""); + + peer = rxrpc_alloc_peer(local, gfp); + if (peer) { + memcpy(&peer->srx, srx, sizeof(*srx)); + rxrpc_init_peer(rx, peer, hash_key); + } + + _leave(" = %p", peer); + return peer; +} + +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + +/* + * Set up a new incoming peer. There shouldn't be any other matching peers + * since we've already done a search in the list from the non-reentrant context + * (the data_ready handler) that is the only place we can add new peers. + */ +void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer) +{ + struct rxrpc_net *rxnet = local->rxnet; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &peer->srx); + rxrpc_init_peer(rx, peer, hash_key); + + spin_lock(&rxnet->peer_hash_lock); + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); + spin_unlock(&rxnet->peer_hash_lock); +} + +/* + * obtain a remote transport endpoint for the specified address + */ +struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, gfp_t gfp) +{ + struct rxrpc_peer *peer, *candidate; + struct rxrpc_net *rxnet = local->rxnet; + unsigned long hash_key = rxrpc_peer_hash_key(local, srx); + + _enter("{%pISp}", &srx->transport); + + /* search the peer list first */ + rcu_read_lock(); + peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + rcu_read_unlock(); + + if (!peer) { + /* The peer is not yet present in hash - create a candidate + * for a new record and then redo the search. + */ + candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp); + if (!candidate) { + _leave(" = NULL [nomem]"); + return NULL; + } + + spin_lock_bh(&rxnet->peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + hash_add_rcu(rxnet->peer_hash, + &candidate->hash_link, hash_key); + list_add_tail(&candidate->keepalive_link, + &rxnet->peer_keepalive_new); + } + + spin_unlock_bh(&rxnet->peer_hash_lock); + + if (peer) + rxrpc_free_peer(candidate); + else + peer = candidate; + } + + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); + + _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); + return peer; +} + +/* + * Get a ref on a peer record. + */ +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + int r; + + __refcount_inc(&peer->ref, &r); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + return peer; +} + +/* + * Get a ref on a peer record unless its usage has already reached 0. + */ +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + int r; + + if (peer) { + if (__refcount_inc_not_zero(&peer->ref, &r)) + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + else + peer = NULL; + } + return peer; +} + +/* + * Discard a peer record. + */ +static void __rxrpc_put_peer(struct rxrpc_peer *peer) +{ + struct rxrpc_net *rxnet = peer->local->rxnet; + + ASSERT(hlist_empty(&peer->error_targets)); + + spin_lock_bh(&rxnet->peer_hash_lock); + hash_del_rcu(&peer->hash_link); + list_del_init(&peer->keepalive_link); + spin_unlock_bh(&rxnet->peer_hash_lock); + + rxrpc_free_peer(peer); +} + +/* + * Drop a ref on a peer record. + */ +void rxrpc_put_peer(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id; + bool dead; + int r; + + if (peer) { + debug_id = peer->debug_id; + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) + __rxrpc_put_peer(peer); + } +} + +/* + * Drop a ref on a peer record where the caller already holds the + * peer_hash_lock. + */ +void rxrpc_put_peer_locked(struct rxrpc_peer *peer) +{ + const void *here = __builtin_return_address(0); + unsigned int debug_id = peer->debug_id; + bool dead; + int r; + + dead = __refcount_dec_and_test(&peer->ref, &r); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + if (dead) { + hash_del_rcu(&peer->hash_link); + list_del_init(&peer->keepalive_link); + rxrpc_free_peer(peer); + } +} + +/* + * Make sure all peer records have been discarded. + */ +void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) +{ + struct rxrpc_peer *peer; + int i; + + for (i = 0; i < HASH_SIZE(rxnet->peer_hash); i++) { + if (hlist_empty(&rxnet->peer_hash[i])) + continue; + + hlist_for_each_entry(peer, &rxnet->peer_hash[i], hash_link) { + pr_err("Leaked peer %u {%u} %pISp\n", + peer->debug_id, + refcount_read(&peer->ref), + &peer->srx.transport); + } + } +} + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srtt: Where to store the SRTT value. + * + * Get the call's peer smoothed RTT in uS. + */ +bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call, + u32 *_srtt) +{ + struct rxrpc_peer *peer = call->peer; + + if (peer->rtt_count == 0) { + *_srtt = 1000000; /* 1S */ + return false; + } + + *_srtt = call->peer->srtt_us >> 3; + return true; +} +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c new file mode 100644 index 000000000..8967201fd --- /dev/null +++ b/net/rxrpc/proc.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* /proc/net/ support for AF_RXRPC + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/module.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { + [RXRPC_CONN_UNUSED] = "Unused ", + [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", + [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", + [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", + [RXRPC_CONN_SERVICE] = "SvSecure", + [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", + [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", +}; + +/* + * generate a list of extant and dead calls in /proc/net/rxrpc_calls + */ +static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) + __acquires(rxnet->call_lock) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + rcu_read_lock(); + read_lock(&rxnet->call_lock); + return seq_list_start_head(&rxnet->calls, *_pos); +} + +static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->calls, pos); +} + +static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) + __releases(rxnet->call_lock) + __releases(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->call_lock); + rcu_read_unlock(); +} + +static int rxrpc_call_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_local *local; + struct rxrpc_sock *rx; + struct rxrpc_peer *peer; + struct rxrpc_call *call; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned long timeout = 0; + rxrpc_seq_t tx_hard_ack, rx_hard_ack; + char lbuff[50], rbuff[50]; + + if (v == &rxnet->calls) { + seq_puts(seq, + "Proto Local " + " Remote " + " SvID ConnID CallID End Use State Abort " + " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n"); + return 0; + } + + call = list_entry(v, struct rxrpc_call, link); + + rx = rcu_dereference(call->socket); + if (rx) { + local = READ_ONCE(rx->local); + if (local) + sprintf(lbuff, "%pISpc", &local->srx.transport); + else + strcpy(lbuff, "no_local"); + } else { + strcpy(lbuff, "no_socket"); + } + + peer = call->peer; + if (peer) + sprintf(rbuff, "%pISpc", &peer->srx.transport); + else + strcpy(rbuff, "no_connection"); + + if (call->state != RXRPC_CALL_SERVER_PREALLOC) { + timeout = READ_ONCE(call->expect_rx_by); + timeout -= jiffies; + } + + tx_hard_ack = READ_ONCE(call->tx_hard_ack); + rx_hard_ack = READ_ONCE(call->rx_hard_ack); + seq_printf(seq, + "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" + " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n", + lbuff, + rbuff, + call->service_id, + call->cid, + call->call_id, + rxrpc_is_service_call(call) ? "Svc" : "Clt", + refcount_read(&call->ref), + rxrpc_call_states[call->state], + call->abort_code, + call->debug_id, + tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack, + rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack, + call->rx_serial, + timeout); + + return 0; +} + +const struct seq_operations rxrpc_call_seq_ops = { + .start = rxrpc_call_seq_start, + .next = rxrpc_call_seq_next, + .stop = rxrpc_call_seq_stop, + .show = rxrpc_call_seq_show, +}; + +/* + * generate a list of extant virtual connections in /proc/net/rxrpc_conns + */ +static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rxnet->conn_lock) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_lock(&rxnet->conn_lock); + return seq_list_start_head(&rxnet->conn_proc_list, *_pos); +} + +static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + return seq_list_next(v, &rxnet->conn_proc_list, pos); +} + +static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) + __releases(rxnet->conn_lock) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + read_unlock(&rxnet->conn_lock); +} + +static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_connection *conn; + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + char lbuff[50], rbuff[50]; + + if (v == &rxnet->conn_proc_list) { + seq_puts(seq, + "Proto Local " + " Remote " + " SvID ConnID End Use State Key " + " Serial ISerial CallId0 CallId1 CallId2 CallId3\n" + ); + return 0; + } + + conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } + + sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); + + sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); +print: + seq_printf(seq, + "UDP %-47.47s %-47.47s %4x %08x %s %3u" + " %s %08x %08x %08x %08x %08x %08x %08x\n", + lbuff, + rbuff, + conn->service_id, + conn->proto.cid, + rxrpc_conn_is_service(conn) ? "Svc" : "Clt", + refcount_read(&conn->ref), + rxrpc_conn_states[conn->state], + key_serial(conn->params.key), + atomic_read(&conn->serial), + conn->hi_serial, + conn->channels[0].call_id, + conn->channels[1].call_id, + conn->channels[2].call_id, + conn->channels[3].call_id); + + return 0; +} + +const struct seq_operations rxrpc_connection_seq_ops = { + .start = rxrpc_connection_seq_start, + .next = rxrpc_connection_seq_next, + .stop = rxrpc_connection_seq_stop, + .show = rxrpc_connection_seq_show, +}; + +/* + * generate a list of extant virtual peers in /proc/net/rxrpc/peers + */ +static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_peer *peer; + time64_t now; + char lbuff[50], rbuff[50]; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Proto Local " + " Remote " + " Use CW MTU LastUse RTT RTO\n" + ); + return 0; + } + + peer = list_entry(v, struct rxrpc_peer, hash_link); + + sprintf(lbuff, "%pISpc", &peer->local->srx.transport); + + sprintf(rbuff, "%pISpc", &peer->srx.transport); + + now = ktime_get_seconds(); + seq_printf(seq, + "UDP %-47.47s %-47.47s %3u" + " %3u %5u %6llus %8u %8u\n", + lbuff, + rbuff, + refcount_read(&peer->ref), + peer->cong_cwnd, + peer->mtu, + now - peer->last_tx_at, + peer->srtt_us >> 3, + jiffies_to_usecs(peer->rto_j)); + + return 0; +} + +static void *rxrpc_peer_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int bucket, n; + unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash); + void *p; + + rcu_read_lock(); + + if (*_pos >= UINT_MAX) + return NULL; + + n = *_pos & ((1U << shift) - 1); + bucket = *_pos >> shift; + for (;;) { + if (bucket >= HASH_SIZE(rxnet->peer_hash)) { + *_pos = UINT_MAX; + return NULL; + } + if (n == 0) { + if (bucket == 0) + return SEQ_START_TOKEN; + *_pos += 1; + n++; + } + + p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1); + if (p) + return p; + bucket++; + n = 1; + *_pos = (bucket << shift) | n; + } +} + +static void *rxrpc_peer_seq_next(struct seq_file *seq, void *v, loff_t *_pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int bucket, n; + unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash); + void *p; + + if (*_pos >= UINT_MAX) + return NULL; + + bucket = *_pos >> shift; + + p = seq_hlist_next_rcu(v, &rxnet->peer_hash[bucket], _pos); + if (p) + return p; + + for (;;) { + bucket++; + n = 1; + *_pos = (bucket << shift) | n; + + if (bucket >= HASH_SIZE(rxnet->peer_hash)) { + *_pos = UINT_MAX; + return NULL; + } + if (n == 0) { + *_pos += 1; + n++; + } + + p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1); + if (p) + return p; + } +} + +static void rxrpc_peer_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + + +const struct seq_operations rxrpc_peer_seq_ops = { + .start = rxrpc_peer_seq_start, + .next = rxrpc_peer_seq_next, + .stop = rxrpc_peer_seq_stop, + .show = rxrpc_peer_seq_show, +}; + +/* + * Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals + */ +static int rxrpc_local_seq_show(struct seq_file *seq, void *v) +{ + struct rxrpc_local *local; + char lbuff[50]; + + if (v == SEQ_START_TOKEN) { + seq_puts(seq, + "Proto Local " + " Use Act\n"); + return 0; + } + + local = hlist_entry(v, struct rxrpc_local, link); + + sprintf(lbuff, "%pISpc", &local->srx.transport); + + seq_printf(seq, + "UDP %-47.47s %3u %3u\n", + lbuff, + refcount_read(&local->ref), + atomic_read(&local->active_users)); + + return 0; +} + +static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos) + __acquires(rcu) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + unsigned int n; + + rcu_read_lock(); + + if (*_pos >= UINT_MAX) + return NULL; + + n = *_pos; + if (n == 0) + return SEQ_START_TOKEN; + + return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1); +} + +static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos) +{ + struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); + + if (*_pos >= UINT_MAX) + return NULL; + + return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos); +} + +static void rxrpc_local_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) +{ + rcu_read_unlock(); +} + +const struct seq_operations rxrpc_local_seq_ops = { + .start = rxrpc_local_seq_start, + .next = rxrpc_local_seq_next, + .stop = rxrpc_local_seq_stop, + .show = rxrpc_local_seq_show, +}; diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h new file mode 100644 index 000000000..49bb97253 --- /dev/null +++ b/net/rxrpc/protocol.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* packet.h: Rx packet layout and definitions + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _LINUX_RXRPC_PACKET_H +#define _LINUX_RXRPC_PACKET_H + +typedef u32 rxrpc_seq_t; /* Rx message sequence number */ +typedef u32 rxrpc_serial_t; /* Rx message serial number */ +typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */ +typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ + +/*****************************************************************************/ +/* + * on-the-wire Rx packet header + * - all multibyte fields should be in network byte order + */ +struct rxrpc_wire_header { + __be32 epoch; /* client boot timestamp */ +#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ + + __be32 cid; /* connection and channel ID */ +#define RXRPC_MAXCALLS 4 /* max active calls per conn */ +#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ +#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ +#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */ +#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ + + __be32 callNumber; /* call ID (0 for connection-level packets) */ + __be32 seq; /* sequence number of pkt in call stream */ + __be32 serial; /* serial number of pkt sent to network */ + + uint8_t type; /* packet type */ +#define RXRPC_PACKET_TYPE_DATA 1 /* data */ +#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */ +#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */ +#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */ +#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */ +#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */ +#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */ +#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */ +#define RXRPC_PACKET_TYPE_PARAMS 9 /* Parameter negotiation (unspec'd, ignore) */ +#define RXRPC_PACKET_TYPE_10 10 /* Ignored */ +#define RXRPC_PACKET_TYPE_11 11 /* Ignored */ +#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ + + uint8_t flags; /* packet flags */ +#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ +#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */ +#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */ +#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */ +#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */ +#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */ + + uint8_t userStatus; /* app-layer defined status */ +#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ + + uint8_t securityIndex; /* security protocol ID */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; + __be16 serviceId; /* service ID */ + +} __packed; + +/*****************************************************************************/ +/* + * jumbo packet secondary header + * - can be mapped to read header by: + * - new_serial = serial + 1 + * - new_seq = seq + 1 + * - new_flags = j_flags + * - new__rsvd = j__rsvd + * - duplicating all other fields + */ +struct rxrpc_jumbo_header { + uint8_t flags; /* packet flags (as per rxrpc_header) */ + uint8_t pad; + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; +}; + +#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) + +/* + * The maximum number of subpackets that can possibly fit in a UDP packet is: + * + * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1 + * = ((65535 - 28 - 28) / 1416) + 1 + * = 46 non-terminal packets and 1 terminal packet. + */ +#define RXRPC_MAX_NR_JUMBO 47 + +/*****************************************************************************/ +/* + * on-the-wire Rx ACK packet data payload + * - all multibyte fields should be in network byte order + */ +struct rxrpc_ackpacket { + __be16 bufferSpace; /* number of packet buffers available */ + __be16 maxSkew; /* diff between serno being ACK'd and highest serial no + * received */ + __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */ + __be32 previousPacket; /* sequence no of previous packet received */ + __be32 serial; /* serial no of packet that prompted this ACK */ + + uint8_t reason; /* reason for ACK */ +#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */ +#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */ +#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */ +#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */ +#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */ +#define RXRPC_ACK_PING 6 /* keep alive ACK */ +#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ +#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ +#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ + + uint8_t nAcks; /* number of ACKs */ +#define RXRPC_MAXACKS 255 + + uint8_t acks[0]; /* list of ACK/NAKs */ +#define RXRPC_ACK_TYPE_NACK 0 +#define RXRPC_ACK_TYPE_ACK 1 + +} __packed; + +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + +/* + * ACK packets can have a further piece of information tagged on the end + */ +struct rxrpc_ackinfo { + __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ + __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ + __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ + __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ +}; + +/*****************************************************************************/ +/* + * Kerberos security type-2 challenge packet + */ +struct rxkad_challenge { + __be32 version; /* version of this challenge type */ + __be32 nonce; /* encrypted random number */ + __be32 min_level; /* minimum security level */ + __be32 __padding; /* padding to 8-byte boundary */ +} __packed; + +/*****************************************************************************/ +/* + * Kerberos security type-2 response packet + */ +struct rxkad_response { + __be32 version; /* version of this response type */ + __be32 __pad; + + /* encrypted bit of the response */ + struct { + __be32 epoch; /* current epoch */ + __be32 cid; /* parent connection ID */ + __be32 checksum; /* checksum */ + __be32 securityIndex; /* security type */ + __be32 call_id[4]; /* encrypted call IDs */ + __be32 inc_nonce; /* challenge nonce + 1 */ + __be32 level; /* desired level */ + } encrypted; + + __be32 kvno; /* Kerberos key version number */ + __be32 ticket_len; /* Kerberos ticket length */ +} __packed; + +#endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c new file mode 100644 index 000000000..787826773 --- /dev/null +++ b/net/rxrpc/recvmsg.c @@ -0,0 +1,815 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC recvmsg() implementation + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/export.h> +#include <linux/sched/signal.h> + +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. + */ +void rxrpc_notify_socket(struct rxrpc_call *call) +{ + struct rxrpc_sock *rx; + struct sock *sk; + + _enter("%d", call->debug_id); + + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); + call->notify_rx(sk, call, call->user_call_ID); + spin_unlock_bh(&call->notify_lock); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); + + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } + } + + rcu_read_unlock(); + _leave(""); +} + +/* + * Transition a call to the complete state. + */ +bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); + wake_up(&call->waitq); + rxrpc_notify_socket(call); + return true; + } + return false; +} + +bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call successfully completed. + */ +bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret = false; + + if (call->state < RXRPC_CALL_COMPLETE) { + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + } + return ret; +} + +/* + * Record that a call is locally aborted. + */ +bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Pass a call terminating message to userspace. + */ +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) +{ + u32 tmp = 0; + int ret; + + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = -call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = -call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } + + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); + return ret; +} + +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, false, true, + rxrpc_propose_ack_terminal_ack); + //rxrpc_send_ack_packet(call, false, NULL); + } + + write_lock_bh(&call->state_lock); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; + write_unlock_bh(&call->state_lock); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false, true, + rxrpc_propose_ack_processing_op); + break; + default: + write_unlock_bh(&call->state_lock); + break; + } +} + +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + bool last = false; + u8 subpacket; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_rotated); + sp = rxrpc_skb(skb); + + subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; + serial = sp->hdr.serial + subpacket; + + if (subpacket == sp->nr_subpackets - 1 && + sp->rx_flags & RXRPC_SKB_INCL_LAST) + last = true; + + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); + + rxrpc_free_skb(skb, rxrpc_skb_freed); + + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); + if (last) { + rxrpc_end_rx_phase(call, serial); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (atomic_inc_return(&call->ackr_nr_consumed) > 2) + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason && call->ackr_reason != RXRPC_ACK_DELAY) + rxrpc_send_ack_packet(call, false, NULL); + } +} + +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; + + _enter(""); + + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if (subpacket > 0) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += subpacket; + } + + return call->security->verify_packet(call, skb, offset, len, + seq, cksum); +} + +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len, + bool *_last) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len; + bool last = false; + int ret; + u8 annotation = *_annotation; + u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET; + + /* Locate the subpacket */ + offset += subpacket * RXRPC_JUMBO_SUBPKTLEN; + len = skb->len - offset; + if (subpacket < sp->nr_subpackets - 1) + len = RXRPC_JUMBO_DATALEN; + else if (sp->rx_flags & RXRPC_SKB_INCL_LAST) + last = true; + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); + if (ret < 0) + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } + + *_offset = offset; + *_len = len; + *_last = last; + call->security->locate_data(call, skb, _offset, _len); + return 0; +} + +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool rx_pkt_last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = -EAGAIN, ret2; + + if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags) && + call->ackr_reason) + rxrpc_send_ack_packet(call, false, NULL); + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + rx_pkt_last = call->rx_pkt_last; + + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } + + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + seq = hard_ack + 1; + + while (top = smp_load_acquire(&call->rx_top), + before_eq(seq, top) + ) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); + break; + } + smp_rmb(); + rxrpc_see_skb(skb, rxrpc_skb_seen); + sp = rxrpc_skb(skb); + + if (!(flags & MSG_PEEK)) { + serial = sp->hdr.serial; + serial += call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET; + trace_rxrpc_receive(call, rxrpc_receive_front, + serial, seq); + } + + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + if (rx_pkt_offset == 0) { + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len, + &rx_pkt_last); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); + if (ret2 < 0) { + ret = ret2; + goto out; + } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); + } + + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; + goto out; + } + + /* handle piecemeal consumption of data packets */ + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; + } + + if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); + ASSERTCMP(*_offset, ==, len); + ret = 0; + break; + } + + /* The whole packet has been transferred. */ + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; + + if (rx_pkt_last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; + } + + seq++; + } + +out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; + call->rx_pkt_last = rx_pkt_last; + } +done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); + if (ret == -EAGAIN) + set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags); + return ret; +} + +/* + * Receive a message from an RxRPC socket + * - we need to be careful about two or more threads calling recvmsg + * simultaneously + */ +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct rxrpc_call *call; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct list_head *l; + size_t copied = 0; + long timeo; + int ret; + + DEFINE_WAIT(wait); + + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); + + if (flags & (MSG_OOB | MSG_TRUNC)) + return -EOPNOTSUPP; + + timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); + +try_again: + lock_sock(&rx->sk); + + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -EAGAIN; + } + + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) { + call = NULL; + goto error_no_call; + } + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); + timeo = schedule_timeout(timeo); + } + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; + } + + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else + rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); + + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); + + /* We're going to drop the socket lock, so we need to lock the call + * against interference by sendmsg. + */ + if (!mutex_trylock(&call->user_mutex)) { + ret = -EWOULDBLOCK; + if (flags & MSG_DONTWAIT) + goto error_requeue_call; + ret = -ERESTARTSYS; + if (mutex_lock_interruptible(&call->user_mutex) < 0) + goto error_requeue_call; + } + + release_sock(&rx->sk); + + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) + BUG(); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + unsigned long idl = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), &idl); + } + if (ret < 0) + goto error_unlock_call; + } + + if (msg->msg_name && call->peer) { + struct sockaddr_rxrpc *srx = msg->msg_name; + size_t len = sizeof(call->peer->srx); + + memcpy(msg->msg_name, &call->peer->srx, len); + srx->srx_service = call->service_id; + msg->msg_namelen = len; + } + + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; + + if (after(call->rx_top, call->rx_hard_ack) && + call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + rxrpc_notify_socket(call); + break; + default: + ret = 0; + break; + } + + if (ret < 0) + goto error_unlock_call; + + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error_unlock_call; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; + } + + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; + +error_unlock_call: + mutex_unlock(&call->user_mutex); + rxrpc_put_call(call, rxrpc_call_put); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); + return ret; + +error_requeue_call: + if (!(flags & MSG_PEEK)) { + write_lock_bh(&rx->recvmsg_lock); + list_add(&call->recvmsg_link, &rx->recvmsg_q); + write_unlock_bh(&rx->recvmsg_lock); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0); + } else { + rxrpc_put_call(call, rxrpc_call_put); + } +error_no_call: + release_sock(&rx->sk); +error_trace: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); + return ret; + +wait_interrupted: + ret = sock_intr_errno(timeo); +wait_error: + finish_wait(sk_sleep(&rx->sk), &wait); + call = NULL; + goto error_trace; +} + +/** + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @iter: The buffer to receive into + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned + * @_service: Where to store the actual service ID (may be upgraded) + * + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. + * + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. + * + * *_abort should also be initialised to 0. + */ +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + struct iov_iter *iter, + bool want_more, u32 *_abort, u16 *_service) +{ + size_t offset = 0; + int ret; + + _enter("{%d,%s},%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + iov_iter_count(iter), want_more); + + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING); + + mutex_lock(&call->user_mutex); + + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, NULL, iter, + iov_iter_count(iter), 0, + &offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (iov_iter_count(iter) > 0) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; + + default: + ret = -EINPROGRESS; + goto out; + } + +read_phase_complete: + ret = 1; +out: + switch (call->ackr_reason) { + case RXRPC_ACK_IDLE: + break; + case RXRPC_ACK_DELAY: + if (ret != -EAGAIN) + break; + fallthrough; + default: + rxrpc_send_ack_packet(call, false, NULL); + } + + if (_service) + *_service = call->service_id; + mutex_unlock(&call->user_mutex); + _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort); + return ret; + +short_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data")); + ret = -EBADMSG; + goto out; +excess_data: + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data")); + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (iov_iter_count(iter) > 0) + ret = -ECONNRESET; + } + goto out; +} +EXPORT_SYMBOL(rxrpc_kernel_recv_data); + +/** + * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet + * @sock: The socket that the call exists on + * @call: The call to query + * @_ts: Where to put the timestamp + * + * Retrieve the timestamp from the first DATA packet of the reply if it is + * in the ring. Returns true if successful, false if not. + */ +bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call, + ktime_t *_ts) +{ + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + bool success = false; + + mutex_lock(&call->user_mutex); + + if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY) + goto out; + + hard_ack = call->rx_hard_ack; + if (hard_ack != 0) + goto out; + + seq = hard_ack + 1; + top = smp_load_acquire(&call->rx_top); + if (after(seq, top)) + goto out; + + skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK]; + if (!skb) + goto out; + + *_ts = skb_get_ktime(skb); + success = true; + +out: + mutex_unlock(&call->user_mutex); + return success; +} +EXPORT_SYMBOL(rxrpc_kernel_get_reply_time); diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000..be61d6f5b --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) +#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ + +static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +{ + return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return min(rto, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = peer->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + } + + peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (peer->mdev_us > peer->mdev_max_us) { + peer->mdev_max_us = peer->mdev_us; + if (peer->mdev_max_us > peer->rttvar_us) + peer->rttvar_us = peer->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ + peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->mdev_max_us = peer->rttvar_us; + } + + peer->srtt_us = max(1U, srtt); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_peer *peer) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(peer); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + peer->rto_j = rxrpc_bound_rto(rto); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +{ + if (rtt_us < 0) + return; + + //rxrpc_update_rtt_min(peer, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); + rxrpc_set_rto(peer); + + /* RFC6298: only reset backoff on valid RTT measurement. */ + peer->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + int rtt_slot, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + spin_lock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(peer, rtt_us); + if (peer->rtt_count < 3) + peer->rtt_count++; + spin_unlock(&peer->rtt_input_lock); + + trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, + peer->srtt_us >> 3, peer->rto_j); +} + +/* + * Get the retransmission timeout to set in jiffies, backing it off each time + * we retransmit. + */ +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +{ + u64 timo_j; + u8 backoff = READ_ONCE(peer->backoff); + + timo_j = peer->rto_j; + timo_j <<= backoff; + if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(peer->backoff, backoff + 1); + + if (timo_j < 1) + timo_j = 1; + + return timo_j; +} + +void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +{ + peer->rto_j = RXRPC_TIMEOUT_INIT; + peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->backoff = 0; + //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c new file mode 100644 index 000000000..5345e8eef --- /dev/null +++ b/net/rxrpc/rxkad.c @@ -0,0 +1,1309 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Kerberos-based RxRPC security + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/skcipher.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/scatterlist.h> +#include <linux/ctype.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> +#include "ar-internal.h" + +#define RXKAD_VERSION 2 +#define MAXKRB5TICKETLEN 1024 +#define RXKAD_TKT_TYPE_KERBEROS_V5 256 +#define ANAME_SZ 40 /* size of authentication name */ +#define INST_SZ 40 /* size of principal's instance */ +#define REALM_SZ 40 /* size of principal's auth domain */ +#define SNAME_SZ 40 /* size of service name */ + +struct rxkad_level1_hdr { + __be32 data_size; /* true data size (excluding padding) */ +}; + +struct rxkad_level2_hdr { + __be32 data_size; /* true data size (excluding padding) */ + __be32 checksum; /* decrypted data checksum */ +}; + +/* + * this holds a pinned cipher so that keventd doesn't get called by the cipher + * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE + * packets + */ +static struct crypto_sync_skcipher *rxkad_ci; +static struct skcipher_request *rxkad_ci_req; +static DEFINE_MUTEX(rxkad_ci_mutex); + +/* + * initialise connection security + */ +static int rxkad_init_connection_security(struct rxrpc_connection *conn) +{ + struct crypto_sync_skcipher *ci; + struct rxrpc_key_token *token; + int ret; + + _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key)); + + token = conn->params.key->payload.data[0]; + conn->security_ix = token->security_index; + + ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(ci)) { + _debug("no cipher"); + ret = PTR_ERR(ci); + goto error; + } + + if (crypto_sync_skcipher_setkey(ci, token->kad->session_key, + sizeof(token->kad->session_key)) < 0) + BUG(); + + switch (conn->params.security_level) { + case RXRPC_SECURITY_PLAIN: + break; + case RXRPC_SECURITY_AUTH: + conn->size_align = 8; + conn->security_size = sizeof(struct rxkad_level1_hdr); + break; + case RXRPC_SECURITY_ENCRYPT: + conn->size_align = 8; + conn->security_size = sizeof(struct rxkad_level2_hdr); + break; + default: + ret = -EKEYREJECTED; + goto error; + } + + conn->cipher = ci; + ret = 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * prime the encryption state with the invariant parts of a connection's + * description + */ +static int rxkad_prime_packet_security(struct rxrpc_connection *conn) +{ + struct skcipher_request *req; + struct rxrpc_key_token *token; + struct scatterlist sg; + struct rxrpc_crypt iv; + __be32 *tmpbuf; + size_t tmpsize = 4 * sizeof(__be32); + + _enter(""); + + if (!conn->params.key) + return 0; + + tmpbuf = kmalloc(tmpsize, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) { + kfree(tmpbuf); + return -ENOMEM; + } + + token = conn->params.key->payload.data[0]; + memcpy(&iv, token->kad->session_key, sizeof(iv)); + + tmpbuf[0] = htonl(conn->proto.epoch); + tmpbuf[1] = htonl(conn->proto.cid); + tmpbuf[2] = 0; + tmpbuf[3] = htonl(conn->security_ix); + + sg_init_one(&sg, tmpbuf, tmpsize); + skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); + crypto_skcipher_encrypt(req); + skcipher_request_free(req); + + memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); + kfree(tmpbuf); + _leave(" = 0"); + return 0; +} + +/* + * Allocate and prepare the crypto request on a call. For any particular call, + * this is called serially for the packets, so no lock should be necessary. + */ +static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) +{ + struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct skcipher_request *cipher_req = call->cipher_req; + + if (!cipher_req) { + cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); + if (!cipher_req) + return NULL; + call->cipher_req = cipher_req; + } + + return cipher_req; +} + +/* + * Clean up the crypto on a call. + */ +static void rxkad_free_call_crypto(struct rxrpc_call *call) +{ + if (call->cipher_req) + skcipher_request_free(call->cipher_req); + call->cipher_req = NULL; +} + +/* + * partially encrypt a packet (level 1 security) + */ +static int rxkad_secure_packet_auth(const struct rxrpc_call *call, + struct sk_buff *skb, + u32 data_size, + void *sechdr, + struct skcipher_request *req) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxkad_level1_hdr hdr; + struct rxrpc_crypt iv; + struct scatterlist sg; + u16 check; + + _enter(""); + + check = sp->hdr.seq ^ call->call_id; + data_size |= (u32)check << 16; + + hdr.data_size = htonl(data_size); + memcpy(sechdr, &hdr, sizeof(hdr)); + + /* start the encryption afresh */ + memset(&iv, 0, sizeof(iv)); + + sg_init_one(&sg, sechdr, 8); + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); + crypto_skcipher_encrypt(req); + skcipher_request_zero(req); + + _leave(" = 0"); + return 0; +} + +/* + * wholly encrypt a packet (level 2 security) + */ +static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, + struct sk_buff *skb, + u32 data_size, + void *sechdr, + struct skcipher_request *req) +{ + const struct rxrpc_key_token *token; + struct rxkad_level2_hdr rxkhdr; + struct rxrpc_skb_priv *sp; + struct rxrpc_crypt iv; + struct scatterlist sg[16]; + unsigned int len; + u16 check; + int err; + + sp = rxrpc_skb(skb); + + _enter(""); + + check = sp->hdr.seq ^ call->call_id; + + rxkhdr.data_size = htonl(data_size | (u32)check << 16); + rxkhdr.checksum = 0; + memcpy(sechdr, &rxkhdr, sizeof(rxkhdr)); + + /* encrypt from the session key */ + token = call->conn->params.key->payload.data[0]; + memcpy(&iv, token->kad->session_key, sizeof(iv)); + + sg_init_one(&sg[0], sechdr, sizeof(rxkhdr)); + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, &sg[0], &sg[0], sizeof(rxkhdr), iv.x); + crypto_skcipher_encrypt(req); + + /* we want to encrypt the skbuff in-place */ + err = -EMSGSIZE; + if (skb_shinfo(skb)->nr_frags > 16) + goto out; + + len = data_size + call->conn->size_align - 1; + len &= ~(call->conn->size_align - 1); + + sg_init_table(sg, ARRAY_SIZE(sg)); + err = skb_to_sgvec(skb, sg, 0, len); + if (unlikely(err < 0)) + goto out; + skcipher_request_set_crypt(req, sg, sg, len, iv.x); + crypto_skcipher_encrypt(req); + + _leave(" = 0"); + err = 0; + +out: + skcipher_request_zero(req); + return err; +} + +/* + * checksum an RxRPC packet header + */ +static int rxkad_secure_packet(struct rxrpc_call *call, + struct sk_buff *skb, + size_t data_size, + void *sechdr) +{ + struct rxrpc_skb_priv *sp; + struct skcipher_request *req; + struct rxrpc_crypt iv; + struct scatterlist sg; + u32 x, y; + int ret; + + sp = rxrpc_skb(skb); + + _enter("{%d{%x}},{#%u},%zu,", + call->debug_id, key_serial(call->conn->params.key), + sp->hdr.seq, data_size); + + if (!call->conn->cipher) + return 0; + + ret = key_validate(call->conn->params.key); + if (ret < 0) + return ret; + + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + + /* continue encrypting from where we left off */ + memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + + /* calculate the security checksum */ + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x |= sp->hdr.seq & 0x3fffffff; + call->crypto_buf[0] = htonl(call->call_id); + call->crypto_buf[1] = htonl(x); + + sg_init_one(&sg, call->crypto_buf, 8); + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); + crypto_skcipher_encrypt(req); + skcipher_request_zero(req); + + y = ntohl(call->crypto_buf[1]); + y = (y >> 16) & 0xffff; + if (y == 0) + y = 1; /* zero checksums are not permitted */ + sp->hdr.cksum = y; + + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_PLAIN: + ret = 0; + break; + case RXRPC_SECURITY_AUTH: + ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr, + req); + break; + case RXRPC_SECURITY_ENCRYPT: + ret = rxkad_secure_packet_encrypt(call, skb, data_size, + sechdr, req); + break; + default: + ret = -EPERM; + break; + } + + _leave(" = %d [set %hx]", ret, y); + return ret; +} + +/* + * decrypt partial encryption on a packet (level 1 security) + */ +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, + struct skcipher_request *req) +{ + struct rxkad_level1_hdr sechdr; + struct rxrpc_crypt iv; + struct scatterlist sg[16]; + bool aborted; + u32 data_size, buf; + u16 check; + int ret; + + _enter(""); + + if (len < 8) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H", + RXKADSEALEDINCON); + goto protocol_error; + } + + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ + sg_init_table(sg, ARRAY_SIZE(sg)); + ret = skb_to_sgvec(skb, sg, offset, 8); + if (unlikely(ret < 0)) + return ret; + + /* start the decryption afresh */ + memset(&iv, 0, sizeof(iv)); + + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sg, sg, 8, iv.x); + crypto_skcipher_decrypt(req); + skcipher_request_zero(req); + + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1", + RXKADDATALEN); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); + + buf = ntohl(sechdr.data_size); + data_size = buf & 0xffff; + + check = buf >> 16; + check ^= seq ^ call->call_id; + check &= 0xffff; + if (check != 0) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C", + RXKADSEALEDINCON); + goto protocol_error; + } + + if (data_size > len) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L", + RXKADDATALEN); + goto protocol_error; + } + + _leave(" = 0 [dlen=%x]", data_size); + return 0; + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; +} + +/* + * wholly decrypt a packet (level 2 security) + */ +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, + struct skcipher_request *req) +{ + const struct rxrpc_key_token *token; + struct rxkad_level2_hdr sechdr; + struct rxrpc_crypt iv; + struct scatterlist _sg[4], *sg; + bool aborted; + u32 data_size, buf; + u16 check; + int nsg, ret; + + _enter(",{%d}", skb->len); + + if (len < 8) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H", + RXKADSEALEDINCON); + goto protocol_error; + } + + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ + sg = _sg; + nsg = skb_shinfo(skb)->nr_frags + 1; + if (nsg <= 4) { + nsg = 4; + } else { + sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); + if (!sg) + goto nomem; + } + + sg_init_table(sg, nsg); + ret = skb_to_sgvec(skb, sg, offset, len); + if (unlikely(ret < 0)) { + if (sg != _sg) + kfree(sg); + return ret; + } + + /* decrypt from the session key */ + token = call->conn->params.key->payload.data[0]; + memcpy(&iv, token->kad->session_key, sizeof(iv)); + + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); + crypto_skcipher_decrypt(req); + skcipher_request_zero(req); + if (sg != _sg) + kfree(sg); + + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2", + RXKADDATALEN); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); + + buf = ntohl(sechdr.data_size); + data_size = buf & 0xffff; + + check = buf >> 16; + check ^= seq ^ call->call_id; + check &= 0xffff; + if (check != 0) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C", + RXKADSEALEDINCON); + goto protocol_error; + } + + if (data_size > len) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L", + RXKADDATALEN); + goto protocol_error; + } + + _leave(" = 0 [dlen=%x]", data_size); + return 0; + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; + +nomem: + _leave(" = -ENOMEM"); + return -ENOMEM; +} + +/* + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). + */ +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) +{ + struct skcipher_request *req; + struct rxrpc_crypt iv; + struct scatterlist sg; + bool aborted; + u16 cksum; + u32 x, y; + + _enter("{%d{%x}},{#%u}", + call->debug_id, key_serial(call->conn->params.key), seq); + + if (!call->conn->cipher) + return 0; + + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + + /* continue encrypting from where we left off */ + memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); + + /* validate the security checksum */ + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x |= seq & 0x3fffffff; + call->crypto_buf[0] = htonl(call->call_id); + call->crypto_buf[1] = htonl(x); + + sg_init_one(&sg, call->crypto_buf, 8); + skcipher_request_set_sync_tfm(req, call->conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, &sg, &sg, 8, iv.x); + crypto_skcipher_encrypt(req); + skcipher_request_zero(req); + + y = ntohl(call->crypto_buf[1]); + cksum = (y >> 16) & 0xffff; + if (cksum == 0) + cksum = 1; /* zero checksums are not permitted */ + + if (cksum != expected_cksum) { + aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK", + RXKADSEALEDINCON); + goto protocol_error; + } + + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_PLAIN: + return 0; + case RXRPC_SECURITY_AUTH: + return rxkad_verify_packet_1(call, skb, offset, len, seq, req); + case RXRPC_SECURITY_ENCRYPT: + return rxkad_verify_packet_2(call, skb, offset, len, seq, req); + default: + return -ENOANO; + } + +protocol_error: + if (aborted) + rxrpc_send_abort_packet(call); + return -EPROTO; +} + +/* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } +} + +/* + * issue a challenge + */ +static int rxkad_issue_challenge(struct rxrpc_connection *conn) +{ + struct rxkad_challenge challenge; + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[2]; + size_t len; + u32 serial; + int ret; + + _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + + ret = key_validate(conn->server_key); + if (ret < 0) + return ret; + + get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce)); + + challenge.version = htonl(2); + challenge.nonce = htonl(conn->security_nonce); + challenge.min_level = htonl(0); + challenge.__padding = 0; + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(conn->proto.cid); + whdr.callNumber = 0; + whdr.seq = 0; + whdr.type = RXRPC_PACKET_TYPE_CHALLENGE; + whdr.flags = conn->out_clientflag; + whdr.userStatus = 0; + whdr.securityIndex = conn->security_ix; + whdr._rsvd = 0; + whdr.serviceId = htons(conn->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &challenge; + iov[1].iov_len = sizeof(challenge); + + len = iov[0].iov_len + iov[1].iov_len; + + serial = atomic_inc_return(&conn->serial); + whdr.serial = htonl(serial); + _proto("Tx CHALLENGE %%%u", serial); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_rxkad_challenge); + return -EAGAIN; + } + + conn->params.peer->last_tx_at = ktime_get_seconds(); + trace_rxrpc_tx_packet(conn->debug_id, &whdr, + rxrpc_tx_point_rxkad_challenge); + _leave(" = 0"); + return 0; +} + +/* + * send a Kerberos security response + */ +static int rxkad_send_response(struct rxrpc_connection *conn, + struct rxrpc_host_header *hdr, + struct rxkad_response *resp, + const struct rxkad_key *s2) +{ + struct rxrpc_wire_header whdr; + struct msghdr msg; + struct kvec iov[3]; + size_t len; + u32 serial; + int ret; + + _enter(""); + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&whdr, 0, sizeof(whdr)); + whdr.epoch = htonl(hdr->epoch); + whdr.cid = htonl(hdr->cid); + whdr.type = RXRPC_PACKET_TYPE_RESPONSE; + whdr.flags = conn->out_clientflag; + whdr.securityIndex = hdr->securityIndex; + whdr.serviceId = htons(hdr->serviceId); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = resp; + iov[1].iov_len = sizeof(*resp); + iov[2].iov_base = (void *)s2->ticket; + iov[2].iov_len = s2->ticket_len; + + len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; + + serial = atomic_inc_return(&conn->serial); + whdr.serial = htonl(serial); + _proto("Tx RESPONSE %%%u", serial); + + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len); + if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_point_rxkad_response); + return -EAGAIN; + } + + conn->params.peer->last_tx_at = ktime_get_seconds(); + _leave(" = 0"); + return 0; +} + +/* + * calculate the response checksum + */ +static void rxkad_calc_response_checksum(struct rxkad_response *response) +{ + u32 csum = 1000003; + int loop; + u8 *p = (u8 *) response; + + for (loop = sizeof(*response); loop > 0; loop--) + csum = csum * 0x10204081 + *p++; + + response->encrypted.checksum = htonl(csum); +} + +/* + * encrypt the response packet + */ +static int rxkad_encrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxkad_key *s2) +{ + struct skcipher_request *req; + struct rxrpc_crypt iv; + struct scatterlist sg[1]; + + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) + return -ENOMEM; + + /* continue encrypting from where we left off */ + memcpy(&iv, s2->session_key, sizeof(iv)); + + sg_init_table(sg, 1); + sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); + skcipher_request_set_sync_tfm(req, conn->cipher); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); + crypto_skcipher_encrypt(req); + skcipher_request_free(req); + return 0; +} + +/* + * respond to a challenge packet + */ +static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + const struct rxrpc_key_token *token; + struct rxkad_challenge challenge; + struct rxkad_response *resp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + const char *eproto; + u32 version, nonce, min_level, abort_code; + int ret; + + _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); + + eproto = tracepoint_string("chall_no_key"); + abort_code = RX_PROTOCOL_ERROR; + if (!conn->params.key) + goto protocol_error; + + abort_code = RXKADEXPIRED; + ret = key_validate(conn->params.key); + if (ret < 0) + goto other_error; + + eproto = tracepoint_string("chall_short"); + abort_code = RXKADPACKETSHORT; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) + goto protocol_error; + + version = ntohl(challenge.version); + nonce = ntohl(challenge.nonce); + min_level = ntohl(challenge.min_level); + + _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", + sp->hdr.serial, version, nonce, min_level); + + eproto = tracepoint_string("chall_ver"); + abort_code = RXKADINCONSISTENCY; + if (version != RXKAD_VERSION) + goto protocol_error; + + abort_code = RXKADLEVELFAIL; + ret = -EACCES; + if (conn->params.security_level < min_level) + goto other_error; + + token = conn->params.key->payload.data[0]; + + /* build the response packet */ + resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); + if (!resp) + return -ENOMEM; + + resp->version = htonl(RXKAD_VERSION); + resp->encrypted.epoch = htonl(conn->proto.epoch); + resp->encrypted.cid = htonl(conn->proto.cid); + resp->encrypted.securityIndex = htonl(conn->security_ix); + resp->encrypted.inc_nonce = htonl(nonce + 1); + resp->encrypted.level = htonl(conn->params.security_level); + resp->kvno = htonl(token->kad->kvno); + resp->ticket_len = htonl(token->kad->ticket_len); + resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter); + resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter); + resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter); + resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter); + + /* calculate the response checksum and then do the encryption */ + rxkad_calc_response_checksum(resp); + ret = rxkad_encrypt_response(conn, resp, token->kad); + if (ret == 0) + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + kfree(resp); + return ret; + +protocol_error: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + ret = -EPROTO; +other_error: + *_abort_code = abort_code; + return ret; +} + +/* + * decrypt the kerberos IV ticket in the response + */ +static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, + struct sk_buff *skb, + void *ticket, size_t ticket_len, + struct rxrpc_crypt *_session_key, + time64_t *_expiry, + u32 *_abort_code) +{ + struct skcipher_request *req; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt iv, key; + struct scatterlist sg[1]; + struct in_addr addr; + unsigned int life; + const char *eproto; + time64_t issue, now; + bool little_endian; + int ret; + u32 abort_code; + u8 *p, *q, *name, *end; + + _enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key)); + + *_expiry = 0; + + ret = key_validate(conn->server_key); + if (ret < 0) { + switch (ret) { + case -EKEYEXPIRED: + abort_code = RXKADEXPIRED; + goto other_error; + default: + abort_code = RXKADNOAUTH; + goto other_error; + } + } + + ASSERT(conn->server_key->payload.data[0] != NULL); + ASSERTCMP((unsigned long) ticket & 7UL, ==, 0); + + memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv)); + + ret = -ENOMEM; + req = skcipher_request_alloc(conn->server_key->payload.data[0], + GFP_NOFS); + if (!req) + goto temporary_error; + + sg_init_one(&sg[0], ticket, ticket_len); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sg, sg, ticket_len, iv.x); + crypto_skcipher_decrypt(req); + skcipher_request_free(req); + + p = ticket; + end = p + ticket_len; + +#define Z(field) \ + ({ \ + u8 *__str = p; \ + eproto = tracepoint_string("rxkad_bad_"#field); \ + q = memchr(p, 0, end - p); \ + if (!q || q - p > (field##_SZ)) \ + goto bad_ticket; \ + for (; p < q; p++) \ + if (!isprint(*p)) \ + goto bad_ticket; \ + p++; \ + __str; \ + }) + + /* extract the ticket flags */ + _debug("KIV FLAGS: %x", *p); + little_endian = *p & 1; + p++; + + /* extract the authentication name */ + name = Z(ANAME); + _debug("KIV ANAME: %s", name); + + /* extract the principal's instance */ + name = Z(INST); + _debug("KIV INST : %s", name); + + /* extract the principal's authentication domain */ + name = Z(REALM); + _debug("KIV REALM: %s", name); + + eproto = tracepoint_string("rxkad_bad_len"); + if (end - p < 4 + 8 + 4 + 2) + goto bad_ticket; + + /* get the IPv4 address of the entity that requested the ticket */ + memcpy(&addr, p, sizeof(addr)); + p += 4; + _debug("KIV ADDR : %pI4", &addr); + + /* get the session key from the ticket */ + memcpy(&key, p, sizeof(key)); + p += 8; + _debug("KIV KEY : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1])); + memcpy(_session_key, &key, sizeof(key)); + + /* get the ticket's lifetime */ + life = *p++ * 5 * 60; + _debug("KIV LIFE : %u", life); + + /* get the issue time of the ticket */ + if (little_endian) { + __le32 stamp; + memcpy(&stamp, p, 4); + issue = rxrpc_u32_to_time64(le32_to_cpu(stamp)); + } else { + __be32 stamp; + memcpy(&stamp, p, 4); + issue = rxrpc_u32_to_time64(be32_to_cpu(stamp)); + } + p += 4; + now = ktime_get_real_seconds(); + _debug("KIV ISSUE: %llx [%llx]", issue, now); + + /* check the ticket is in date */ + if (issue > now) { + abort_code = RXKADNOAUTH; + ret = -EKEYREJECTED; + goto other_error; + } + + if (issue < now - life) { + abort_code = RXKADEXPIRED; + ret = -EKEYEXPIRED; + goto other_error; + } + + *_expiry = issue + life; + + /* get the service name */ + name = Z(SNAME); + _debug("KIV SNAME: %s", name); + + /* get the service instance name */ + name = Z(INST); + _debug("KIV SINST: %s", name); + return 0; + +bad_ticket: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + abort_code = RXKADBADTICKET; + ret = -EPROTO; +other_error: + *_abort_code = abort_code; + return ret; +temporary_error: + return ret; +} + +/* + * decrypt the response packet + */ +static void rxkad_decrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxrpc_crypt *session_key) +{ + struct skcipher_request *req = rxkad_ci_req; + struct scatterlist sg[1]; + struct rxrpc_crypt iv; + + _enter(",,%08x%08x", + ntohl(session_key->n[0]), ntohl(session_key->n[1])); + + mutex_lock(&rxkad_ci_mutex); + if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, + sizeof(*session_key)) < 0) + BUG(); + + memcpy(&iv, session_key, sizeof(iv)); + + sg_init_table(sg, 1); + sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted)); + skcipher_request_set_sync_tfm(req, rxkad_ci); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); + crypto_skcipher_decrypt(req); + skcipher_request_zero(req); + + mutex_unlock(&rxkad_ci_mutex); + + _leave(""); +} + +/* + * verify a response + */ +static int rxkad_verify_response(struct rxrpc_connection *conn, + struct sk_buff *skb, + u32 *_abort_code) +{ + struct rxkad_response *response; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_crypt session_key; + const char *eproto; + time64_t expiry; + void *ticket; + u32 abort_code, version, kvno, ticket_len, level; + __be32 csum; + int ret, i; + + _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); + + ret = -ENOMEM; + response = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); + if (!response) + goto temporary_error; + + eproto = tracepoint_string("rxkad_rsp_short"); + abort_code = RXKADPACKETSHORT; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + response, sizeof(*response)) < 0) + goto protocol_error; + if (!pskb_pull(skb, sizeof(*response))) + BUG(); + + version = ntohl(response->version); + ticket_len = ntohl(response->ticket_len); + kvno = ntohl(response->kvno); + _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", + sp->hdr.serial, version, kvno, ticket_len); + + eproto = tracepoint_string("rxkad_rsp_ver"); + abort_code = RXKADINCONSISTENCY; + if (version != RXKAD_VERSION) + goto protocol_error; + + eproto = tracepoint_string("rxkad_rsp_tktlen"); + abort_code = RXKADTICKETLEN; + if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) + goto protocol_error; + + eproto = tracepoint_string("rxkad_rsp_unkkey"); + abort_code = RXKADUNKNOWNKEY; + if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) + goto protocol_error; + + /* extract the kerberos ticket and decrypt and decode it */ + ret = -ENOMEM; + ticket = kmalloc(ticket_len, GFP_NOFS); + if (!ticket) + goto temporary_error_free_resp; + + eproto = tracepoint_string("rxkad_tkt_short"); + abort_code = RXKADPACKETSHORT; + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) + goto protocol_error_free; + + ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, + &expiry, _abort_code); + if (ret < 0) + goto temporary_error_free_ticket; + + /* use the session key from inside the ticket to decrypt the + * response */ + rxkad_decrypt_response(conn, response, &session_key); + + eproto = tracepoint_string("rxkad_rsp_param"); + abort_code = RXKADSEALEDINCON; + if (ntohl(response->encrypted.epoch) != conn->proto.epoch) + goto protocol_error_free; + if (ntohl(response->encrypted.cid) != conn->proto.cid) + goto protocol_error_free; + if (ntohl(response->encrypted.securityIndex) != conn->security_ix) + goto protocol_error_free; + csum = response->encrypted.checksum; + response->encrypted.checksum = 0; + rxkad_calc_response_checksum(response); + eproto = tracepoint_string("rxkad_rsp_csum"); + if (response->encrypted.checksum != csum) + goto protocol_error_free; + + spin_lock(&conn->bundle->channel_lock); + for (i = 0; i < RXRPC_MAXCALLS; i++) { + struct rxrpc_call *call; + u32 call_id = ntohl(response->encrypted.call_id[i]); + + eproto = tracepoint_string("rxkad_rsp_callid"); + if (call_id > INT_MAX) + goto protocol_error_unlock; + + eproto = tracepoint_string("rxkad_rsp_callctr"); + if (call_id < conn->channels[i].call_counter) + goto protocol_error_unlock; + + eproto = tracepoint_string("rxkad_rsp_callst"); + if (call_id > conn->channels[i].call_counter) { + call = rcu_dereference_protected( + conn->channels[i].call, + lockdep_is_held(&conn->bundle->channel_lock)); + if (call && call->state < RXRPC_CALL_COMPLETE) + goto protocol_error_unlock; + conn->channels[i].call_counter = call_id; + } + } + spin_unlock(&conn->bundle->channel_lock); + + eproto = tracepoint_string("rxkad_rsp_seq"); + abort_code = RXKADOUTOFSEQUENCE; + if (ntohl(response->encrypted.inc_nonce) != conn->security_nonce + 1) + goto protocol_error_free; + + eproto = tracepoint_string("rxkad_rsp_level"); + abort_code = RXKADLEVELFAIL; + level = ntohl(response->encrypted.level); + if (level > RXRPC_SECURITY_ENCRYPT) + goto protocol_error_free; + conn->params.security_level = level; + + /* create a key to hold the security data and expiration time - after + * this the connection security can be handled in exactly the same way + * as for a client connection */ + ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno); + if (ret < 0) + goto temporary_error_free_ticket; + + kfree(ticket); + kfree(response); + _leave(" = 0"); + return 0; + +protocol_error_unlock: + spin_unlock(&conn->bundle->channel_lock); +protocol_error_free: + kfree(ticket); +protocol_error: + kfree(response); + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto); + *_abort_code = abort_code; + return -EPROTO; + +temporary_error_free_ticket: + kfree(ticket); +temporary_error_free_resp: + kfree(response); +temporary_error: + /* Ignore the response packet if we got a temporary error such as + * ENOMEM. We just want to send the challenge again. Note that we + * also come out this way if the ticket decryption fails. + */ + return ret; +} + +/* + * clear the connection security + */ +static void rxkad_clear(struct rxrpc_connection *conn) +{ + _enter(""); + + if (conn->cipher) + crypto_free_sync_skcipher(conn->cipher); +} + +/* + * Initialise the rxkad security service. + */ +static int rxkad_init(void) +{ + struct crypto_sync_skcipher *tfm; + struct skcipher_request *req; + + /* pin the cipher we need so that the crypto layer doesn't invoke + * keventd to go get it */ + tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(&tfm->base, GFP_KERNEL); + if (!req) + goto nomem_tfm; + + rxkad_ci_req = req; + rxkad_ci = tfm; + return 0; + +nomem_tfm: + crypto_free_sync_skcipher(tfm); + return -ENOMEM; +} + +/* + * Clean up the rxkad security service. + */ +static void rxkad_exit(void) +{ + crypto_free_sync_skcipher(rxkad_ci); + skcipher_request_free(rxkad_ci_req); +} + +/* + * RxRPC Kerberos-based security + */ +const struct rxrpc_security rxkad = { + .name = "rxkad", + .security_index = RXRPC_SECURITY_RXKAD, + .no_key_abort = RXKADUNKNOWNKEY, + .init = rxkad_init, + .exit = rxkad_exit, + .init_connection_security = rxkad_init_connection_security, + .prime_packet_security = rxkad_prime_packet_security, + .secure_packet = rxkad_secure_packet, + .verify_packet = rxkad_verify_packet, + .free_call_crypto = rxkad_free_call_crypto, + .locate_data = rxkad_locate_data, + .issue_challenge = rxkad_issue_challenge, + .respond_to_challenge = rxkad_respond_to_challenge, + .verify_response = rxkad_verify_response, + .clear = rxkad_clear, +}; diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c new file mode 100644 index 000000000..9b1fb9ed0 --- /dev/null +++ b/net/rxrpc/security.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC security handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <linux/udp.h> +#include <linux/crypto.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include <keys/rxrpc-type.h> +#include "ar-internal.h" + +static const struct rxrpc_security *rxrpc_security_types[] = { + [RXRPC_SECURITY_NONE] = &rxrpc_no_security, +#ifdef CONFIG_RXKAD + [RXRPC_SECURITY_RXKAD] = &rxkad, +#endif +}; + +int __init rxrpc_init_security(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) { + if (rxrpc_security_types[i]) { + ret = rxrpc_security_types[i]->init(); + if (ret < 0) + goto failed; + } + } + + return 0; + +failed: + for (i--; i >= 0; i--) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); + return ret; +} + +void rxrpc_exit_security(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rxrpc_security_types); i++) + if (rxrpc_security_types[i]) + rxrpc_security_types[i]->exit(); +} + +/* + * look up an rxrpc security module + */ +static const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) +{ + if (security_index >= ARRAY_SIZE(rxrpc_security_types)) + return NULL; + return rxrpc_security_types[security_index]; +} + +/* + * initialise the security on a client connection + */ +int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) +{ + const struct rxrpc_security *sec; + struct rxrpc_key_token *token; + struct key *key = conn->params.key; + int ret; + + _enter("{%d},{%x}", conn->debug_id, key_serial(key)); + + if (!key) + return 0; + + ret = key_validate(key); + if (ret < 0) + return ret; + + token = key->payload.data[0]; + if (!token) + return -EKEYREJECTED; + + sec = rxrpc_security_lookup(token->security_index); + if (!sec) + return -EKEYREJECTED; + conn->security = sec; + + ret = conn->security->init_connection_security(conn); + if (ret < 0) { + conn->security = &rxrpc_no_security; + return ret; + } + + _leave(" = 0"); + return 0; +} + +/* + * Find the security key for a server connection. + */ +bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx, + const struct rxrpc_security **_sec, + struct key **_key, + struct sk_buff *skb) +{ + const struct rxrpc_security *sec; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + key_ref_t kref = NULL; + char kdesc[5 + 1 + 3 + 1]; + + _enter(""); + + sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex); + + sec = rxrpc_security_lookup(sp->hdr.securityIndex); + if (!sec) { + trace_rxrpc_abort(0, "SVS", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = RX_INVALID_OPERATION; + return false; + } + + if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE) + goto out; + + if (!rx->securities) { + trace_rxrpc_abort(0, "SVR", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = RX_INVALID_OPERATION; + return false; + } + + /* look through the service's keyring */ + kref = keyring_search(make_key_ref(rx->securities, 1UL), + &key_type_rxrpc_s, kdesc, true); + if (IS_ERR(kref)) { + trace_rxrpc_abort(0, "SVK", + sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + sec->no_key_abort, EKEYREJECTED); + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; + skb->priority = sec->no_key_abort; + return false; + } + +out: + *_sec = sec; + *_key = key_ref_to_ptr(kref); + return true; +} diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c new file mode 100644 index 000000000..1882fea71 --- /dev/null +++ b/net/rxrpc/sendmsg.c @@ -0,0 +1,899 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* AF_RXRPC sendmsg() implementation. + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/export.h> +#include <linux/sched/signal.h> + +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +/* + * Return true if there's sufficient Tx queue space. + */ +static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) +{ + unsigned int win_size = + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra); + rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack); + + if (_tx_win) + *_tx_win = tx_win; + return call->tx_top - tx_win < win_size; +} + +/* + * Wait for space to appear in the Tx queue or a signal to occur. + */ +static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (rxrpc_check_tx_space(call, NULL)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (signal_pending(current)) + return sock_intr_errno(*timeo); + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + *timeo = schedule_timeout(*timeo); + } +} + +/* + * Wait for space to appear in the Tx queue uninterruptibly, but with + * a timeout of 2*RTT if no progress was made and a signal occurred. + */ +static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, + struct rxrpc_call *call) +{ + rxrpc_seq_t tx_start, tx_win; + signed long rtt, timeout; + + rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = usecs_to_jiffies(rtt) * 2; + if (rtt < 2) + rtt = 2; + + timeout = rtt; + tx_start = READ_ONCE(call->tx_hard_ack); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + + tx_win = READ_ONCE(call->tx_hard_ack); + if (rxrpc_check_tx_space(call, &tx_win)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (timeout == 0 && + tx_win == tx_start && signal_pending(current)) + return -EINTR; + + if (tx_win != tx_start) { + timeout = rtt; + tx_start = tx_win; + } + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + timeout = schedule_timeout(timeout); + } +} + +/* + * Wait for space to appear in the Tx queue uninterruptibly. + */ +static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (rxrpc_check_tx_space(call, NULL)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + *timeo = schedule_timeout(*timeo); + } +} + +/* + * wait for space to appear in the transmit/ACK window + * - caller holds the socket locked + */ +static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo, + bool waitall) +{ + DECLARE_WAITQUEUE(myself, current); + int ret; + + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); + + add_wait_queue(&call->waitq, &myself); + + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + if (waitall) + ret = rxrpc_wait_for_tx_window_waitall(rx, call); + else + ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); + break; + case RXRPC_PREINTERRUPTIBLE: + case RXRPC_UNINTERRUPTIBLE: + default: + ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo); + break; + } + + remove_wait_queue(&call->waitq, &myself); + set_current_state(TASK_RUNNING); + _leave(" = %d", ret); + return ret; +} + +/* + * Schedule an instant Tx resend. + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) +{ + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = + (call->rxtx_annotations[ix] & RXRPC_TX_ANNO_LAST) | + RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + } + + spin_unlock_bh(&call->lock); +} + +/* + * Notify the owner of the call that the transmit phase is ended and the last + * packet has been queued. + */ +static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, + rxrpc_notify_end_tx_t notify_end_tx) +{ + if (notify_end_tx) + notify_end_tx(&rx->sk, call, call->user_call_ID); +} + +/* + * Queue a DATA packet for transmission, set the resend timeout and send + * the packet immediately. Returns the error from rxrpc_send_data_packet() + * in case the caller wants to do something with it. + */ +static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned long now; + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; + + _net("queue skb %p [%d]", skb, seq); + + ASSERTCMP(seq, ==, call->tx_top + 1); + + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb, rxrpc_skb_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + else + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + + if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { + _debug("________awaiting reply/ACK__________"); + write_lock_bh(&call->state_lock); + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + rxrpc_notify_end_tx(rx, call, notify_end_tx); + break; + case RXRPC_CALL_SERVER_ACK_REQUEST: + call->state = RXRPC_CALL_SERVER_SEND_REPLY; + now = jiffies; + WRITE_ONCE(call->ack_at, now + MAX_JIFFY_OFFSET); + if (call->ackr_reason == RXRPC_ACK_DELAY) + call->ackr_reason = 0; + trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now); + if (!last) + break; + fallthrough; + case RXRPC_CALL_SERVER_SEND_REPLY: + call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + rxrpc_notify_end_tx(rx, call, notify_end_tx); + break; + default: + break; + } + write_unlock_bh(&call->state_lock); + } + + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + + ret = rxrpc_send_data_packet(call, skb, false); + if (ret < 0) { + switch (ret) { + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + 0, ret); + goto out; + } + _debug("need instant resend %d", ret); + rxrpc_instant_resend(call, ix); + } else { + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; + + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); + } + +out: + rxrpc_free_skb(skb, rxrpc_skb_freed); + _leave(" = %d", ret); + return ret; +} + +/* + * send data through a socket + * - must be called in process context + * - The caller holds the call user access mutex, but not the socket lock. + */ +static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len, + rxrpc_notify_end_tx_t notify_end_tx, + bool *_dropped_lock) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + struct sock *sk = &rx->sk; + enum rxrpc_call_state state; + long timeo; + bool more = msg->msg_flags & MSG_MORE; + int ret, copied = 0; + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + +reload: + ret = -EPIPE; + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto maybe_error; + state = READ_ONCE(call->state); + ret = -ESHUTDOWN; + if (state >= RXRPC_CALL_COMPLETE) + goto maybe_error; + ret = -EPROTO; + if (state != RXRPC_CALL_CLIENT_SEND_REQUEST && + state != RXRPC_CALL_SERVER_ACK_REQUEST && + state != RXRPC_CALL_SERVER_SEND_REPLY) + goto maybe_error; + + ret = -EMSGSIZE; + if (call->tx_total_len != -1) { + if (len - copied > call->tx_total_len) + goto maybe_error; + if (!more && len - copied != call->tx_total_len) + goto maybe_error; + } + + skb = call->tx_pending; + call->tx_pending = NULL; + rxrpc_see_skb(skb, rxrpc_skb_seen); + + do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_ack_packet(call, false, NULL); + + if (!skb) { + size_t size, chunk, max, space; + + _debug("alloc"); + + if (!rxrpc_check_tx_space(call, NULL)) + goto wait_for_space; + + max = RXRPC_JUMBO_DATALEN; + max -= call->conn->security_size; + max &= ~(call->conn->size_align - 1UL); + + chunk = max; + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); + + space = chunk + call->conn->size_align; + space &= ~(call->conn->size_align - 1UL); + + size = space + call->conn->security_size; + + _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + + /* create a buffer that we can retain until it's ACK'd */ + skb = sock_alloc_send_skb( + sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto maybe_error; + + sp = rxrpc_skb(skb); + sp->rx_flags |= RXRPC_SKB_TX_BUFFER; + rxrpc_new_skb(skb, rxrpc_skb_new); + + _debug("ALLOC SEND %p", skb); + + ASSERTCMP(skb->mark, ==, 0); + + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; + + sp->remain = chunk; + if (sp->remain > skb_tailroom(skb)) + sp->remain = skb_tailroom(skb); + + _net("skb: hr %d, tr %d, hl %d, rm %d", + skb_headroom(skb), + skb_tailroom(skb), + skb_headlen(skb), + sp->remain); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + _debug("append"); + sp = rxrpc_skb(skb); + + /* append next segment of data to the current buffer */ + if (msg_data_left(msg) > 0) { + int copy = skb_tailroom(skb); + ASSERTCMP(copy, >, 0); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + if (copy > sp->remain) + copy = sp->remain; + + _debug("add"); + ret = skb_add_data(skb, &msg->msg_iter, copy); + _debug("added"); + if (ret < 0) + goto efault; + sp->remain -= copy; + skb->mark += copy; + copied += copy; + if (call->tx_total_len != -1) + call->tx_total_len -= copy; + } + + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + + /* add the packet to the send queue if it's now full */ + if (sp->remain <= 0 || + (msg_data_left(msg) == 0 && !more)) { + struct rxrpc_connection *conn = call->conn; + uint32_t seq; + size_t pad; + + /* pad out if we're using security */ + if (conn->security_ix) { + pad = conn->security_size + skb->mark; + pad = conn->size_align - pad; + pad &= conn->size_align - 1; + _debug("pad %zu", pad); + if (pad) + skb_put_zero(skb, pad); + } + + seq = call->tx_top + 1; + + sp->hdr.seq = seq; + sp->hdr._rsvd = 0; + sp->hdr.flags = conn->out_clientflag; + + if (msg_data_left(msg) == 0 && !more) + sp->hdr.flags |= RXRPC_LAST_PACKET; + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) + sp->hdr.flags |= RXRPC_MORE_PACKETS; + + ret = call->security->secure_packet( + call, skb, skb->mark, skb->head); + if (ret < 0) + goto out; + + ret = rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); + /* Should check for failure here */ + skb = NULL; + } + } while (msg_data_left(msg) > 0); + +success: + ret = copied; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { + read_lock_bh(&call->state_lock); + if (call->error < 0) + ret = call->error; + read_unlock_bh(&call->state_lock); + } +out: + call->tx_pending = skb; + _leave(" = %d", ret); + return ret; + +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_freed); + _leave(" = %d", call->error); + return call->error; + +maybe_error: + if (copied) + goto success; + goto out; + +efault: + ret = -EFAULT; + goto out; + +wait_for_space: + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + mutex_unlock(&call->user_mutex); + *_dropped_lock = true; + ret = rxrpc_wait_for_tx_window(rx, call, &timeo, + msg->msg_flags & MSG_WAITALL); + if (ret < 0) + goto maybe_error; + if (call->interruptibility == RXRPC_INTERRUPTIBLE) { + if (mutex_lock_interruptible(&call->user_mutex) < 0) { + ret = sock_intr_errno(timeo); + goto maybe_error; + } + } else { + mutex_lock(&call->user_mutex); + } + *_dropped_lock = false; + goto reload; +} + +/* + * extract control messages from the sendmsg() control buffer + */ +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p) +{ + struct cmsghdr *cmsg; + bool got_user_ID = false; + int len; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - sizeof(struct cmsghdr); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_USER_CALL_ID: + if (msg->msg_flags & MSG_CMSG_COMPAT) { + if (len != sizeof(u32)) + return -EINVAL; + p->call.user_call_ID = *(u32 *)CMSG_DATA(cmsg); + } else { + if (len != sizeof(unsigned long)) + return -EINVAL; + p->call.user_call_ID = *(unsigned long *) + CMSG_DATA(cmsg); + } + got_user_ID = true; + break; + + case RXRPC_ABORT: + if (p->command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + p->command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(p->abort_code)) + return -EINVAL; + p->abort_code = *(unsigned int *)CMSG_DATA(cmsg); + if (p->abort_code == 0) + return -EINVAL; + break; + + case RXRPC_CHARGE_ACCEPT: + if (p->command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + p->command = RXRPC_CMD_CHARGE_ACCEPT; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_EXCLUSIVE_CALL: + p->exclusive = true; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_UPGRADE_SERVICE: + p->upgrade = true; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_TX_LENGTH: + if (p->call.tx_total_len != -1 || len != sizeof(__s64)) + return -EINVAL; + p->call.tx_total_len = *(__s64 *)CMSG_DATA(cmsg); + if (p->call.tx_total_len < 0) + return -EINVAL; + break; + + case RXRPC_SET_CALL_TIMEOUT: + if (len & 3 || len < 4 || len > 12) + return -EINVAL; + memcpy(&p->call.timeouts, CMSG_DATA(cmsg), len); + p->call.nr_timeouts = len / 4; + if (p->call.timeouts.hard > INT_MAX / HZ) + return -ERANGE; + if (p->call.nr_timeouts >= 2 && p->call.timeouts.idle > 60 * 60 * 1000) + return -ERANGE; + if (p->call.nr_timeouts >= 3 && p->call.timeouts.normal > 60 * 60 * 1000) + return -ERANGE; + break; + + default: + return -EINVAL; + } + } + + if (!got_user_ID) + return -EINVAL; + if (p->call.tx_total_len != -1 && p->command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + _leave(" = 0"); + return 0; +} + +/* + * Create a new client call for sendmsg(). + * - Called with the socket lock held, which it must release. + * - If it returns a call, the call's lock will need releasing by the caller. + */ +static struct rxrpc_call * +rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, + struct rxrpc_send_params *p) + __releases(&rx->sk.sk_lock.slock) + __acquires(&call->user_mutex) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call *call; + struct key *key; + + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + + _enter(""); + + if (!msg->msg_name) { + release_sock(&rx->sk); + return ERR_PTR(-EDESTADDRREQ); + } + + key = rx->key; + if (key && !rx->key->payload.data[0]) + key = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = rx->key; + cp.security_level = rx->min_sec_level; + cp.exclusive = rx->exclusive | p->exclusive; + cp.upgrade = p->upgrade; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); + /* The socket is now unlocked */ + + rxrpc_put_peer(cp.peer); + _leave(" = %p\n", call); + return call; +} + +/* + * send a message forming part of a client call through an RxRPC socket + * - caller holds the socket locked + * - the socket may be either a client socket or a server socket + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) + __releases(&rx->sk.sk_lock.slock) + __releases(&call->user_mutex) +{ + enum rxrpc_call_state state; + struct rxrpc_call *call; + unsigned long now, j; + bool dropped_lock = false; + int ret; + + struct rxrpc_send_params p = { + .call.tx_total_len = -1, + .call.user_call_ID = 0, + .call.nr_timeouts = 0, + .call.interruptibility = RXRPC_INTERRUPTIBLE, + .abort_code = 0, + .command = RXRPC_CMD_SEND_DATA, + .exclusive = false, + .upgrade = false, + }; + + _enter(""); + + ret = rxrpc_sendmsg_cmsg(msg, &p); + if (ret < 0) + goto error_release_sock; + + if (p.command == RXRPC_CMD_CHARGE_ACCEPT) { + ret = -EINVAL; + if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) + goto error_release_sock; + ret = rxrpc_user_charge_accept(rx, p.call.user_call_ID); + goto error_release_sock; + } + + call = rxrpc_find_call_by_user_ID(rx, p.call.user_call_ID); + if (!call) { + ret = -EBADSLT; + if (p.command != RXRPC_CMD_SEND_DATA) + goto error_release_sock; + call = rxrpc_new_client_call_for_sendmsg(rx, msg, &p); + /* The socket is now unlocked... */ + if (IS_ERR(call)) + return PTR_ERR(call); + /* ... and we have the call lock. */ + ret = 0; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + goto out_put_unlock; + } else { + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_UNINITIALISED: + case RXRPC_CALL_CLIENT_AWAIT_CONN: + case RXRPC_CALL_SERVER_PREALLOC: + case RXRPC_CALL_SERVER_SECURING: + rxrpc_put_call(call, rxrpc_call_put); + ret = -EBUSY; + goto error_release_sock; + default: + break; + } + + ret = mutex_lock_interruptible(&call->user_mutex); + release_sock(&rx->sk); + if (ret < 0) { + ret = -ERESTARTSYS; + goto error_put; + } + + if (p.call.tx_total_len != -1) { + ret = -EINVAL; + if (call->tx_total_len != -1 || + call->tx_pending || + call->tx_top != 0) + goto out_put_unlock; + call->tx_total_len = p.call.tx_total_len; + } + } + + switch (p.call.nr_timeouts) { + case 3: + j = msecs_to_jiffies(p.call.timeouts.normal); + if (p.call.timeouts.normal > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_rx_timo, j); + fallthrough; + case 2: + j = msecs_to_jiffies(p.call.timeouts.idle); + if (p.call.timeouts.idle > 0 && j == 0) + j = 1; + WRITE_ONCE(call->next_req_timo, j); + fallthrough; + case 1: + if (p.call.timeouts.hard > 0) { + j = p.call.timeouts.hard * HZ; + now = jiffies; + j += now; + WRITE_ONCE(call->expect_term_by, j); + rxrpc_reduce_call_timer(call, j, now, + rxrpc_timer_set_for_hard); + } + break; + } + + state = READ_ONCE(call->state); + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, state, call->conn); + + if (state >= RXRPC_CALL_COMPLETE) { + /* it's too late for this call */ + ret = -ESHUTDOWN; + } else if (p.command == RXRPC_CMD_SEND_ABORT) { + ret = 0; + if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED)) + ret = rxrpc_send_abort_packet(call); + } else if (p.command != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; + } else { + ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock); + } + +out_put_unlock: + if (!dropped_lock) + mutex_unlock(&call->user_mutex); +error_put: + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ret; + +error_release_sock: + release_sock(&rx->sk); + return ret; +} + +/** + * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on + * @call: The call to send data through + * @msg: The data to send + * @len: The amount of data to send + * @notify_end_tx: Notification that the last packet is queued. + * + * Allow a kernel service to send data on a call. The call must be in an state + * appropriate to sending data. No control data should be supplied in @msg, + * nor should an address be supplied. MSG_MORE should be flagged if there's + * more data to come, otherwise this data will end the transmission phase. + */ +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len, + rxrpc_notify_end_tx_t notify_end_tx) +{ + bool dropped_lock = false; + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + + ASSERTCMP(msg->msg_name, ==, NULL); + ASSERTCMP(msg->msg_control, ==, NULL); + + mutex_lock(&call->user_mutex); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + case RXRPC_CALL_SERVER_SEND_REPLY: + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, + notify_end_tx, &dropped_lock); + break; + case RXRPC_CALL_COMPLETE: + read_lock_bh(&call->state_lock); + ret = call->error; + read_unlock_bh(&call->state_lock); + break; + default: + /* Request phase complete for this client call */ + trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send")); + ret = -EPROTO; + break; + } + + if (!dropped_lock) + mutex_unlock(&call->user_mutex); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_send_data); + +/** + * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on + * @call: The call to be aborted + * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. + * + * Allow a kernel service to abort a call, if it's still in an abortable state + * and return true if the call was aborted, false if it was already complete. + */ +bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code, int error, const char *why) +{ + bool aborted; + + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + + mutex_lock(&call->user_mutex); + + aborted = rxrpc_abort_call(why, call, 0, abort_code, error); + if (aborted) + rxrpc_send_abort_packet(call); + + mutex_unlock(&call->user_mutex); + return aborted; +} +EXPORT_SYMBOL(rxrpc_kernel_abort_call); + +/** + * rxrpc_kernel_set_tx_length - Set the total Tx length on a call + * @sock: The socket the call is on + * @call: The call to be informed + * @tx_total_len: The amount of data to be transmitted for this call + * + * Allow a kernel service to set the total transmit length on a call. This + * allows buffer-to-packet encrypt-and-copy to be performed. + * + * This function is primarily for use for setting the reply length since the + * request length can be set when beginning the call. + */ +void rxrpc_kernel_set_tx_length(struct socket *sock, struct rxrpc_call *call, + s64 tx_total_len) +{ + WARN_ON(call->tx_total_len != -1); + call->tx_total_len = tx_total_len; +} +EXPORT_SYMBOL(rxrpc_kernel_set_tx_length); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c new file mode 100644 index 000000000..580a5acff --- /dev/null +++ b/net/rxrpc/skbuff.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* ar-skbuff.c: socket buffer destruction handling + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/net.h> +#include <linux/skbuff.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER) +#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + +/* + * Note the allocation or reception of a socket buffer. + */ +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); +} + +/* + * Note the re-emergence of a socket buffer from a queue or buffer. + */ +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n = atomic_read(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); + } +} + +/* + * Note the addition of a ref on a socket buffer. + */ +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); + skb_get(skb); +} + +/* + * Note the dropping of a ref on a socket buffer by the core. + */ +void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&rxrpc_n_rx_skbs); + trace_rxrpc_skb(skb, op, 0, n, 0, here); +} + +/* + * Note the destruction of a socket buffer. + */ +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); + kfree_skb(skb); + } +} + +/* + * Clear a queue of socket buffers. + */ +void rxrpc_purge_queue(struct sk_buff_head *list) +{ + const void *here = __builtin_return_address(0); + struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { + int n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, rxrpc_skb_purged, + refcount_read(&skb->users), n, + rxrpc_skb(skb)->rx_flags, here); + kfree_skb(skb); + } +} diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c new file mode 100644 index 000000000..555e09107 --- /dev/null +++ b/net/rxrpc/sysctl.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* sysctls for configuring RxRPC operating parameters + * + * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/sysctl.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +static struct ctl_table_header *rxrpc_sysctl_reg_table; +static const unsigned int four = 4; +static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; +static const unsigned int n_65535 = 65535; +static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; +static const unsigned long one_jiffy = 1; +static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; + +/* + * RxRPC operating parameters. + * + * See Documentation/networking/rxrpc.rst and the variable definitions for more + * information on the individual parameters. + */ +static struct ctl_table rxrpc_sysctl_table[] = { + /* Values measured in milliseconds but used in jiffies */ + { + .procname = "req_ack_delay", + .data = &rxrpc_requested_ack_delay, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, + }, + { + .procname = "soft_ack_delay", + .data = &rxrpc_soft_ack_delay, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, + }, + { + .procname = "idle_ack_delay", + .data = &rxrpc_idle_ack_delay, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, + }, + { + .procname = "idle_conn_expiry", + .data = &rxrpc_conn_idle_client_expiry, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, + }, + { + .procname = "idle_conn_fast_expiry", + .data = &rxrpc_conn_idle_client_fast_expiry, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = (void *)&one_jiffy, + .extra2 = (void *)&max_jiffies, + }, + + /* Non-time values */ + { + .procname = "reap_client_conns", + .data = &rxrpc_reap_client_connections, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + .extra2 = (void *)&n_65535, + }, + { + .procname = "max_backlog", + .data = &rxrpc_max_backlog, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&four, + .extra2 = (void *)&max_backlog, + }, + { + .procname = "rx_window_size", + .data = &rxrpc_rx_window_size, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + .extra2 = (void *)&n_max_acks, + }, + { + .procname = "rx_mtu", + .data = &rxrpc_rx_mtu, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + .extra2 = (void *)&n_65535, + }, + { + .procname = "rx_jumbo_max", + .data = &rxrpc_rx_jumbo_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + .extra2 = (void *)&four, + }, + + { } +}; + +int __init rxrpc_sysctl_init(void) +{ + rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc", + rxrpc_sysctl_table); + if (!rxrpc_sysctl_reg_table) + return -ENOMEM; + return 0; +} + +void rxrpc_sysctl_exit(void) +{ + if (rxrpc_sysctl_reg_table) + unregister_net_sysctl_table(rxrpc_sysctl_reg_table); +} diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c new file mode 100644 index 000000000..2e4b9d86e --- /dev/null +++ b/net/rxrpc/utils.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Utility routines + * + * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include "ar-internal.h" + +/* + * Fill out a peer address from a socket buffer containing a packet. + */ +int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) +{ + memset(srx, 0, sizeof(*srx)); + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = udp_hdr(skb)->source; + srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + return 0; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case ETH_P_IPV6: + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = udp_hdr(skb)->source; + srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr; + return 0; +#endif + + default: + pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n", + ntohs(skb->protocol)); + return -EAFNOSUPPORT; + } +} |