diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:02:30 +0000 |
commit | 76cb841cb886eef6b3bee341a2266c76578724ad (patch) | |
tree | f5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /net/tipc | |
parent | Initial commit. (diff) | |
download | linux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip |
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'net/tipc')
43 files changed, 21687 insertions, 0 deletions
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig new file mode 100644 index 000000000..e45021212 --- /dev/null +++ b/net/tipc/Kconfig @@ -0,0 +1,44 @@ +# +# TIPC configuration +# + +menuconfig TIPC + tristate "The TIPC Protocol" + depends on INET + ---help--- + The Transparent Inter Process Communication (TIPC) protocol is + specially designed for intra cluster communication. This protocol + originates from Ericsson where it has been used in carrier grade + cluster applications for many years. + + For more information about TIPC, see http://tipc.sourceforge.net. + + This protocol support is also available as a module ( = code which + can be inserted in and removed from the running kernel whenever you + want). The module will be called tipc. If you want to compile it + as a module, say M here and read <file:Documentation/kbuild/modules.txt>. + + If in doubt, say N. + +config TIPC_MEDIA_IB + bool "InfiniBand media type support" + depends on TIPC && INFINIBAND_IPOIB + help + Saying Y here will enable support for running TIPC on + IP-over-InfiniBand devices. +config TIPC_MEDIA_UDP + bool "IP/UDP media type support" + depends on TIPC + select NET_UDP_TUNNEL + help + Saying Y here will enable support for running TIPC over IP/UDP + bool + default y + +config TIPC_DIAG + tristate "TIPC: socket monitoring interface" + depends on TIPC + default y + ---help--- + Support for TIPC socket monitoring interface used by ss tool. + If unsure, say Y. diff --git a/net/tipc/Makefile b/net/tipc/Makefile new file mode 100644 index 000000000..aca168f2a --- /dev/null +++ b/net/tipc/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux TIPC layer +# + +obj-$(CONFIG_TIPC) := tipc.o + +tipc-y += addr.o bcast.o bearer.o \ + core.o link.o discover.o msg.o \ + name_distr.o subscr.o monitor.o name_table.o net.o \ + netlink.o netlink_compat.o node.o socket.o eth_media.o \ + topsrv.o socket.o group.o + +tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o +tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o + + +obj-$(CONFIG_TIPC_DIAG) += diag.o + +tipc_diag-y := diag.o diff --git a/net/tipc/addr.c b/net/tipc/addr.c new file mode 100644 index 000000000..0f1eaed1b --- /dev/null +++ b/net/tipc/addr.c @@ -0,0 +1,124 @@ +/* + * net/tipc/addr.c: TIPC address utility routines + * + * Copyright (c) 2000-2006, 2018, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "addr.h" +#include "core.h" + +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr) +{ + if (!domain || (domain == addr)) + return true; + if (!legacy_format) + return false; + if (domain == tipc_cluster_mask(addr)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_CLUSTER_MASK)) /* domain <Z.C.0> */ + return true; + if (domain == (addr & TIPC_ZONE_MASK)) /* domain <Z.0.0> */ + return true; + return false; +} + +void tipc_set_node_id(struct net *net, u8 *id) +{ + struct tipc_net *tn = tipc_net(net); + u32 *tmp = (u32 *)id; + + memcpy(tn->node_id, id, NODE_ID_LEN); + tipc_nodeid2string(tn->node_id_string, id); + tn->trial_addr = tmp[0] ^ tmp[1] ^ tmp[2] ^ tmp[3]; + pr_info("Own node identity %s, cluster identity %u\n", + tipc_own_id_string(net), tn->net_id); +} + +void tipc_set_node_addr(struct net *net, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + u8 node_id[NODE_ID_LEN] = {0,}; + + tn->node_addr = addr; + if (!tipc_own_id(net)) { + sprintf(node_id, "%x", addr); + tipc_set_node_id(net, node_id); + } + tn->trial_addr = addr; + tn->addr_trial_end = jiffies; + pr_info("32-bit node address hash set to %x\n", addr); +} + +char *tipc_nodeid2string(char *str, u8 *id) +{ + int i; + u8 c; + + /* Already a string ? */ + for (i = 0; i < NODE_ID_LEN; i++) { + c = id[i]; + if (c >= '0' && c <= '9') + continue; + if (c >= 'A' && c <= 'Z') + continue; + if (c >= 'a' && c <= 'z') + continue; + if (c == '.') + continue; + if (c == ':') + continue; + if (c == '_') + continue; + if (c == '-') + continue; + if (c == '@') + continue; + if (c != 0) + break; + } + if (i == NODE_ID_LEN) { + memcpy(str, id, NODE_ID_LEN); + str[NODE_ID_LEN] = 0; + return str; + } + + /* Translate to hex string */ + for (i = 0; i < NODE_ID_LEN; i++) + sprintf(&str[2 * i], "%02x", id[i]); + + /* Strip off trailing zeroes */ + for (i = NODE_ID_STR_LEN - 2; str[i] == '0'; i--) + str[i] = 0; + + return str; +} diff --git a/net/tipc/addr.h b/net/tipc/addr.h new file mode 100644 index 000000000..31bee0ea7 --- /dev/null +++ b/net/tipc/addr.h @@ -0,0 +1,91 @@ +/* + * net/tipc/addr.h: Include file for TIPC address utility routines + * + * Copyright (c) 2000-2006, 2018, Ericsson AB + * Copyright (c) 2004-2005, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_ADDR_H +#define _TIPC_ADDR_H + +#include <linux/types.h> +#include <linux/tipc.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include "core.h" + +static inline u32 tipc_own_addr(struct net *net) +{ + return tipc_net(net)->node_addr; +} + +static inline u8 *tipc_own_id(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + if (!strlen(tn->node_id_string)) + return NULL; + return tn->node_id; +} + +static inline char *tipc_own_id_string(struct net *net) +{ + return tipc_net(net)->node_id_string; +} + +static inline u32 tipc_cluster_mask(u32 addr) +{ + return addr & TIPC_ZONE_CLUSTER_MASK; +} + +static inline int tipc_node2scope(u32 node) +{ + return node ? TIPC_NODE_SCOPE : TIPC_CLUSTER_SCOPE; +} + +static inline int tipc_scope2node(struct net *net, int sc) +{ + return sc != TIPC_NODE_SCOPE ? 0 : tipc_own_addr(net); +} + +static inline int in_own_node(struct net *net, u32 addr) +{ + return addr == tipc_own_addr(net) || !addr; +} + +bool tipc_in_scope(bool legacy_format, u32 domain, u32 addr); +void tipc_set_node_id(struct net *net, u8 *id); +void tipc_set_node_addr(struct net *net, u32 addr); +char *tipc_nodeid2string(char *str, u8 *id); +u32 tipc_node_id2hash(u8 *id128); + +#endif diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c new file mode 100644 index 000000000..68107bf92 --- /dev/null +++ b/net/tipc/bcast.c @@ -0,0 +1,578 @@ +/* + * net/tipc/bcast.c: TIPC broadcast code + * + * Copyright (c) 2004-2006, 2014-2017, Ericsson AB + * Copyright (c) 2004, Intel Corporation. + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/tipc_config.h> +#include "socket.h" +#include "msg.h" +#include "bcast.h" +#include "link.h" +#include "name_table.h" + +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ + +const char tipc_bclink_name[] = "broadcast-link"; + +/** + * struct tipc_bc_base - base structure for keeping broadcast send state + * @link: broadcast send link structure + * @inputq: data input queue; will only carry SOCK_WAKEUP messages + * @dests: array keeping number of reachable destinations per bearer + * @primary_bearer: a bearer having links to all broadcast destinations, if any + * @bcast_support: indicates if primary bearer, if any, supports broadcast + * @rcast_support: indicates if all peer nodes support replicast + * @rc_ratio: dest count as percentage of cluster size where send method changes + * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast + */ +struct tipc_bc_base { + struct tipc_link *link; + struct sk_buff_head inputq; + int dests[MAX_BEARERS]; + int primary_bearer; + bool bcast_support; + bool rcast_support; + int rc_ratio; + int bc_threshold; +}; + +static struct tipc_bc_base *tipc_bc_base(struct net *net) +{ + return tipc_net(net)->bcbase; +} + +/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link + * Note: the MTU is decremented to give room for a tunnel header, in + * case the message needs to be sent as replicast + */ +int tipc_bcast_get_mtu(struct net *net) +{ + return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; +} + +void tipc_bcast_disable_rcast(struct net *net) +{ + tipc_bc_base(net)->rcast_support = false; +} + +static void tipc_bcbase_calc_bc_threshold(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net)); + + bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100); +} + +/* tipc_bcbase_select_primary(): find a bearer with links to all destinations, + * if any, and make it primary bearer + */ +static void tipc_bcbase_select_primary(struct net *net) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + int all_dests = tipc_link_bc_peers(bb->link); + int i, mtu, prim; + + bb->primary_bearer = INVALID_BEARER_ID; + bb->bcast_support = true; + + if (!all_dests) + return; + + for (i = 0; i < MAX_BEARERS; i++) { + if (!bb->dests[i]) + continue; + + mtu = tipc_bearer_mtu(net, i); + if (mtu < tipc_link_mtu(bb->link)) + tipc_link_set_mtu(bb->link, mtu); + bb->bcast_support &= tipc_bearer_bcast_support(net, i); + if (bb->dests[i] < all_dests) + continue; + + bb->primary_bearer = i; + + /* Reduce risk that all nodes select same primary */ + if ((i ^ tipc_own_addr(net)) & 1) + break; + } + prim = bb->primary_bearer; + if (prim != INVALID_BEARER_ID) + bb->bcast_support = tipc_bearer_bcast_support(net, prim); +} + +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + tipc_bcast_lock(net); + bb->dests[bearer_id]++; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); +} + +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + + tipc_bcast_lock(net); + bb->dests[bearer_id]--; + tipc_bcbase_select_primary(net); + tipc_bcast_unlock(net); +} + +/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers + * + * Note that number of reachable destinations, as indicated in the dests[] + * array, may transitionally differ from the number of destinations indicated + * in each sent buffer. We can sustain this. Excess destination nodes will + * drop and never acknowledge the unexpected packets, and missing destinations + * will either require retransmission (if they are just about to be added to + * the bearer), or be removed from the buffer's 'ackers' counter (if they + * just went down) + */ +static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) +{ + int bearer_id; + struct tipc_bc_base *bb = tipc_bc_base(net); + struct sk_buff *skb, *_skb; + struct sk_buff_head _xmitq; + + if (skb_queue_empty(xmitq)) + return; + + /* The typical case: at least one bearer has links to all nodes */ + bearer_id = bb->primary_bearer; + if (bearer_id >= 0) { + tipc_bearer_bc_xmit(net, bearer_id, xmitq); + return; + } + + /* We have to transmit across all bearers */ + __skb_queue_head_init(&_xmitq); + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + if (!bb->dests[bearer_id]) + continue; + + skb_queue_walk(xmitq, skb) { + _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_queue_tail(&_xmitq, _skb); + } + tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); + } + __skb_queue_purge(xmitq); + __skb_queue_purge(&_xmitq); +} + +static void tipc_bcast_select_xmit_method(struct net *net, int dests, + struct tipc_mc_method *method) +{ + struct tipc_bc_base *bb = tipc_bc_base(net); + unsigned long exp = method->expires; + + /* Broadcast supported by used bearer/bearers? */ + if (!bb->bcast_support) { + method->rcast = true; + return; + } + /* Any destinations which don't support replicast ? */ + if (!bb->rcast_support) { + method->rcast = false; + return; + } + /* Can current method be changed ? */ + method->expires = jiffies + TIPC_METHOD_EXPIRE; + if (method->mandatory || time_before(jiffies, exp)) + return; + + /* Determine method to use now */ + method->rcast = dests <= bb->bc_threshold; +} + +/* tipc_bcast_xmit - broadcast the buffer chain to all external nodes + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0 + * Consumes the buffer chain. + * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE + */ +static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, + u16 *cong_link_cnt) +{ + struct tipc_link *l = tipc_bc_sndlink(net); + struct sk_buff_head xmitq; + int rc = 0; + + __skb_queue_head_init(&xmitq); + tipc_bcast_lock(net); + if (tipc_link_bc_peers(l)) + rc = tipc_link_xmit(l, pkts, &xmitq); + tipc_bcast_unlock(net); + tipc_bcbase_xmit(net, &xmitq); + __skb_queue_purge(pkts); + if (rc == -ELINKCONG) { + *cong_link_cnt = 1; + rc = 0; + } + return rc; +} + +/* tipc_rcast_xmit - replicate and send a message to given destination nodes + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @dests: list of destination nodes + * @cong_link_cnt: returns number of congested links + * @cong_links: returns identities of congested links + * Returns 0 if success, otherwise errno + */ +static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_nlist *dests, u16 *cong_link_cnt) +{ + struct tipc_dest *dst, *tmp; + struct sk_buff_head _pkts; + u32 dnode, selector; + + selector = msg_link_selector(buf_msg(skb_peek(pkts))); + __skb_queue_head_init(&_pkts); + + list_for_each_entry_safe(dst, tmp, &dests->list, list) { + dnode = dst->node; + if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) + return -ENOMEM; + + /* Any other return value than -ELINKCONG is ignored */ + if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) + (*cong_link_cnt)++; + } + return 0; +} + +/* tipc_mcast_xmit - deliver message to indicated destination nodes + * and to identified node local sockets + * @net: the applicable net namespace + * @pkts: chain of buffers containing message + * @method: send method to be used + * @dests: destination nodes for message. + * @cong_link_cnt: returns number of encountered congested destination links + * Consumes buffer chain. + * Returns 0 if success, otherwise errno + */ +int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_mc_method *method, struct tipc_nlist *dests, + u16 *cong_link_cnt) +{ + struct sk_buff_head inputq, localq; + int rc = 0; + + skb_queue_head_init(&inputq); + __skb_queue_head_init(&localq); + + /* Clone packets before they are consumed by next call */ + if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { + rc = -ENOMEM; + goto exit; + } + /* Send according to determined transmit method */ + if (dests->remote) { + tipc_bcast_select_xmit_method(net, dests->remote, method); + if (method->rcast) + rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); + else + rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); + } + + if (dests->local) + tipc_sk_mcast_rcv(net, &localq, &inputq); +exit: + /* This queue should normally be empty by now */ + __skb_queue_purge(pkts); + return rc; +} + +/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link + * + * RCU is locked, no other locks set + */ +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + int rc; + + __skb_queue_head_init(&xmitq); + + if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { + kfree_skb(skb); + return 0; + } + + tipc_bcast_lock(net); + if (msg_user(hdr) == BCAST_PROTOCOL) + rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); + else + rc = tipc_link_rcv(l, skb, NULL); + tipc_bcast_unlock(net); + + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); + + return rc; +} + +/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge + * + * RCU is locked, no other locks set + */ +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) +{ + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + u16 acked = msg_bcast_ack(hdr); + struct sk_buff_head xmitq; + + /* Ignore bc acks sent by peer before bcast synch point was received */ + if (msg_bc_ack_invalid(hdr)) + return; + + __skb_queue_head_init(&xmitq); + + tipc_bcast_lock(net); + tipc_link_bc_ack_rcv(l, acked, &xmitq); + tipc_bcast_unlock(net); + + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} + +/* tipc_bcast_synch_rcv - check and update rcv link with peer's send state + * + * RCU is locked, no other locks set + */ +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) +{ + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + int rc = 0; + + __skb_queue_head_init(&xmitq); + + tipc_bcast_lock(net); + if (msg_type(hdr) != STATE_MSG) { + tipc_link_bc_init_rcv(l, hdr); + } else if (!msg_bc_ack_invalid(hdr)) { + tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); + rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); + } + tipc_bcast_unlock(net); + + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); + return rc; +} + +/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); + + tipc_bcast_lock(net); + tipc_link_add_bc_peer(snd_l, uc_l, xmitq); + tipc_bcbase_select_primary(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); +} + +/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer + * + * RCU is locked, node lock is set + */ +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) +{ + struct tipc_link *snd_l = tipc_bc_sndlink(net); + struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + tipc_bcast_lock(net); + tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); + tipc_bcbase_select_primary(net); + tipc_bcbase_calc_bc_threshold(net); + tipc_bcast_unlock(net); + + tipc_bcbase_xmit(net, &xmitq); + + /* Any socket wakeup messages ? */ + if (!skb_queue_empty(inputq)) + tipc_sk_rcv(net, inputq); +} + +int tipc_bclink_reset_stats(struct net *net) +{ + struct tipc_link *l = tipc_bc_sndlink(net); + + if (!l) + return -ENOPROTOOPT; + + tipc_bcast_lock(net); + tipc_link_reset_stats(l); + tipc_bcast_unlock(net); + return 0; +} + +static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit) +{ + struct tipc_link *l = tipc_bc_sndlink(net); + + if (!l) + return -ENOPROTOOPT; + if (limit < BCLINK_WIN_MIN) + limit = BCLINK_WIN_MIN; + if (limit > TIPC_MAX_LINK_WIN) + return -EINVAL; + tipc_bcast_lock(net); + tipc_link_set_queue_limits(l, limit); + tipc_bcast_unlock(net); + return 0; +} + +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) +{ + int err; + u32 win; + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + if (!attrs[TIPC_NLA_LINK_PROP]) + return -EINVAL; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); + if (err) + return err; + + if (!props[TIPC_NLA_PROP_WIN]) + return -EOPNOTSUPP; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + + return tipc_bc_link_set_queue_limits(net, win); +} + +int tipc_bcast_init(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bc_base *bb = NULL; + struct tipc_link *l = NULL; + + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) + goto enomem; + tn->bcbase = bb; + spin_lock_init(&tipc_net(net)->bclock); + + if (!tipc_link_bc_create(net, 0, 0, + FB_MTU, + BCLINK_WIN_DEFAULT, + 0, + &bb->inputq, + NULL, + NULL, + &l)) + goto enomem; + bb->link = l; + tn->bcl = l; + bb->rc_ratio = 25; + bb->rcast_support = true; + return 0; +enomem: + kfree(bb); + kfree(l); + return -ENOMEM; +} + +void tipc_bcast_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + synchronize_net(); + kfree(tn->bcbase); + kfree(tn->bcl); +} + +void tipc_nlist_init(struct tipc_nlist *nl, u32 self) +{ + memset(nl, 0, sizeof(*nl)); + INIT_LIST_HEAD(&nl->list); + nl->self = self; +} + +void tipc_nlist_add(struct tipc_nlist *nl, u32 node) +{ + if (node == nl->self) + nl->local = true; + else if (tipc_dest_push(&nl->list, node, 0)) + nl->remote++; +} + +void tipc_nlist_del(struct tipc_nlist *nl, u32 node) +{ + if (node == nl->self) + nl->local = false; + else if (tipc_dest_del(&nl->list, node, 0)) + nl->remote--; +} + +void tipc_nlist_purge(struct tipc_nlist *nl) +{ + tipc_dest_list_purge(&nl->list); + nl->remote = 0; + nl->local = false; +} diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h new file mode 100644 index 000000000..751530ab0 --- /dev/null +++ b/net/tipc/bcast.h @@ -0,0 +1,110 @@ +/* + * net/tipc/bcast.h: Include file for TIPC broadcast code + * + * Copyright (c) 2003-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_BCAST_H +#define _TIPC_BCAST_H + +#include "core.h" + +struct tipc_node; +struct tipc_msg; +struct tipc_nl_msg; +struct tipc_nlist; +struct tipc_nitem; +extern const char tipc_bclink_name[]; + +#define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000) + +struct tipc_nlist { + struct list_head list; + u32 self; + u16 remote; + bool local; +}; + +void tipc_nlist_init(struct tipc_nlist *nl, u32 self); +void tipc_nlist_purge(struct tipc_nlist *nl); +void tipc_nlist_add(struct tipc_nlist *nl, u32 node); +void tipc_nlist_del(struct tipc_nlist *nl, u32 node); + +/* Cookie to be used between socket and broadcast layer + * @rcast: replicast (instead of broadcast) was used at previous xmit + * @mandatory: broadcast/replicast indication was set by user + * @expires: re-evaluate non-mandatory transmit method if we are past this + */ +struct tipc_mc_method { + bool rcast; + bool mandatory; + unsigned long expires; +}; + +int tipc_bcast_init(struct net *net); +void tipc_bcast_stop(struct net *net); +void tipc_bcast_add_peer(struct net *net, struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl); +void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id); +void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); +int tipc_bcast_get_mtu(struct net *net); +void tipc_bcast_disable_rcast(struct net *net); +int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, + struct tipc_mc_method *method, struct tipc_nlist *dests, + u16 *cong_link_cnt); +int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); +int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); +int tipc_bclink_reset_stats(struct net *net); + +static inline void tipc_bcast_lock(struct net *net) +{ + spin_lock_bh(&tipc_net(net)->bclock); +} + +static inline void tipc_bcast_unlock(struct net *net) +{ + spin_unlock_bh(&tipc_net(net)->bclock); +} + +static inline struct tipc_link *tipc_bc_sndlink(struct net *net) +{ + return tipc_net(net)->bcl; +} + +#endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c new file mode 100644 index 000000000..0f970259d --- /dev/null +++ b/net/tipc/bearer.c @@ -0,0 +1,1242 @@ +/* + * net/tipc/bearer.c: TIPC bearer code + * + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2004-2006, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "discover.h" +#include "monitor.h" +#include "bcast.h" +#include "netlink.h" +#include "udp_media.h" + +#define MAX_ADDR_STR 60 + +static struct tipc_media * const media_info_array[] = { + ð_media_info, +#ifdef CONFIG_TIPC_MEDIA_IB + &ib_media_info, +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP + &udp_media_info, +#endif + NULL +}; + +static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + + return rcu_dereference_rtnl(tn->bearer_list[bearer_id]); +} + +static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); + +/** + * tipc_media_find - locates specified media object by name + */ +struct tipc_media *tipc_media_find(const char *name) +{ + u32 i; + + for (i = 0; media_info_array[i] != NULL; i++) { + if (!strcmp(media_info_array[i]->name, name)) + break; + } + return media_info_array[i]; +} + +/** + * media_find_id - locates specified media object by type identifier + */ +static struct tipc_media *media_find_id(u8 type) +{ + u32 i; + + for (i = 0; media_info_array[i] != NULL; i++) { + if (media_info_array[i]->type_id == type) + break; + } + return media_info_array[i]; +} + +/** + * tipc_media_addr_printf - record media address in print buffer + */ +void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a) +{ + char addr_str[MAX_ADDR_STR]; + struct tipc_media *m; + int ret; + + m = media_find_id(a->media_id); + + if (m && !m->addr2str(a, addr_str, sizeof(addr_str))) + ret = scnprintf(buf, len, "%s(%s)", m->name, addr_str); + else { + u32 i; + + ret = scnprintf(buf, len, "UNKNOWN(%u)", a->media_id); + for (i = 0; i < sizeof(a->value); i++) + ret += scnprintf(buf - ret, len + ret, + "-%02x", a->value[i]); + } +} + +/** + * bearer_name_validate - validate & (optionally) deconstruct bearer name + * @name: ptr to bearer name string + * @name_parts: ptr to area for bearer name components (or NULL if not needed) + * + * Returns 1 if bearer name is valid, otherwise 0. + */ +static int bearer_name_validate(const char *name, + struct tipc_bearer_names *name_parts) +{ + char name_copy[TIPC_MAX_BEARER_NAME]; + char *media_name; + char *if_name; + u32 media_len; + u32 if_len; + + /* copy bearer name & ensure length is OK */ + name_copy[TIPC_MAX_BEARER_NAME - 1] = 0; + /* need above in case non-Posix strncpy() doesn't pad with nulls */ + strncpy(name_copy, name, TIPC_MAX_BEARER_NAME); + if (name_copy[TIPC_MAX_BEARER_NAME - 1] != 0) + return 0; + + /* ensure all component parts of bearer name are present */ + media_name = name_copy; + if_name = strchr(media_name, ':'); + if (if_name == NULL) + return 0; + *(if_name++) = 0; + media_len = if_name - media_name; + if_len = strlen(if_name) + 1; + + /* validate component parts of bearer name */ + if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) || + (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME)) + return 0; + + /* return bearer name components, if necessary */ + if (name_parts) { + strcpy(name_parts->media_name, media_name); + strcpy(name_parts->if_name, if_name); + } + return 1; +} + +/** + * tipc_bearer_find - locates bearer object with matching bearer name + */ +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b; + u32 i; + + for (i = 0; i < MAX_BEARERS; i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (!strcmp(b->name, name))) + return b; + } + return NULL; +} + +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. + */ +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; + + strcpy(name, b->name); + return 0; +} + +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_add_dest(b->disc); + rcu_read_unlock(); +} + +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + if (b) + tipc_disc_remove_dest(b->disc); + rcu_read_unlock(); +} + +/** + * tipc_enable_bearer - enable bearer with the given name + */ +static int tipc_enable_bearer(struct net *net, const char *name, + u32 disc_domain, u32 prio, + struct nlattr *attr[], + struct netlink_ext_ack *extack) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer_names b_names; + int with_this_prio = 1; + struct tipc_bearer *b; + struct tipc_media *m; + struct sk_buff *skb; + int bearer_id = 0; + int res = -EINVAL; + char *errstr = ""; + u32 i; + + if (!bearer_name_validate(name, &b_names)) { + NL_SET_ERR_MSG(extack, "Illegal name"); + return res; + } + + if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { + errstr = "illegal priority"; + NL_SET_ERR_MSG(extack, "Illegal priority"); + goto rejected; + } + + m = tipc_media_find(b_names.media_name); + if (!m) { + errstr = "media not registered"; + NL_SET_ERR_MSG(extack, "Media not registered"); + goto rejected; + } + + if (prio == TIPC_MEDIA_LINK_PRI) + prio = m->priority; + + /* Check new bearer vs existing ones and find free bearer id if any */ + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + while (i-- != 0) { + b = rtnl_dereference(tn->bearer_list[i]); + if (!b) { + bearer_id = i; + continue; + } + if (!strcmp(name, b->name)) { + errstr = "already enabled"; + NL_SET_ERR_MSG(extack, "Already enabled"); + goto rejected; + } + + if (b->priority == prio && + (++with_this_prio > 2)) { + pr_warn("Bearer <%s>: already 2 bearers with priority %u\n", + name, prio); + + if (prio == TIPC_MIN_LINK_PRI) { + errstr = "cannot adjust to lower"; + NL_SET_ERR_MSG(extack, "Cannot adjust to lower"); + goto rejected; + } + + pr_warn("Bearer <%s>: trying with adjusted priority\n", + name); + prio--; + bearer_id = MAX_BEARERS; + i = MAX_BEARERS; + with_this_prio = 1; + } + } + + if (bearer_id >= MAX_BEARERS) { + errstr = "max 3 bearers permitted"; + NL_SET_ERR_MSG(extack, "Max 3 bearers permitted"); + goto rejected; + } + + b = kzalloc(sizeof(*b), GFP_ATOMIC); + if (!b) + return -ENOMEM; + + strcpy(b->name, name); + b->media = m; + res = m->enable_media(net, b, attr); + if (res) { + kfree(b); + errstr = "failed to enable media"; + NL_SET_ERR_MSG(extack, "Failed to enable media"); + goto rejected; + } + + b->identity = bearer_id; + b->tolerance = m->tolerance; + b->window = m->window; + b->domain = disc_domain; + b->net_plane = bearer_id + 'A'; + b->priority = prio; + test_and_set_bit_lock(0, &b->up); + + res = tipc_disc_create(net, b, &b->bcast_addr, &skb); + if (res) { + bearer_disable(net, b); + errstr = "failed to create discoverer"; + NL_SET_ERR_MSG(extack, "Failed to create discoverer"); + goto rejected; + } + + rcu_assign_pointer(tn->bearer_list[bearer_id], b); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); + return -ENOMEM; + } + + pr_info("Enabled bearer <%s>, priority %u\n", name, prio); + + return res; +rejected: + pr_warn("Enabling of bearer <%s> rejected, %s\n", name, errstr); + return res; +} + +/** + * tipc_reset_bearer - Reset all links established over this bearer + */ +static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) +{ + pr_info("Resetting bearer <%s>\n", b->name); + tipc_node_delete_links(net, b->identity); + tipc_disc_reset(net, b); + return 0; +} + +/** + * bearer_disable + * + * Note: This routine assumes caller holds RTNL lock. + */ +static void bearer_disable(struct net *net, struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + + pr_info("Disabling bearer <%s>\n", b->name); + clear_bit_unlock(0, &b->up); + tipc_node_delete_links(net, bearer_id); + b->media->disable_media(b); + RCU_INIT_POINTER(b->media_ptr, NULL); + if (b->disc) + tipc_disc_delete(b->disc); + RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); + kfree_rcu(b, rcu); + tipc_mon_delete(net, bearer_id); +} + +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]) +{ + char *dev_name = strchr((const char *)b->name, ':') + 1; + int hwaddr_len = b->media->hwaddr_len; + u8 node_id[NODE_ID_LEN] = {0,}; + struct net_device *dev; + + /* Find device with specified name */ + dev = dev_get_by_name(net, dev_name); + if (!dev) + return -ENODEV; + if (tipc_mtu_bad(dev, 0)) { + dev_put(dev); + return -EINVAL; + } + + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { + memcpy(node_id, dev->dev_addr, hwaddr_len); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + dev_put(dev); + pr_warn("Failed to obtain node identity\n"); + return -EINVAL; + } + + /* Associate TIPC bearer with L2 bearer */ + rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(&b->pt); + memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); + memcpy(b->bcast_addr.value, dev->broadcast, hwaddr_len); + b->bcast_addr.media_id = b->media->type_id; + b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; + b->mtu = dev->mtu; + b->media->raw2addr(b, &b->addr, (char *)dev->dev_addr); + rcu_assign_pointer(dev->tipc_ptr, b); + return 0; +} + +/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface + * + * Mark L2 bearer as inactive so that incoming buffers are thrown away + */ +void tipc_disable_l2_media(struct tipc_bearer *b) +{ + struct net_device *dev; + + dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(&b->pt); + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + dev_put(dev); +} + +/** + * tipc_l2_send_msg - send a TIPC packet out over an L2 interface + * @skb: the packet to be sent + * @b: the bearer through which the packet is to be sent + * @dest: peer destination address + */ +int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, struct tipc_media_addr *dest) +{ + struct net_device *dev; + int delta; + + dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); + if (!dev) + return 0; + + delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { + kfree_skb(skb); + return 0; + } + skb_reset_network_header(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_TIPC); + dev_hard_header(skb, dev, ETH_P_TIPC, dest->value, + dev->dev_addr, skb->len); + dev_queue_xmit(skb); + return 0; +} + +bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id) +{ + bool supp = false; + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (b) + supp = (b->bcast_addr.broadcast == TIPC_BROADCAST_SUPPORT); + rcu_read_unlock(); + return supp; +} + +int tipc_bearer_mtu(struct net *net, u32 bearer_id) +{ + int mtu = 0; + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]); + if (b) + mtu = b->mtu; + rcu_read_unlock(); + return mtu; +} + +/* tipc_bearer_xmit_skb - sends buffer to destination over bearer + */ +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_bearer *b; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) + b->media->send_msg(net, skb, b, dest); + else + kfree_skb(skb); + rcu_read_unlock(); +} + +/* tipc_bearer_xmit() -send buffer to destination over bearer + */ +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst) +{ + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + + if (skb_queue_empty(xmitq)) + return; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) + b->media->send_msg(net, skb, b, dst); + else + kfree_skb(skb); + } + rcu_read_unlock(); +} + +/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations + */ +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq) +{ + struct tipc_net *tn = tipc_net(net); + int net_id = tn->net_id; + struct tipc_bearer *b; + struct sk_buff *skb, *tmp; + struct tipc_msg *hdr; + + rcu_read_lock(); + b = bearer_get(net, bearer_id); + if (unlikely(!b || !test_bit(0, &b->up))) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); + } + rcu_read_unlock(); +} + +/** + * tipc_l2_rcv_msg - handle incoming TIPC message from an interface + * @buf: the received packet + * @dev: the net device that the packet was received on + * @pt: the packet_type structure which was used to register this handler + * @orig_dev: the original receive net device in case the device is a bond + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using interface multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct tipc_bearer *b; + + rcu_read_lock(); + b = rcu_dereference_rtnl(dev->tipc_ptr) ?: + rcu_dereference_rtnl(orig_dev->tipc_ptr); + if (likely(b && test_bit(0, &b->up) && + (skb->pkt_type <= PACKET_MULTICAST))) { + skb->next = NULL; + tipc_rcv(dev_net(b->pt.dev), skb, b); + rcu_read_unlock(); + return NET_RX_SUCCESS; + } + rcu_read_unlock(); + kfree_skb(skb); + return NET_RX_DROP; +} + +/** + * tipc_l2_device_event - handle device events from network device + * @nb: the context of the notification + * @evt: the type of event + * @ptr: the net device that the event was on + * + * This function is called by the Ethernet driver in case of link + * change event. + */ +static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, + void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct tipc_bearer *b; + + b = rtnl_dereference(dev->tipc_ptr); + if (!b) + return NOTIFY_DONE; + + switch (evt) { + case NETDEV_CHANGE: + if (netif_carrier_ok(dev) && netif_oper_up(dev)) { + test_and_set_bit_lock(0, &b->up); + break; + } + /* fall through */ + case NETDEV_GOING_DOWN: + clear_bit_unlock(0, &b->up); + tipc_reset_bearer(net, b); + break; + case NETDEV_UP: + test_and_set_bit_lock(0, &b->up); + break; + case NETDEV_CHANGEMTU: + if (tipc_mtu_bad(dev, 0)) { + bearer_disable(net, b); + break; + } + b->mtu = dev->mtu; + tipc_reset_bearer(net, b); + break; + case NETDEV_CHANGEADDR: + b->media->raw2addr(b, &b->addr, + (char *)dev->dev_addr); + tipc_reset_bearer(net, b); + break; + case NETDEV_UNREGISTER: + case NETDEV_CHANGENAME: + bearer_disable(net, b); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block notifier = { + .notifier_call = tipc_l2_device_event, + .priority = 0, +}; + +int tipc_bearer_setup(void) +{ + return register_netdevice_notifier(¬ifier); +} + +void tipc_bearer_cleanup(void) +{ + unregister_netdevice_notifier(¬ifier); +} + +void tipc_bearer_stop(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_bearer *b; + u32 i; + + for (i = 0; i < MAX_BEARERS; i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b) { + bearer_disable(net, b); + tn->bearer_list[i] = NULL; + } + } +} + +/* Caller should hold rtnl_lock to protect the bearer */ +static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, + struct tipc_bearer *bearer, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_BEARER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_BEARER); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_BEARER_NAME, bearer->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_BEARER_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, bearer->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window)) + goto prop_msg_full; + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { + if (tipc_udp_nl_add_bearer_data(msg, bearer)) + goto attr_msg_full; + } +#endif + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + if (i == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = 0; i < MAX_BEARERS; i++) { + bearer = rtnl_dereference(tn->bearer_list[i]); + if (!bearer) + continue; + + err = __tipc_nl_add_bearer(&msg, bearer, NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct sk_buff *rep; + struct tipc_bearer *bearer; + struct tipc_nl_msg msg; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = genl_info_net(info); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + bearer = tipc_bearer_find(net, name); + if (!bearer) { + err = -EINVAL; + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + goto err_out; + } + + err = __tipc_nl_add_bearer(&msg, bearer, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + bearer = tipc_bearer_find(net, name); + if (!bearer) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + return -EINVAL; + } + + bearer_disable(net, bearer); + + return 0; +} + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_disable(skb, info); + rtnl_unlock(); + + return err; +} + +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *bearer; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + u32 domain = 0; + u32 prio; + + prio = TIPC_MEDIA_LINK_PRI; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + if (attrs[TIPC_NLA_BEARER_DOMAIN]) + domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + } + + return tipc_enable_bearer(net, bearer, domain, prio, attrs, + info->extack); +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_enable(skb, info); + rtnl_unlock(); + + return err; +} + +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, name); + if (!b) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + return -EINVAL; + } + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + err = tipc_udp_nl_bearer_add(b, + attrs[TIPC_NLA_BEARER_UDP_OPTS]); + if (err) { + rtnl_unlock(); + return err; + } + } +#endif + rtnl_unlock(); + + return 0; +} + +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + char *name; + int err; + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + b = tipc_bearer_find(net, name); + if (!b) { + NL_SET_ERR_MSG(info->extack, "Bearer not found"); + return -EINVAL; + } + + if (attrs[TIPC_NLA_BEARER_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_TOL]) { + b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_TOL); + } + if (props[TIPC_NLA_PROP_PRIO]) + b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); + return -EINVAL; + } +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); + return -EINVAL; + } + b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); + tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU); +#endif + } + } + + return 0; +} + +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_set(skb, info); + rtnl_unlock(); + + return err; +} + +static int __tipc_nl_add_media(struct tipc_nl_msg *msg, + struct tipc_media *media, int nlflags) +{ + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_MEDIA_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MEDIA); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_MEDIA_NAME, media->name)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, media->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window)) + goto prop_msg_full; + if (media->type_id == TIPC_MEDIA_TYPE_UDP) + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu)) + goto prop_msg_full; + + nla_nest_end(msg->skb, prop); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + int i = cb->args[0]; + struct tipc_nl_msg msg; + + if (i == MAX_MEDIA) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (; media_info_array[i] != NULL; i++) { + err = __tipc_nl_add_media(&msg, media_info_array[i], + NLM_F_MULTI); + if (err) + break; + } + rtnl_unlock(); + + cb->args[0] = i; + return skb->len; +} + +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_nl_msg msg; + struct tipc_media *media; + struct sk_buff *rep; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!rep) + return -ENOMEM; + + msg.skb = rep; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + rtnl_lock(); + media = tipc_media_find(name); + if (!media) { + NL_SET_ERR_MSG(info->extack, "Media not found"); + err = -EINVAL; + goto err_out; + } + + err = __tipc_nl_add_media(&msg, media, 0); + if (err) + goto err_out; + rtnl_unlock(); + + return genlmsg_reply(rep, info); +err_out: + rtnl_unlock(); + nlmsg_free(rep); + + return err; +} + +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_media *m; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + + if (!info->attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MEDIA_MAX, + info->attrs[TIPC_NLA_MEDIA], + tipc_nl_media_policy, info->extack); + + if (!attrs[TIPC_NLA_MEDIA_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); + + m = tipc_media_find(name); + if (!m) { + NL_SET_ERR_MSG(info->extack, "Media not found"); + return -EINVAL; + } + if (attrs[TIPC_NLA_MEDIA_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], + props); + if (err) + return err; + + if (props[TIPC_NLA_PROP_TOL]) + m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if (props[TIPC_NLA_PROP_PRIO]) + m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (props[TIPC_NLA_PROP_WIN]) + m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if (props[TIPC_NLA_PROP_MTU]) { + if (m->type_id != TIPC_MEDIA_TYPE_UDP) { + NL_SET_ERR_MSG(info->extack, + "MTU property is unsupported"); + return -EINVAL; + } +#ifdef CONFIG_TIPC_MEDIA_UDP + if (tipc_udp_mtu_bad(nla_get_u32 + (props[TIPC_NLA_PROP_MTU]))) { + NL_SET_ERR_MSG(info->extack, + "MTU value is out-of-range"); + return -EINVAL; + } + m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]); +#endif + } + } + + return 0; +} + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_media_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h new file mode 100644 index 000000000..394290cbb --- /dev/null +++ b/net/tipc/bearer.h @@ -0,0 +1,245 @@ +/* + * net/tipc/bearer.h: Include file for TIPC bearer code + * + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_BEARER_H +#define _TIPC_BEARER_H + +#include "netlink.h" +#include "core.h" +#include "msg.h" +#include <net/genetlink.h> + +#define MAX_MEDIA 3 + +/* Identifiers associated with TIPC message header media address info + * - address info field is 32 bytes long + * - the field's actual content and length is defined per media + * - remaining unused bytes in the field are set to zero + */ +#define TIPC_MEDIA_INFO_SIZE 32 +#define TIPC_MEDIA_TYPE_OFFSET 3 +#define TIPC_MEDIA_ADDR_OFFSET 4 + +/* + * Identifiers of supported TIPC media types + */ +#define TIPC_MEDIA_TYPE_ETH 1 +#define TIPC_MEDIA_TYPE_IB 2 +#define TIPC_MEDIA_TYPE_UDP 3 + +/* Minimum bearer MTU */ +#define TIPC_MIN_BEARER_MTU (MAX_H_SIZE + INT_H_SIZE) + +/* Identifiers for distinguishing between broadcast/multicast and replicast + */ +#define TIPC_BROADCAST_SUPPORT 1 +#define TIPC_REPLICAST_SUPPORT 2 + +/** + * struct tipc_media_addr - destination address used by TIPC bearers + * @value: address info (format defined by media) + * @media_id: TIPC media type identifier + * @broadcast: non-zero if address is a broadcast address + */ +struct tipc_media_addr { + u8 value[TIPC_MEDIA_INFO_SIZE]; + u8 media_id; + u8 broadcast; +}; + +struct tipc_bearer; + +/** + * struct tipc_media - Media specific info exposed to generic bearer layer + * @send_msg: routine which handles buffer transmission + * @enable_media: routine which enables a media + * @disable_media: routine which disables a media + * @addr2str: convert media address format to string + * @addr2msg: convert from media addr format to discovery msg addr format + * @msg2addr: convert from discovery msg addr format to media addr format + * @raw2addr: convert from raw addr format to media addr format + * @priority: default link (and bearer) priority + * @tolerance: default time (in ms) before declaring link failure + * @window: default window (in packets) before declaring link congestion + * @mtu: max packet size bearer can support for media type not dependent on + * underlying device MTU + * @type_id: TIPC media identifier + * @hwaddr_len: TIPC media address len + * @name: media name + */ +struct tipc_media { + int (*send_msg)(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, + struct tipc_media_addr *dest); + int (*enable_media)(struct net *net, struct tipc_bearer *b, + struct nlattr *attr[]); + void (*disable_media)(struct tipc_bearer *b); + int (*addr2str)(struct tipc_media_addr *addr, + char *strbuf, + int bufsz); + int (*addr2msg)(char *msg, struct tipc_media_addr *addr); + int (*msg2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg); + int (*raw2addr)(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *raw); + u32 priority; + u32 tolerance; + u32 window; + u32 mtu; + u32 type_id; + u32 hwaddr_len; + char name[TIPC_MAX_MEDIA_NAME]; +}; + +/** + * struct tipc_bearer - Generic TIPC bearer structure + * @media_ptr: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) + * @media: ptr to media structure associated with bearer + * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer + * @rcu: rcu struct for tipc_bearer + * @priority: default link priority for bearer + * @window: default window size for bearer + * @tolerance: default link tolerance for bearer + * @domain: network domain to which links can be established + * @identity: array index of this bearer within TIPC bearer array + * @link_req: ptr to (optional) structure making periodic link setup requests + * @net_plane: network plane ('A' through 'H') currently associated with bearer + * + * Note: media-specific code is responsible for initialization of the fields + * indicated below when a bearer is enabled; TIPC's generic bearer code takes + * care of initializing all other fields. + */ +struct tipc_bearer { + void __rcu *media_ptr; /* initalized by media */ + u32 mtu; /* initalized by media */ + struct tipc_media_addr addr; /* initalized by media */ + char name[TIPC_MAX_BEARER_NAME]; + struct tipc_media *media; + struct tipc_media_addr bcast_addr; + struct packet_type pt; + struct rcu_head rcu; + u32 priority; + u32 window; + u32 tolerance; + u32 domain; + u32 identity; + struct tipc_discoverer *disc; + char net_plane; + unsigned long up; +}; + +struct tipc_bearer_names { + char media_name[TIPC_MAX_MEDIA_NAME]; + char if_name[TIPC_MAX_IF_NAME]; +}; + +/* + * TIPC routines available to supported media types + */ + +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b); + +/* + * Routines made available to TIPC by supported media types + */ +extern struct tipc_media eth_media_info; + +#ifdef CONFIG_TIPC_MEDIA_IB +extern struct tipc_media ib_media_info; +#endif +#ifdef CONFIG_TIPC_MEDIA_UDP +extern struct tipc_media udp_media_info; +#endif + +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); + +int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); + +int tipc_media_set_priority(const char *name, u32 new_value); +int tipc_media_set_window(const char *name, u32 new_value); +void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); +int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]); +void tipc_disable_l2_media(struct tipc_bearer *b); +int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b, struct tipc_media_addr *dest); + +void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); +void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); +struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); +struct tipc_media *tipc_media_find(const char *name); +int tipc_bearer_setup(void); +void tipc_bearer_cleanup(void); +void tipc_bearer_stop(struct net *net); +int tipc_bearer_mtu(struct net *net, u32 bearer_id); +bool tipc_bearer_bcast_support(struct net *net, u32 bearer_id); +void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, + struct sk_buff *skb, + struct tipc_media_addr *dest); +void tipc_bearer_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr *dst); +void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, + struct sk_buff_head *xmitq); + +/* check if device MTU is too low for tipc headers */ +static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) +{ + if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve) + return false; + netdev_warn(dev, "MTU too low for tipc bearer\n"); + return true; +} + +#endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/core.c b/net/tipc/core.c new file mode 100644 index 000000000..ce0f067d0 --- /dev/null +++ b/net/tipc/core.c @@ -0,0 +1,194 @@ +/* + * net/tipc/core.c: TIPC module code + * + * Copyright (c) 2003-2006, 2013, Ericsson AB + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "core.h" +#include "name_table.h" +#include "subscr.h" +#include "bearer.h" +#include "net.h" +#include "socket.h" +#include "bcast.h" + +#include <linux/module.h> + +/* configurable TIPC parameters */ +unsigned int tipc_net_id __read_mostly; +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ + +static int __net_init tipc_init_net(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + int err; + + tn->net_id = 4711; + tn->node_addr = 0; + tn->trial_addr = 0; + tn->addr_trial_end = 0; + memset(tn->node_id, 0, sizeof(tn->node_id)); + memset(tn->node_id_string, 0, sizeof(tn->node_id_string)); + tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; + get_random_bytes(&tn->random, sizeof(int)); + INIT_LIST_HEAD(&tn->node_list); + spin_lock_init(&tn->node_list_lock); + + err = tipc_sk_rht_init(net); + if (err) + goto out_sk_rht; + + err = tipc_nametbl_init(net); + if (err) + goto out_nametbl; + + INIT_LIST_HEAD(&tn->dist_queue); + + err = tipc_bcast_init(net); + if (err) + goto out_bclink; + + return 0; + +out_bclink: + tipc_nametbl_stop(net); +out_nametbl: + tipc_sk_rht_destroy(net); +out_sk_rht: + return err; +} + +static void __net_exit tipc_exit_net(struct net *net) +{ + tipc_net_stop(net); + + /* Make sure the tipc_net_finalize_work stopped + * before releasing the resources. + */ + flush_scheduled_work(); + tipc_bcast_stop(net); + tipc_nametbl_stop(net); + tipc_sk_rht_destroy(net); +} + +static struct pernet_operations tipc_net_ops = { + .init = tipc_init_net, + .exit = tipc_exit_net, + .id = &tipc_net_id, + .size = sizeof(struct tipc_net), +}; + +static struct pernet_operations tipc_topsrv_net_ops = { + .init = tipc_topsrv_init_net, + .exit = tipc_topsrv_exit_net, +}; + +static int __init tipc_init(void) +{ + int err; + + pr_info("Activated (version " TIPC_MOD_VER ")\n"); + + sysctl_tipc_rmem[0] = RCVBUF_MIN; + sysctl_tipc_rmem[1] = RCVBUF_DEF; + sysctl_tipc_rmem[2] = RCVBUF_MAX; + + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; + + err = register_pernet_device(&tipc_net_ops); + if (err) + goto out_pernet; + + err = tipc_socket_init(); + if (err) + goto out_socket; + + err = register_pernet_device(&tipc_topsrv_net_ops); + if (err) + goto out_pernet_topsrv; + + err = tipc_bearer_setup(); + if (err) + goto out_bearer; + + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + + pr_info("Started in single node mode\n"); + return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); +out_bearer: + unregister_pernet_device(&tipc_topsrv_net_ops); +out_pernet_topsrv: + tipc_socket_stop(); +out_socket: + unregister_pernet_device(&tipc_net_ops); +out_pernet: + tipc_unregister_sysctl(); +out_sysctl: + pr_err("Unable to start in single node mode\n"); + return err; +} + +static void __exit tipc_exit(void) +{ + tipc_netlink_compat_stop(); + tipc_netlink_stop(); + tipc_bearer_cleanup(); + unregister_pernet_device(&tipc_topsrv_net_ops); + tipc_socket_stop(); + unregister_pernet_device(&tipc_net_ops); + tipc_unregister_sysctl(); + + pr_info("Deactivated\n"); +} + +module_init(tipc_init); +module_exit(tipc_exit); + +MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(TIPC_MOD_VER); diff --git a/net/tipc/core.h b/net/tipc/core.h new file mode 100644 index 000000000..8020a6c36 --- /dev/null +++ b/net/tipc/core.h @@ -0,0 +1,189 @@ +/* + * net/tipc/core.h: Include file for TIPC global declarations + * + * Copyright (c) 2005-2006, 2013-2018 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_CORE_H +#define _TIPC_CORE_H + +#include <linux/tipc.h> +#include <linux/tipc_config.h> +#include <linux/tipc_netlink.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/timer.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/atomic.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/rtnetlink.h> +#include <linux/etherdevice.h> +#include <net/netns/generic.h> +#include <linux/rhashtable.h> +#include <net/genetlink.h> + +struct tipc_node; +struct tipc_bearer; +struct tipc_bc_base; +struct tipc_link; +struct tipc_name_table; +struct tipc_topsrv; +struct tipc_monitor; + +#define TIPC_MOD_VER "2.0.0" + +#define NODE_HTABLE_SIZE 512 +#define MAX_BEARERS 3 +#define TIPC_DEF_MON_THRESHOLD 32 +#define NODE_ID_LEN 16 +#define NODE_ID_STR_LEN (NODE_ID_LEN * 2 + 1) + +extern unsigned int tipc_net_id __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; +extern int sysctl_tipc_named_timeout __read_mostly; + +struct tipc_net { + u8 node_id[NODE_ID_LEN]; + u32 node_addr; + u32 trial_addr; + unsigned long addr_trial_end; + char node_id_string[NODE_ID_STR_LEN]; + int net_id; + int random; + bool legacy_addr_format; + + /* Node table and node list */ + spinlock_t node_list_lock; + struct hlist_head node_htable[NODE_HTABLE_SIZE]; + struct list_head node_list; + u32 num_nodes; + u32 num_links; + + /* Neighbor monitoring list */ + struct tipc_monitor *monitors[MAX_BEARERS]; + int mon_threshold; + + /* Bearer list */ + struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; + + /* Broadcast link */ + spinlock_t bclock; + struct tipc_bc_base *bcbase; + struct tipc_link *bcl; + + /* Socket hash table */ + struct rhashtable sk_rht; + + /* Name table */ + spinlock_t nametbl_lock; + struct name_table *nametbl; + + /* Name dist queue */ + struct list_head dist_queue; + + /* Topology subscription server */ + struct tipc_topsrv *topsrv; + atomic_t subscription_count; +}; + +static inline struct tipc_net *tipc_net(struct net *net) +{ + return net_generic(net, tipc_net_id); +} + +static inline int tipc_netid(struct net *net) +{ + return tipc_net(net)->net_id; +} + +static inline struct list_head *tipc_nodes(struct net *net) +{ + return &tipc_net(net)->node_list; +} + +static inline struct name_table *tipc_name_table(struct net *net) +{ + return tipc_net(net)->nametbl; +} + +static inline struct tipc_topsrv *tipc_topsrv(struct net *net) +{ + return tipc_net(net)->topsrv; +} + +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + +static inline u16 mod(u16 x) +{ + return x & 0xffffu; +} + +static inline int less_eq(u16 left, u16 right) +{ + return mod(right - left) < 32768u; +} + +static inline int more(u16 left, u16 right) +{ + return !less_eq(left, right); +} + +static inline int less(u16 left, u16 right) +{ + return less_eq(left, right) && (mod(right) != mod(left)); +} + +static inline int in_range(u16 val, u16 min, u16 max) +{ + return !less(val, min) && !more(val, max); +} + +#ifdef CONFIG_SYSCTL +int tipc_register_sysctl(void); +void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif +#endif diff --git a/net/tipc/diag.c b/net/tipc/diag.c new file mode 100644 index 000000000..73137f4ae --- /dev/null +++ b/net/tipc/diag.c @@ -0,0 +1,116 @@ +/* + * net/tipc/diag.c: TIPC socket diag + * + * Copyright (c) 2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "ASIS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include <linux/sock_diag.h> +#include <linux/tipc_sockets_diag.h> + +static u64 __tipc_diag_gen_cookie(struct sock *sk) +{ + u32 res[2]; + + sock_diag_save_cookie(sk, res); + return *((u64 *)res); +} + +static int __tipc_add_sock_diag(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct tipc_sock_diag_req *req = nlmsg_data(cb->nlh); + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put_answer(skb, cb, SOCK_DIAG_BY_FAMILY, 0, + NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states, + __tipc_diag_gen_cookie); + if (err) + return err; + + nlmsg_end(skb, nlh); + return 0; +} + +static int tipc_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_add_sock_diag); +} + +static int tipc_sock_diag_handler_dump(struct sk_buff *skb, + struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct tipc_sock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = tipc_dump_start, + .dump = tipc_diag_dump, + .done = tipc_dump_done, + }; + netlink_dump_start(net->diag_nlsk, skb, h, &c); + return 0; + } + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler tipc_sock_diag_handler = { + .family = AF_TIPC, + .dump = tipc_sock_diag_handler_dump, +}; + +static int __init tipc_diag_init(void) +{ + return sock_diag_register(&tipc_sock_diag_handler); +} + +static void __exit tipc_diag_exit(void) +{ + sock_diag_unregister(&tipc_sock_diag_handler); +} + +module_init(tipc_diag_init); +module_exit(tipc_diag_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tipc/discover.c b/net/tipc/discover.c new file mode 100644 index 000000000..c138d68e8 --- /dev/null +++ b/net/tipc/discover.c @@ -0,0 +1,415 @@ +/* + * net/tipc/discover.c + * + * Copyright (c) 2003-2006, 2014-2018, Ericsson AB + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "node.h" +#include "discover.h" + +/* min delay during bearer start up */ +#define TIPC_DISC_INIT msecs_to_jiffies(125) +/* max delay if bearer has no links */ +#define TIPC_DISC_FAST msecs_to_jiffies(1000) +/* max delay if bearer has links */ +#define TIPC_DISC_SLOW msecs_to_jiffies(60000) +/* indicates no timer in use */ +#define TIPC_DISC_INACTIVE 0xffffffff + +/** + * struct tipc_discoverer - information about an ongoing link setup request + * @bearer_id: identity of bearer issuing requests + * @net: network namespace instance + * @dest: destination address for request messages + * @domain: network domain to which links can be established + * @num_nodes: number of nodes currently discovered (i.e. with an active link) + * @lock: spinlock for controlling access to requests + * @skb: request message to be (repeatedly) sent + * @timer: timer governing period between requests + * @timer_intv: current interval between requests (in ms) + */ +struct tipc_discoverer { + u32 bearer_id; + struct tipc_media_addr dest; + struct net *net; + u32 domain; + int num_nodes; + spinlock_t lock; + struct sk_buff *skb; + struct timer_list timer; + unsigned long timer_intv; +}; + +/** + * tipc_disc_init_msg - initialize a link setup message + * @net: the applicable net namespace + * @type: message type (request or response) + * @b: ptr to bearer issuing message + */ +static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, + u32 mtyp, struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + u32 dest_domain = b->domain; + struct tipc_msg *hdr; + + hdr = buf_msg(skb); + tipc_msg_init(tn->trial_addr, hdr, LINK_CONFIG, mtyp, + MAX_H_SIZE, dest_domain); + msg_set_size(hdr, MAX_H_SIZE + NODE_ID_LEN); + msg_set_non_seq(hdr, 1); + msg_set_node_sig(hdr, tn->random); + msg_set_node_capabilities(hdr, TIPC_NODE_CAPABILITIES); + msg_set_dest_domain(hdr, dest_domain); + msg_set_bc_netid(hdr, tn->net_id); + b->media->addr2msg(msg_media_addr(hdr), &b->addr); + msg_set_node_id(hdr, tipc_own_id(net)); +} + +static void tipc_disc_msg_xmit(struct net *net, u32 mtyp, u32 dst, + u32 src, u32 sugg_addr, + struct tipc_media_addr *maddr, + struct tipc_bearer *b) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + + skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!skb) + return; + hdr = buf_msg(skb); + tipc_disc_init_msg(net, skb, mtyp, b); + msg_set_sugg_node_addr(hdr, sugg_addr); + msg_set_dest_domain(hdr, dst); + tipc_bearer_xmit_skb(net, b->identity, skb, maddr); +} + +/** + * disc_dupl_alert - issue node address duplication alert + * @b: pointer to bearer detecting duplication + * @node_addr: duplicated node address + * @media_addr: media address advertised by duplicated node + */ +static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr, + struct tipc_media_addr *media_addr) +{ + char media_addr_str[64]; + + tipc_media_addr_printf(media_addr_str, sizeof(media_addr_str), + media_addr); + pr_warn("Duplicate %x using %s seen on <%s>\n", node_addr, + media_addr_str, b->name); +} + +/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer + * Returns true if message should be dropped by caller, i.e., if it is a + * trial message or we are inside trial period. Otherwise false. + */ +static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, + struct tipc_media_addr *maddr, + struct tipc_bearer *b, + u32 dst, u32 src, + u32 sugg_addr, + u8 *peer_id, + int mtyp) +{ + struct net *net = d->net; + struct tipc_net *tn = tipc_net(net); + bool trial = time_before(jiffies, tn->addr_trial_end); + u32 self = tipc_own_addr(net); + + if (mtyp == DSC_TRIAL_FAIL_MSG) { + if (!trial) + return true; + + /* Ignore if somebody else already gave new suggestion */ + if (dst != tn->trial_addr) + return true; + + /* Otherwise update trial address and restart trial period */ + tn->trial_addr = sugg_addr; + msg_set_prevnode(buf_msg(d->skb), sugg_addr); + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + return true; + } + + /* Apply trial address if we just left trial period */ + if (!trial && !self) { + tipc_sched_net_finalize(net, tn->trial_addr); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + } + + /* Accept regular link requests/responses only after trial period */ + if (mtyp != DSC_TRIAL_MSG) + return trial; + + sugg_addr = tipc_node_try_addr(net, peer_id, src); + if (sugg_addr) + tipc_disc_msg_xmit(net, DSC_TRIAL_FAIL_MSG, src, + self, sugg_addr, maddr, b); + return true; +} + +/** + * tipc_disc_rcv - handle incoming discovery message (request or response) + * @net: applicable net namespace + * @skb: buffer containing message + * @b: bearer that message arrived on + */ +void tipc_disc_rcv(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_msg *hdr = buf_msg(skb); + u16 caps = msg_node_capabilities(hdr); + bool legacy = tn->legacy_addr_format; + u32 sugg = msg_sugg_node_addr(hdr); + u32 signature = msg_node_sig(hdr); + u8 peer_id[NODE_ID_LEN] = {0,}; + u32 dst = msg_dest_domain(hdr); + u32 net_id = msg_bc_netid(hdr); + struct tipc_media_addr maddr; + u32 src = msg_prevnode(hdr); + u32 mtyp = msg_type(hdr); + bool dupl_addr = false; + bool respond = false; + u32 self; + int err; + + skb_linearize(skb); + hdr = buf_msg(skb); + + if (caps & TIPC_NODE_ID128) + memcpy(peer_id, msg_node_id(hdr), NODE_ID_LEN); + else + sprintf(peer_id, "%x", src); + + err = b->media->msg2addr(b, &maddr, msg_media_addr(hdr)); + kfree_skb(skb); + if (err || maddr.broadcast) { + pr_warn_ratelimited("Rcv corrupt discovery message\n"); + return; + } + /* Ignore discovery messages from own node */ + if (!memcmp(&maddr, &b->addr, sizeof(maddr))) + return; + if (net_id != tn->net_id) + return; + if (tipc_disc_addr_trial_msg(b->disc, &maddr, b, dst, + src, sugg, peer_id, mtyp)) + return; + self = tipc_own_addr(net); + + /* Message from somebody using this node's address */ + if (in_own_node(net, src)) { + disc_dupl_alert(b, self, &maddr); + return; + } + if (!tipc_in_scope(legacy, dst, self)) + return; + if (!tipc_in_scope(legacy, b->domain, src)) + return; + tipc_node_check_dest(net, src, peer_id, b, caps, signature, + &maddr, &respond, &dupl_addr); + if (dupl_addr) + disc_dupl_alert(b, src, &maddr); + if (!respond) + return; + if (mtyp != DSC_REQ_MSG) + return; + tipc_disc_msg_xmit(net, DSC_RESP_MSG, src, self, 0, &maddr, b); +} + +/* tipc_disc_add_dest - increment set of discovered nodes + */ +void tipc_disc_add_dest(struct tipc_discoverer *d) +{ + spin_lock_bh(&d->lock); + d->num_nodes++; + spin_unlock_bh(&d->lock); +} + +/* tipc_disc_remove_dest - decrement set of discovered nodes + */ +void tipc_disc_remove_dest(struct tipc_discoverer *d) +{ + int intv, num; + + spin_lock_bh(&d->lock); + d->num_nodes--; + num = d->num_nodes; + intv = d->timer_intv; + if (!num && (intv == TIPC_DISC_INACTIVE || intv > TIPC_DISC_FAST)) { + d->timer_intv = TIPC_DISC_INIT; + mod_timer(&d->timer, jiffies + d->timer_intv); + } + spin_unlock_bh(&d->lock); +} + +/* tipc_disc_timeout - send a periodic link setup request + * Called whenever a link setup request timer associated with a bearer expires. + * - Keep doubling time between sent request until limit is reached; + * - Hold at fast polling rate if we don't have any associated nodes + * - Otherwise hold at slow polling rate + */ +static void tipc_disc_timeout(struct timer_list *t) +{ + struct tipc_discoverer *d = from_timer(d, t, timer); + struct tipc_net *tn = tipc_net(d->net); + struct tipc_media_addr maddr; + struct sk_buff *skb = NULL; + struct net *net = d->net; + u32 bearer_id; + + spin_lock_bh(&d->lock); + + /* Stop searching if only desired node has been found */ + if (tipc_node(d->domain) && d->num_nodes) { + d->timer_intv = TIPC_DISC_INACTIVE; + goto exit; + } + + /* Did we just leave trial period ? */ + if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { + mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); + spin_unlock_bh(&d->lock); + tipc_sched_net_finalize(net, tn->trial_addr); + return; + } + + /* Adjust timeout interval according to discovery phase */ + if (time_before(jiffies, tn->addr_trial_end)) { + d->timer_intv = TIPC_DISC_INIT; + } else { + d->timer_intv *= 2; + if (d->num_nodes && d->timer_intv > TIPC_DISC_SLOW) + d->timer_intv = TIPC_DISC_SLOW; + else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST) + d->timer_intv = TIPC_DISC_FAST; + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); + } + + mod_timer(&d->timer, jiffies + d->timer_intv); + memcpy(&maddr, &d->dest, sizeof(maddr)); + skb = skb_clone(d->skb, GFP_ATOMIC); + bearer_id = d->bearer_id; +exit: + spin_unlock_bh(&d->lock); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &maddr); +} + +/** + * tipc_disc_create - create object to send periodic link setup requests + * @net: the applicable net namespace + * @b: ptr to bearer issuing requests + * @dest: destination address for request messages + * @dest_domain: network domain to which links can be established + * + * Returns 0 if successful, otherwise -errno. + */ +int tipc_disc_create(struct net *net, struct tipc_bearer *b, + struct tipc_media_addr *dest, struct sk_buff **skb) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_discoverer *d; + + d = kmalloc(sizeof(*d), GFP_ATOMIC); + if (!d) + return -ENOMEM; + d->skb = tipc_buf_acquire(MAX_H_SIZE + NODE_ID_LEN, GFP_ATOMIC); + if (!d->skb) { + kfree(d); + return -ENOMEM; + } + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); + + /* Do we need an address trial period first ? */ + if (!tipc_own_addr(net)) { + tn->addr_trial_end = jiffies + msecs_to_jiffies(1000); + msg_set_type(buf_msg(d->skb), DSC_TRIAL_MSG); + } + memcpy(&d->dest, dest, sizeof(*dest)); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + spin_lock_init(&d->lock); + timer_setup(&d->timer, tipc_disc_timeout, 0); + mod_timer(&d->timer, jiffies + d->timer_intv); + b->disc = d; + *skb = skb_clone(d->skb, GFP_ATOMIC); + return 0; +} + +/** + * tipc_disc_delete - destroy object sending periodic link setup requests + * @d: ptr to link duest structure + */ +void tipc_disc_delete(struct tipc_discoverer *d) +{ + del_timer_sync(&d->timer); + kfree_skb(d->skb); + kfree(d); +} + +/** + * tipc_disc_reset - reset object to send periodic link setup requests + * @net: the applicable net namespace + * @b: ptr to bearer issuing requests + * @dest_domain: network domain to which links can be established + */ +void tipc_disc_reset(struct net *net, struct tipc_bearer *b) +{ + struct tipc_discoverer *d = b->disc; + struct tipc_media_addr maddr; + struct sk_buff *skb; + + spin_lock_bh(&d->lock); + tipc_disc_init_msg(net, d->skb, DSC_REQ_MSG, b); + d->net = net; + d->bearer_id = b->identity; + d->domain = b->domain; + d->num_nodes = 0; + d->timer_intv = TIPC_DISC_INIT; + memcpy(&maddr, &d->dest, sizeof(maddr)); + mod_timer(&d->timer, jiffies + d->timer_intv); + skb = skb_clone(d->skb, GFP_ATOMIC); + spin_unlock_bh(&d->lock); + if (skb) + tipc_bearer_xmit_skb(net, b->identity, skb, &maddr); +} diff --git a/net/tipc/discover.h b/net/tipc/discover.h new file mode 100644 index 000000000..521d96c41 --- /dev/null +++ b/net/tipc/discover.h @@ -0,0 +1,51 @@ +/* + * net/tipc/discover.h + * + * Copyright (c) 2003-2006, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_DISCOVER_H +#define _TIPC_DISCOVER_H + +struct tipc_discoverer; + +int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, + struct tipc_media_addr *dest, struct sk_buff **skb); +void tipc_disc_delete(struct tipc_discoverer *req); +void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); +void tipc_disc_add_dest(struct tipc_discoverer *req); +void tipc_disc_remove_dest(struct tipc_discoverer *req); +void tipc_disc_rcv(struct net *net, struct sk_buff *buf, + struct tipc_bearer *b_ptr); + +#endif diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c new file mode 100644 index 000000000..f69a2fde9 --- /dev/null +++ b/net/tipc/eth_media.c @@ -0,0 +1,99 @@ +/* + * net/tipc/eth_media.c: Ethernet bearer support for TIPC + * + * Copyright (c) 2001-2007, 2013-2014, Ericsson AB + * Copyright (c) 2005-2008, 2011-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" + +/* Convert Ethernet address (media address format) to string */ +static int tipc_eth_addr2str(struct tipc_media_addr *addr, + char *strbuf, int bufsz) +{ + if (bufsz < 18) /* 18 = strlen("aa:bb:cc:dd:ee:ff\0") */ + return 1; + + sprintf(strbuf, "%pM", addr->value); + return 0; +} + +/* Convert from media address format to discovery message addr format */ +static int tipc_eth_addr2msg(char *msg, struct tipc_media_addr *addr) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_ETH; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, addr->value, ETH_ALEN); + return 0; +} + +/* Convert raw mac address format to media addr format */ +static int tipc_eth_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + char bcast_mac[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + + memset(addr, 0, sizeof(*addr)); + ether_addr_copy(addr->value, msg); + addr->media_id = TIPC_MEDIA_TYPE_ETH; + addr->broadcast = !memcmp(addr->value, bcast_mac, ETH_ALEN); + return 0; +} + +/* Convert discovery msg addr format to Ethernet media addr format */ +static int tipc_eth_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + /* Skip past preamble: */ + msg += TIPC_MEDIA_ADDR_OFFSET; + return tipc_eth_raw2addr(b, addr, msg); +} + +/* Ethernet media registration info */ +struct tipc_media eth_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_eth_addr2str, + .addr2msg = tipc_eth_addr2msg, + .msg2addr = tipc_eth_msg2addr, + .raw2addr = tipc_eth_raw2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_ETH, + .hwaddr_len = ETH_ALEN, + .name = "eth" +}; diff --git a/net/tipc/group.c b/net/tipc/group.c new file mode 100644 index 000000000..b656385ef --- /dev/null +++ b/net/tipc/group.c @@ -0,0 +1,955 @@ +/* + * net/tipc/group.c: TIPC group messaging code + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "addr.h" +#include "group.h" +#include "bcast.h" +#include "topsrv.h" +#include "msg.h" +#include "socket.h" +#include "node.h" +#include "name_table.h" +#include "subscr.h" + +#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) +#define ADV_IDLE ADV_UNIT +#define ADV_ACTIVE (ADV_UNIT * 12) + +enum mbr_state { + MBR_JOINING, + MBR_PUBLISHED, + MBR_JOINED, + MBR_PENDING, + MBR_ACTIVE, + MBR_RECLAIMING, + MBR_REMITTED, + MBR_LEAVING +}; + +struct tipc_member { + struct rb_node tree_node; + struct list_head list; + struct list_head small_win; + struct sk_buff_head deferredq; + struct tipc_group *group; + u32 node; + u32 port; + u32 instance; + enum mbr_state state; + u16 advertised; + u16 window; + u16 bc_rcv_nxt; + u16 bc_syncpt; + u16 bc_acked; +}; + +struct tipc_group { + struct rb_root members; + struct list_head small_win; + struct list_head pending; + struct list_head active; + struct tipc_nlist dests; + struct net *net; + int subid; + u32 type; + u32 instance; + u32 scope; + u32 portid; + u16 member_cnt; + u16 active_cnt; + u16 max_active; + u16 bc_snd_nxt; + u16 bc_ackers; + bool *open; + bool loopback; + bool events; +}; + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq); + +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *wakeup = true; +} + +static void tipc_group_decr_active(struct tipc_group *grp, + struct tipc_member *m) +{ + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) + grp->active_cnt--; +} + +static int tipc_group_rcvbuf_limit(struct tipc_group *grp) +{ + int max_active, active_pool, idle_pool; + int mcnt = grp->member_cnt + 1; + + /* Limit simultaneous reception from other members */ + max_active = min(mcnt / 8, 64); + max_active = max(max_active, 16); + grp->max_active = max_active; + + /* Reserve blocks for active and idle members */ + active_pool = max_active * ADV_ACTIVE; + idle_pool = (mcnt - max_active) * ADV_IDLE; + + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ + return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4; +} + +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) +{ + return grp->bc_snd_nxt; +} + +static bool tipc_group_is_receiver(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_LEAVING; +} + +static bool tipc_group_is_sender(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; +} + +u32 tipc_group_exclude(struct tipc_group *grp) +{ + if (!grp->loopback) + return grp->portid; + return 0; +} + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open) +{ + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; + struct tipc_group *grp; + u32 type = mreq->type; + + grp = kzalloc(sizeof(*grp), GFP_ATOMIC); + if (!grp) + return NULL; + tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + INIT_LIST_HEAD(&grp->small_win); + INIT_LIST_HEAD(&grp->active); + INIT_LIST_HEAD(&grp->pending); + grp->members = RB_ROOT; + grp->net = net; + grp->portid = portid; + grp->type = type; + grp->instance = mreq->instance; + grp->scope = mreq->scope; + grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + grp->open = group_is_open; + *grp->open = false; + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) + return grp; + kfree(grp); + return NULL; +} + +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + +void tipc_group_delete(struct net *net, struct tipc_group *grp) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + __skb_queue_purge(&m->deferredq); + list_del(&m->list); + kfree(m); + } + tipc_node_distr_xmit(net, &xmitq); + tipc_nlist_purge(&grp->dests); + tipc_topsrv_kern_unsubscr(net, grp->subid); + kfree(grp); +} + +static struct tipc_member *tipc_group_find_member(struct tipc_group *grp, + u32 node, u32 port) +{ + struct rb_node *n = grp->members.rb_node; + u64 nkey, key = (u64)node << 32 | port; + struct tipc_member *m; + + while (n) { + m = container_of(n, struct tipc_member, tree_node); + nkey = (u64)m->node << 32 | m->port; + if (key < nkey) + n = n->rb_left; + else if (key > nkey) + n = n->rb_right; + else + return m; + } + return NULL; +} + +static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, + u32 node, u32 port) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (m && tipc_group_is_receiver(m)) + return m; + return NULL; +} + +static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, + u32 node) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (m->node == node) + return m; + } + return NULL; +} + +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) +{ + u64 nkey, key = (u64)m->node << 32 | m->port; + struct rb_node **n, *parent = NULL; + struct tipc_member *tmp; + + n = &grp->members.rb_node; + while (*n) { + tmp = container_of(*n, struct tipc_member, tree_node); + parent = *n; + tmp = container_of(parent, struct tipc_member, tree_node); + nkey = (u64)tmp->node << 32 | tmp->port; + if (key < nkey) + n = &(*n)->rb_left; + else if (key > nkey) + n = &(*n)->rb_right; + else + return -EEXIST; + } + rb_link_node(&m->tree_node, parent, n); + rb_insert_color(&m->tree_node, &grp->members); + return 0; +} + +static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, + u32 node, u32 port, + u32 instance, int state) +{ + struct tipc_member *m; + int ret; + + m = kzalloc(sizeof(*m), GFP_ATOMIC); + if (!m) + return NULL; + INIT_LIST_HEAD(&m->list); + INIT_LIST_HEAD(&m->small_win); + __skb_queue_head_init(&m->deferredq); + m->group = grp; + m->node = node; + m->port = port; + m->instance = instance; + m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } + grp->member_cnt++; + tipc_nlist_add(&grp->dests, m->node); + m->state = state; + return m; +} + +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) +{ + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); +} + +static void tipc_group_delete_member(struct tipc_group *grp, + struct tipc_member *m) +{ + rb_erase(&m->tree_node, &grp->members); + grp->member_cnt--; + + /* Check if we were waiting for replicast ack from this member */ + if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1)) + grp->bc_ackers--; + + list_del_init(&m->list); + list_del_init(&m->small_win); + tipc_group_decr_active(grp, m); + + /* If last member on a node, remove node from dest list */ + if (!tipc_group_find_node(grp, m->node)) + tipc_nlist_del(&grp->dests, m->node); + + kfree(m); +} + +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) +{ + return &grp->dests; +} + +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope) +{ + seq->type = grp->type; + seq->lower = grp->instance; + seq->upper = grp->instance; + *scope = grp->scope; +} + +void tipc_group_update_member(struct tipc_member *m, int len) +{ + struct tipc_group *grp = m->group; + struct tipc_member *_m, *tmp; + + if (!tipc_group_is_receiver(m)) + return; + + m->window -= len; + + if (m->window >= ADV_IDLE) + return; + + list_del_init(&m->small_win); + + /* Sort member into small_window members' list */ + list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) { + if (_m->window > m->window) + break; + } + list_add_tail(&m->small_win, &_m->small_win); +} + +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) +{ + u16 prev = grp->bc_snd_nxt - 1; + struct tipc_member *m; + struct rb_node *n; + u16 ackers = 0; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (tipc_group_is_receiver(m)) { + tipc_group_update_member(m, len); + m->bc_acked = prev; + ackers++; + } + } + + /* Mark number of acknowledges to expect, if any */ + if (ack) + grp->bc_ackers = ackers; + grp->bc_snd_nxt++; +} + +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **mbr) +{ + struct sk_buff_head xmitq; + struct tipc_member *m; + int adv, state; + + m = tipc_group_find_dest(grp, dnode, dport); + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; + return false; + } + *mbr = m; + + if (m->window >= len) + return false; + + *grp->open = false; + + /* If not fully advertised, do it now to prevent mutual blocking */ + adv = m->advertised; + state = m->state; + if (state == MBR_JOINED && adv == ADV_IDLE) + return true; + if (state == MBR_ACTIVE && adv == ADV_ACTIVE) + return true; + if (state == MBR_PENDING && adv == ADV_IDLE) + return true; + __skb_queue_head_init(&xmitq); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); + tipc_node_distr_xmit(grp->net, &xmitq); + return true; +} + +bool tipc_group_bc_cong(struct tipc_group *grp, int len) +{ + struct tipc_member *m = NULL; + + /* If prev bcast was replicast, reject until all receivers have acked */ + if (grp->bc_ackers) { + *grp->open = false; + return true; + } + if (list_empty(&grp->small_win)) + return false; + + m = list_first_entry(&grp->small_win, struct tipc_member, small_win); + if (m->window >= len) + return false; + + return tipc_group_cong(grp, m->node, m->port, len, &m); +} + +/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number + */ +static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) +{ + struct tipc_msg *_hdr, *hdr = buf_msg(skb); + u16 bc_seqno = msg_grp_bc_seqno(hdr); + struct sk_buff *_skb, *tmp; + int mtyp = msg_type(hdr); + + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */ + if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (!less(bc_seqno, msg_grp_bc_seqno(_hdr))) + continue; + __skb_queue_before(defq, _skb, skb); + return; + } + /* Bcast was not bypassed, - add to tail */ + } + /* Unicasts are never bypassed, - always add to tail */ + __skb_queue_tail(defq, skb); +} + +/* tipc_group_filter_msg() - determine if we should accept arriving message + */ +void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + bool ack, deliver, update, leave = false; + struct sk_buff_head *defq; + struct tipc_member *m; + struct tipc_msg *hdr; + u32 node, port; + int mtyp, blks; + + if (!skb) + return; + + hdr = buf_msg(skb); + node = msg_orignode(hdr); + port = msg_origport(hdr); + + if (!msg_in_group(hdr)) + goto drop; + + m = tipc_group_find_member(grp, node, port); + if (!tipc_group_is_sender(m)) + goto drop; + + if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + goto drop; + + TIPC_SKB_CB(skb)->orig_member = m->instance; + defq = &m->deferredq; + tipc_group_sort_msg(skb, defq); + + while ((skb = skb_peek(defq))) { + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + blks = msg_blocks(hdr); + deliver = true; + ack = false; + update = false; + + if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + break; + + /* Decide what to do with message */ + switch (mtyp) { + case TIPC_GRP_MCAST_MSG: + if (msg_nameinst(hdr) != grp->instance) { + update = true; + deliver = false; + } + /* Fall thru */ + case TIPC_GRP_BCAST_MSG: + m->bc_rcv_nxt++; + ack = msg_grp_bc_ack_req(hdr); + break; + case TIPC_GRP_UCAST_MSG: + break; + case TIPC_GRP_MEMBER_EVT: + if (m->state == MBR_LEAVING) + leave = true; + if (!grp->events) + deliver = false; + break; + default: + break; + } + + /* Execute decisions */ + __skb_dequeue(defq); + if (deliver) + __skb_queue_tail(inputq, skb); + else + kfree_skb(skb); + + if (ack) + tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + + if (leave) { + __skb_queue_purge(defq); + tipc_group_delete_member(grp, m); + break; + } + if (!update) + continue; + + tipc_group_update_rcv_win(grp, blks, node, port, xmitq); + } + return; +drop: + kfree_skb(skb); +} + +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq) +{ + struct list_head *active = &grp->active; + int max_active = grp->max_active; + int reclaim_limit = max_active * 3 / 4; + int active_cnt = grp->active_cnt; + struct tipc_member *m, *rm, *pm; + + m = tipc_group_find_member(grp, node, port); + if (!m) + return; + + m->advertised -= blks; + + switch (m->state) { + case MBR_JOINED: + /* First, decide if member can go active */ + if (active_cnt <= max_active) { + m->state = MBR_ACTIVE; + list_add_tail(&m->list, active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } else { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + } + + if (active_cnt < reclaim_limit) + break; + + /* Reclaim from oldest active member, if possible */ + if (!list_empty(active)) { + rm = list_first_entry(active, struct tipc_member, list); + rm->state = MBR_RECLAIMING; + list_del_init(&rm->list); + tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); + break; + } + /* Nobody to reclaim from; - revert oldest pending to JOINED */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + list_del_init(&pm->list); + pm->state = MBR_JOINED; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_ACTIVE: + if (!list_is_last(&m->list, &grp->active)) + list_move_tail(&m->list, &grp->active); + if (m->advertised > (ADV_ACTIVE * 3 / 4)) + break; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + break; + case MBR_REMITTED: + if (m->advertised > ADV_IDLE) + break; + m->state = MBR_JOINED; + grp->active_cnt--; + if (m->advertised < ADV_IDLE) { + pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } + + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_RECLAIMING: + case MBR_JOINING: + case MBR_LEAVING: + default: + break; + } +} + +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + memset(&evt, 0, sizeof(evt)); + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + int adv = 0; + + skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, + m->node, tipc_own_addr(grp->net), + m->port, grp->portid, 0); + if (!skb) + return; + + if (m->state == MBR_ACTIVE) + adv = ADV_ACTIVE - m->advertised; + else if (m->state == MBR_JOINED || m->state == MBR_PENDING) + adv = ADV_IDLE - m->advertised; + + hdr = buf_msg(skb); + + if (mtyp == GRP_JOIN_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_LEAVE_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + } else if (mtyp == GRP_ADV_MSG) { + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_ACK_MSG) { + msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); + } else if (mtyp == GRP_REMIT_MSG) { + msg_set_grp_remitted(hdr, m->window); + } + msg_set_dest_droppable(hdr, true); + __skb_queue_tail(xmitq, skb); +} + +void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, + struct tipc_msg *hdr, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + u32 node = msg_orignode(hdr); + u32 port = msg_origport(hdr); + struct tipc_member *m, *pm; + u16 remitted, in_flight; + + if (!grp) + return; + + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (msg_type(hdr)) { + case GRP_JOIN_MSG: + if (!m) + m = tipc_group_create_member(grp, node, port, + 0, MBR_JOINING); + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + m->bc_rcv_nxt = m->bc_syncpt; + m->window += msg_adv_win(hdr); + + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + return; + case GRP_LEAVE_MSG: + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); + return; + case GRP_ADV_MSG: + if (!m) + return; + m->window += msg_adv_win(hdr); + tipc_group_open(m, usr_wakeup); + return; + case GRP_ACK_MSG: + if (!m) + return; + m->bc_acked = msg_grp_bc_acked(hdr); + if (--grp->bc_ackers) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *usr_wakeup = true; + tipc_group_update_member(m, 0); + return; + case GRP_RECLAIM_MSG: + if (!m) + return; + tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); + m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); + return; + case GRP_REMIT_MSG: + if (!m || m->state != MBR_RECLAIMING) + return; + + remitted = msg_grp_remitted(hdr); + + /* Messages preceding the REMIT still in receive queue */ + if (m->advertised > remitted) { + m->state = MBR_REMITTED; + in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; + } + /* This should never happen */ + if (m->advertised < remitted) + pr_warn_ratelimited("Unexpected REMIT msg\n"); + + /* All messages preceding the REMIT have been read */ + m->state = MBR_JOINED; + grp->active_cnt--; + m->advertised = ADV_IDLE; + + /* Set oldest pending member to active and advertise */ + if (list_empty(&grp->pending)) + return; + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + if (pm->advertised <= (ADV_ACTIVE * 3 / 4)) + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + return; + default: + pr_warn("Received unknown GROUP_PROTO message\n"); + } +} + +/* tipc_group_member_evt() - receive and handle a member up/down event + */ +void tipc_group_member_evt(struct tipc_group *grp, + bool *usr_wakeup, + int *sk_rcvbuf, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_event *evt = (void *)msg_data(hdr); + u32 instance = evt->found_lower; + u32 node = evt->port.node; + u32 port = evt->port.ref; + int event = evt->event; + struct tipc_member *m; + struct net *net; + u32 self; + + if (!grp) + return; + + net = grp->net; + self = tipc_own_addr(net); + if (!grp->loopback && node == self && port == grp->portid) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; + } + + if (m->state != MBR_JOINING) + break; + + /* Member can be taken into service */ + m->instance = instance; + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: + if (!m) + break; + + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + + /* Only send event if no LEAVE message can be expected */ + if (!tipc_node_is_up(net, node)) + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); + break; + default: + break; + } + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) +{ + struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, + grp->type) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, + grp->instance) || + nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT, + grp->bc_snd_nxt)) + goto group_msg_cancel; + + if (grp->scope == TIPC_NODE_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE)) + goto group_msg_cancel; + + if (grp->scope == TIPC_CLUSTER_SCOPE) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE)) + goto group_msg_cancel; + + if (*grp->open) + if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN)) + goto group_msg_cancel; + + nla_nest_end(skb, group); + return 0; + +group_msg_cancel: + nla_nest_cancel(skb, group); + return -1; +} diff --git a/net/tipc/group.h b/net/tipc/group.h new file mode 100644 index 000000000..76b4e5a7b --- /dev/null +++ b/net/tipc/group.h @@ -0,0 +1,76 @@ +/* + * net/tipc/group.h: Include file for TIPC group unicast/multicast functions + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_GROUP_H +#define _TIPC_GROUP_H + +#include "core.h" + +struct tipc_group; +struct tipc_member; +struct tipc_msg; + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); +void tipc_group_delete(struct net *net, struct tipc_group *grp); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope); +u32 tipc_group_exclude(struct tipc_group *grp); +void tipc_group_filter_msg(struct tipc_group *grp, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, + int *sk_rcvbuf, struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **m); +bool tipc_group_bc_cong(struct tipc_group *grp, int len); +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq); +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +void tipc_group_update_member(struct tipc_member *m, int len); +int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb); +#endif diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c new file mode 100644 index 000000000..e8c16718e --- /dev/null +++ b/net/tipc/ib_media.c @@ -0,0 +1,101 @@ +/* + * net/tipc/ib_media.c: Infiniband bearer support for TIPC + * + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + * + * Based on eth_media.c, which carries the following copyright notice: + * + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2008, 2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/if_infiniband.h> +#include "core.h" +#include "bearer.h" + +/* convert InfiniBand address (media address format) media address to string */ +static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf, + int str_size) +{ + if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ + return 1; + + sprintf(str_buf, "%20phC", a->value); + + return 0; +} + +/* Convert from media address format to discovery message addr format */ +static int tipc_ib_addr2msg(char *msg, struct tipc_media_addr *addr) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + memcpy(msg, addr->value, INFINIBAND_ALEN); + return 0; +} + +/* Convert raw InfiniBand address format to media addr format */ +static int tipc_ib_raw2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + memset(addr, 0, sizeof(*addr)); + memcpy(addr->value, msg, INFINIBAND_ALEN); + addr->media_id = TIPC_MEDIA_TYPE_IB; + addr->broadcast = !memcmp(msg, b->bcast_addr.value, + INFINIBAND_ALEN); + return 0; +} + +/* Convert discovery msg addr format to InfiniBand media addr format */ +static int tipc_ib_msg2addr(struct tipc_bearer *b, + struct tipc_media_addr *addr, + char *msg) +{ + return tipc_ib_raw2addr(b, addr, msg); +} + +/* InfiniBand media registration info */ +struct tipc_media ib_media_info = { + .send_msg = tipc_l2_send_msg, + .enable_media = tipc_enable_l2_media, + .disable_media = tipc_disable_l2_media, + .addr2str = tipc_ib_addr2str, + .addr2msg = tipc_ib_addr2msg, + .msg2addr = tipc_ib_msg2addr, + .raw2addr = tipc_ib_raw2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .type_id = TIPC_MEDIA_TYPE_IB, + .hwaddr_len = INFINIBAND_ALEN, + .name = "ib" +}; diff --git a/net/tipc/link.c b/net/tipc/link.c new file mode 100644 index 000000000..0d2ee4eb1 --- /dev/null +++ b/net/tipc/link.c @@ -0,0 +1,2285 @@ +/* + * net/tipc/link.c: TIPC link code + * + * Copyright (c) 1996-2007, 2012-2016, Ericsson AB + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "subscr.h" +#include "link.h" +#include "bcast.h" +#include "socket.h" +#include "name_distr.h" +#include "discover.h" +#include "netlink.h" +#include "monitor.h" + +#include <linux/pkt_sched.h> + +struct tipc_stats { + u32 sent_pkts; + u32 recv_pkts; + u32 sent_states; + u32 recv_states; + u32 sent_probes; + u32 recv_probes; + u32 sent_nacks; + u32 recv_nacks; + u32 sent_acks; + u32 sent_bundled; + u32 sent_bundles; + u32 recv_bundled; + u32 recv_bundles; + u32 retransmitted; + u32 sent_fragmented; + u32 sent_fragments; + u32 recv_fragmented; + u32 recv_fragments; + u32 link_congs; /* # port sends blocked by congestion */ + u32 deferred_recv; + u32 duplicates; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ +}; + +/** + * struct tipc_link - TIPC link data structure + * @addr: network address of link's peer node + * @name: link name character string + * @media_addr: media address to use when sending messages over link + * @timer: link timer + * @net: pointer to namespace struct + * @refcnt: reference counter for permanent references (owner node & timer) + * @peer_session: link session # being used by peer end of link + * @peer_bearer_id: bearer id used by link's peer endpoint + * @bearer_id: local bearer id used by link + * @tolerance: minimum link continuity loss needed to reset link [in ms] + * @abort_limit: # of unacknowledged continuity probes needed to reset link + * @state: current state of link FSM + * @peer_caps: bitmap describing capabilities of peer node + * @silent_intv_cnt: # of timer intervals without any reception from peer + * @proto_msg: template for control messages generated by link + * @pmsg: convenience pointer to "proto_msg" field + * @priority: current link priority + * @net_plane: current link network plane ('A' through 'H') + * @mon_state: cookie with information needed by link monitor + * @backlog_limit: backlog queue congestion thresholds (indexed by importance) + * @exp_msg_count: # of tunnelled messages expected during link changeover + * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset + * @mtu: current maximum packet size for this link + * @advertised_mtu: advertised own mtu when link is being established + * @transmitq: queue for sent, non-acked messages + * @backlogq: queue for messages waiting to be sent + * @snt_nxt: next sequence number to use for outbound messages + * @last_retransmitted: sequence number of most recently retransmitted message + * @stale_cnt: counter for number of identical retransmit attempts + * @stale_limit: time when repeated identical retransmits must force link reset + * @ackers: # of peers that needs to ack each packet before it can be released + * @acked: # last packet acked by a certain peer. Used for broadcast. + * @rcv_nxt: next sequence number to expect for inbound messages + * @deferred_queue: deferred queue saved OOS b'cast message received from node + * @unacked_window: # of inbound messages rx'd without ack'ing back to peer + * @inputq: buffer queue for messages to be delivered upwards + * @namedq: buffer queue for name table messages to be delivered upwards + * @next_out: ptr to first unsent outbound message in queue + * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate + * @long_msg_seq_no: next identifier to use for outbound fragmented messages + * @reasm_buf: head of partially reassembled inbound message fragments + * @bc_rcvr: marks that this is a broadcast receiver link + * @stats: collects statistics regarding link activity + */ +struct tipc_link { + u32 addr; + char name[TIPC_MAX_LINK_NAME]; + struct net *net; + + /* Management and link supervision data */ + u16 peer_session; + u16 session; + u16 snd_nxt_state; + u16 rcv_nxt_state; + u32 peer_bearer_id; + u32 bearer_id; + u32 tolerance; + u32 abort_limit; + u32 state; + u16 peer_caps; + bool in_session; + bool active; + u32 silent_intv_cnt; + char if_name[TIPC_MAX_IF_NAME]; + u32 priority; + char net_plane; + struct tipc_mon_state mon_state; + u16 rst_cnt; + + /* Failover/synch */ + u16 drop_point; + struct sk_buff *failover_reasm_skb; + + /* Max packet negotiation */ + u16 mtu; + u16 advertised_mtu; + + /* Sending */ + struct sk_buff_head transmq; + struct sk_buff_head backlogq; + struct { + u16 len; + u16 limit; + struct sk_buff *target_bskb; + } backlog[5]; + u16 snd_nxt; + u16 last_retransm; + u16 window; + u16 stale_cnt; + unsigned long stale_limit; + + /* Reception */ + u16 rcv_nxt; + u32 rcv_unacked; + struct sk_buff_head deferdq; + struct sk_buff_head *inputq; + struct sk_buff_head *namedq; + + /* Congestion handling */ + struct sk_buff_head wakeupq; + + /* Fragmentation/reassembly */ + struct sk_buff *reasm_buf; + + /* Broadcast */ + u16 ackers; + u16 acked; + struct tipc_link *bc_rcvlink; + struct tipc_link *bc_sndlink; + unsigned long prev_retr; + u16 prev_from; + u16 prev_to; + u8 nack_state; + bool bc_peer_is_up; + + /* Statistics */ + struct tipc_stats stats; +}; + +/* + * Error message prefixes + */ +static const char *link_co_err = "Link tunneling error, "; +static const char *link_rst_msg = "Resetting link "; + +/* Send states for broadcast NACKs + */ +enum { + BC_NACK_SND_CONDITIONAL, + BC_NACK_SND_UNCONDITIONAL, + BC_NACK_SND_SUPPRESS, +}; + +#define TIPC_BC_RETR_LIMIT 10 /* [ms] */ + +/* + * Interval between NACKs when packets arrive out of order + */ +#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2) + +/* Link FSM states: + */ +enum { + LINK_ESTABLISHED = 0xe, + LINK_ESTABLISHING = 0xe << 4, + LINK_RESET = 0x1 << 8, + LINK_RESETTING = 0x2 << 12, + LINK_PEER_RESET = 0xd << 16, + LINK_FAILINGOVER = 0xf << 20, + LINK_SYNCHING = 0xc << 24 +}; + +/* Link FSM state checking routines + */ +static int link_is_up(struct tipc_link *l) +{ + return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); +} + +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, + struct sk_buff_head *xmitq); +static void link_print(struct tipc_link *l, const char *str); +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); + +/* + * Simple non-static link routines (i.e. referenced outside this file) + */ +bool tipc_link_is_up(struct tipc_link *l) +{ + return link_is_up(l); +} + +bool tipc_link_peer_is_down(struct tipc_link *l) +{ + return l->state == LINK_PEER_RESET; +} + +bool tipc_link_is_reset(struct tipc_link *l) +{ + return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); +} + +bool tipc_link_is_establishing(struct tipc_link *l) +{ + return l->state == LINK_ESTABLISHING; +} + +bool tipc_link_is_synching(struct tipc_link *l) +{ + return l->state == LINK_SYNCHING; +} + +bool tipc_link_is_failingover(struct tipc_link *l) +{ + return l->state == LINK_FAILINGOVER; +} + +bool tipc_link_is_blocked(struct tipc_link *l) +{ + return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); +} + +static bool link_is_bc_sndlink(struct tipc_link *l) +{ + return !l->bc_sndlink; +} + +static bool link_is_bc_rcvlink(struct tipc_link *l) +{ + return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); +} + +void tipc_link_set_active(struct tipc_link *l, bool active) +{ + l->active = active; +} + +u32 tipc_link_id(struct tipc_link *l) +{ + return l->peer_bearer_id << 16 | l->bearer_id; +} + +int tipc_link_window(struct tipc_link *l) +{ + return l->window; +} + +int tipc_link_prio(struct tipc_link *l) +{ + return l->priority; +} + +unsigned long tipc_link_tolerance(struct tipc_link *l) +{ + return l->tolerance; +} + +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) +{ + return l->inputq; +} + +char tipc_link_plane(struct tipc_link *l) +{ + return l->net_plane; +} + +void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) +{ + l->peer_caps = capabilities; +} + +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq) +{ + struct tipc_link *rcv_l = uc_l->bc_rcvlink; + + snd_l->ackers++; + rcv_l->acked = snd_l->snd_nxt - 1; + snd_l->state = LINK_ESTABLISHED; + tipc_link_build_bc_init_msg(uc_l, xmitq); +} + +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq) +{ + u16 ack = snd_l->snd_nxt - 1; + + snd_l->ackers--; + rcv_l->bc_peer_is_up = true; + rcv_l->state = LINK_ESTABLISHED; + tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); + tipc_link_reset(rcv_l); + rcv_l->state = LINK_RESET; + if (!snd_l->ackers) { + tipc_link_reset(snd_l); + snd_l->state = LINK_RESET; + __skb_queue_purge(xmitq); + } +} + +int tipc_link_bc_peers(struct tipc_link *l) +{ + return l->ackers; +} + +static u16 link_bc_rcv_gap(struct tipc_link *l) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 gap = 0; + + if (more(l->snd_nxt, l->rcv_nxt)) + gap = l->snd_nxt - l->rcv_nxt; + if (skb) + gap = buf_seqno(skb) - l->rcv_nxt; + return gap; +} + +void tipc_link_set_mtu(struct tipc_link *l, int mtu) +{ + l->mtu = mtu; +} + +int tipc_link_mtu(struct tipc_link *l) +{ + return l->mtu; +} + +u16 tipc_link_rcv_nxt(struct tipc_link *l) +{ + return l->rcv_nxt; +} + +u16 tipc_link_acked(struct tipc_link *l) +{ + return l->acked; +} + +char *tipc_link_name(struct tipc_link *l) +{ + return l->name; +} + +u32 tipc_link_state(struct tipc_link *l) +{ + return l->state; +} + +/** + * tipc_link_create - create a new link + * @n: pointer to associated node + * @if_name: associated interface name + * @bearer_id: id (index) of associated bearer + * @tolerance: link tolerance to be used by link + * @net_plane: network plane (A,B,c..) this link belongs to + * @mtu: mtu to be advertised by link + * @priority: priority to be used by link + * @window: send window to be used by link + * @session: session to be used by link + * @ownnode: identity of own node + * @peer: node id of peer node + * @peer_caps: bitmap describing peer node capabilities + * @bc_sndlink: the namespace global link used for broadcast sending + * @bc_rcvlink: the peer specific link used for broadcast reception + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link + * + * Returns true if link was created, otherwise false + */ +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 self, + u32 peer, u8 *peer_id, u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link **link) +{ + char peer_str[NODE_ID_STR_LEN] = {0,}; + char self_str[NODE_ID_STR_LEN] = {0,}; + struct tipc_link *l; + + l = kzalloc(sizeof(*l), GFP_ATOMIC); + if (!l) + return false; + *link = l; + l->session = session; + + /* Set link name for unicast links only */ + if (peer_id) { + tipc_nodeid2string(self_str, tipc_own_id(net)); + if (strlen(self_str) > 16) + sprintf(self_str, "%x", self); + tipc_nodeid2string(peer_str, peer_id); + if (strlen(peer_str) > 16) + sprintf(peer_str, "%x", peer); + } + /* Peer i/f name will be completed by reset/activate message */ + snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown", + self_str, if_name, peer_str); + + strcpy(l->if_name, if_name); + l->addr = peer; + l->peer_caps = peer_caps; + l->net = net; + l->in_session = false; + l->bearer_id = bearer_id; + l->tolerance = tolerance; + if (bc_rcvlink) + bc_rcvlink->tolerance = tolerance; + l->net_plane = net_plane; + l->advertised_mtu = mtu; + l->mtu = mtu; + l->priority = priority; + tipc_link_set_queue_limits(l, window); + l->ackers = 1; + l->bc_sndlink = bc_sndlink; + l->bc_rcvlink = bc_rcvlink; + l->inputq = inputq; + l->namedq = namedq; + l->state = LINK_RESETTING; + __skb_queue_head_init(&l->transmq); + __skb_queue_head_init(&l->backlogq); + __skb_queue_head_init(&l->deferdq); + skb_queue_head_init(&l->wakeupq); + skb_queue_head_init(l->inputq); + return true; +} + +/** + * tipc_link_bc_create - create new link to be used for broadcast + * @n: pointer to associated node + * @mtu: mtu to be used initially if no peers + * @window: send window to be used + * @inputq: queue to put messages ready for delivery + * @namedq: queue to put binding table update messages ready for delivery + * @link: return value, pointer to put the created link + * + * Returns true if link was created, otherwise false + */ +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link) +{ + struct tipc_link *l; + + if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window, + 0, ownnode, peer, NULL, peer_caps, bc_sndlink, + NULL, inputq, namedq, link)) + return false; + + l = *link; + strcpy(l->name, tipc_bclink_name); + tipc_link_reset(l); + l->state = LINK_RESET; + l->ackers = 0; + l->bc_rcvlink = l; + + /* Broadcast send link is always up */ + if (link_is_bc_sndlink(l)) + l->state = LINK_ESTABLISHED; + + /* Disable replicast if even a single peer doesn't support it */ + if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) + tipc_bcast_disable_rcast(net); + + return true; +} + +/** + * tipc_link_fsm_evt - link finite state machine + * @l: pointer to link + * @evt: state machine event to be processed + */ +int tipc_link_fsm_evt(struct tipc_link *l, int evt) +{ + int rc = 0; + + switch (l->state) { + case LINK_RESETTING: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + break; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_FAILURE_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_RESET: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_ESTABLISHING; + break; + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; + case LINK_FAILURE_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILOVER_END_EVT: + break; + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_PEER_RESET: + switch (evt) { + case LINK_RESET_EVT: + l->state = LINK_ESTABLISHING; + break; + case LINK_PEER_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: + break; + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_FAILINGOVER: + switch (evt) { + case LINK_FAILOVER_END_EVT: + l->state = LINK_RESET; + break; + case LINK_PEER_RESET_EVT: + case LINK_RESET_EVT: + case LINK_ESTABLISH_EVT: + case LINK_FAILURE_EVT: + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_ESTABLISHING: + switch (evt) { + case LINK_ESTABLISH_EVT: + l->state = LINK_ESTABLISHED; + break; + case LINK_FAILOVER_BEGIN_EVT: + l->state = LINK_FAILINGOVER; + break; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_FAILURE_EVT: + case LINK_PEER_RESET_EVT: + case LINK_SYNCH_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + break; + case LINK_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_ESTABLISHED: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_ESTABLISH_EVT: + case LINK_SYNCH_END_EVT: + break; + case LINK_SYNCH_BEGIN_EVT: + l->state = LINK_SYNCHING; + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case LINK_SYNCHING: + switch (evt) { + case LINK_PEER_RESET_EVT: + l->state = LINK_PEER_RESET; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_FAILURE_EVT: + l->state = LINK_RESETTING; + rc |= TIPC_LINK_DOWN_EVT; + break; + case LINK_RESET_EVT: + l->state = LINK_RESET; + break; + case LINK_ESTABLISH_EVT: + case LINK_SYNCH_BEGIN_EVT: + break; + case LINK_SYNCH_END_EVT: + l->state = LINK_ESTABLISHED; + break; + case LINK_FAILOVER_BEGIN_EVT: + case LINK_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + default: + pr_err("Unknown FSM state %x in %s\n", l->state, l->name); + } + return rc; +illegal_evt: + pr_err("Illegal FSM event %x in state %x on link %s\n", + evt, l->state, l->name); + return rc; +} + +/* link_profile_stats - update statistical profiling of traffic + */ +static void link_profile_stats(struct tipc_link *l) +{ + struct sk_buff *skb; + struct tipc_msg *msg; + int length; + + /* Update counters used in statistical profiling of send traffic */ + l->stats.accu_queue_sz += skb_queue_len(&l->transmq); + l->stats.queue_sz_counts++; + + skb = skb_peek(&l->transmq); + if (!skb) + return; + msg = buf_msg(skb); + length = msg_size(msg); + + if (msg_user(msg) == MSG_FRAGMENTER) { + if (msg_type(msg) != FIRST_FRAGMENT) + return; + length = msg_size(msg_get_wrapped(msg)); + } + l->stats.msg_lengths_total += length; + l->stats.msg_length_counts++; + if (length <= 64) + l->stats.msg_length_profile[0]++; + else if (length <= 256) + l->stats.msg_length_profile[1]++; + else if (length <= 1024) + l->stats.msg_length_profile[2]++; + else if (length <= 4096) + l->stats.msg_length_profile[3]++; + else if (length <= 16384) + l->stats.msg_length_profile[4]++; + else if (length <= 32768) + l->stats.msg_length_profile[5]++; + else + l->stats.msg_length_profile[6]++; +} + +/* tipc_link_timeout - perform periodic task as instructed from node timeout + */ +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + int mtyp = 0; + int rc = 0; + bool state = false; + bool probe = false; + bool setup = false; + u16 bc_snt = l->bc_sndlink->snd_nxt - 1; + u16 bc_acked = l->bc_rcvlink->acked; + struct tipc_mon_state *mstate = &l->mon_state; + + switch (l->state) { + case LINK_ESTABLISHED: + case LINK_SYNCHING: + mtyp = STATE_MSG; + link_profile_stats(l); + tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); + if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + state = bc_acked != bc_snt; + state |= l->bc_rcvlink->rcv_unacked; + state |= l->rcv_unacked; + state |= !skb_queue_empty(&l->transmq); + state |= !skb_queue_empty(&l->deferdq); + probe = mstate->probing; + probe |= l->silent_intv_cnt; + if (probe || mstate->monitoring) + l->silent_intv_cnt++; + break; + case LINK_RESET: + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); + mtyp = RESET_MSG; + break; + case LINK_ESTABLISHING: + setup = true; + mtyp = ACTIVATE_MSG; + break; + case LINK_PEER_RESET: + case LINK_RESETTING: + case LINK_FAILINGOVER: + break; + default: + break; + } + + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); + + return rc; +} + +/** + * link_schedule_user - schedule a message sender for wakeup after congestion + * @l: congested link + * @hdr: header of message that is being sent + * Create pseudo msg to send back to user when congestion abates + */ +static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) +{ + u32 dnode = tipc_own_addr(l->net); + u32 dport = msg_origport(hdr); + struct sk_buff *skb; + + /* Create and schedule wakeup pseudo message */ + skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, + dnode, l->addr, dport, 0, 0); + if (!skb) + return -ENOBUFS; + msg_set_dest_droppable(buf_msg(skb), true); + TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); + skb_queue_tail(&l->wakeupq, skb); + l->stats.link_congs++; + return -ELINKCONG; +} + +/** + * link_prepare_wakeup - prepare users for wakeup after congestion + * @l: congested link + * Wake up a number of waiting users, as permitted by available space + * in the send queue + */ +static void link_prepare_wakeup(struct tipc_link *l) +{ + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; + struct sk_buff *skb, *tmp; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) { + imp = TIPC_SKB_CB(skb)->chain_imp; + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); + } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + +} + +void tipc_link_reset(struct tipc_link *l) +{ + struct sk_buff_head list; + u32 imp; + + __skb_queue_head_init(&list); + + l->in_session = false; + l->session++; + l->mtu = l->advertised_mtu; + + spin_lock_bh(&l->wakeupq.lock); + skb_queue_splice_init(&l->wakeupq, &list); + spin_unlock_bh(&l->wakeupq.lock); + + spin_lock_bh(&l->inputq->lock); + skb_queue_splice_init(&list, l->inputq); + spin_unlock_bh(&l->inputq->lock); + + __skb_queue_purge(&l->transmq); + __skb_queue_purge(&l->deferdq); + __skb_queue_purge(&l->backlogq); + for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { + l->backlog[imp].len = 0; + l->backlog[imp].target_bskb = NULL; + } + kfree_skb(l->reasm_buf); + kfree_skb(l->failover_reasm_skb); + l->reasm_buf = NULL; + l->failover_reasm_skb = NULL; + l->rcv_unacked = 0; + l->snd_nxt = 1; + l->rcv_nxt = 1; + l->snd_nxt_state = 1; + l->rcv_nxt_state = 1; + l->acked = 0; + l->silent_intv_cnt = 0; + l->rst_cnt = 0; + l->stale_cnt = 0; + l->bc_peer_is_up = false; + memset(&l->mon_state, 0, sizeof(l->mon_state)); + tipc_link_reset_stats(l); +} + +/** + * tipc_link_xmit(): enqueue buffer list according to queue situation + * @link: link to use + * @list: chain of buffers containing message + * @xmitq: returned list of packets to be sent by caller + * + * Consumes the buffer chain. + * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS + * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted + */ +int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, + struct sk_buff_head *xmitq) +{ + unsigned int maxwin = l->window; + unsigned int mtu = l->mtu; + u16 ack = l->rcv_nxt - 1; + u16 seqno = l->snd_nxt; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + struct sk_buff_head *transmq = &l->transmq; + struct sk_buff_head *backlogq = &l->backlogq; + struct sk_buff *skb, *_skb, **tskb; + int pkt_cnt = skb_queue_len(list); + struct tipc_msg *hdr; + int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; + + hdr = buf_msg(skb_peek(list)); + if (unlikely(msg_size(hdr) > mtu)) { + __skb_queue_purge(list); + return -EMSGSIZE; + } + + imp = msg_importance(hdr); + /* Allow oversubscription of one data msg per source at congestion */ + if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { + if (imp == TIPC_SYSTEM_IMPORTANCE) { + pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); + return -ENOBUFS; + } + rc = link_schedule_user(l, hdr); + } + + if (pkt_cnt > 1) { + l->stats.sent_fragmented++; + l->stats.sent_fragments += pkt_cnt; + } + + /* Prepare each packet for sending, and add to relevant queue: */ + while (skb_queue_len(list)) { + skb = skb_peek(list); + hdr = buf_msg(skb); + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + + if (likely(skb_queue_len(transmq) < maxwin)) { + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) { + __skb_queue_purge(list); + return -ENOBUFS; + } + __skb_dequeue(list); + __skb_queue_tail(transmq, skb); + __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; + l->rcv_unacked = 0; + l->stats.sent_pkts++; + seqno++; + continue; + } + tskb = &l->backlog[imp].target_bskb; + if (tipc_msg_bundle(*tskb, hdr, mtu)) { + kfree_skb(__skb_dequeue(list)); + l->stats.sent_bundled++; + continue; + } + if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) { + kfree_skb(__skb_dequeue(list)); + __skb_queue_tail(backlogq, *tskb); + l->backlog[imp].len++; + l->stats.sent_bundled++; + l->stats.sent_bundles++; + continue; + } + l->backlog[imp].target_bskb = NULL; + l->backlog[imp].len += skb_queue_len(list); + skb_queue_splice_tail_init(list, backlogq); + } + l->snd_nxt = seqno; + return rc; +} + +static void tipc_link_advance_backlog(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *_skb; + struct tipc_msg *hdr; + u16 seqno = l->snd_nxt; + u16 ack = l->rcv_nxt - 1; + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u32 imp; + + while (skb_queue_len(&l->transmq) < l->window) { + skb = skb_peek(&l->backlogq); + if (!skb) + break; + _skb = skb_clone(skb, GFP_ATOMIC); + if (!_skb) + break; + __skb_dequeue(&l->backlogq); + hdr = buf_msg(skb); + imp = msg_importance(hdr); + l->backlog[imp].len--; + if (unlikely(skb == l->backlog[imp].target_bskb)) + l->backlog[imp].target_bskb = NULL; + __skb_queue_tail(&l->transmq, skb); + __skb_queue_tail(xmitq, _skb); + TIPC_SKB_CB(skb)->ackers = l->ackers; + msg_set_seqno(hdr, seqno); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + l->rcv_unacked = 0; + l->stats.sent_pkts++; + seqno++; + } + l->snd_nxt = seqno; +} + +static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) +{ + struct tipc_msg *hdr = buf_msg(skb); + + pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, src: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); +} + +/* tipc_link_retrans() - retransmit one or more packets + * @l: the link to transmit on + * @r: the receiving link ordering the retransmit. Same as l if unicast + * @from: retransmit from (inclusive) this sequence number + * @to: retransmit to (inclusive) this sequence number + * xmitq: queue for accumulating the retransmitted packets + */ +static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r, + u16 from, u16 to, struct sk_buff_head *xmitq) +{ + struct sk_buff *_skb, *skb = skb_peek(&l->transmq); + u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u16 ack = l->rcv_nxt - 1; + struct tipc_msg *hdr; + + if (!skb) + return 0; + + /* Detect repeated retransmit failures on same packet */ + if (r->last_retransm != buf_seqno(skb)) { + r->last_retransm = buf_seqno(skb); + r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); + r->stale_cnt = 0; + } else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) { + link_retransmit_failure(l, skb); + if (link_is_bc_sndlink(l)) + return TIPC_LINK_DOWN_EVT; + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + + skb_queue_walk(&l->transmq, skb) { + hdr = buf_msg(skb); + if (less(msg_seqno(hdr), from)) + continue; + if (more(msg_seqno(hdr), to)) + break; + _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + if (!_skb) + return 0; + hdr = buf_msg(_skb); + msg_set_ack(hdr, ack); + msg_set_bcast_ack(hdr, bc_ack); + _skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, _skb); + l->stats.retransmitted++; + } + return 0; +} + +/* tipc_data_input - deliver data and name distr msgs to upper layer + * + * Consumes buffer if message is of right type + * Node lock must be held + */ +static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; + struct tipc_msg *hdr = buf_msg(skb); + + switch (msg_user(hdr)) { + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: + if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { + skb_queue_tail(mc_inputq, skb); + return true; + } + /* else: fall through */ + case CONN_MANAGER: + skb_queue_tail(inputq, skb); + return true; + case GROUP_PROTOCOL: + skb_queue_tail(mc_inputq, skb); + return true; + case NAME_DISTRIBUTOR: + l->bc_rcvlink->state = LINK_ESTABLISHED; + skb_queue_tail(l->namedq, skb); + return true; + case MSG_BUNDLER: + case TUNNEL_PROTOCOL: + case MSG_FRAGMENTER: + case BCAST_PROTOCOL: + return false; + default: + pr_warn("Dropping received illegal msg type\n"); + kfree_skb(skb); + return true; + }; +} + +/* tipc_link_input - process packet that has passed link protocol check + * + * Consumes buffer + */ +static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *inputq) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct sk_buff **reasm_skb = &l->reasm_buf; + struct sk_buff *iskb; + struct sk_buff_head tmpq; + int usr = msg_user(hdr); + int rc = 0; + int pos = 0; + int ipos = 0; + + if (unlikely(usr == TUNNEL_PROTOCOL)) { + if (msg_type(hdr) == SYNCH_MSG) { + __skb_queue_purge(&l->deferdq); + goto drop; + } + if (!tipc_msg_extract(skb, &iskb, &ipos)) + return rc; + kfree_skb(skb); + skb = iskb; + hdr = buf_msg(skb); + if (less(msg_seqno(hdr), l->drop_point)) + goto drop; + if (tipc_data_input(l, skb, inputq)) + return rc; + usr = msg_user(hdr); + reasm_skb = &l->failover_reasm_skb; + } + + if (usr == MSG_BUNDLER) { + skb_queue_head_init(&tmpq); + l->stats.recv_bundles++; + l->stats.recv_bundled += msg_msgcnt(hdr); + while (tipc_msg_extract(skb, &iskb, &pos)) + tipc_data_input(l, iskb, &tmpq); + tipc_skb_queue_splice_tail(&tmpq, inputq); + return 0; + } else if (usr == MSG_FRAGMENTER) { + l->stats.recv_fragments++; + if (tipc_buf_append(reasm_skb, &skb)) { + l->stats.recv_fragmented++; + tipc_data_input(l, skb, inputq); + } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { + pr_warn_ratelimited("Unable to build fragment list\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + return 0; + } else if (usr == BCAST_PROTOCOL) { + tipc_bcast_lock(l->net); + tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); + tipc_bcast_unlock(l->net); + } +drop: + kfree_skb(skb); + return 0; +} + +static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) +{ + bool released = false; + struct sk_buff *skb, *tmp; + + skb_queue_walk_safe(&l->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + __skb_unlink(skb, &l->transmq); + kfree_skb(skb); + released = true; + } + return released; +} + +/* tipc_link_build_state_msg: prepare link state message for transmission + * + * Note that sending of broadcast ack is coordinated among nodes, to reduce + * risk of ack storms towards the sender + */ +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + if (!l) + return 0; + + /* Broadcast ACK must be sent via a unicast link => defer to caller */ + if (link_is_bc_rcvlink(l)) { + if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) + return 0; + l->rcv_unacked = 0; + + /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ + l->snd_nxt = l->rcv_nxt; + return TIPC_LINK_SND_STATE; + } + + /* Unicast ACK */ + l->rcv_unacked = 0; + l->stats.sent_acks++; + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); + return 0; +} + +/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message + */ +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +{ + int mtyp = RESET_MSG; + struct sk_buff *skb; + + if (l->state == LINK_ESTABLISHING) + mtyp = ACTIVATE_MSG; + + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); +} + +/* tipc_link_build_nack_msg: prepare link nack message for transmission + * Note that sending of broadcast NACK is coordinated among nodes, to + * reduce the risk of NACK storms towards the sender + */ +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 def_cnt = ++l->stats.deferred_recv; + int match1, match2; + + if (link_is_bc_rcvlink(l)) { + match1 = def_cnt & 0xf; + match2 = tipc_own_addr(l->net) & 0xf; + if (match1 == match2) + return TIPC_LINK_SND_STATE; + return 0; + } + + if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); + return 0; +} + +/* tipc_link_rcv - process TIPC packets/messages arriving from off-node + * @l: the link that should handle the message + * @skb: TIPC packet + * @xmitq: queue to place packets to be sent after this call + */ +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head *defq = &l->deferdq; + struct tipc_msg *hdr; + u16 seqno, rcv_nxt, win_lim; + int rc = 0; + + do { + hdr = buf_msg(skb); + seqno = msg_seqno(hdr); + rcv_nxt = l->rcv_nxt; + win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; + + /* Verify and update link state */ + if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) + return tipc_link_proto_rcv(l, skb, xmitq); + + if (unlikely(!link_is_up(l))) { + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + goto drop; + } + + /* Don't send probe at next timeout expiration */ + l->silent_intv_cnt = 0; + + /* Drop if outside receive window */ + if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { + l->stats.duplicates++; + goto drop; + } + + /* Forward queues and wake up waiting users */ + if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) { + l->stale_cnt = 0; + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } + + /* Defer delivery if sequence gap */ + if (unlikely(seqno != rcv_nxt)) { + __tipc_skb_queue_sorted(defq, seqno, skb); + rc |= tipc_link_build_nack_msg(l, xmitq); + break; + } + + /* Deliver packet */ + l->rcv_nxt++; + l->stats.recv_pkts++; + if (!tipc_data_input(l, skb, l->inputq)) + rc |= tipc_link_input(l, skb, l->inputq); + if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) + rc |= tipc_link_build_state_msg(l, xmitq); + if (unlikely(rc & ~TIPC_LINK_SND_STATE)) + break; + } while ((skb = __skb_dequeue(defq))); + + return rc; +drop: + kfree_skb(skb); + return rc; +} + +static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, + struct sk_buff_head *xmitq) +{ + struct tipc_link *bcl = l->bc_rcvlink; + struct sk_buff *skb; + struct tipc_msg *hdr; + struct sk_buff_head *dfq = &l->deferdq; + bool node_up = link_is_up(bcl); + struct tipc_mon_state *mstate = &l->mon_state; + int dlen = 0; + void *data; + + /* Don't send protocol message during reset or link failover */ + if (tipc_link_is_blocked(l)) + return; + + if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) + return; + + if (!skb_queue_empty(dfq)) + rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; + + skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, + tipc_max_domain_size, l->addr, + tipc_own_addr(l->net), 0, 0, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + data = msg_data(hdr); + msg_set_session(hdr, l->session); + msg_set_bearer_id(hdr, l->bearer_id); + msg_set_net_plane(hdr, l->net_plane); + msg_set_next_sent(hdr, l->snd_nxt); + msg_set_ack(hdr, l->rcv_nxt - 1); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_ack_invalid(hdr, !node_up); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_link_tolerance(hdr, tolerance); + msg_set_linkprio(hdr, priority); + msg_set_redundant_link(hdr, node_up); + msg_set_seq_gap(hdr, 0); + msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); + + if (mtyp == STATE_MSG) { + if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) + msg_set_seqno(hdr, l->snd_nxt_state++); + msg_set_seq_gap(hdr, rcvgap); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); + msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); + tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + dlen); + skb_trim(skb, INT_H_SIZE + dlen); + l->stats.sent_states++; + l->rcv_unacked = 0; + } else { + /* RESET_MSG or ACTIVATE_MSG */ + msg_set_max_pkt(hdr, l->advertised_mtu); + strcpy(data, l->if_name); + msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); + skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); + } + if (probe) + l->stats.sent_probes++; + if (rcvgap) + l->stats.sent_nacks++; + skb->priority = TC_PRIO_CONTROL; + __skb_queue_tail(xmitq, skb); +} + +void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 onode = tipc_own_addr(l->net); + struct tipc_msg *hdr, *ihdr; + struct sk_buff_head tnlq; + struct sk_buff *skb; + u32 dnode = l->addr; + + __skb_queue_head_init(&tnlq); + skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, + INT_H_SIZE, BASIC_H_SIZE, + dnode, onode, 0, 0, 0); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, 1); + msg_set_bearer_id(hdr, l->peer_bearer_id); + + ihdr = (struct tipc_msg *)msg_data(hdr); + tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, dnode); + msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, xmitq); +} + +/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets + * with contents of the link's transmit and backlog queues. + */ +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *tnlskb; + struct tipc_msg *hdr, tnlhdr; + struct sk_buff_head *queue = &l->transmq; + struct sk_buff_head tmpxq, tnlq; + u16 pktlen, pktcnt, seqno = l->snd_nxt; + + if (!tnl) + return; + + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + + /* At least one packet required for safe algorithm => add dummy */ + skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), + 0, 0, TIPC_ERR_NO_PORT); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, &tmpxq); + __skb_queue_purge(&tmpxq); + + /* Initialize reusable tunnel packet header */ + tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, + mtyp, INT_H_SIZE, l->addr); + pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq); + msg_set_msgcnt(&tnlhdr, pktcnt); + msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); +tnl: + /* Wrap each packet into a tunnel packet */ + skb_queue_walk(queue, skb) { + hdr = buf_msg(skb); + if (queue == &l->backlogq) + msg_set_seqno(hdr, seqno++); + pktlen = msg_size(hdr); + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); + if (!tnlskb) { + pr_warn("%sunable to send packet\n", link_co_err); + return; + } + skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen); + __skb_queue_tail(&tnlq, tnlskb); + } + if (queue != &l->backlogq) { + queue = &l->backlogq; + goto tnl; + } + + tipc_link_xmit(tnl, &tnlq, xmitq); + + if (mtyp == FAILOVER_MSG) { + tnl->drop_point = l->rcv_nxt; + tnl->failover_reasm_skb = l->reasm_buf; + l->reasm_buf = NULL; + } +} + +/* tipc_link_validate_msg(): validate message against current link state + * Returns true if message should be accepted, otherwise false + */ +bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) +{ + u16 curr_session = l->peer_session; + u16 session = msg_session(hdr); + int mtyp = msg_type(hdr); + + if (msg_user(hdr) != LINK_PROTOCOL) + return true; + + switch (mtyp) { + case RESET_MSG: + if (!l->in_session) + return true; + /* Accept only RESET with new session number */ + return more(session, curr_session); + case ACTIVATE_MSG: + if (!l->in_session) + return true; + /* Accept only ACTIVATE with new or current session number */ + return !less(session, curr_session); + case STATE_MSG: + /* Accept only STATE with current session number */ + if (!l->in_session) + return false; + if (session != curr_session) + return false; + /* Extra sanity check */ + if (!link_is_up(l) && msg_ack(hdr)) + return false; + if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) + return true; + /* Accept only STATE with new sequence number */ + return !less(msg_seqno(hdr), l->rcv_nxt_state); + default: + return false; + } +} + +/* tipc_link_proto_rcv(): receive link level protocol message : + * Note that network plane id propagates through the network, and may + * change at any time. The node with lowest numerical id determines + * network plane + */ +static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u16 rcvgap = 0; + u16 ack = msg_ack(hdr); + u16 gap = msg_seq_gap(hdr); + u16 peers_snd_nxt = msg_next_sent(hdr); + u16 peers_tol = msg_link_tolerance(hdr); + u16 peers_prio = msg_linkprio(hdr); + u16 rcv_nxt = l->rcv_nxt; + u32 dlen = msg_data_sz(hdr); + int mtyp = msg_type(hdr); + bool reply = msg_probe(hdr); + void *data; + char *if_name; + int rc = 0; + + if (dlen > U16_MAX) + goto exit; + + if (tipc_link_is_blocked(l) || !xmitq) + goto exit; + + if (tipc_own_addr(l->net) > msg_prevnode(hdr)) + l->net_plane = msg_net_plane(hdr); + + skb_linearize(skb); + hdr = buf_msg(skb); + data = msg_data(hdr); + + if (!tipc_link_validate_msg(l, hdr)) + goto exit; + + switch (mtyp) { + case RESET_MSG: + case ACTIVATE_MSG: + /* Complete own link name with peer's interface name */ + if_name = strrchr(l->name, ':') + 1; + if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) + break; + if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) + break; + strncpy(if_name, data, TIPC_MAX_IF_NAME); + + /* Update own tolerance if peer indicates a non-zero value */ + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + l->tolerance = peers_tol; + l->bc_rcvlink->tolerance = peers_tol; + } + /* Update own priority if peer's priority is higher */ + if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) + l->priority = peers_prio; + + /* If peer is going down we want full re-establish cycle */ + if (msg_peer_stopping(hdr)) { + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ + if (mtyp == RESET_MSG || !link_is_up(l)) + rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); + + /* ACTIVATE_MSG takes up link if it was already locally reset */ + if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + + l->peer_session = msg_session(hdr); + l->in_session = true; + l->peer_bearer_id = msg_bearer_id(hdr); + if (l->mtu > msg_max_pkt(hdr)) + l->mtu = msg_max_pkt(hdr); + break; + + case STATE_MSG: + l->rcv_nxt_state = msg_seqno(hdr) + 1; + + /* Update own tolerance if peer indicates a non-zero value */ + if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { + l->tolerance = peers_tol; + l->bc_rcvlink->tolerance = peers_tol; + } + /* Update own prio if peer indicates a different value */ + if ((peers_prio != l->priority) && + in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { + l->priority = peers_prio; + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + + l->silent_intv_cnt = 0; + l->stats.recv_states++; + if (msg_probe(hdr)) + l->stats.recv_probes++; + + if (!link_is_up(l)) { + if (l->state == LINK_ESTABLISHING) + rc = TIPC_LINK_UP_EVT; + break; + } + tipc_mon_rcv(l->net, data, dlen, l->addr, + &l->mon_state, l->bearer_id); + + /* Send NACK if peer has sent pkts we haven't received yet */ + if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) + rcvgap = peers_snd_nxt - l->rcv_nxt; + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); + tipc_link_release_pkts(l, ack); + + /* If NACK, retransmit will now start at right position */ + if (gap) { + rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq); + l->stats.recv_nacks++; + } + + tipc_link_advance_backlog(l, xmitq); + if (unlikely(!skb_queue_empty(&l->wakeupq))) + link_prepare_wakeup(l); + } +exit: + kfree_skb(skb); + return rc; +} + +/* tipc_link_build_bc_proto_msg() - create broadcast protocol message + */ +static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, + u16 peers_snd_nxt, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + struct tipc_msg *hdr; + struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); + u16 ack = l->rcv_nxt - 1; + u16 gap_to = peers_snd_nxt - 1; + + skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, + 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); + if (!skb) + return false; + hdr = buf_msg(skb); + msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); + msg_set_bcast_ack(hdr, ack); + msg_set_bcgap_after(hdr, ack); + if (dfrd_skb) + gap_to = buf_seqno(dfrd_skb) - 1; + msg_set_bcgap_to(hdr, gap_to); + msg_set_non_seq(hdr, bcast); + __skb_queue_tail(xmitq, skb); + return true; +} + +/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. + * + * Give a newly added peer node the sequence number where it should + * start receiving and acking broadcast packets. + */ +static void tipc_link_build_bc_init_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + struct sk_buff_head list; + + __skb_queue_head_init(&list); + if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) + return; + msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); + tipc_link_xmit(l, &list, xmitq); +} + +/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer + */ +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) +{ + int mtyp = msg_type(hdr); + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + + if (link_is_up(l)) + return; + + if (msg_user(hdr) == BCAST_PROTOCOL) { + l->rcv_nxt = peers_snd_nxt; + l->state = LINK_ESTABLISHED; + return; + } + + if (l->peer_caps & TIPC_BCAST_SYNCH) + return; + + if (msg_peer_node_is_up(hdr)) + return; + + /* Compatibility: accept older, less safe initial synch data */ + if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) + l->rcv_nxt = peers_snd_nxt; +} + +/* link_bc_retr eval()- check if the indicated range can be retransmitted now + * - Adjust permitted range if there is overlap with previous retransmission + */ +static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to) +{ + unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr); + + if (less(*to, *from)) + return false; + + /* New retransmission request */ + if ((elapsed > TIPC_BC_RETR_LIMIT) || + less(*to, l->prev_from) || more(*from, l->prev_to)) { + l->prev_from = *from; + l->prev_to = *to; + l->prev_retr = jiffies; + return true; + } + + /* Inside range of previous retransmit */ + if (!less(*from, l->prev_from) && !more(*to, l->prev_to)) + return false; + + /* Fully or partially outside previous range => exclude overlap */ + if (less(*from, l->prev_from)) { + *to = l->prev_from - 1; + l->prev_from = *from; + } + if (more(*to, l->prev_to)) { + *from = l->prev_to + 1; + l->prev_to = *to; + } + l->prev_retr = jiffies; + return true; +} + +/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state + */ +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) +{ + struct tipc_link *snd_l = l->bc_sndlink; + u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + u16 from = msg_bcast_ack(hdr) + 1; + u16 to = from + msg_bc_gap(hdr) - 1; + int rc = 0; + + if (!link_is_up(l)) + return rc; + + if (!msg_peer_node_is_up(hdr)) + return rc; + + /* Open when peer ackowledges our bcast init msg (pkt #1) */ + if (msg_ack(hdr)) + l->bc_peer_is_up = true; + + if (!l->bc_peer_is_up) + return rc; + + l->stats.recv_nacks++; + + /* Ignore if peers_snd_nxt goes beyond receive window */ + if (more(peers_snd_nxt, l->rcv_nxt + l->window)) + return rc; + + if (link_bc_retr_eval(snd_l, &from, &to)) + rc = tipc_link_retrans(snd_l, l, from, to, xmitq); + + l->snd_nxt = peers_snd_nxt; + if (link_bc_rcv_gap(l)) + rc |= TIPC_LINK_SND_STATE; + + /* Return now if sender supports nack via STATE messages */ + if (l->peer_caps & TIPC_BCAST_STATE_NACK) + return rc; + + /* Otherwise, be backwards compatible */ + + if (!more(peers_snd_nxt, l->rcv_nxt)) { + l->nack_state = BC_NACK_SND_CONDITIONAL; + return 0; + } + + /* Don't NACK if one was recently sent or peeked */ + if (l->nack_state == BC_NACK_SND_SUPPRESS) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + return 0; + } + + /* Conditionally delay NACK sending until next synch rcv */ + if (l->nack_state == BC_NACK_SND_CONDITIONAL) { + l->nack_state = BC_NACK_SND_UNCONDITIONAL; + if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) + return 0; + } + + /* Send NACK now but suppress next one */ + tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); + l->nack_state = BC_NACK_SND_SUPPRESS; + return 0; +} + +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb, *tmp; + struct tipc_link *snd_l = l->bc_sndlink; + + if (!link_is_up(l) || !l->bc_peer_is_up) + return; + + if (!more(acked, l->acked)) + return; + + /* Skip over packets peer has already acked */ + skb_queue_walk(&snd_l->transmq, skb) { + if (more(buf_seqno(skb), l->acked)) + break; + } + + /* Update/release the packets peer is acking now */ + skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) { + if (more(buf_seqno(skb), acked)) + break; + if (!--TIPC_SKB_CB(skb)->ackers) { + __skb_unlink(skb, &snd_l->transmq); + kfree_skb(skb); + } + } + l->acked = acked; + tipc_link_advance_backlog(snd_l, xmitq); + if (unlikely(!skb_queue_empty(&snd_l->wakeupq))) + link_prepare_wakeup(snd_l); +} + +/* tipc_link_bc_nack_rcv(): receive broadcast nack message + * This function is here for backwards compatibility, since + * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. + */ +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u32 dnode = msg_destnode(hdr); + int mtyp = msg_type(hdr); + u16 acked = msg_bcast_ack(hdr); + u16 from = acked + 1; + u16 to = msg_bcgap_to(hdr); + u16 peers_snd_nxt = to + 1; + int rc = 0; + + kfree_skb(skb); + + if (!tipc_link_is_up(l) || !l->bc_peer_is_up) + return 0; + + if (mtyp != STATE_MSG) + return 0; + + if (dnode == tipc_own_addr(l->net)) { + tipc_link_bc_ack_rcv(l, acked, xmitq); + rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq); + l->stats.recv_nacks++; + return rc; + } + + /* Msg for other node => suppress own NACK at next sync if applicable */ + if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) + l->nack_state = BC_NACK_SND_SUPPRESS; + + return 0; +} + +void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) +{ + int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); + + l->window = win; + l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); + l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); + l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; +} + +/** + * link_reset_stats - reset link statistics + * @l: pointer to link + */ +void tipc_link_reset_stats(struct tipc_link *l) +{ + memset(&l->stats, 0, sizeof(l->stats)); +} + +static void link_print(struct tipc_link *l, const char *str) +{ + struct sk_buff *hskb = skb_peek(&l->transmq); + u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; + u16 tail = l->snd_nxt - 1; + + pr_info("%s Link <%s> state %x\n", str, l->name, l->state); + pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", + skb_queue_len(&l->transmq), head, tail, + skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); +} + +/* Parse and validate nested (link) properties valid for media, bearer and link + */ +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) +{ + int err; + + err = nla_parse_nested(props, TIPC_NLA_PROP_MAX, prop, + tipc_nl_prop_policy, NULL); + if (err) + return err; + + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + if (prio > TIPC_MAX_LINK_PRI) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) + return -EINVAL; + } + + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN)) + return -EINVAL; + } + + return 0; +} + +static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) +{ + int i; + struct nlattr *stats; + + struct nla_map { + u32 key; + u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, 0}, + {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, 0}, + {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, + {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? + s->msg_length_counts : 1}, + {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, + {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, + {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, + {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, + {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, + {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, + {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, + {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, + {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, + {TIPC_NLA_STATS_RX_STATES, s->recv_states}, + {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, + {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, + {TIPC_NLA_STATS_TX_STATES, s->sent_states}, + {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, + {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? + (s->accu_queue_sz / s->queue_sz_counts) : 0} + }; + + stats = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!stats) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, stats); + + return 0; +msg_full: + nla_nest_cancel(skb, stats); + + return -EMSGSIZE; +} + +/* Caller should hold appropriate locks to protect the link */ +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags) +{ + u32 self = tipc_own_addr(net); + struct nlattr *attrs; + struct nlattr *prop; + void *hdr; + int err; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + nlflags, TIPC_NL_LINK_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self))) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts)) + goto attr_msg_full; + + if (tipc_link_is_up(link)) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + if (link->active) + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, + link->window)) + goto prop_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_stats(msg->skb, &link->stats); + if (err) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, + struct tipc_stats *stats) +{ + int i; + struct nlattr *nest; + + struct nla_map { + __u32 key; + __u32 val; + }; + + struct nla_map map[] = { + {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts}, + {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, + {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, + {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, + {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, + {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts}, + {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, + {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, + {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, + {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, + {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, + {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, + {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, + {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, + {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, + {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, + {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, + {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, + {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? + (stats->accu_queue_sz / stats->queue_sz_counts) : 0} + }; + + nest = nla_nest_start(skb, TIPC_NLA_LINK_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < ARRAY_SIZE(map); i++) + if (nla_put_u32(skb, map[i].key, map[i].val)) + goto msg_full; + + nla_nest_end(skb, nest); + + return 0; +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) +{ + int err; + void *hdr; + struct nlattr *attrs; + struct nlattr *prop; + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *bcl = tn->bcl; + + if (!bcl) + return 0; + + tipc_bcast_lock(net); + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_LINK_GET); + if (!hdr) { + tipc_bcast_unlock(net); + return -EMSGSIZE; + } + + attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); + if (!attrs) + goto msg_full; + + /* The broadcast link is always up */ + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) + goto attr_msg_full; + + if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) + goto attr_msg_full; + + prop = nla_nest_start(msg->skb, TIPC_NLA_LINK_PROP); + if (!prop) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window)) + goto prop_msg_full; + nla_nest_end(msg->skb, prop); + + err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); + if (err) + goto attr_msg_full; + + tipc_bcast_unlock(net); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +prop_msg_full: + nla_nest_cancel(msg->skb, prop); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + tipc_bcast_unlock(net); + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq) +{ + l->tolerance = tol; + if (l->bc_rcvlink) + l->bc_rcvlink->tolerance = tol; + if (link_is_up(l)) + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); +} + +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq) +{ + l->priority = prio; + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); +} + +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) +{ + l->abort_limit = limit; +} diff --git a/net/tipc/link.h b/net/tipc/link.h new file mode 100644 index 000000000..90488c538 --- /dev/null +++ b/net/tipc/link.h @@ -0,0 +1,150 @@ +/* + * net/tipc/link.h: Include file for TIPC link code + * + * Copyright (c) 1995-2006, 2013-2014, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_LINK_H +#define _TIPC_LINK_H + +#include <net/genetlink.h> +#include "msg.h" +#include "node.h" + +/* TIPC-specific error codes +*/ +#define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ + +/* Link FSM events: + */ +enum { + LINK_ESTABLISH_EVT = 0xec1ab1e, + LINK_PEER_RESET_EVT = 0x9eed0e, + LINK_FAILURE_EVT = 0xfa110e, + LINK_RESET_EVT = 0x10ca1d0e, + LINK_FAILOVER_BEGIN_EVT = 0xfa110bee, + LINK_FAILOVER_END_EVT = 0xfa110ede, + LINK_SYNCH_BEGIN_EVT = 0xc1ccbee, + LINK_SYNCH_END_EVT = 0xc1ccede +}; + +/* Events returned from link at packet reception or at timeout + */ +enum { + TIPC_LINK_UP_EVT = 1, + TIPC_LINK_DOWN_EVT = (1 << 1), + TIPC_LINK_SND_STATE = (1 << 2) +}; + +/* Starting value for maximum packet size negotiation on unicast links + * (unless bearer MTU is less) + */ +#define MAX_PKT_DEFAULT 1500 + +bool tipc_link_create(struct net *net, char *if_name, int bearer_id, + int tolerance, char net_plane, u32 mtu, int priority, + int window, u32 session, u32 ownnode, + u32 peer, u8 *peer_id, u16 peer_caps, + struct tipc_link *bc_sndlink, + struct tipc_link *bc_rcvlink, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link **link); +bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, + int mtu, int window, u16 peer_caps, + struct sk_buff_head *inputq, + struct sk_buff_head *namedq, + struct tipc_link *bc_sndlink, + struct tipc_link **link); +void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, + int mtyp, struct sk_buff_head *xmitq); +void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, + struct sk_buff_head *xmitq); +void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_fsm_evt(struct tipc_link *l, int evt); +bool tipc_link_is_up(struct tipc_link *l); +bool tipc_link_peer_is_down(struct tipc_link *l); +bool tipc_link_is_reset(struct tipc_link *l); +bool tipc_link_is_establishing(struct tipc_link *l); +bool tipc_link_is_synching(struct tipc_link *l); +bool tipc_link_is_failingover(struct tipc_link *l); +bool tipc_link_is_blocked(struct tipc_link *l); +void tipc_link_set_active(struct tipc_link *l, bool active); +void tipc_link_reset(struct tipc_link *l); +void tipc_link_reset_stats(struct tipc_link *l); +int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list, + struct sk_buff_head *xmitq); +struct sk_buff_head *tipc_link_inputq(struct tipc_link *l); +u16 tipc_link_rcv_nxt(struct tipc_link *l); +u16 tipc_link_acked(struct tipc_link *l); +u32 tipc_link_id(struct tipc_link *l); +char *tipc_link_name(struct tipc_link *l); +u32 tipc_link_state(struct tipc_link *l); +char tipc_link_plane(struct tipc_link *l); +int tipc_link_prio(struct tipc_link *l); +int tipc_link_window(struct tipc_link *l); +void tipc_link_update_caps(struct tipc_link *l, u16 capabilities); +bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr); +unsigned long tipc_link_tolerance(struct tipc_link *l); +void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, + struct sk_buff_head *xmitq); +void tipc_link_set_prio(struct tipc_link *l, u32 prio, + struct sk_buff_head *xmitq); +void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit); +void tipc_link_set_queue_limits(struct tipc_link *l, u32 window); +int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, + struct tipc_link *link, int nlflags); +int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); +int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +void tipc_link_add_bc_peer(struct tipc_link *snd_l, + struct tipc_link *uc_l, + struct sk_buff_head *xmitq); +void tipc_link_remove_bc_peer(struct tipc_link *snd_l, + struct tipc_link *rcv_l, + struct sk_buff_head *xmitq); +int tipc_link_bc_peers(struct tipc_link *l); +void tipc_link_set_mtu(struct tipc_link *l, int mtu); +int tipc_link_mtu(struct tipc_link *l); +void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, + struct sk_buff_head *xmitq); +void tipc_link_build_bc_sync_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); +void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); +int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, + struct sk_buff_head *xmitq); +#endif diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c new file mode 100644 index 000000000..7b6c1c5c3 --- /dev/null +++ b/net/tipc/monitor.c @@ -0,0 +1,835 @@ +/* + * net/tipc/monitor.c + * + * Copyright (c) 2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/genetlink.h> +#include "core.h" +#include "addr.h" +#include "monitor.h" +#include "bearer.h" + +#define MAX_MON_DOMAIN 64 +#define MON_TIMEOUT 120000 +#define MAX_PEER_DOWN_EVENTS 4 + +/* struct tipc_mon_domain: domain record to be transferred between peers + * @len: actual size of domain record + * @gen: current generation of sender's domain + * @ack_gen: most recent generation of self's domain acked by peer + * @member_cnt: number of domain member nodes described in this record + * @up_map: bit map indicating which of the members the sender considers up + * @members: identity of the domain members + */ +struct tipc_mon_domain { + u16 len; + u16 gen; + u16 ack_gen; + u16 member_cnt; + u64 up_map; + u32 members[MAX_MON_DOMAIN]; +}; + +/* struct tipc_peer: state of a peer node and its domain + * @addr: tipc node identity of peer + * @head_map: shows which other nodes currently consider peer 'up' + * @domain: most recent domain record from peer + * @hash: position in hashed lookup list + * @list: position in linked list, in circular ascending order by 'addr' + * @applied: number of reported domain members applied on this monitor list + * @is_up: peer is up as seen from this node + * @is_head: peer is assigned domain head as seen from this node + * @is_local: peer is in local domain and should be continuously monitored + * @down_cnt: - numbers of other peers which have reported this on lost + */ +struct tipc_peer { + u32 addr; + struct tipc_mon_domain *domain; + struct hlist_node hash; + struct list_head list; + u8 applied; + u8 down_cnt; + bool is_up; + bool is_head; + bool is_local; +}; + +struct tipc_monitor { + struct hlist_head peers[NODE_HTABLE_SIZE]; + int peer_cnt; + struct tipc_peer *self; + rwlock_t lock; + struct tipc_mon_domain cache; + u16 list_gen; + u16 dom_gen; + struct net *net; + struct timer_list timer; + unsigned long timer_intv; +}; + +static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) +{ + return tipc_net(net)->monitors[bearer_id]; +} + +const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); + +/* dom_rec_len(): actual length of domain record for transport + */ +static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) +{ + return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); +} + +/* dom_size() : calculate size of own domain based on number of peers + */ +static int dom_size(int peers) +{ + int i = 0; + + while ((i * i) < peers) + i++; + return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN; +} + +static void map_set(u64 *up_map, int i, unsigned int v) +{ + *up_map &= ~(1ULL << i); + *up_map |= ((u64)v << i); +} + +static int map_get(u64 up_map, int i) +{ + return (up_map & (1 << i)) >> i; +} + +static struct tipc_peer *peer_prev(struct tipc_peer *peer) +{ + return list_last_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_nxt(struct tipc_peer *peer) +{ + return list_first_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_head(struct tipc_peer *peer) +{ + while (!peer->is_head) + peer = peer_prev(peer); + return peer; +} + +static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) +{ + struct tipc_peer *peer; + unsigned int thash = tipc_hashfn(addr); + + hlist_for_each_entry(peer, &mon->peers[thash], hash) { + if (peer->addr == addr) + return peer; + } + return NULL; +} + +static struct tipc_peer *get_self(struct net *net, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + + return mon->self; +} + +static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) +{ + struct tipc_net *tn = tipc_net(net); + + return mon->peer_cnt > tn->mon_threshold; +} + +/* mon_identify_lost_members() : - identify amd mark potentially lost members + */ +static void mon_identify_lost_members(struct tipc_peer *peer, + struct tipc_mon_domain *dom_bef, + int applied_bef) +{ + struct tipc_peer *member = peer; + struct tipc_mon_domain *dom_aft = peer->domain; + int applied_aft = peer->applied; + int i; + + for (i = 0; i < applied_bef; i++) { + member = peer_nxt(member); + + /* Do nothing if self or peer already see member as down */ + if (!member->is_up || !map_get(dom_bef->up_map, i)) + continue; + + /* Loss of local node must be detected by active probing */ + if (member->is_local) + continue; + + /* Start probing if member was removed from applied domain */ + if (!applied_aft || (applied_aft < i)) { + member->down_cnt = 1; + continue; + } + + /* Member loss is confirmed if it is still in applied domain */ + if (!map_get(dom_aft->up_map, i)) + member->down_cnt++; + } +} + +/* mon_apply_domain() : match a peer's domain record against monitor list + */ +static void mon_apply_domain(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + struct tipc_mon_domain *dom = peer->domain; + struct tipc_peer *member; + u32 addr; + int i; + + if (!dom || !peer->is_up) + return; + + /* Scan across domain members and match against monitor list */ + peer->applied = 0; + member = peer_nxt(peer); + for (i = 0; i < dom->member_cnt; i++) { + addr = dom->members[i]; + if (addr != member->addr) + return; + peer->applied++; + member = peer_nxt(member); + } +} + +/* mon_update_local_domain() : update after peer addition/removal/up/down + */ +static void mon_update_local_domain(struct tipc_monitor *mon) +{ + struct tipc_peer *self = mon->self; + struct tipc_mon_domain *cache = &mon->cache; + struct tipc_mon_domain *dom = self->domain; + struct tipc_peer *peer = self; + u64 prev_up_map = dom->up_map; + u16 member_cnt, i; + bool diff; + + /* Update local domain size based on current size of cluster */ + member_cnt = dom_size(mon->peer_cnt) - 1; + self->applied = member_cnt; + + /* Update native and cached outgoing local domain records */ + dom->len = dom_rec_len(dom, member_cnt); + diff = dom->member_cnt != member_cnt; + dom->member_cnt = member_cnt; + for (i = 0; i < member_cnt; i++) { + peer = peer_nxt(peer); + diff |= dom->members[i] != peer->addr; + dom->members[i] = peer->addr; + map_set(&dom->up_map, i, peer->is_up); + cache->members[i] = htonl(peer->addr); + } + diff |= dom->up_map != prev_up_map; + if (!diff) + return; + dom->gen = ++mon->dom_gen; + cache->len = htons(dom->len); + cache->gen = htons(dom->gen); + cache->member_cnt = htons(member_cnt); + cache->up_map = cpu_to_be64(dom->up_map); + mon_apply_domain(mon, self); +} + +/* mon_update_neighbors() : update preceding neighbors of added/removed peer + */ +static void mon_update_neighbors(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + int dz, i; + + dz = dom_size(mon->peer_cnt); + for (i = 0; i < dz; i++) { + mon_apply_domain(mon, peer); + peer = peer_prev(peer); + } +} + +/* mon_assign_roles() : reassign peer roles after a network change + * The monitor list is consistent at this stage; i.e., each peer is monitoring + * a set of domain members as matched between domain record and the monitor list + */ +static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) +{ + struct tipc_peer *peer = peer_nxt(head); + struct tipc_peer *self = mon->self; + int i = 0; + + for (; peer != self; peer = peer_nxt(peer)) { + peer->is_local = false; + + /* Update domain member */ + if (i++ < head->applied) { + peer->is_head = false; + if (head == self) + peer->is_local = true; + continue; + } + /* Assign next domain head */ + if (!peer->is_up) + continue; + if (peer->is_head) + break; + head = peer; + head->is_head = true; + i = 0; + } + mon->list_gen++; +} + +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *prev, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) + goto exit; + prev = peer_prev(peer); + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + mon->peer_cnt--; + head = peer_head(prev); + if (head == self) + mon_update_local_domain(mon); + mon_update_neighbors(mon, prev); + + /* Revert to full-mesh monitoring if we reach threshold */ + if (!tipc_mon_is_active(net, mon)) { + list_for_each_entry(peer, &self->list, list) { + kfree(peer->domain); + peer->domain = NULL; + peer->applied = 0; + } + } + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, + struct tipc_peer **peer) +{ + struct tipc_peer *self = mon->self; + struct tipc_peer *cur, *prev, *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + *peer = p; + if (!p) + return false; + p->addr = addr; + + /* Add new peer to lookup list */ + INIT_LIST_HEAD(&p->list); + hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); + + /* Sort new peer into iterator list, in ascending circular order */ + prev = self; + list_for_each_entry(cur, &self->list, list) { + if ((addr > prev->addr) && (addr < cur->addr)) + break; + if (((addr < cur->addr) || (addr > prev->addr)) && + (prev->addr > cur->addr)) + break; + prev = cur; + } + list_add_tail(&p->list, &cur->list); + mon->peer_cnt++; + mon_update_neighbors(mon, p); + return true; +} + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) + goto exit; + peer->is_up = true; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + struct tipc_mon_domain *dom; + int applied; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) { + pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); + goto exit; + } + applied = peer->applied; + peer->applied = 0; + dom = peer->domain; + peer->domain = NULL; + if (peer->is_head) + mon_identify_lost_members(peer, dom, applied); + kfree(dom); + peer->is_up = false; + peer->is_head = false; + peer->is_local = false; + peer->down_cnt = 0; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +/* tipc_mon_rcv - process monitor domain event message + */ +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *arrv_dom = data; + struct tipc_mon_domain dom_bef; + struct tipc_mon_domain *dom; + struct tipc_peer *peer; + u16 new_member_cnt = ntohs(arrv_dom->member_cnt); + int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); + u16 new_gen = ntohs(arrv_dom->gen); + u16 acked_gen = ntohs(arrv_dom->ack_gen); + bool probing = state->probing; + int i, applied_bef; + + state->probing = false; + + /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; + if (dlen < dom_rec_len(arrv_dom, 0)) + return; + if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) + return; + if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) + return; + + /* Synch generation numbers with peer if link just came up */ + if (!state->synched) { + state->peer_gen = new_gen - 1; + state->acked_gen = acked_gen; + state->synched = true; + } + + if (more(acked_gen, state->acked_gen)) + state->acked_gen = acked_gen; + + /* Drop duplicate unless we are waiting for a probe response */ + if (!more(new_gen, state->peer_gen) && !probing) + return; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer || !peer->is_up) + goto exit; + + /* Peer is confirmed, stop any ongoing probing */ + peer->down_cnt = 0; + + /* Task is done for duplicate record */ + if (!more(new_gen, state->peer_gen)) + goto exit; + + state->peer_gen = new_gen; + + /* Cache current domain record for later use */ + dom_bef.member_cnt = 0; + dom = peer->domain; + if (dom) + memcpy(&dom_bef, dom, dom->len); + + /* Transform and store received domain record */ + if (!dom || (dom->len < new_dlen)) { + kfree(dom); + dom = kmalloc(new_dlen, GFP_ATOMIC); + peer->domain = dom; + if (!dom) + goto exit; + } + dom->len = new_dlen; + dom->gen = new_gen; + dom->member_cnt = new_member_cnt; + dom->up_map = be64_to_cpu(arrv_dom->up_map); + for (i = 0; i < new_member_cnt; i++) + dom->members[i] = ntohl(arrv_dom->members[i]); + + /* Update peers affected by this domain record */ + applied_bef = peer->applied; + mon_apply_domain(mon, peer); + mon_identify_lost_members(peer, &dom_bef, applied_bef); + mon_assign_roles(mon, peer_head(peer)); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *dom = data; + u16 gen = mon->dom_gen; + u16 len; + + /* Send invalid record if not active */ + if (!tipc_mon_is_active(net, mon)) { + dom->len = 0; + return; + } + + /* Send only a dummy record with ack if peer has acked our last sent */ + if (likely(state->acked_gen == gen)) { + len = dom_rec_len(dom, 0); + *dlen = len; + dom->len = htons(len); + dom->gen = htons(gen); + dom->ack_gen = htons(state->peer_gen); + dom->member_cnt = 0; + return; + } + /* Send the full record */ + read_lock_bh(&mon->lock); + len = ntohs(mon->cache.len); + *dlen = len; + memcpy(data, &mon->cache, len); + read_unlock_bh(&mon->lock); + dom->ack_gen = htons(state->peer_gen); +} + +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + if (!tipc_mon_is_active(net, mon)) { + state->probing = false; + state->monitoring = true; + return; + } + + /* Used cached state if table has not changed */ + if (!state->probing && + (state->list_gen == mon->list_gen) && + (state->acked_gen == mon->dom_gen)) + return; + + read_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (peer) { + state->probing = state->acked_gen != mon->dom_gen; + state->probing |= peer->down_cnt; + state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; + state->monitoring = peer->is_local; + state->monitoring |= peer->is_head; + state->list_gen = mon->list_gen; + } + read_unlock_bh(&mon->lock); +} + +static void mon_timeout(struct timer_list *t) +{ + struct tipc_monitor *mon = from_timer(mon, t, timer); + struct tipc_peer *self; + int best_member_cnt = dom_size(mon->peer_cnt) - 1; + + write_lock_bh(&mon->lock); + self = mon->self; + if (self && (best_member_cnt != self->applied)) { + mon_update_local_domain(mon); + mon_assign_roles(mon, self); + } + write_unlock_bh(&mon->lock); + mod_timer(&mon->timer, jiffies + mon->timer_intv); +} + +int tipc_mon_create(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon; + struct tipc_peer *self; + struct tipc_mon_domain *dom; + + if (tn->monitors[bearer_id]) + return 0; + + mon = kzalloc(sizeof(*mon), GFP_ATOMIC); + self = kzalloc(sizeof(*self), GFP_ATOMIC); + dom = kzalloc(sizeof(*dom), GFP_ATOMIC); + if (!mon || !self || !dom) { + kfree(mon); + kfree(self); + kfree(dom); + return -ENOMEM; + } + tn->monitors[bearer_id] = mon; + rwlock_init(&mon->lock); + mon->net = net; + mon->peer_cnt = 1; + mon->self = self; + self->domain = dom; + self->addr = tipc_own_addr(net); + self->is_up = true; + self->is_head = true; + INIT_LIST_HEAD(&self->list); + timer_setup(&mon->timer, mon_timeout, 0); + mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); + mod_timer(&mon->timer, jiffies + mon->timer_intv); + return 0; +} + +void tipc_mon_delete(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self; + struct tipc_peer *peer, *tmp; + + if (!mon) + return; + + self = get_self(net, bearer_id); + write_lock_bh(&mon->lock); + tn->monitors[bearer_id] = NULL; + list_for_each_entry_safe(peer, tmp, &self->list, list) { + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + } + mon->self = NULL; + write_unlock_bh(&mon->lock); + del_timer_sync(&mon->timer); + kfree(self->domain); + kfree(self); + kfree(mon); +} + +void tipc_mon_reinit_self(struct net *net) +{ + struct tipc_monitor *mon; + int bearer_id; + + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + mon = tipc_monitor(net, bearer_id); + if (!mon) + continue; + write_lock_bh(&mon->lock); + mon->self->addr = tipc_own_addr(net); + write_unlock_bh(&mon->lock); + } +} + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} + +static int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, + struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) + goto attr_msg_full; + } + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + if (!mon) + return -EINVAL; + + read_lock_bh(&mon->lock); + peer = mon->self; + do { + if (*prev_node) { + if (peer->addr == *prev_node) + *prev_node = 0; + else + continue; + } + if (__tipc_nl_add_monitor_peer(peer, msg)) { + *prev_node = peer->addr; + read_unlock_bh(&mon->lock); + return -EMSGSIZE; + } + } while ((peer = peer_nxt(peer)) != mon->self); + read_unlock_bh(&mon->lock); + + return 0; +} + +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + char bearer_name[TIPC_MAX_BEARER_NAME]; + struct nlattr *attrs; + void *hdr; + int ret; + + ret = tipc_bearer_get_name(net, bearer_name, bearer_id); + if (ret || !mon) + return 0; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + read_lock_bh(&mon->lock); + if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) + goto attr_msg_full; + if (tipc_mon_is_active(net, mon)) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) + goto attr_msg_full; + + read_unlock_bh(&mon->lock); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + read_unlock_bh(&mon->lock); + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h new file mode 100644 index 000000000..ed63d2e65 --- /dev/null +++ b/net/tipc/monitor.h @@ -0,0 +1,83 @@ +/* + * net/tipc/monitor.h + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MONITOR_H +#define _TIPC_MONITOR_H + +#include "netlink.h" + +/* struct tipc_mon_state: link instance's cache of monitor list and domain state + * @list_gen: current generation of this node's monitor list + * @gen: current generation of this node's local domain + * @peer_gen: most recent domain generation received from peer + * @acked_gen: most recent generation of self's domain acked by peer + * @monitoring: this peer endpoint should continuously monitored + * @probing: peer endpoint should be temporarily probed for potential loss + * @synched: domain record's generation has been synched with peer after reset + */ +struct tipc_mon_state { + u16 list_gen; + u16 peer_gen; + u16 acked_gen; + bool monitoring :1; + bool probing :1; + bool reset :1; + bool synched :1; +}; + +int tipc_mon_create(struct net *net, int bearer_id); +void tipc_mon_delete(struct net *net, int bearer_id); + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id); +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id); +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id); +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id); +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node); +void tipc_mon_reinit_self(struct net *net); + +extern const int tipc_max_domain_size; +#endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c new file mode 100644 index 000000000..0ac270444 --- /dev/null +++ b/net/tipc/msg.c @@ -0,0 +1,710 @@ +/* + * net/tipc/msg.c: TIPC message header routines + * + * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "msg.h" +#include "addr.h" +#include "name_table.h" + +#define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 + +static unsigned int align(unsigned int i) +{ + return (i + 3) & ~3u; +} + +/** + * tipc_buf_acquire - creates a TIPC message buffer + * @size: message size (including TIPC header) + * + * Returns a new buffer with data pointers set to the specified size. + * + * NOTE: Headroom is reserved to allow prepending of a data link header. + * There may also be unrequested tailroom present at the buffer's end. + */ +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) +{ + struct sk_buff *skb; + unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; + + skb = alloc_skb_fclone(buf_size, gfp); + if (skb) { + skb_reserve(skb, BUF_HEADROOM); + skb_put(skb, size); + skb->next = NULL; + } + return skb; +} + +void tipc_msg_init(u32 own_node, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 dnode) +{ + memset(m, 0, hsize); + msg_set_version(m); + msg_set_user(m, user); + msg_set_hdr_sz(m, hsize); + msg_set_size(m, hsize); + msg_set_prevnode(m, own_node); + msg_set_type(m, type); + if (hsize > SHORT_H_SIZE) { + msg_set_orignode(m, own_node); + msg_set_destnode(m, dnode); + } +} + +struct sk_buff *tipc_msg_create(uint user, uint type, + uint hdr_sz, uint data_sz, u32 dnode, + u32 onode, u32 dport, u32 oport, int errcode) +{ + struct tipc_msg *msg; + struct sk_buff *buf; + + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); + if (unlikely(!buf)) + return NULL; + + msg = buf_msg(buf); + tipc_msg_init(onode, msg, user, type, hdr_sz, dnode); + msg_set_size(msg, hdr_sz + data_sz); + msg_set_origport(msg, oport); + msg_set_destport(msg, dport); + msg_set_errcode(msg, errcode); + if (hdr_sz > SHORT_H_SIZE) { + msg_set_orignode(msg, onode); + msg_set_destnode(msg, dnode); + } + return buf; +} + +/* tipc_buf_append(): Append a buffer to the fragment list of another buffer + * @*headbuf: in: NULL for first frag, otherwise value returned from prev call + * out: set when successful non-complete reassembly, otherwise NULL + * @*buf: in: the buffer to append. Always defined + * out: head buf after successful complete reassembly, otherwise NULL + * Returns 1 when reassembly complete, otherwise 0 + */ +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) +{ + struct sk_buff *head = *headbuf; + struct sk_buff *frag = *buf; + struct sk_buff *tail = NULL; + struct tipc_msg *msg; + u32 fragid; + int delta; + bool headstolen; + + if (!frag) + goto err; + + msg = buf_msg(frag); + fragid = msg_type(msg); + frag->next = NULL; + skb_pull(frag, msg_hdr_sz(msg)); + + if (fragid == FIRST_FRAGMENT) { + if (unlikely(head)) + goto err; + *buf = NULL; + if (skb_has_frag_list(frag) && __skb_linearize(frag)) + goto err; + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) + goto err; + head = *headbuf = frag; + TIPC_SKB_CB(head)->tail = NULL; + return 0; + } + + if (!head) + goto err; + + if (skb_try_coalesce(head, frag, &headstolen, &delta)) { + kfree_skb_partial(frag, headstolen); + } else { + tail = TIPC_SKB_CB(head)->tail; + if (!skb_has_frag_list(head)) + skb_shinfo(head)->frag_list = frag; + else + tail->next = frag; + head->truesize += frag->truesize; + head->data_len += frag->len; + head->len += frag->len; + TIPC_SKB_CB(head)->tail = frag; + } + + if (fragid == LAST_FRAGMENT) { + TIPC_SKB_CB(head)->validated = false; + if (unlikely(!tipc_msg_validate(&head))) + goto err; + *buf = head; + TIPC_SKB_CB(head)->tail = NULL; + *headbuf = NULL; + return 1; + } + *buf = NULL; + return 0; +err: + kfree_skb(*buf); + kfree_skb(*headbuf); + *buf = *headbuf = NULL; + return 0; +} + +/* tipc_msg_validate - validate basic format of received message + * + * This routine ensures a TIPC message has an acceptable header, and at least + * as much data as the header indicates it should. The routine also ensures + * that the entire message header is stored in the main fragment of the message + * buffer, to simplify future access to message header fields. + * + * Note: Having extra info present in the message header or data areas is OK. + * TIPC will ignore the excess, under the assumption that it is optional info + * introduced by a later release of the protocol. + */ +bool tipc_msg_validate(struct sk_buff **_skb) +{ + struct sk_buff *skb = *_skb; + struct tipc_msg *hdr; + int msz, hsz; + + /* Ensure that flow control ratio condition is satisfied */ + if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) { + skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC); + if (!skb) + return false; + kfree_skb(*_skb); + *_skb = skb; + } + + if (unlikely(TIPC_SKB_CB(skb)->validated)) + return true; + if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) + return false; + + hsz = msg_hdr_sz(buf_msg(skb)); + if (unlikely(hsz < MIN_H_SIZE) || (hsz > MAX_H_SIZE)) + return false; + if (unlikely(!pskb_may_pull(skb, hsz))) + return false; + + hdr = buf_msg(skb); + if (unlikely(msg_version(hdr) != TIPC_VERSION)) + return false; + + msz = msg_size(hdr); + if (unlikely(msz < hsz)) + return false; + if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) + return false; + if (unlikely(skb->len < msz)) + return false; + + TIPC_SKB_CB(skb)->validated = true; + return true; +} + +/** + * tipc_msg_build - create buffer chain containing specified header and data + * @mhdr: Message header, to be prepended to data + * @m: User message + * @dsz: Total length of user data + * @pktmax: Max packet size that can be used + * @list: Buffer or chain of buffers to be returned to caller + * + * Note that the recursive call we are making here is safe, since it can + * logically go only one further level down. + * + * Returns message data size or errno: -ENOMEM, -EFAULT + */ +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, + int dsz, int pktmax, struct sk_buff_head *list) +{ + int mhsz = msg_hdr_sz(mhdr); + struct tipc_msg pkthdr; + int msz = mhsz + dsz; + int pktrem = pktmax; + struct sk_buff *skb; + int drem = dsz; + int pktno = 1; + char *pktpos; + int pktsz; + int rc; + + msg_set_size(mhdr, msz); + + /* No fragmentation needed? */ + if (likely(msz <= pktmax)) { + skb = tipc_buf_acquire(msz, GFP_KERNEL); + + /* Fall back to smaller MTU if node local message */ + if (unlikely(!skb)) { + if (pktmax != MAX_MSG_SIZE) + return -ENOMEM; + rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + if (rc != dsz) + return rc; + if (tipc_msg_assemble(list)) + return dsz; + return -ENOMEM; + } + skb_orphan(skb); + __skb_queue_tail(list, skb); + skb_copy_to_linear_data(skb, mhdr, mhsz); + pktpos = skb->data + mhsz; + if (copy_from_iter_full(pktpos, dsz, &m->msg_iter)) + return dsz; + rc = -EFAULT; + goto error; + } + + /* Prepare reusable fragment header */ + tipc_msg_init(msg_prevnode(mhdr), &pkthdr, MSG_FRAGMENTER, + FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(mhdr)); + msg_set_size(&pkthdr, pktmax); + msg_set_fragm_no(&pkthdr, pktno); + msg_set_importance(&pkthdr, msg_importance(mhdr)); + + /* Prepare first fragment */ + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); + if (!skb) + return -ENOMEM; + skb_orphan(skb); + __skb_queue_tail(list, skb); + pktpos = skb->data; + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos += INT_H_SIZE; + pktrem -= INT_H_SIZE; + skb_copy_to_linear_data_offset(skb, INT_H_SIZE, mhdr, mhsz); + pktpos += mhsz; + pktrem -= mhsz; + + do { + if (drem < pktrem) + pktrem = drem; + + if (!copy_from_iter_full(pktpos, pktrem, &m->msg_iter)) { + rc = -EFAULT; + goto error; + } + drem -= pktrem; + + if (!drem) + break; + + /* Prepare new fragment: */ + if (drem < (pktmax - INT_H_SIZE)) + pktsz = drem + INT_H_SIZE; + else + pktsz = pktmax; + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); + if (!skb) { + rc = -ENOMEM; + goto error; + } + skb_orphan(skb); + __skb_queue_tail(list, skb); + msg_set_type(&pkthdr, FRAGMENT); + msg_set_size(&pkthdr, pktsz); + msg_set_fragm_no(&pkthdr, ++pktno); + skb_copy_to_linear_data(skb, &pkthdr, INT_H_SIZE); + pktpos = skb->data + INT_H_SIZE; + pktrem = pktsz - INT_H_SIZE; + + } while (1); + msg_set_type(buf_msg(skb), LAST_FRAGMENT); + return dsz; +error: + __skb_queue_purge(list); + __skb_queue_head_init(list); + return rc; +} + +/** + * tipc_msg_bundle(): Append contents of a buffer to tail of an existing one + * @skb: the buffer to append to ("bundle") + * @msg: message to be appended + * @mtu: max allowable size for the bundle buffer + * Consumes buffer if successful + * Returns true if bundling could be performed, otherwise false + */ +bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu) +{ + struct tipc_msg *bmsg; + unsigned int bsz; + unsigned int msz = msg_size(msg); + u32 start, pad; + u32 max = mtu - INT_H_SIZE; + + if (likely(msg_user(msg) == MSG_FRAGMENTER)) + return false; + if (!skb) + return false; + bmsg = buf_msg(skb); + bsz = msg_size(bmsg); + start = align(bsz); + pad = start - bsz; + + if (unlikely(msg_user(msg) == TUNNEL_PROTOCOL)) + return false; + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) + return false; + if (unlikely(msg_user(bmsg) != MSG_BUNDLER)) + return false; + if (unlikely(skb_tailroom(skb) < (pad + msz))) + return false; + if (unlikely(max < (start + msz))) + return false; + if ((msg_importance(msg) < TIPC_SYSTEM_IMPORTANCE) && + (msg_importance(bmsg) == TIPC_SYSTEM_IMPORTANCE)) + return false; + + skb_put(skb, pad + msz); + skb_copy_to_linear_data_offset(skb, start, msg, msz); + msg_set_size(bmsg, start + msz); + msg_set_msgcnt(bmsg, msg_msgcnt(bmsg) + 1); + return true; +} + +/** + * tipc_msg_extract(): extract bundled inner packet from buffer + * @skb: buffer to be extracted from. + * @iskb: extracted inner buffer, to be returned + * @pos: position in outer message of msg to be extracted. + * Returns position of next msg + * Consumes outer buffer when last packet extracted + * Returns true when when there is an extracted buffer, otherwise false + */ +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) +{ + struct tipc_msg *hdr, *ihdr; + int imsz; + + *iskb = NULL; + if (unlikely(skb_linearize(skb))) + goto none; + + hdr = buf_msg(skb); + if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE))) + goto none; + + ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos); + imsz = msg_size(ihdr); + + if ((*pos + imsz) > msg_data_sz(hdr)) + goto none; + + *iskb = tipc_buf_acquire(imsz, GFP_ATOMIC); + if (!*iskb) + goto none; + + skb_copy_to_linear_data(*iskb, ihdr, imsz); + if (unlikely(!tipc_msg_validate(iskb))) + goto none; + + *pos += align(imsz); + return true; +none: + kfree_skb(skb); + kfree_skb(*iskb); + *iskb = NULL; + return false; +} + +/** + * tipc_msg_make_bundle(): Create bundle buf and append message to its tail + * @list: the buffer chain, where head is the buffer to replace/append + * @skb: buffer to be created, appended to and returned in case of success + * @msg: message to be appended + * @mtu: max allowable size for the bundle buffer, inclusive header + * @dnode: destination node for message. (Not always present in header) + * Returns true if success, otherwise false + */ +bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, + u32 mtu, u32 dnode) +{ + struct sk_buff *_skb; + struct tipc_msg *bmsg; + u32 msz = msg_size(msg); + u32 max = mtu - INT_H_SIZE; + + if (msg_user(msg) == MSG_FRAGMENTER) + return false; + if (msg_user(msg) == TUNNEL_PROTOCOL) + return false; + if (msg_user(msg) == BCAST_PROTOCOL) + return false; + if (msz > (max / 2)) + return false; + + _skb = tipc_buf_acquire(max, GFP_ATOMIC); + if (!_skb) + return false; + + skb_trim(_skb, INT_H_SIZE); + bmsg = buf_msg(_skb); + tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, + INT_H_SIZE, dnode); + msg_set_importance(bmsg, msg_importance(msg)); + msg_set_seqno(bmsg, msg_seqno(msg)); + msg_set_ack(bmsg, msg_ack(msg)); + msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); + tipc_msg_bundle(_skb, msg, mtu); + *skb = _skb; + return true; +} + +/** + * tipc_msg_reverse(): swap source and destination addresses and add error code + * @own_node: originating node id for reversed message + * @skb: buffer containing message to be reversed; may be replaced. + * @err: error code to be set in message, if any + * Consumes buffer at failure + * Returns true if success, otherwise false + */ +bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) +{ + struct sk_buff *_skb = *skb; + struct tipc_msg *hdr; + struct tipc_msg ohdr; + int dlen; + + if (skb_linearize(_skb)) + goto exit; + hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); + if (msg_dest_droppable(hdr)) + goto exit; + if (msg_errcode(hdr)) + goto exit; + + /* Take a copy of original header before altering message */ + memcpy(&ohdr, hdr, msg_hdr_sz(hdr)); + + /* Never return SHORT header; expand by replacing buffer if necessary */ + if (msg_short(hdr)) { + *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC); + if (!*skb) + goto exit; + memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen); + kfree_skb(_skb); + _skb = *skb; + hdr = buf_msg(_skb); + memcpy(hdr, &ohdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } + + /* Now reverse the concerned fields */ + msg_set_errcode(hdr, err); + msg_set_non_seq(hdr, 0); + msg_set_origport(hdr, msg_destport(&ohdr)); + msg_set_destport(hdr, msg_origport(&ohdr)); + msg_set_destnode(hdr, msg_prevnode(&ohdr)); + msg_set_prevnode(hdr, own_node); + msg_set_orignode(hdr, own_node); + msg_set_size(hdr, msg_hdr_sz(hdr) + dlen); + skb_trim(_skb, msg_size(hdr)); + skb_orphan(_skb); + return true; +exit: + kfree_skb(_skb); + *skb = NULL; + return false; +} + +/** + * tipc_msg_lookup_dest(): try to find new destination for named message + * @skb: the buffer containing the message. + * @err: error code to be used by caller if lookup fails + * Does not consume buffer + * Returns true if a destination is found, false otherwise + */ +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) +{ + struct tipc_msg *msg = buf_msg(skb); + u32 dport, dnode; + u32 onode = tipc_own_addr(net); + + if (!msg_isdata(msg)) + return false; + if (!msg_named(msg)) + return false; + if (msg_errcode(msg)) + return false; + *err = TIPC_ERR_NO_NAME; + if (skb_linearize(skb)) + return false; + msg = buf_msg(skb); + if (msg_reroute_cnt(msg)) + return false; + dnode = tipc_scope2node(net, msg_lookup_scope(msg)); + dport = tipc_nametbl_translate(net, msg_nametype(msg), + msg_nameinst(msg), &dnode); + if (!dport) + return false; + msg_incr_reroute_cnt(msg); + if (dnode != onode) + msg_set_prevnode(msg, onode); + msg_set_destnode(msg, dnode); + msg_set_destport(msg, dport); + *err = TIPC_OK; + + if (!skb_cloned(skb)) + return true; + + return true; +} + +/* tipc_msg_assemble() - assemble chain of fragments into one message + */ +bool tipc_msg_assemble(struct sk_buff_head *list) +{ + struct sk_buff *skb, *tmp = NULL; + + if (skb_queue_len(list) == 1) + return true; + + while ((skb = __skb_dequeue(list))) { + skb->next = NULL; + if (tipc_buf_append(&tmp, &skb)) { + __skb_queue_tail(list, skb); + return true; + } + if (!tmp) + break; + } + __skb_queue_purge(list); + __skb_queue_head_init(list); + pr_warn("Failed do assemble buffer\n"); + return false; +} + +/* tipc_msg_reassemble() - clone a buffer chain of fragments and + * reassemble the clones into one message + */ +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq) +{ + struct sk_buff *skb, *_skb; + struct sk_buff *frag = NULL; + struct sk_buff *head = NULL; + int hdr_len; + + /* Copy header if single buffer */ + if (skb_queue_len(list) == 1) { + skb = skb_peek(list); + hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb)); + _skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC); + if (!_skb) + return false; + __skb_queue_tail(rcvq, _skb); + return true; + } + + /* Clone all fragments and reassemble */ + skb_queue_walk(list, skb) { + frag = skb_clone(skb, GFP_ATOMIC); + if (!frag) + goto error; + frag->next = NULL; + if (tipc_buf_append(&head, &frag)) + break; + if (!head) + goto error; + } + __skb_queue_tail(rcvq, frag); + return true; +error: + pr_warn("Failed do clone local mcast rcv buffer\n"); + kfree_skb(head); + return false; +} + +bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, + struct sk_buff_head *cpy) +{ + struct sk_buff *skb, *_skb; + + skb_queue_walk(msg, skb) { + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + __skb_queue_purge(cpy); + return false; + } + msg_set_destnode(buf_msg(_skb), dst); + __skb_queue_tail(cpy, _skb); + } + return true; +} + +/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number + * @list: list to be appended to + * @seqno: sequence number of buffer to add + * @skb: buffer to add + */ +void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb) +{ + struct sk_buff *_skb, *tmp; + + if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) { + __skb_queue_head(list, skb); + return; + } + + if (more(seqno, buf_seqno(skb_peek_tail(list)))) { + __skb_queue_tail(list, skb); + return; + } + + skb_queue_walk_safe(list, _skb, tmp) { + if (more(seqno, buf_seqno(_skb))) + continue; + if (seqno == buf_seqno(_skb)) + break; + __skb_queue_before(list, _skb, skb); + return; + } + kfree_skb(skb); +} + +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + __skb_queue_tail(xmitq, skb); +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h new file mode 100644 index 000000000..a4e944d59 --- /dev/null +++ b/net/tipc/msg.h @@ -0,0 +1,1079 @@ +/* + * net/tipc/msg.h: Include file for TIPC message header routines + * + * Copyright (c) 2000-2007, 2014-2017 Ericsson AB + * Copyright (c) 2005-2008, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MSG_H +#define _TIPC_MSG_H + +#include <linux/tipc.h> +#include "core.h" + +/* + * Constants and routines used to read and write TIPC payload message headers + * + * Note: Some items are also used with TIPC internal message headers + */ +#define TIPC_VERSION 2 +struct plist; + +/* + * Payload message users are defined in TIPC's public API: + * - TIPC_LOW_IMPORTANCE + * - TIPC_MEDIUM_IMPORTANCE + * - TIPC_HIGH_IMPORTANCE + * - TIPC_CRITICAL_IMPORTANCE + */ +#define TIPC_SYSTEM_IMPORTANCE 4 + + +/* + * Payload message types + */ +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 +#define TIPC_GRP_MEMBER_EVT 4 +#define TIPC_GRP_BCAST_MSG 5 +#define TIPC_GRP_MCAST_MSG 6 +#define TIPC_GRP_UCAST_MSG 7 + +/* + * Internal message users + */ +#define BCAST_PROTOCOL 5 +#define MSG_BUNDLER 6 +#define LINK_PROTOCOL 7 +#define CONN_MANAGER 8 +#define GROUP_PROTOCOL 9 +#define TUNNEL_PROTOCOL 10 +#define NAME_DISTRIBUTOR 11 +#define MSG_FRAGMENTER 12 +#define LINK_CONFIG 13 +#define SOCK_WAKEUP 14 /* pseudo user */ +#define TOP_SRV 15 /* pseudo user */ + +/* + * Message header sizes + */ +#define SHORT_H_SIZE 24 /* In-cluster basic payload message */ +#define BASIC_H_SIZE 32 /* Basic payload message */ +#define NAMED_H_SIZE 40 /* Named payload message */ +#define MCAST_H_SIZE 44 /* Multicast payload message */ +#define GROUP_H_SIZE 44 /* Group payload message */ +#define INT_H_SIZE 40 /* Internal messages */ +#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ +#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ + +#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) +#define FB_MTU 3744 +#define TIPC_MEDIA_INFO_OFFSET 5 + +struct tipc_skb_cb { + u32 bytes_read; + u32 orig_member; + struct sk_buff *tail; + bool validated; + u16 chain_imp; + u16 ackers; +}; + +#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) + +struct tipc_msg { + __be32 hdr[15]; +}; + +static inline struct tipc_msg *buf_msg(struct sk_buff *skb) +{ + return (struct tipc_msg *)skb->data; +} + +static inline u32 msg_word(struct tipc_msg *m, u32 pos) +{ + return ntohl(m->hdr[pos]); +} + +static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) +{ + m->hdr[w] = htonl(val); +} + +static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) +{ + return (msg_word(m, w) >> pos) & mask; +} + +static inline void msg_set_bits(struct tipc_msg *m, u32 w, + u32 pos, u32 mask, u32 val) +{ + val = (val & mask) << pos; + mask = mask << pos; + m->hdr[w] &= ~htonl(mask); + m->hdr[w] |= htonl(val); +} + +static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) +{ + u32 temp = msg->hdr[a]; + + msg->hdr[a] = msg->hdr[b]; + msg->hdr[b] = temp; +} + +/* + * Word 0 + */ +static inline u32 msg_version(struct tipc_msg *m) +{ + return msg_bits(m, 0, 29, 7); +} + +static inline void msg_set_version(struct tipc_msg *m) +{ + msg_set_bits(m, 0, 29, 7, TIPC_VERSION); +} + +static inline u32 msg_user(struct tipc_msg *m) +{ + return msg_bits(m, 0, 25, 0xf); +} + +static inline u32 msg_isdata(struct tipc_msg *m) +{ + return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE; +} + +static inline void msg_set_user(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 25, 0xf, n); +} + +static inline u32 msg_hdr_sz(struct tipc_msg *m) +{ + return msg_bits(m, 0, 21, 0xf) << 2; +} + +static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 21, 0xf, n>>2); +} + +static inline u32 msg_size(struct tipc_msg *m) +{ + return msg_bits(m, 0, 0, 0x1ffff); +} + +static inline u32 msg_blocks(struct tipc_msg *m) +{ + return (msg_size(m) / 1024) + 1; +} + +static inline u32 msg_data_sz(struct tipc_msg *m) +{ + return msg_size(m) - msg_hdr_sz(m); +} + +static inline int msg_non_seq(struct tipc_msg *m) +{ + return msg_bits(m, 0, 20, 1); +} + +static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 0, 20, 1, n); +} + +static inline int msg_dest_droppable(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + +static inline int msg_src_droppable(struct tipc_msg *m) +{ + return msg_bits(m, 0, 18, 1); +} + +static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 18, 1, d); +} + +static inline void msg_set_size(struct tipc_msg *m, u32 sz) +{ + m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); +} + +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + +static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +{ + return (struct tipc_msg *)msg_data(m); +} + +/* + * Word 1 + */ +static inline u32 msg_type(struct tipc_msg *m) +{ + return msg_bits(m, 1, 29, 0x7); +} + +static inline void msg_set_type(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 29, 0x7, n); +} + +static inline int msg_in_group(struct tipc_msg *m) +{ + int mtyp = msg_type(m); + + return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; +} + +static inline bool msg_is_grp_evt(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_GRP_MEMBER_EVT; +} + +static inline u32 msg_named(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_NAMED_MSG; +} + +static inline u32 msg_mcast(struct tipc_msg *m) +{ + int mtyp = msg_type(m); + + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || + (mtyp == TIPC_GRP_MCAST_MSG)); +} + +static inline u32 msg_connected(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_CONN_MSG; +} + +static inline u32 msg_errcode(struct tipc_msg *m) +{ + return msg_bits(m, 1, 25, 0xf); +} + +static inline void msg_set_errcode(struct tipc_msg *m, u32 err) +{ + msg_set_bits(m, 1, 25, 0xf, err); +} + +static inline u32 msg_reroute_cnt(struct tipc_msg *m) +{ + return msg_bits(m, 1, 21, 0xf); +} + +static inline void msg_incr_reroute_cnt(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); +} + +static inline void msg_reset_reroute_cnt(struct tipc_msg *m) +{ + msg_set_bits(m, 1, 21, 0xf, 0); +} + +static inline u32 msg_lookup_scope(struct tipc_msg *m) +{ + return msg_bits(m, 1, 19, 0x3); +} + +static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 19, 0x3, n); +} + +static inline u16 msg_bcast_ack(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} + + +/* + * Word 2 + */ +static inline u16 msg_ack(struct tipc_msg *m) +{ + return msg_bits(m, 2, 16, 0xffff); +} + +static inline void msg_set_ack(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 2, 16, 0xffff, n); +} + +static inline u16 msg_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 2, 0, 0xffff); +} + +static inline void msg_set_seqno(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 2, 0, 0xffff, n); +} + +/* + * Words 3-10 + */ +static inline u32 msg_importance(struct tipc_msg *m) +{ + int usr = msg_user(m); + + if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) + return usr; + if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) + return msg_bits(m, 9, 0, 0x7); + return TIPC_SYSTEM_IMPORTANCE; +} + +static inline void msg_set_importance(struct tipc_msg *m, u32 i) +{ + int usr = msg_user(m); + + if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) + msg_set_bits(m, 9, 0, 0x7, i); + else if (i < TIPC_SYSTEM_IMPORTANCE) + msg_set_user(m, i); + else + pr_warn("Trying to set illegal importance in message\n"); +} + +static inline u32 msg_prevnode(struct tipc_msg *m) +{ + return msg_word(m, 3); +} + +static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 3, a); +} + +static inline u32 msg_origport(struct tipc_msg *m) +{ + if (msg_user(m) == MSG_FRAGMENTER) + m = msg_get_wrapped(m); + return msg_word(m, 4); +} + +static inline void msg_set_origport(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 4, p); +} + +static inline u32 msg_destport(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + +static inline void msg_set_destport(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 5, p); +} + +static inline u32 msg_mc_netid(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + +static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) +{ + msg_set_word(m, 5, p); +} + +static inline int msg_short(struct tipc_msg *m) +{ + return msg_hdr_sz(m) == SHORT_H_SIZE; +} + +static inline u32 msg_orignode(struct tipc_msg *m) +{ + if (likely(msg_short(m))) + return msg_prevnode(m); + return msg_word(m, 6); +} + +static inline void msg_set_orignode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 6, a); +} + +static inline u32 msg_destnode(struct tipc_msg *m) +{ + return msg_word(m, 7); +} + +static inline void msg_set_destnode(struct tipc_msg *m, u32 a) +{ + msg_set_word(m, 7, a); +} + +static inline u32 msg_nametype(struct tipc_msg *m) +{ + return msg_word(m, 8); +} + +static inline void msg_set_nametype(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 8, n); +} + +static inline u32 msg_nameinst(struct tipc_msg *m) +{ + return msg_word(m, 9); +} + +static inline u32 msg_namelower(struct tipc_msg *m) +{ + return msg_nameinst(m); +} + +static inline void msg_set_namelower(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 9, n); +} + +static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) +{ + msg_set_namelower(m, n); +} + +static inline u32 msg_nameupper(struct tipc_msg *m) +{ + return msg_word(m, 10); +} + +static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 10, n); +} + +/* + * Constants and routines used to read and write TIPC internal message headers + */ + +/* + * Connection management protocol message types + */ +#define CONN_PROBE 0 +#define CONN_PROBE_REPLY 1 +#define CONN_ACK 2 + +/* + * Name distributor message types + */ +#define PUBLICATION 0 +#define WITHDRAWAL 1 + +/* + * Segmentation message types + */ +#define FIRST_FRAGMENT 0 +#define FRAGMENT 1 +#define LAST_FRAGMENT 2 + +/* + * Link management protocol message types + */ +#define STATE_MSG 0 +#define RESET_MSG 1 +#define ACTIVATE_MSG 2 + +/* + * Changeover tunnel message types + */ +#define SYNCH_MSG 0 +#define FAILOVER_MSG 1 + +/* + * Config protocol message types + */ +#define DSC_REQ_MSG 0 +#define DSC_RESP_MSG 1 +#define DSC_TRIAL_MSG 2 +#define DSC_TRIAL_FAIL_MSG 3 + +/* + * Group protocol message types + */ +#define GRP_JOIN_MSG 0 +#define GRP_LEAVE_MSG 1 +#define GRP_ADV_MSG 2 +#define GRP_ACK_MSG 3 +#define GRP_RECLAIM_MSG 4 +#define GRP_REMIT_MSG 5 + +/* + * Word 1 + */ +static inline u32 msg_seq_gap(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1fff); +} + +static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 16, 0x1fff, n); +} + +static inline u32 msg_node_sig(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} + +static inline u32 msg_node_capabilities(struct tipc_msg *m) +{ + return msg_bits(m, 1, 15, 0x1fff); +} + +static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 1, 15, 0x1fff, n); +} + +/* + * Word 2 + */ +static inline u32 msg_dest_domain(struct tipc_msg *m) +{ + return msg_word(m, 2); +} + +static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 2, n); +} + +static inline u32 msg_bcgap_after(struct tipc_msg *m) +{ + return msg_bits(m, 2, 16, 0xffff); +} + +static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 16, 0xffff, n); +} + +static inline u32 msg_bcgap_to(struct tipc_msg *m) +{ + return msg_bits(m, 2, 0, 0xffff); +} + +static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 2, 0, 0xffff, n); +} + +/* + * Word 4 + */ +static inline u32 msg_last_bcast(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + +static inline u32 msg_bc_snd_nxt(struct tipc_msg *m) +{ + return msg_last_bcast(m) + 1; +} + +static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 16, 0xffff, n); +} + +static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 16, 0xffff, n); +} + + +static inline u16 msg_next_sent(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline void msg_set_long_msgno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_bc_netid(struct tipc_msg *m) +{ + return msg_word(m, 4); +} + +static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id) +{ + msg_set_word(m, 4, id); +} + +static inline u32 msg_link_selector(struct tipc_msg *m) +{ + if (msg_user(m) == MSG_FRAGMENTER) + m = (void *)msg_data(m); + return msg_bits(m, 4, 0, 1); +} + +/* + * Word 5 + */ +static inline u16 msg_session(struct tipc_msg *m) +{ + return msg_bits(m, 5, 16, 0xffff); +} + +static inline void msg_set_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 5, 16, 0xffff, n); +} + +static inline u32 msg_probe(struct tipc_msg *m) +{ + return msg_bits(m, 5, 0, 1); +} + +static inline void msg_set_probe(struct tipc_msg *m, u32 val) +{ + msg_set_bits(m, 5, 0, 1, val); +} + +static inline char msg_net_plane(struct tipc_msg *m) +{ + return msg_bits(m, 5, 1, 7) + 'A'; +} + +static inline void msg_set_net_plane(struct tipc_msg *m, char n) +{ + msg_set_bits(m, 5, 1, 7, (n - 'A')); +} + +static inline u32 msg_linkprio(struct tipc_msg *m) +{ + return msg_bits(m, 5, 4, 0x1f); +} + +static inline void msg_set_linkprio(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 5, 4, 0x1f, n); +} + +static inline u32 msg_bearer_id(struct tipc_msg *m) +{ + return msg_bits(m, 5, 9, 0x7); +} + +static inline void msg_set_bearer_id(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 5, 9, 0x7, n); +} + +static inline u32 msg_redundant_link(struct tipc_msg *m) +{ + return msg_bits(m, 5, 12, 0x1); +} + +static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) +{ + msg_set_bits(m, 5, 12, 0x1, r); +} + +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + +static inline bool msg_bc_ack_invalid(struct tipc_msg *m) +{ + switch (msg_user(m)) { + case BCAST_PROTOCOL: + case NAME_DISTRIBUTOR: + case LINK_PROTOCOL: + return msg_bits(m, 5, 14, 0x1); + default: + return false; + } +} + +static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) +{ + msg_set_bits(m, 5, 14, 0x1, invalid); +} + +static inline char *msg_media_addr(struct tipc_msg *m) +{ + return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; +} + +static inline u32 msg_bc_gap(struct tipc_msg *m) +{ + return msg_bits(m, 8, 0, 0x3ff); +} + +static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 8, 0, 0x3ff, n); +} + +/* + * Word 9 + */ +static inline u16 msg_msgcnt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u32 msg_conn_ack(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_adv_win(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + +static inline u32 msg_max_pkt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff) * 4; +} + +static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 16, 0xffff, (n / 4)); +} + +static inline u32 msg_link_tolerance(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + +static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_bc_acked(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_remitted(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +/* Word 10 + */ +static inline u16 msg_grp_evt(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x3); +} + +static inline void msg_set_grp_evt(struct tipc_msg *m, int n) +{ + msg_set_bits(m, 10, 0, 0x3, n); +} + +static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x1); +} + +static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) +{ + msg_set_bits(m, 10, 0, 0x1, n); +} + +static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 10, 16, 0xffff); +} + +static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 10, 16, 0xffff, n); +} + +static inline bool msg_peer_link_is_up(struct tipc_msg *m) +{ + if (likely(msg_user(m) != LINK_PROTOCOL)) + return true; + if (msg_type(m) == STATE_MSG) + return true; + return false; +} + +static inline bool msg_peer_node_is_up(struct tipc_msg *m) +{ + if (msg_peer_link_is_up(m)) + return true; + return msg_redundant_link(m); +} + +static inline bool msg_is_reset(struct tipc_msg *hdr) +{ + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); +} + +static inline u32 msg_sugg_node_addr(struct tipc_msg *m) +{ + return msg_word(m, 14); +} + +static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n) +{ + msg_set_word(m, 14, n); +} + +static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id) +{ + memcpy(msg_data(hdr), id, 16); +} + +static inline u8 *msg_node_id(struct tipc_msg *hdr) +{ + return (u8 *)msg_data(hdr); +} + +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); +bool tipc_msg_validate(struct sk_buff **_skb); +bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq); +void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, + u32 hsize, u32 destnode); +struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, + uint data_sz, u32 dnode, u32 onode, + u32 dport, u32 oport, int errcode); +int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); +bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu); +bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, + u32 mtu, u32 dnode); +bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, + int offset, int dsz, int mtu, struct sk_buff_head *list); +bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); +bool tipc_msg_assemble(struct sk_buff_head *list); +bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); +bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, + struct sk_buff_head *cpy); +void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, + struct sk_buff *skb); + +static inline u16 buf_seqno(struct sk_buff *skb) +{ + return msg_seqno(buf_msg(skb)); +} + +static inline int buf_roundup_len(struct sk_buff *skb) +{ + return (skb->len / 1024 + 1) * 1024; +} + +/* tipc_skb_peek(): peek and reserve first buffer in list + * @list: list to be peeked in + * Returns pointer to first buffer in list, if any + */ +static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, + spinlock_t *lock) +{ + struct sk_buff *skb; + + spin_lock_bh(lock); + skb = skb_peek(list); + if (skb) + skb_get(skb); + spin_unlock_bh(lock); + return skb; +} + +/* tipc_skb_peek_port(): find a destination port, ignoring all destinations + * up to and including 'filter'. + * Note: ignoring previously tried destinations minimizes the risk of + * contention on the socket lock + * @list: list to be peeked in + * @filter: last destination to be ignored from search + * Returns a destination port number, of applicable. + */ +static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) +{ + struct sk_buff *skb; + u32 dport = 0; + bool ignore = true; + + spin_lock_bh(&list->lock); + skb_queue_walk(list, skb) { + dport = msg_destport(buf_msg(skb)); + if (!filter || skb_queue_is_last(list, skb)) + break; + if (dport == filter) + ignore = false; + else if (!ignore) + break; + } + spin_unlock_bh(&list->lock); + return dport; +} + +/* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list + * @list: list to be unlinked from + * @dport: selection criteria for buffer to unlink + */ +static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, + u32 dport) +{ + struct sk_buff *_skb, *tmp, *skb = NULL; + + spin_lock_bh(&list->lock); + skb_queue_walk_safe(list, _skb, tmp) { + if (msg_destport(buf_msg(_skb)) == dport) { + __skb_unlink(_skb, list); + skb = _skb; + break; + } + } + spin_unlock_bh(&list->lock); + return skb; +} + +/* tipc_skb_queue_splice_tail - append an skb list to lock protected list + * @list: the new list to append. Not lock protected + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + spin_lock_bh(&head->lock); + skb_queue_splice_tail(list, head); + spin_unlock_bh(&head->lock); +} + +/* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists + * @list: the new list to add. Lock protected. Will be reinitialized + * @head: target list. Lock protected. + */ +static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, + struct sk_buff_head *head) +{ + struct sk_buff_head tmp; + + __skb_queue_head_init(&tmp); + + spin_lock_bh(&list->lock); + skb_queue_splice_tail_init(list, &tmp); + spin_unlock_bh(&list->lock); + tipc_skb_queue_splice_tail(&tmp, head); +} + +#endif diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c new file mode 100644 index 000000000..661bc2551 --- /dev/null +++ b/net/tipc/name_distr.c @@ -0,0 +1,350 @@ +/* + * net/tipc/name_distr.c: TIPC name distribution code + * + * Copyright (c) 2000-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "link.h" +#include "name_distr.h" + +int sysctl_tipc_named_timeout __read_mostly = 2000; + +struct distr_queue_item { + struct distr_item i; + u32 dtype; + u32 node; + unsigned long expires; + struct list_head next; +}; + +/** + * publ_to_item - add publication info to a publication message + */ +static void publ_to_item(struct distr_item *i, struct publication *p) +{ + i->type = htonl(p->type); + i->lower = htonl(p->lower); + i->upper = htonl(p->upper); + i->port = htonl(p->port); + i->key = htonl(p->key); +} + +/** + * named_prepare_buf - allocate & initialize a publication message + * + * The buffer returned is of size INT_H_SIZE + payload size + */ +static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, + u32 dest) +{ + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); + u32 self = tipc_own_addr(net); + struct tipc_msg *msg; + + if (buf != NULL) { + msg = buf_msg(buf); + tipc_msg_init(self, msg, NAME_DISTRIBUTOR, + type, INT_H_SIZE, dest); + msg_set_size(msg, INT_H_SIZE + size); + } + return buf; +} + +/** + * tipc_named_publish - tell other nodes about a new publication by this node + */ +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ) +{ + struct name_table *nt = tipc_name_table(net); + struct distr_item *item; + struct sk_buff *skb; + + if (publ->scope == TIPC_NODE_SCOPE) { + list_add_tail_rcu(&publ->binding_node, &nt->node_scope); + return NULL; + } + write_lock_bh(&nt->cluster_scope_lock); + list_add_tail(&publ->binding_node, &nt->cluster_scope); + write_unlock_bh(&nt->cluster_scope_lock); + skb = named_prepare_buf(net, PUBLICATION, ITEM_SIZE, 0); + if (!skb) { + pr_warn("Publication distribution failure\n"); + return NULL; + } + + item = (struct distr_item *)msg_data(buf_msg(skb)); + publ_to_item(item, publ); + return skb; +} + +/** + * tipc_named_withdraw - tell other nodes about a withdrawn publication by this node + */ +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ) +{ + struct name_table *nt = tipc_name_table(net); + struct sk_buff *buf; + struct distr_item *item; + + write_lock_bh(&nt->cluster_scope_lock); + list_del(&publ->binding_node); + write_unlock_bh(&nt->cluster_scope_lock); + if (publ->scope == TIPC_NODE_SCOPE) + return NULL; + + buf = named_prepare_buf(net, WITHDRAWAL, ITEM_SIZE, 0); + if (!buf) { + pr_warn("Withdrawal distribution failure\n"); + return NULL; + } + + item = (struct distr_item *)msg_data(buf_msg(buf)); + publ_to_item(item, publ); + return buf; +} + +/** + * named_distribute - prepare name info for bulk distribution to another node + * @list: list of messages (buffers) to be returned from this function + * @dnode: node to be updated + * @pls: linked list of publication items to be packed into buffer chain + */ +static void named_distribute(struct net *net, struct sk_buff_head *list, + u32 dnode, struct list_head *pls) +{ + struct publication *publ; + struct sk_buff *skb = NULL; + struct distr_item *item = NULL; + u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + u32 msg_rem = msg_dsz; + + list_for_each_entry(publ, pls, binding_node) { + /* Prepare next buffer: */ + if (!skb) { + skb = named_prepare_buf(net, PUBLICATION, msg_rem, + dnode); + if (!skb) { + pr_warn("Bulk publication failure\n"); + return; + } + msg_set_bc_ack_invalid(buf_msg(skb), true); + item = (struct distr_item *)msg_data(buf_msg(skb)); + } + + /* Pack publication into message: */ + publ_to_item(item, publ); + item++; + msg_rem -= ITEM_SIZE; + + /* Append full buffer to list: */ + if (!msg_rem) { + __skb_queue_tail(list, skb); + skb = NULL; + msg_rem = msg_dsz; + } + } + if (skb) { + msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); + __skb_queue_tail(list, skb); + } +} + +/** + * tipc_named_node_up - tell specified node about all publications by this node + */ +void tipc_named_node_up(struct net *net, u32 dnode) +{ + struct name_table *nt = tipc_name_table(net); + struct sk_buff_head head; + + __skb_queue_head_init(&head); + + read_lock_bh(&nt->cluster_scope_lock); + named_distribute(net, &head, dnode, &nt->cluster_scope); + tipc_node_xmit(net, &head, dnode, 0); + read_unlock_bh(&nt->cluster_scope_lock); +} + +/** + * tipc_publ_purge - remove publication associated with a failed node + * + * Invoked for each publication issued by a newly failed node. + * Removes publication structure from name table & deletes it. + */ +static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + struct publication *p; + + spin_lock_bh(&tn->nametbl_lock); + p = tipc_nametbl_remove_publ(net, publ->type, publ->lower, publ->upper, + publ->node, publ->key); + if (p) + tipc_node_unsubscribe(net, &p->binding_node, addr); + spin_unlock_bh(&tn->nametbl_lock); + + if (p != publ) { + pr_err("Unable to remove publication from failed node\n" + " (type=%u, lower=%u, node=0x%x, port=%u, key=%u)\n", + publ->type, publ->lower, publ->node, publ->port, + publ->key); + } + + if (p) + kfree_rcu(p, rcu); +} + +/** + * tipc_dist_queue_purge - remove deferred updates from a node that went down + */ +static void tipc_dist_queue_purge(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct distr_queue_item *e, *tmp; + + spin_lock_bh(&tn->nametbl_lock); + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { + if (e->node != addr) + continue; + list_del(&e->next); + kfree(e); + } + spin_unlock_bh(&tn->nametbl_lock); +} + +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) +{ + struct publication *publ, *tmp; + + list_for_each_entry_safe(publ, tmp, nsub_list, binding_node) + tipc_publ_purge(net, publ, addr); + tipc_dist_queue_purge(net, addr); +} + +/** + * tipc_update_nametbl - try to process a nametable update and notify + * subscribers + * + * tipc_nametbl_lock must be held. + * Returns the publication item if successful, otherwise NULL. + */ +static bool tipc_update_nametbl(struct net *net, struct distr_item *i, + u32 node, u32 dtype) +{ + struct publication *p = NULL; + u32 lower = ntohl(i->lower); + u32 upper = ntohl(i->upper); + u32 type = ntohl(i->type); + u32 port = ntohl(i->port); + u32 key = ntohl(i->key); + + if (dtype == PUBLICATION) { + p = tipc_nametbl_insert_publ(net, type, lower, upper, + TIPC_CLUSTER_SCOPE, node, + port, key); + if (p) { + tipc_node_subscribe(net, &p->binding_node, node); + return true; + } + } else if (dtype == WITHDRAWAL) { + p = tipc_nametbl_remove_publ(net, type, lower, + upper, node, key); + if (p) { + tipc_node_unsubscribe(net, &p->binding_node, node); + kfree_rcu(p, rcu); + return true; + } + pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", + type, lower, node); + } else { + pr_warn_ratelimited("Unknown name table message received\n"); + } + return false; +} + +/** + * tipc_named_rcv - process name table update messages sent by another node + */ +void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_msg *msg; + struct distr_item *item; + uint count; + u32 node; + struct sk_buff *skb; + int mtype; + + spin_lock_bh(&tn->nametbl_lock); + for (skb = skb_dequeue(inputq); skb; skb = skb_dequeue(inputq)) { + skb_linearize(skb); + msg = buf_msg(skb); + mtype = msg_type(msg); + item = (struct distr_item *)msg_data(msg); + count = msg_data_sz(msg) / ITEM_SIZE; + node = msg_orignode(msg); + while (count--) { + tipc_update_nametbl(net, item, node, mtype); + item++; + } + kfree_skb(skb); + } + spin_unlock_bh(&tn->nametbl_lock); +} + +/** + * tipc_named_reinit - re-initialize local publications + * + * This routine is called whenever TIPC networking is enabled. + * All name table entries published by this node are updated to reflect + * the node's new network address. + */ +void tipc_named_reinit(struct net *net) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct publication *publ; + u32 self = tipc_own_addr(net); + + spin_lock_bh(&tn->nametbl_lock); + + list_for_each_entry_rcu(publ, &nt->node_scope, binding_node) + publ->node = self; + list_for_each_entry_rcu(publ, &nt->cluster_scope, binding_node) + publ->node = self; + + spin_unlock_bh(&tn->nametbl_lock); +} diff --git a/net/tipc/name_distr.h b/net/tipc/name_distr.h new file mode 100644 index 000000000..63fc73e0f --- /dev/null +++ b/net/tipc/name_distr.h @@ -0,0 +1,77 @@ +/* + * net/tipc/name_distr.h: Include file for TIPC name distribution code + * + * Copyright (c) 2000-2006, Ericsson AB + * Copyright (c) 2005, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NAME_DISTR_H +#define _TIPC_NAME_DISTR_H + +#include "name_table.h" + +#define ITEM_SIZE sizeof(struct distr_item) + +/** + * struct distr_item - publication info distributed to other nodes + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @ref: publishing port reference + * @key: publication key + * + * ===> All fields are stored in network byte order. <=== + * + * First 3 fields identify (name or) name sequence being published. + * Reference field uniquely identifies port that published name sequence. + * Key field uniquely identifies publication, in the event a port has + * multiple publications of the same name sequence. + * + * Note: There is no field that identifies the publishing node because it is + * the same for all items contained within a publication message. + */ +struct distr_item { + __be32 type; + __be32 lower; + __be32 upper; + __be32 port; + __be32 key; +}; + +struct sk_buff *tipc_named_publish(struct net *net, struct publication *publ); +struct sk_buff *tipc_named_withdraw(struct net *net, struct publication *publ); +void tipc_named_node_up(struct net *net, u32 dnode); +void tipc_named_rcv(struct net *net, struct sk_buff_head *msg_queue); +void tipc_named_reinit(struct net *net); +void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr); + +#endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c new file mode 100644 index 000000000..059ffb8b4 --- /dev/null +++ b/net/tipc/name_table.c @@ -0,0 +1,1057 @@ +/* + * net/tipc/name_table.c: TIPC name table code + * + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB + * Copyright (c) 2004-2008, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/sock.h> +#include "core.h" +#include "netlink.h" +#include "name_table.h" +#include "name_distr.h" +#include "subscr.h" +#include "bcast.h" +#include "addr.h" +#include "node.h" +#include "group.h" + +/** + * struct service_range - container for all bindings of a service range + * @lower: service range lower bound + * @upper: service range upper bound + * @tree_node: member of service range RB tree + * @local_publ: list of identical publications made from this node + * Used by closest_first lookup and multicast lookup algorithm + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm + */ +struct service_range { + u32 lower; + u32 upper; + struct rb_node tree_node; + struct list_head local_publ; + struct list_head all_publ; +}; + +/** + * struct tipc_service - container for all published instances of a service type + * @type: 32 bit 'type' value for service + * @ranges: rb tree containing all service ranges for this service + * @service_list: links to adjacent name ranges in hash chain + * @subscriptions: list of subscriptions for this service type + * @lock: spinlock controlling access to pertaining service ranges/publications + * @rcu: RCU callback head used for deferred freeing + */ +struct tipc_service { + u32 type; + struct rb_root ranges; + struct hlist_node service_list; + struct list_head subscriptions; + spinlock_t lock; /* Covers service range list */ + struct rcu_head rcu; +}; + +static int hash(int x) +{ + return x & (TIPC_NAMETBL_SIZE - 1); +} + +/** + * tipc_publ_create - create a publication structure + */ +static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper, + u32 scope, u32 node, u32 port, + u32 key) +{ + struct publication *publ = kzalloc(sizeof(*publ), GFP_ATOMIC); + + if (!publ) + return NULL; + + publ->type = type; + publ->lower = lower; + publ->upper = upper; + publ->scope = scope; + publ->node = node; + publ->port = port; + publ->key = key; + INIT_LIST_HEAD(&publ->binding_sock); + INIT_LIST_HEAD(&publ->binding_node); + INIT_LIST_HEAD(&publ->local_publ); + INIT_LIST_HEAD(&publ->all_publ); + return publ; +} + +/** + * tipc_service_create - create a service structure for the specified 'type' + * + * Allocates a single range structure and sets it to all 0's. + */ +static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd) +{ + struct tipc_service *service = kzalloc(sizeof(*service), GFP_ATOMIC); + + if (!service) { + pr_warn("Service creation failed, no memory\n"); + return NULL; + } + + spin_lock_init(&service->lock); + service->type = type; + service->ranges = RB_ROOT; + INIT_HLIST_NODE(&service->service_list); + INIT_LIST_HEAD(&service->subscriptions); + hlist_add_head_rcu(&service->service_list, hd); + return service; +} + +/** + * tipc_service_first_range - find first service range in tree matching instance + * + * Very time-critical, so binary search through range rb tree + */ +static struct service_range *tipc_service_first_range(struct tipc_service *sc, + u32 instance) +{ + struct rb_node *n = sc->ranges.rb_node; + struct service_range *sr; + + while (n) { + sr = container_of(n, struct service_range, tree_node); + if (sr->lower > instance) + n = n->rb_left; + else if (sr->upper < instance) + n = n->rb_right; + else + return sr; + } + return NULL; +} + +/* tipc_service_find_range - find service range matching publication parameters + */ +static struct service_range *tipc_service_find_range(struct tipc_service *sc, + u32 lower, u32 upper) +{ + struct rb_node *n = sc->ranges.rb_node; + struct service_range *sr; + + sr = tipc_service_first_range(sc, lower); + if (!sr) + return NULL; + + /* Look for exact match */ + for (n = &sr->tree_node; n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->upper == upper) + break; + } + if (!n || sr->lower != lower || sr->upper != upper) + return NULL; + + return sr; +} + +static struct service_range *tipc_service_create_range(struct tipc_service *sc, + u32 lower, u32 upper) +{ + struct rb_node **n, *parent = NULL; + struct service_range *sr, *tmp; + + n = &sc->ranges.rb_node; + while (*n) { + tmp = container_of(*n, struct service_range, tree_node); + parent = *n; + tmp = container_of(parent, struct service_range, tree_node); + if (lower < tmp->lower) + n = &(*n)->rb_left; + else if (lower > tmp->lower) + n = &(*n)->rb_right; + else if (upper < tmp->upper) + n = &(*n)->rb_left; + else if (upper > tmp->upper) + n = &(*n)->rb_right; + else + return tmp; + } + sr = kzalloc(sizeof(*sr), GFP_ATOMIC); + if (!sr) + return NULL; + sr->lower = lower; + sr->upper = upper; + INIT_LIST_HEAD(&sr->local_publ); + INIT_LIST_HEAD(&sr->all_publ); + rb_link_node(&sr->tree_node, parent, n); + rb_insert_color(&sr->tree_node, &sc->ranges); + return sr; +} + +static struct publication *tipc_service_insert_publ(struct net *net, + struct tipc_service *sc, + u32 type, u32 lower, + u32 upper, u32 scope, + u32 node, u32 port, + u32 key) +{ + struct tipc_subscription *sub, *tmp; + struct service_range *sr; + struct publication *p; + bool first = false; + + sr = tipc_service_create_range(sc, lower, upper); + if (!sr) + goto err; + + first = list_empty(&sr->all_publ); + + /* Return if the publication already exists */ + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->key == key && (!p->node || p->node == node)) + return NULL; + } + + /* Create and insert publication */ + p = tipc_publ_create(type, lower, upper, scope, node, port, key); + if (!p) + goto err; + if (in_own_node(net, node)) + list_add(&p->local_publ, &sr->local_publ); + list_add(&p->all_publ, &sr->all_publ); + + /* Any subscriptions waiting for notification? */ + list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { + tipc_sub_report_overlap(sub, p->lower, p->upper, TIPC_PUBLISHED, + p->port, p->node, p->scope, first); + } + return p; +err: + pr_warn("Failed to bind to %u,%u,%u, no memory\n", type, lower, upper); + return NULL; +} + +/** + * tipc_service_remove_publ - remove a publication from a service + */ +static struct publication *tipc_service_remove_publ(struct service_range *sr, + u32 node, u32 key) +{ + struct publication *p; + + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->key != key || (node && node != p->node)) + continue; + list_del(&p->all_publ); + list_del(&p->local_publ); + return p; + } + return NULL; +} + +/** + * tipc_service_subscribe - attach a subscription, and optionally + * issue the prescribed number of events if there is any service + * range overlapping with the requested range + */ +static void tipc_service_subscribe(struct tipc_service *service, + struct tipc_subscription *sub) +{ + struct tipc_subscr *sb = &sub->evt.s; + struct service_range *sr; + struct tipc_name_seq ns; + struct publication *p; + struct rb_node *n; + bool first; + + ns.type = tipc_sub_read(sb, seq.type); + ns.lower = tipc_sub_read(sb, seq.lower); + ns.upper = tipc_sub_read(sb, seq.upper); + + tipc_sub_get(sub); + list_add(&sub->service_list, &service->subscriptions); + + if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS) + return; + + for (n = rb_first(&service->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->lower > ns.upper) + break; + if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper)) + continue; + first = true; + + list_for_each_entry(p, &sr->all_publ, all_publ) { + tipc_sub_report_overlap(sub, sr->lower, sr->upper, + TIPC_PUBLISHED, p->port, + p->node, p->scope, first); + first = false; + } + } +} + +static struct tipc_service *tipc_service_find(struct net *net, u32 type) +{ + struct name_table *nt = tipc_name_table(net); + struct hlist_head *service_head; + struct tipc_service *service; + + service_head = &nt->services[hash(type)]; + hlist_for_each_entry_rcu(service, service_head, service_list) { + if (service->type == type) + return service; + } + return NULL; +}; + +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, + u32 scope, u32 node, + u32 port, u32 key) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_service *sc; + struct publication *p; + + if (scope > TIPC_NODE_SCOPE || lower > upper) { + pr_debug("Failed to bind illegal {%u,%u,%u} with scope %u\n", + type, lower, upper, scope); + return NULL; + } + sc = tipc_service_find(net, type); + if (!sc) + sc = tipc_service_create(type, &nt->services[hash(type)]); + if (!sc) + return NULL; + + spin_lock_bh(&sc->lock); + p = tipc_service_insert_publ(net, sc, type, lower, upper, + scope, node, port, key); + spin_unlock_bh(&sc->lock); + return p; +} + +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 upper, + u32 node, u32 key) +{ + struct tipc_service *sc = tipc_service_find(net, type); + struct tipc_subscription *sub, *tmp; + struct service_range *sr = NULL; + struct publication *p = NULL; + bool last; + + if (!sc) + return NULL; + + spin_lock_bh(&sc->lock); + sr = tipc_service_find_range(sc, lower, upper); + if (!sr) + goto exit; + p = tipc_service_remove_publ(sr, node, key); + if (!p) + goto exit; + + /* Notify any waiting subscriptions */ + last = list_empty(&sr->all_publ); + list_for_each_entry_safe(sub, tmp, &sc->subscriptions, service_list) { + tipc_sub_report_overlap(sub, lower, upper, TIPC_WITHDRAWN, + p->port, node, p->scope, last); + } + + /* Remove service range item if this was its last publication */ + if (list_empty(&sr->all_publ)) { + rb_erase(&sr->tree_node, &sc->ranges); + kfree(sr); + } + + /* Delete service item if this no more publications and subscriptions */ + if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { + hlist_del_init_rcu(&sc->service_list); + kfree_rcu(sc, rcu); + } +exit: + spin_unlock_bh(&sc->lock); + return p; +} + +/** + * tipc_nametbl_translate - perform service instance to socket translation + * + * On entry, 'dnode' is the search domain used during translation. + * + * On exit: + * - if translation is deferred to another node, leave 'dnode' unchanged and + * return 0 + * - if translation is attempted and succeeds, set 'dnode' to the publishing + * node and return the published (non-zero) port number + * - if translation is attempted and fails, set 'dnode' to 0 and return 0 + * + * Note that for legacy users (node configured with Z.C.N address format) the + * 'closest-first' lookup algorithm must be maintained, i.e., if dnode is 0 + * we must look in the local binding list first + */ +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode) +{ + struct tipc_net *tn = tipc_net(net); + bool legacy = tn->legacy_addr_format; + u32 self = tipc_own_addr(net); + struct service_range *sr; + struct tipc_service *sc; + struct list_head *list; + struct publication *p; + u32 port = 0; + u32 node = 0; + + if (!tipc_in_scope(legacy, *dnode, self)) + return 0; + + rcu_read_lock(); + sc = tipc_service_find(net, type); + if (unlikely(!sc)) + goto not_found; + + spin_lock_bh(&sc->lock); + sr = tipc_service_first_range(sc, instance); + if (unlikely(!sr)) + goto no_match; + + /* Select lookup algorithm: local, closest-first or round-robin */ + if (*dnode == self) { + list = &sr->local_publ; + if (list_empty(list)) + goto no_match; + p = list_first_entry(list, struct publication, local_publ); + list_move_tail(&p->local_publ, &sr->local_publ); + } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) { + list = &sr->local_publ; + p = list_first_entry(list, struct publication, local_publ); + list_move_tail(&p->local_publ, &sr->local_publ); + } else { + list = &sr->all_publ; + p = list_first_entry(list, struct publication, all_publ); + list_move_tail(&p->all_publ, &sr->all_publ); + } + port = p->port; + node = p->node; +no_match: + spin_unlock_bh(&sc->lock); +not_found: + rcu_read_unlock(); + *dnode = node; + return port; +} + +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all) +{ + u32 self = tipc_own_addr(net); + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + + *dstcnt = 0; + rcu_read_lock(); + sc = tipc_service_find(net, type); + if (unlikely(!sc)) + goto exit; + + spin_lock_bh(&sc->lock); + + sr = tipc_service_first_range(sc, instance); + if (!sr) + goto no_match; + + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->scope != scope) + continue; + if (p->port == exclude && p->node == self) + continue; + tipc_dest_push(dsts, p->node, p->port); + (*dstcnt)++; + if (all) + continue; + list_move_tail(&p->all_publ, &sr->all_publ); + break; + } +no_match: + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); + return !list_empty(dsts); +} + +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + struct rb_node *n; + + rcu_read_lock(); + sc = tipc_service_find(net, type); + if (!sc) + goto exit; + + spin_lock_bh(&sc->lock); + + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->upper < lower) + continue; + if (sr->lower > upper) + break; + list_for_each_entry(p, &sr->local_publ, local_publ) { + if (p->scope == scope || (!exact && p->scope < scope)) + tipc_dest_push(dports, 0, p->port); + } + } + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); +} + +/* tipc_nametbl_lookup_dst_nodes - find broadcast destination nodes + * - Creates list of nodes that overlap the given multicast address + * - Determines if any node local destinations overlap + */ +void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, + u32 upper, struct tipc_nlist *nodes) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + struct rb_node *n; + + rcu_read_lock(); + sc = tipc_service_find(net, type); + if (!sc) + goto exit; + + spin_lock_bh(&sc->lock); + + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->upper < lower) + continue; + if (sr->lower > upper) + break; + list_for_each_entry(p, &sr->all_publ, all_publ) { + tipc_nlist_add(nodes, p->node); + } + } + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); +} + +/* tipc_nametbl_build_group - build list of communication group members + */ +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 scope) +{ + struct service_range *sr; + struct tipc_service *sc; + struct publication *p; + struct rb_node *n; + + rcu_read_lock(); + sc = tipc_service_find(net, type); + if (!sc) + goto exit; + + spin_lock_bh(&sc->lock); + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + list_for_each_entry(p, &sr->all_publ, all_publ) { + if (p->scope != scope) + continue; + tipc_group_add_member(grp, p->node, p->port, p->lower); + } + } + spin_unlock_bh(&sc->lock); +exit: + rcu_read_unlock(); +} + +/* tipc_nametbl_publish - add service binding to name table + */ +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port, + u32 key) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct publication *p = NULL; + struct sk_buff *skb = NULL; + + spin_lock_bh(&tn->nametbl_lock); + + if (nt->local_publ_count >= TIPC_MAX_PUBL) { + pr_warn("Bind failed, max limit %u reached\n", TIPC_MAX_PUBL); + goto exit; + } + + p = tipc_nametbl_insert_publ(net, type, lower, upper, scope, + tipc_own_addr(net), port, key); + if (p) { + nt->local_publ_count++; + skb = tipc_named_publish(net, p); + } +exit: + spin_unlock_bh(&tn->nametbl_lock); + + if (skb) + tipc_node_broadcast(net, skb); + return p; +} + +/** + * tipc_nametbl_withdraw - withdraw a service binding + */ +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, + u32 upper, u32 key) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + u32 self = tipc_own_addr(net); + struct sk_buff *skb = NULL; + struct publication *p; + + spin_lock_bh(&tn->nametbl_lock); + + p = tipc_nametbl_remove_publ(net, type, lower, upper, self, key); + if (p) { + nt->local_publ_count--; + skb = tipc_named_withdraw(net, p); + list_del_init(&p->binding_sock); + kfree_rcu(p, rcu); + } else { + pr_err("Failed to remove local publication {%u,%u,%u}/%u\n", + type, lower, upper, key); + } + spin_unlock_bh(&tn->nametbl_lock); + + if (skb) { + tipc_node_broadcast(net, skb); + return 1; + } + return 0; +} + +/** + * tipc_nametbl_subscribe - add a subscription object to the name table + */ +bool tipc_nametbl_subscribe(struct tipc_subscription *sub) +{ + struct name_table *nt = tipc_name_table(sub->net); + struct tipc_net *tn = tipc_net(sub->net); + struct tipc_subscr *s = &sub->evt.s; + u32 type = tipc_sub_read(s, seq.type); + struct tipc_service *sc; + bool res = true; + + spin_lock_bh(&tn->nametbl_lock); + sc = tipc_service_find(sub->net, type); + if (!sc) + sc = tipc_service_create(type, &nt->services[hash(type)]); + if (sc) { + spin_lock_bh(&sc->lock); + tipc_service_subscribe(sc, sub); + spin_unlock_bh(&sc->lock); + } else { + pr_warn("Failed to subscribe for {%u,%u,%u}\n", type, + tipc_sub_read(s, seq.lower), + tipc_sub_read(s, seq.upper)); + res = false; + } + spin_unlock_bh(&tn->nametbl_lock); + return res; +} + +/** + * tipc_nametbl_unsubscribe - remove a subscription object from name table + */ +void tipc_nametbl_unsubscribe(struct tipc_subscription *sub) +{ + struct tipc_net *tn = tipc_net(sub->net); + struct tipc_subscr *s = &sub->evt.s; + u32 type = tipc_sub_read(s, seq.type); + struct tipc_service *sc; + + spin_lock_bh(&tn->nametbl_lock); + sc = tipc_service_find(sub->net, type); + if (!sc) + goto exit; + + spin_lock_bh(&sc->lock); + list_del_init(&sub->service_list); + tipc_sub_put(sub); + + /* Delete service item if no more publications and subscriptions */ + if (RB_EMPTY_ROOT(&sc->ranges) && list_empty(&sc->subscriptions)) { + hlist_del_init_rcu(&sc->service_list); + kfree_rcu(sc, rcu); + } + spin_unlock_bh(&sc->lock); +exit: + spin_unlock_bh(&tn->nametbl_lock); +} + +int tipc_nametbl_init(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct name_table *nt; + int i; + + nt = kzalloc(sizeof(*nt), GFP_KERNEL); + if (!nt) + return -ENOMEM; + + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) + INIT_HLIST_HEAD(&nt->services[i]); + + INIT_LIST_HEAD(&nt->node_scope); + INIT_LIST_HEAD(&nt->cluster_scope); + rwlock_init(&nt->cluster_scope_lock); + tn->nametbl = nt; + spin_lock_init(&tn->nametbl_lock); + return 0; +} + +/** + * tipc_service_delete - purge all publications for a service and delete it + */ +static void tipc_service_delete(struct net *net, struct tipc_service *sc) +{ + struct service_range *sr, *tmpr; + struct publication *p, *tmp; + + spin_lock_bh(&sc->lock); + rbtree_postorder_for_each_entry_safe(sr, tmpr, &sc->ranges, tree_node) { + list_for_each_entry_safe(p, tmp, &sr->all_publ, all_publ) { + tipc_service_remove_publ(sr, p->node, p->key); + kfree_rcu(p, rcu); + } + rb_erase(&sr->tree_node, &sc->ranges); + kfree(sr); + } + hlist_del_init_rcu(&sc->service_list); + spin_unlock_bh(&sc->lock); + kfree_rcu(sc, rcu); +} + +void tipc_nametbl_stop(struct net *net) +{ + struct name_table *nt = tipc_name_table(net); + struct tipc_net *tn = tipc_net(net); + struct hlist_head *service_head; + struct tipc_service *service; + u32 i; + + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ + spin_lock_bh(&tn->nametbl_lock); + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { + if (hlist_empty(&nt->services[i])) + continue; + service_head = &nt->services[i]; + hlist_for_each_entry_rcu(service, service_head, service_list) { + tipc_service_delete(net, service); + } + } + spin_unlock_bh(&tn->nametbl_lock); + + synchronize_net(); + kfree(nt); +} + +static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, + struct tipc_service *service, + struct service_range *sr, + u32 *last_key) +{ + struct publication *p; + struct nlattr *attrs; + struct nlattr *b; + void *hdr; + + if (*last_key) { + list_for_each_entry(p, &sr->all_publ, all_publ) + if (p->key == *last_key) + break; + if (list_entry_is_head(p, &sr->all_publ, all_publ)) + return -EPIPE; + } else { + p = list_first_entry(&sr->all_publ, + struct publication, + all_publ); + } + + list_for_each_entry_from(p, &sr->all_publ, all_publ) { + *last_key = p->key; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_NAME_TABLE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE); + if (!attrs) + goto msg_full; + + b = nla_nest_start(msg->skb, TIPC_NLA_NAME_TABLE_PUBL); + if (!b) + goto attr_msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_TYPE, service->type)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_LOWER, sr->lower)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_UPPER, sr->upper)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_SCOPE, p->scope)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_NODE, p->node)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_REF, p->port)) + goto publ_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_PUBL_KEY, p->key)) + goto publ_msg_full; + + nla_nest_end(msg->skb, b); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + } + *last_key = 0; + + return 0; + +publ_msg_full: + nla_nest_cancel(msg->skb, b); +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +static int __tipc_nl_service_range_list(struct tipc_nl_msg *msg, + struct tipc_service *sc, + u32 *last_lower, u32 *last_key) +{ + struct service_range *sr; + struct rb_node *n; + int err; + + for (n = rb_first(&sc->ranges); n; n = rb_next(n)) { + sr = container_of(n, struct service_range, tree_node); + if (sr->lower < *last_lower) + continue; + err = __tipc_nl_add_nametable_publ(msg, sc, sr, last_key); + if (err) { + *last_lower = sr->lower; + return err; + } + } + *last_lower = 0; + return 0; +} + +static int tipc_nl_service_list(struct net *net, struct tipc_nl_msg *msg, + u32 *last_type, u32 *last_lower, u32 *last_key) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_service *service = NULL; + struct hlist_head *head; + int err; + int i; + + if (*last_type) + i = hash(*last_type); + else + i = 0; + + for (; i < TIPC_NAMETBL_SIZE; i++) { + head = &tn->nametbl->services[i]; + + if (*last_type || + (!i && *last_key && (*last_lower == *last_key))) { + service = tipc_service_find(net, *last_type); + if (!service) + return -EPIPE; + } else { + hlist_for_each_entry_rcu(service, head, service_list) + break; + if (!service) + continue; + } + + hlist_for_each_entry_from_rcu(service, service_list) { + spin_lock_bh(&service->lock); + err = __tipc_nl_service_range_list(msg, service, + last_lower, + last_key); + + if (err) { + *last_type = service->type; + spin_unlock_bh(&service->lock); + return err; + } + spin_unlock_bh(&service->lock); + } + *last_type = 0; + } + return 0; +} + +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 last_type = cb->args[0]; + u32 last_lower = cb->args[1]; + u32 last_key = cb->args[2]; + int done = cb->args[3]; + struct tipc_nl_msg msg; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + err = tipc_nl_service_list(net, &msg, &last_type, + &last_lower, &last_key); + if (!err) { + done = 1; + } else if (err != -EMSGSIZE) { + /* We never set seq or call nl_dump_check_consistent() this + * means that setting prev_seq here will cause the consistence + * check to fail in the netlink callback handler. Resulting in + * the NLMSG_DONE message having the NLM_F_DUMP_INTR flag set if + * we got an error. + */ + cb->prev_seq = 1; + } + rcu_read_unlock(); + + cb->args[0] = last_type; + cb->args[1] = last_lower; + cb->args[2] = last_key; + cb->args[3] = done; + + return skb->len; +} + +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) +{ + struct tipc_dest *dst; + + list_for_each_entry(dst, l, list) { + if (dst->node == node && dst->port == port) + return dst; + } + return NULL; +} + +bool tipc_dest_push(struct list_head *l, u32 node, u32 port) +{ + struct tipc_dest *dst; + + if (tipc_dest_find(l, node, port)) + return false; + + dst = kmalloc(sizeof(*dst), GFP_ATOMIC); + if (unlikely(!dst)) + return false; + dst->node = node; + dst->port = port; + list_add(&dst->list, l); + return true; +} + +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port) +{ + struct tipc_dest *dst; + + if (list_empty(l)) + return false; + dst = list_first_entry(l, typeof(*dst), list); + if (port) + *port = dst->port; + if (node) + *node = dst->node; + list_del(&dst->list); + kfree(dst); + return true; +} + +bool tipc_dest_del(struct list_head *l, u32 node, u32 port) +{ + struct tipc_dest *dst; + + dst = tipc_dest_find(l, node, port); + if (!dst) + return false; + list_del(&dst->list); + kfree(dst); + return true; +} + +void tipc_dest_list_purge(struct list_head *l) +{ + struct tipc_dest *dst, *tmp; + + list_for_each_entry_safe(dst, tmp, l, list) { + list_del(&dst->list); + kfree(dst); + } +} + +int tipc_dest_list_len(struct list_head *l) +{ + struct tipc_dest *dst; + int i = 0; + + list_for_each_entry(dst, l, list) { + i++; + } + return i; +} diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h new file mode 100644 index 000000000..f79066334 --- /dev/null +++ b/net/tipc/name_table.h @@ -0,0 +1,148 @@ +/* + * net/tipc/name_table.h: Include file for TIPC name table code + * + * Copyright (c) 2000-2006, 2014-2018, Ericsson AB + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NAME_TABLE_H +#define _TIPC_NAME_TABLE_H + +struct tipc_subscription; +struct tipc_plist; +struct tipc_nlist; +struct tipc_group; + +/* + * TIPC name types reserved for internal TIPC use (both current and planned) + */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ + +/** + * struct publication - info about a published (name or) name sequence + * @type: name sequence type + * @lower: name sequence lower bound + * @upper: name sequence upper bound + * @scope: scope of publication, TIPC_NODE_SCOPE or TIPC_CLUSTER_SCOPE + * @node: network address of publishing socket's node + * @port: publishing port + * @key: publication key, unique across the cluster + * @binding_node: all publications from the same node which bound this one + * - Remote publications: in node->publ_list + * Used by node/name distr to withdraw publications when node is lost + * - Local/node scope publications: in name_table->node_scope list + * - Local/cluster scope publications: in name_table->cluster_scope list + * @binding_sock: all publications from the same socket which bound this one + * Used by socket to withdraw publications when socket is unbound/released + * @local_publ: list of identical publications made from this node + * Used by closest_first and multicast receive lookup algorithms + * @all_publ: all publications identical to this one, whatever node and scope + * Used by round-robin lookup algorithm + * @rcu: RCU callback head used for deferred freeing + */ +struct publication { + u32 type; + u32 lower; + u32 upper; + u32 scope; + u32 node; + u32 port; + u32 key; + struct list_head binding_node; + struct list_head binding_sock; + struct list_head local_publ; + struct list_head all_publ; + struct rcu_head rcu; +}; + +/** + * struct name_table - table containing all existing port name publications + * @seq_hlist: name sequence hash lists + * @node_scope: all local publications with node scope + * - used by name_distr during re-init of name table + * @cluster_scope: all local publications with cluster scope + * - used by name_distr to send bulk updates to new nodes + * - used by name_distr during re-init of name table + * @local_publ_count: number of publications issued by this node + */ +struct name_table { + struct hlist_head services[TIPC_NAMETBL_SIZE]; + struct list_head node_scope; + struct list_head cluster_scope; + rwlock_t cluster_scope_lock; + u32 local_publ_count; +}; + +int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); + +u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); +void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain); +void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, + u32 upper, struct tipc_nlist *nodes); +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all); +struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, + u32 upper, u32 scope, u32 port, + u32 key); +int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 upper, + u32 key); +struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, + u32 lower, u32 upper, u32 scope, + u32 node, u32 ref, u32 key); +struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, + u32 lower, u32 upper, + u32 node, u32 key); +bool tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_unsubscribe(struct tipc_subscription *s); +int tipc_nametbl_init(struct net *net); +void tipc_nametbl_stop(struct net *net); + +struct tipc_dest { + struct list_head list; + u32 port; + u32 node; +}; + +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); +bool tipc_dest_push(struct list_head *l, u32 node, u32 port); +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); +bool tipc_dest_del(struct list_head *l, u32 node, u32 port); +void tipc_dest_list_purge(struct list_head *l); +int tipc_dest_list_len(struct list_head *l); + +#endif diff --git a/net/tipc/net.c b/net/tipc/net.c new file mode 100644 index 000000000..2e2e938fe --- /dev/null +++ b/net/tipc/net.c @@ -0,0 +1,304 @@ +/* + * net/tipc/net.c: TIPC network routing code + * + * Copyright (c) 1995-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "net.h" +#include "name_distr.h" +#include "subscr.h" +#include "socket.h" +#include "node.h" +#include "bcast.h" +#include "netlink.h" +#include "monitor.h" + +/* + * The TIPC locking policy is designed to ensure a very fine locking + * granularity, permitting complete parallel access to individual + * port and node/link instances. The code consists of four major + * locking domains, each protected with their own disjunct set of locks. + * + * 1: The bearer level. + * RTNL lock is used to serialize the process of configuring bearer + * on update side, and RCU lock is applied on read side to make + * bearer instance valid on both paths of message transmission and + * reception. + * + * 2: The node and link level. + * All node instances are saved into two tipc_node_list and node_htable + * lists. The two lists are protected by node_list_lock on write side, + * and they are guarded with RCU lock on read side. Especially node + * instance is destroyed only when TIPC module is removed, and we can + * confirm that there has no any user who is accessing the node at the + * moment. Therefore, Except for iterating the two lists within RCU + * protection, it's no needed to hold RCU that we access node instance + * in other places. + * + * In addition, all members in node structure including link instances + * are protected by node spin lock. + * + * 3: The transport level of the protocol. + * This consists of the structures port, (and its user level + * representations, such as user_port and tipc_sock), reference and + * tipc_user (port.c, reg.c, socket.c). + * + * This layer has four different locks: + * - The tipc_port spin_lock. This is protecting each port instance + * from parallel data access and removal. Since we can not place + * this lock in the port itself, it has been placed in the + * corresponding reference table entry, which has the same life + * cycle as the module. This entry is difficult to access from + * outside the TIPC core, however, so a pointer to the lock has + * been added in the port instance, -to be used for unlocking + * only. + * - A read/write lock to protect the reference table itself (teg.c). + * (Nobody is using read-only access to this, so it can just as + * well be changed to a spin_lock) + * - A spin lock to protect the registry of kernel/driver users (reg.c) + * - A global spin_lock (tipc_port_lock), which only task is to ensure + * consistency where more than one port is involved in an operation, + * i.e., whe a port is part of a linked list of ports. + * There are two such lists; 'port_list', which is used for management, + * and 'wait_list', which is used to queue ports during congestion. + * + * 4: The name table (name_table.c, name_distr.c, subscription.c) + * - There is one big read/write-lock (tipc_nametbl_lock) protecting the + * overall name table structure. Nothing must be added/removed to + * this structure without holding write access to it. + * - There is one local spin_lock per sub_sequence, which can be seen + * as a sub-domain to the tipc_nametbl_lock domain. It is used only + * for translation operations, and is needed because a translation + * steps the root of the 'publication' linked list between each lookup. + * This is always used within the scope of a tipc_nametbl_lock(read). + * - A local spin_lock protecting the queue of subscriber events. +*/ + +struct tipc_net_work { + struct work_struct work; + struct net *net; + u32 addr; +}; + +static void tipc_net_finalize(struct net *net, u32 addr); + +int tipc_net_init(struct net *net, u8 *node_id, u32 addr) +{ + if (tipc_own_id(net)) { + pr_info("Cannot configure node identity twice\n"); + return -1; + } + pr_info("Started in network mode\n"); + + if (node_id) + tipc_set_node_id(net, node_id); + if (addr) + tipc_net_finalize(net, addr); + return 0; +} + +static void tipc_net_finalize(struct net *net, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + + if (cmpxchg(&tn->node_addr, 0, addr)) + return; + tipc_set_node_addr(net, addr); + tipc_named_reinit(net); + tipc_sk_reinit(net); + tipc_mon_reinit_self(net); + tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, + TIPC_CLUSTER_SCOPE, 0, addr); +} + +static void tipc_net_finalize_work(struct work_struct *work) +{ + struct tipc_net_work *fwork; + + fwork = container_of(work, struct tipc_net_work, work); + tipc_net_finalize(fwork->net, fwork->addr); + kfree(fwork); +} + +void tipc_sched_net_finalize(struct net *net, u32 addr) +{ + struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + + if (!fwork) + return; + INIT_WORK(&fwork->work, tipc_net_finalize_work); + fwork->net = net; + fwork->addr = addr; + schedule_work(&fwork->work); +} + +void tipc_net_stop(struct net *net) +{ + if (!tipc_own_id(net)) + return; + + rtnl_lock(); + tipc_bearer_stop(net); + tipc_node_stop(net); + rtnl_unlock(); + + pr_info("Left network mode\n"); +} + +static int __tipc_nl_add_net(struct net *net, struct tipc_nl_msg *msg) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + u64 *w0 = (u64 *)&tn->node_id[0]; + u64 *w1 = (u64 *)&tn->node_id[8]; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NET_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NET); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NET_ID, tn->net_id)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID, *w0, 0)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_NET_NODEID_W1, *w1, 0)) + goto attr_msg_full; + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int err; + int done = cb->args[0]; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + err = __tipc_nl_add_net(net, &msg); + if (err) + goto out; + + done = 1; +out: + cb->args[0] = done; + + return skb->len; +} + +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = tipc_net(net); + int err; + + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); + + if (err) + return err; + + /* Can't change net id once TIPC has joined a network */ + if (tipc_own_addr(net)) + return -EPERM; + + if (attrs[TIPC_NLA_NET_ID]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_NET_ID]); + if (val < 1 || val > 9999) + return -EINVAL; + + tn->net_id = val; + } + + if (attrs[TIPC_NLA_NET_ADDR]) { + u32 addr; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + if (!addr) + return -EINVAL; + tn->legacy_addr_format = true; + tipc_net_init(net, NULL, addr); + } + + if (attrs[TIPC_NLA_NET_NODEID]) { + u8 node_id[NODE_ID_LEN]; + u64 *w0 = (u64 *)&node_id[0]; + u64 *w1 = (u64 *)&node_id[8]; + + if (!attrs[TIPC_NLA_NET_NODEID_W1]) + return -EINVAL; + *w0 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID]); + *w1 = nla_get_u64(attrs[TIPC_NLA_NET_NODEID_W1]); + tipc_net_init(net, node_id, 0); + } + return 0; +} + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_net_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/net.h b/net/tipc/net.h new file mode 100644 index 000000000..b7f2e364e --- /dev/null +++ b/net/tipc/net.h @@ -0,0 +1,51 @@ +/* + * net/tipc/net.h: Include file for TIPC network routing code + * + * Copyright (c) 1995-2006, 2014, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NET_H +#define _TIPC_NET_H + +#include <net/genetlink.h> + +extern const struct nla_policy tipc_nl_net_policy[]; + +int tipc_net_init(struct net *net, u8 *node_id, u32 addr); +void tipc_sched_net_finalize(struct net *net, u32 addr); +void tipc_net_stop(struct net *net); +int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); + +#endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c new file mode 100644 index 000000000..9b36163d9 --- /dev/null +++ b/net/tipc/netlink.c @@ -0,0 +1,296 @@ +/* + * net/tipc/netlink.c: TIPC configuration handling + * + * Copyright (c) 2005-2006, 2014, Ericsson AB + * Copyright (c) 2005-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "socket.h" +#include "name_table.h" +#include "bearer.h" +#include "link.h" +#include "node.h" +#include "net.h" +#include "udp_media.h" +#include <net/genetlink.h> + +static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { + [TIPC_NLA_UNSPEC] = { .type = NLA_UNSPEC, }, + [TIPC_NLA_BEARER] = { .type = NLA_NESTED, }, + [TIPC_NLA_SOCK] = { .type = NLA_NESTED, }, + [TIPC_NLA_PUBL] = { .type = NLA_NESTED, }, + [TIPC_NLA_LINK] = { .type = NLA_NESTED, }, + [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, + [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, + [TIPC_NLA_NET] = { .type = NLA_NESTED, }, + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, +}; + +const struct nla_policy +tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { + [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } +}; + +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_REF] = { .type = NLA_U32 }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + +const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { + [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 }, + [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED }, + [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG } +}; + +const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = { + [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NET_ID] = { .type = NLA_U32 }, + [TIPC_NLA_NET_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NET_NODEID] = { .type = NLA_U64 }, + [TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 }, +}; + +const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = { + [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING, + .len = TIPC_MAX_LINK_NAME }, + [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG }, + [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED }, + [TIPC_NLA_LINK_RX] = { .type = NLA_U32 }, + [TIPC_NLA_LINK_TX] = { .type = NLA_U32 } +}; + +const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = { + [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 }, + [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG } +}; + +/* Properties valid for media, bearer and link */ +const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { + [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 } +}; + +const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = { + [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING, + .len = TIPC_MAX_BEARER_NAME }, + [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED }, + [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 } +}; + +const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = { + [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING }, + [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED } +}; + +const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = { + [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC}, + [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, + [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY, + .len = sizeof(struct sockaddr_storage)}, +}; + +/* Users of the legacy API (tipc-config) can't handle that we add operations, + * so we have a separate genl handling for the new API. + */ +static const struct genl_ops tipc_genl_v2_ops[] = { + { + .cmd = TIPC_NL_BEARER_DISABLE, + .doit = tipc_nl_bearer_disable, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_ENABLE, + .doit = tipc_nl_bearer_enable, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_GET, + .doit = tipc_nl_bearer_get, + .dumpit = tipc_nl_bearer_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_ADD, + .doit = tipc_nl_bearer_add, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_BEARER_SET, + .doit = tipc_nl_bearer_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_SOCK_GET, + .start = tipc_dump_start, + .dumpit = tipc_nl_sk_dump, + .done = tipc_dump_done, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_PUBL_GET, + .dumpit = tipc_nl_publ_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_GET, + .doit = tipc_nl_node_get_link, + .dumpit = tipc_nl_node_dump_link, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_SET, + .doit = tipc_nl_node_set_link, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_LINK_RESET_STATS, + .doit = tipc_nl_node_reset_link_stats, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_GET, + .doit = tipc_nl_media_get, + .dumpit = tipc_nl_media_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MEDIA_SET, + .doit = tipc_nl_media_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NODE_GET, + .dumpit = tipc_nl_node_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_GET, + .dumpit = tipc_nl_net_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NET_SET, + .doit = tipc_nl_net_set, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_NAME_TABLE_GET, + .dumpit = tipc_nl_name_table_dump, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_SET, + .doit = tipc_nl_node_set_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_GET, + .doit = tipc_nl_node_get_monitor, + .dumpit = tipc_nl_node_dump_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_PEER_GET, + .dumpit = tipc_nl_node_dump_monitor_peer, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_PEER_REMOVE, + .doit = tipc_nl_peer_rm, + .policy = tipc_nl_policy, + }, +#ifdef CONFIG_TIPC_MEDIA_UDP + { + .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .dumpit = tipc_udp_nl_dump_remoteip, + .policy = tipc_nl_policy, + }, +#endif +}; + +struct genl_family tipc_genl_family __ro_after_init = { + .name = TIPC_GENL_V2_NAME, + .version = TIPC_GENL_V2_VERSION, + .hdrsize = 0, + .maxattr = TIPC_NLA_MAX, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_v2_ops, + .n_ops = ARRAY_SIZE(tipc_genl_v2_ops), +}; + +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) +{ + u32 maxattr = tipc_genl_family.maxattr; + + *attr = genl_family_attrbuf(&tipc_genl_family); + if (!*attr) + return -EOPNOTSUPP; + + return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy, + NULL); +} + +int __init tipc_netlink_start(void) +{ + int res; + + res = genl_register_family(&tipc_genl_family); + if (res) { + pr_err("Failed to register netlink interface\n"); + return res; + } + return 0; +} + +void tipc_netlink_stop(void) +{ + genl_unregister_family(&tipc_genl_family); +} diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h new file mode 100644 index 000000000..4ba0ad422 --- /dev/null +++ b/net/tipc/netlink.h @@ -0,0 +1,65 @@ +/* + * net/tipc/netlink.h: Include file for TIPC netlink code + * + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NETLINK_H +#define _TIPC_NETLINK_H +#include <net/netlink.h> + +extern struct genl_family tipc_genl_family; +int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf); + +struct tipc_nl_msg { + struct sk_buff *skb; + u32 portid; + u32 seq; +}; + +extern const struct nla_policy tipc_nl_name_table_policy[]; +extern const struct nla_policy tipc_nl_sock_policy[]; +extern const struct nla_policy tipc_nl_net_policy[]; +extern const struct nla_policy tipc_nl_link_policy[]; +extern const struct nla_policy tipc_nl_node_policy[]; +extern const struct nla_policy tipc_nl_prop_policy[]; +extern const struct nla_policy tipc_nl_bearer_policy[]; +extern const struct nla_policy tipc_nl_media_policy[]; +extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; + +int tipc_netlink_start(void); +int tipc_netlink_compat_start(void); +void tipc_netlink_stop(void); +void tipc_netlink_compat_stop(void); + +#endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c new file mode 100644 index 000000000..5086e27d3 --- /dev/null +++ b/net/tipc/netlink_compat.c @@ -0,0 +1,1359 @@ +/* + * Copyright (c) 2014, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "bearer.h" +#include "link.h" +#include "name_table.h" +#include "socket.h" +#include "node.h" +#include "net.h" +#include <net/genetlink.h> +#include <linux/tipc_config.h> + +/* The legacy API had an artificial message length limit called + * ULTRA_STRING_MAX_LEN. + */ +#define ULTRA_STRING_MAX_LEN 32768 + +#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN) + +#define REPLY_TRUNCATED "<truncated>\n" + +struct tipc_nl_compat_msg { + u16 cmd; + int rep_type; + int rep_size; + int req_type; + int req_size; + struct net *net; + struct sk_buff *rep; + struct tlv_desc *req; + struct sock *dst_sk; +}; + +struct tipc_nl_compat_cmd_dump { + int (*header)(struct tipc_nl_compat_msg *); + int (*dumpit)(struct sk_buff *, struct netlink_callback *); + int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs); +}; + +struct tipc_nl_compat_cmd_doit { + int (*doit)(struct sk_buff *skb, struct genl_info *info); + int (*transcode)(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, struct tipc_nl_compat_msg *msg); +}; + +static int tipc_skb_tailroom(struct sk_buff *skb) +{ + int tailroom; + int limit; + + tailroom = skb_tailroom(skb); + limit = TIPC_SKB_MAX - skb->len; + + if (tailroom < limit) + return tailroom; + + return limit; +} + +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + +static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); + + if (tipc_skb_tailroom(skb) < TLV_SPACE(len)) + return -EMSGSIZE; + + skb_put(skb, TLV_SPACE(len)); + tlv->tlv_type = htons(type); + tlv->tlv_len = htons(TLV_LENGTH(len)); + if (len && data) + memcpy(TLV_DATA(tlv), data, len); + + return 0; +} + +static void tipc_tlv_init(struct sk_buff *skb, u16 type) +{ + struct tlv_desc *tlv = (struct tlv_desc *)skb->data; + + TLV_SET_LEN(tlv, 0); + TLV_SET_TYPE(tlv, type); + skb_put(skb, sizeof(struct tlv_desc)); +} + +static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...) +{ + int n; + u16 len; + u32 rem; + char *buf; + struct tlv_desc *tlv; + va_list args; + + rem = tipc_skb_tailroom(skb); + + tlv = (struct tlv_desc *)skb->data; + len = TLV_GET_LEN(tlv); + buf = TLV_DATA(tlv) + len; + + va_start(args, fmt); + n = vscnprintf(buf, rem, fmt, args); + va_end(args); + + TLV_SET_LEN(tlv, n + len); + skb_put(skb, n); + + return n; +} + +static struct sk_buff *tipc_tlv_alloc(int size) +{ + int hdr_len; + struct sk_buff *buf; + + size = TLV_SPACE(size); + hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + + buf = alloc_skb(hdr_len + size, GFP_KERNEL); + if (!buf) + return NULL; + + skb_reserve(buf, hdr_len); + + return buf; +} + +static struct sk_buff *tipc_get_err_tlv(char *str) +{ + int str_len = strlen(str) + 1; + struct sk_buff *buf; + + buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + if (buf) + tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); + + return buf; +} + +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + +static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg, + struct sk_buff *arg) +{ + int len = 0; + int err; + struct sk_buff *buf; + struct nlmsghdr *nlmsg; + struct netlink_callback cb; + + memset(&cb, 0, sizeof(cb)); + cb.nlh = (struct nlmsghdr *)arg->data; + cb.skb = arg; + + buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf->sk = msg->dst_sk; + if (__tipc_dump_start(&cb, msg->net)) { + kfree_skb(buf); + return -ENOMEM; + } + + do { + int rem; + + len = (*cmd->dumpit)(buf, &cb); + + nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) { + struct nlattr **attrs; + + err = tipc_nlmsg_parse(nlmsg, &attrs); + if (err) + goto err_out; + + err = (*cmd->format)(msg, attrs); + if (err) + goto err_out; + + if (tipc_skb_tailroom(msg->rep) <= 1) { + err = -EMSGSIZE; + goto err_out; + } + } + + skb_reset_tail_pointer(buf); + buf->len = 0; + + } while (len); + + err = 0; + +err_out: + tipc_dump_done(&cb); + kfree_skb(buf); + + if (err == -EMSGSIZE) { + /* The legacy API only considered messages filling + * "ULTRA_STRING_MAX_LEN" to be truncated. + */ + if ((TIPC_SKB_MAX - msg->rep->len) <= 1) { + char *tail = skb_tail_pointer(msg->rep); + + if (*tail != '\0') + sprintf(tail - sizeof(REPLY_TRUNCATED) - 1, + REPLY_TRUNCATED); + } + + return 0; + } + + return err; +} + +static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, + struct tipc_nl_compat_msg *msg) +{ + struct nlmsghdr *nlh; + struct sk_buff *arg; + int err; + + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) + return -EINVAL; + + msg->rep = tipc_tlv_alloc(msg->rep_size); + if (!msg->rep) + return -ENOMEM; + + if (msg->rep_type) + tipc_tlv_init(msg->rep, msg->rep_type); + + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } + + arg = nlmsg_new(0, GFP_KERNEL); + if (!arg) { + kfree_skb(msg->rep); + msg->rep = NULL; + return -ENOMEM; + } + + nlh = nlmsg_put(arg, 0, 0, tipc_genl_family.id, 0, NLM_F_MULTI); + if (!nlh) { + kfree_skb(arg); + kfree_skb(msg->rep); + msg->rep = NULL; + return -EMSGSIZE; + } + nlmsg_end(arg, nlh); + + err = __tipc_nl_compat_dumpit(cmd, msg, arg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + } + kfree_skb(arg); + + return err; +} + +static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + struct sk_buff *doit_buf; + struct sk_buff *trans_buf; + struct nlattr **attrbuf; + struct genl_info info; + + trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!trans_buf) + return -ENOMEM; + + attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1, + sizeof(struct nlattr *), + GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto trans_out; + } + + doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!doit_buf) { + err = -ENOMEM; + goto attrbuf_out; + } + + memset(&info, 0, sizeof(info)); + info.attrs = attrbuf; + + rtnl_lock(); + err = (*cmd->transcode)(cmd, trans_buf, msg); + if (err) + goto doit_out; + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); + if (err) + goto doit_out; + + doit_buf->sk = msg->dst_sk; + + err = (*cmd->doit)(doit_buf, &info); +doit_out: + rtnl_unlock(); + + kfree_skb(doit_buf); +attrbuf_out: + kfree(attrbuf); +trans_out: + kfree_skb(trans_buf); + + return err; +} + +static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, + struct tipc_nl_compat_msg *msg) +{ + int err; + + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) + return -EINVAL; + + err = __tipc_nl_compat_doit(cmd, msg); + if (err) + return err; + + /* The legacy API considered an empty message a success message */ + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + + return 0; +} + +static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL, NULL); + if (err) + return err; + + return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, + nla_data(bearer[TIPC_NLA_BEARER_NAME]), + nla_len(bearer[TIPC_NLA_BEARER_NAME])); +} + +static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_bearer_config *b; + int len; + + b = (struct tipc_bearer_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain))) + return -EMSGSIZE; + + if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) { + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority))) + return -EMSGSIZE; + nla_nest_end(skb, prop); + } + nla_nest_end(skb, bearer); + + return 0; +} + +static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *bearer; + int len; + + name = (char *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, bearer); + + return 0; +} + +static inline u32 perc(u32 count, u32 total) +{ + return (count * 100 + (total / 2)) / total; +} + +static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg, + struct nlattr *prop[], struct nlattr *stats[]) +{ + tipc_tlv_sprintf(msg->rep, " Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, " RX naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, " TX naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); +} + +static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char *name; + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; + struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + int err; + int len; + + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL, NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; + + err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL, NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL, NULL); + if (err) + return err; + + name = (char *)TLV_DATA(msg->req); + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) + return 0; + + tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n", + nla_data(link[TIPC_NLA_LINK_NAME])); + + if (link[TIPC_NLA_LINK_BROADCAST]) { + __fill_bc_link_stat(msg, prop, stats); + return 0; + } + + if (link[TIPC_NLA_LINK_ACTIVE]) + tipc_tlv_sprintf(msg->rep, " ACTIVE"); + else if (link[TIPC_NLA_LINK_UP]) + tipc_tlv_sprintf(msg->rep, " STANDBY"); + else + tipc_tlv_sprintf(msg->rep, " DEFUNCT"); + + tipc_tlv_sprintf(msg->rep, " MTU:%u Priority:%u", + nla_get_u32(link[TIPC_NLA_LINK_MTU]), + nla_get_u32(prop[TIPC_NLA_PROP_PRIO])); + + tipc_tlv_sprintf(msg->rep, " Tolerance:%u ms Window:%u packets\n", + nla_get_u32(prop[TIPC_NLA_PROP_TOL]), + nla_get_u32(prop[TIPC_NLA_PROP_WIN])); + + tipc_tlv_sprintf(msg->rep, + " RX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_RX]) - + nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX packets:%u fragments:%u/%u bundles:%u/%u\n", + nla_get_u32(link[TIPC_NLA_LINK_TX]) - + nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED])); + + tipc_tlv_sprintf(msg->rep, + " TX profile sample:%u packets average:%u octets\n", + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) / + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])); + + tipc_tlv_sprintf(msg->rep, + " 0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n", + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])), + perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]), + nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]))); + + tipc_tlv_sprintf(msg->rep, + " RX states:%u probes:%u naks:%u defs:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]), + nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES])); + + tipc_tlv_sprintf(msg->rep, + " TX states:%u probes:%u naks:%u acks:%u dups:%u\n", + nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]), + nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED])); + + tipc_tlv_sprintf(msg->rep, + " Congestion link:%u Send queue max:%u avg:%u", + nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]), + nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]), + nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE])); + + return 0; +} + +static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; + struct tipc_link_info link_info; + int err; + + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL, NULL); + if (err) + return err; + + link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST])); + link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], + TIPC_MAX_LINK_NAME); + + return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, + &link_info, sizeof(link_info)); +} + +static int __tipc_add_link_prop(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg, + struct tipc_link_config *lc) +{ + switch (msg->cmd) { + case TIPC_CMD_SET_LINK_PRI: + return nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_TOL: + return nla_put_u32(skb, TIPC_NLA_PROP_TOL, ntohl(lc->value)); + case TIPC_CMD_SET_LINK_WINDOW: + return nla_put_u32(skb, TIPC_NLA_PROP_WIN, ntohl(lc->value)); + } + + return -EINVAL; +} + +static int tipc_nl_compat_media_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *media; + struct tipc_link_config *lc; + int len; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + media = nla_nest_start(skb, TIPC_NLA_MEDIA); + if (!media) + return -EMSGSIZE; + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_MEDIA_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, media); + + return 0; +} + +static int tipc_nl_compat_bearer_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *bearer; + struct tipc_link_config *lc; + int len; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + bearer = nla_nest_start(skb, TIPC_NLA_BEARER); + if (!bearer) + return -EMSGSIZE; + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, bearer); + + return 0; +} + +static int __tipc_nl_compat_link_set(struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct nlattr *prop; + struct nlattr *link; + struct tipc_link_config *lc; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name)) + return -EMSGSIZE; + + prop = nla_nest_start(skb, TIPC_NLA_LINK_PROP); + if (!prop) + return -EMSGSIZE; + + __tipc_add_link_prop(skb, msg, lc); + nla_nest_end(skb, prop); + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + struct tipc_link_config *lc; + struct tipc_bearer *bearer; + struct tipc_media *media; + int len; + + lc = (struct tipc_link_config *)TLV_DATA(msg->req); + + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + + media = tipc_media_find(lc->name); + if (media) { + cmd->doit = &__tipc_nl_media_set; + return tipc_nl_compat_media_set(skb, msg); + } + + bearer = tipc_bearer_find(msg->net, lc->name); + if (bearer) { + cmd->doit = &__tipc_nl_bearer_set; + return tipc_nl_compat_bearer_set(skb, msg); + } + + return __tipc_nl_compat_link_set(skb, msg); +} + +static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + char *name; + struct nlattr *link; + int len; + + name = (char *)TLV_DATA(msg->req); + + link = nla_nest_start(skb, TIPC_NLA_LINK); + if (!link) + return -EMSGSIZE; + + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) + return -EMSGSIZE; + + nla_nest_end(skb, link); + + return 0; +} + +static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) +{ + int i; + u32 depth; + struct tipc_name_table_query *ntq; + static const char * const header[] = { + "Type ", + "Lower Upper ", + "Port Identity ", + "Publication Scope" + }; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; + + depth = ntohl(ntq->depth); + + if (depth > 4) + depth = 4; + for (i = 0; i < depth; i++) + tipc_tlv_sprintf(msg->rep, header[i]); + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + char port_str[27]; + struct tipc_name_table_query *ntq; + struct nlattr *nt[TIPC_NLA_NAME_TABLE_MAX + 1]; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + u32 node, depth, type, lowbound, upbound; + static const char * const scope_str[] = {"", " zone", " cluster", + " node"}; + int err; + + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; + + err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL, NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL, NULL); + if (err) + return err; + + ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + + depth = ntohl(ntq->depth); + type = ntohl(ntq->type); + lowbound = ntohl(ntq->lowbound); + upbound = ntohl(ntq->upbound); + + if (!(depth & TIPC_NTQ_ALLTYPES) && + (type != nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]))) + return 0; + if (lowbound && (lowbound > nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]))) + return 0; + if (upbound && (upbound < nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]))) + return 0; + + tipc_tlv_sprintf(msg->rep, "%-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_TYPE])); + + if (depth == 1) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %-10u ", + nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]), + nla_get_u32(publ[TIPC_NLA_PUBL_UPPER])); + + if (depth == 2) + goto out; + + node = nla_get_u32(publ[TIPC_NLA_PUBL_NODE]); + sprintf(port_str, "<%u.%u.%u:%u>", tipc_zone(node), tipc_cluster(node), + tipc_node(node), nla_get_u32(publ[TIPC_NLA_PUBL_REF])); + tipc_tlv_sprintf(msg->rep, "%-26s ", port_str); + + if (depth == 3) + goto out; + + tipc_tlv_sprintf(msg->rep, "%-10u %s", + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), + scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); +out: + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + u32 type, lower, upper; + struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], + NULL, NULL); + if (err) + return err; + + type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); + lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); + upper = nla_get_u32(publ[TIPC_NLA_PUBL_UPPER]); + + if (lower == upper) + tipc_tlv_sprintf(msg->rep, " {%u,%u}", type, lower); + else + tipc_tlv_sprintf(msg->rep, " {%u,%u,%u}", type, lower, upper); + + return 0; +} + +static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) +{ + int err; + void *hdr; + struct nlattr *nest; + struct sk_buff *args; + struct tipc_nl_compat_cmd_dump dump; + + args = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!args) + return -ENOMEM; + + hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_PUBL_GET); + if (!hdr) { + kfree_skb(args); + return -EMSGSIZE; + } + + nest = nla_nest_start(args, TIPC_NLA_SOCK); + if (!nest) { + kfree_skb(args); + return -EMSGSIZE; + } + + if (nla_put_u32(args, TIPC_NLA_SOCK_REF, sock)) { + kfree_skb(args); + return -EMSGSIZE; + } + + nla_nest_end(args, nest); + genlmsg_end(args, hdr); + + dump.dumpit = tipc_nl_publ_dump; + dump.format = __tipc_nl_compat_publ_dump; + + err = __tipc_nl_compat_dumpit(&dump, msg, args); + + kfree_skb(args); + + return err; +} + +static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + int err; + u32 sock_ref; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], + NULL, NULL); + if (err) + return err; + + sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); + + if (sock[TIPC_NLA_SOCK_CON]) { + u32 node; + struct nlattr *con[TIPC_NLA_CON_MAX + 1]; + + err = nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); + + if (err) + return err; + + node = nla_get_u32(con[TIPC_NLA_CON_NODE]); + tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", + tipc_zone(node), + tipc_cluster(node), + tipc_node(node), + nla_get_u32(con[TIPC_NLA_CON_SOCK])); + + if (con[TIPC_NLA_CON_FLAG]) + tipc_tlv_sprintf(msg->rep, " via {%u,%u}\n", + nla_get_u32(con[TIPC_NLA_CON_TYPE]), + nla_get_u32(con[TIPC_NLA_CON_INST])); + else + tipc_tlv_sprintf(msg->rep, "\n"); + } else if (sock[TIPC_NLA_SOCK_HAS_PUBL]) { + tipc_tlv_sprintf(msg->rep, " bound to"); + + err = tipc_nl_compat_publ_dump(msg, sock_ref); + if (err) + return err; + } + tipc_tlv_sprintf(msg->rep, "\n"); + + return 0; +} + +static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_MEDIA]) + return -EINVAL; + + err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, + attrs[TIPC_NLA_MEDIA], NULL, NULL); + if (err) + return err; + + return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, + nla_data(media[TIPC_NLA_MEDIA_NAME]), + nla_len(media[TIPC_NLA_MEDIA_NAME])); +} + +static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + struct tipc_node_info node_info; + struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NODE]) + return -EINVAL; + + err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], + NULL, NULL); + if (err) + return err; + + node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); + node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_NODE_INFO, &node_info, + sizeof(node_info)); +} + +static int tipc_nl_compat_net_set(struct tipc_nl_compat_cmd_doit *cmd, + struct sk_buff *skb, + struct tipc_nl_compat_msg *msg) +{ + u32 val; + struct nlattr *net; + + val = ntohl(*(__be32 *)TLV_DATA(msg->req)); + + net = nla_nest_start(skb, TIPC_NLA_NET); + if (!net) + return -EMSGSIZE; + + if (msg->cmd == TIPC_CMD_SET_NODE_ADDR) { + if (nla_put_u32(skb, TIPC_NLA_NET_ADDR, val)) + return -EMSGSIZE; + } else if (msg->cmd == TIPC_CMD_SET_NETID) { + if (nla_put_u32(skb, TIPC_NLA_NET_ID, val)) + return -EMSGSIZE; + } + nla_nest_end(skb, net); + + return 0; +} + +static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, + struct nlattr **attrs) +{ + __be32 id; + struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], + NULL, NULL); + if (err) + return err; + + id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); + + return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); +} + +static int tipc_cmd_show_stats_compat(struct tipc_nl_compat_msg *msg) +{ + msg->rep = tipc_tlv_alloc(ULTRA_STRING_MAX_LEN); + if (!msg->rep) + return -ENOMEM; + + tipc_tlv_init(msg->rep, TIPC_TLV_ULTRA_STRING); + tipc_tlv_sprintf(msg->rep, "TIPC version " TIPC_MOD_VER "\n"); + + return 0; +} + +static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) +{ + struct tipc_nl_compat_cmd_dump dump; + struct tipc_nl_compat_cmd_doit doit; + + memset(&dump, 0, sizeof(dump)); + memset(&doit, 0, sizeof(doit)); + + switch (msg->cmd) { + case TIPC_CMD_NOOP: + msg->rep = tipc_tlv_alloc(0); + if (!msg->rep) + return -ENOMEM; + return 0; + case TIPC_CMD_GET_BEARER_NAMES: + msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME); + dump.dumpit = tipc_nl_bearer_dump; + dump.format = tipc_nl_compat_bearer_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_ENABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_CONFIG; + doit.doit = __tipc_nl_bearer_enable; + doit.transcode = tipc_nl_compat_bearer_enable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_DISABLE_BEARER: + msg->req_type = TIPC_TLV_BEARER_NAME; + doit.doit = __tipc_nl_bearer_disable; + doit.transcode = tipc_nl_compat_bearer_disable; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_node_dump_link; + dump.format = tipc_nl_compat_link_stat_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_LINKS: + msg->req_type = TIPC_TLV_NET_ADDR; + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump_link; + dump.format = tipc_nl_compat_link_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_LINK_TOL: + case TIPC_CMD_SET_LINK_PRI: + case TIPC_CMD_SET_LINK_WINDOW: + msg->req_type = TIPC_TLV_LINK_CONFIG; + doit.doit = tipc_nl_node_set_link; + doit.transcode = tipc_nl_compat_link_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_RESET_LINK_STATS: + msg->req_type = TIPC_TLV_LINK_NAME; + doit.doit = tipc_nl_node_reset_link_stats; + doit.transcode = tipc_nl_compat_link_reset_stats; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SHOW_NAME_TABLE: + msg->req_type = TIPC_TLV_NAME_TBL_QUERY; + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.header = tipc_nl_compat_name_table_dump_header; + dump.dumpit = tipc_nl_name_table_dump; + dump.format = tipc_nl_compat_name_table_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_PORTS: + msg->rep_size = ULTRA_STRING_MAX_LEN; + msg->rep_type = TIPC_TLV_ULTRA_STRING; + dump.dumpit = tipc_nl_sk_dump; + dump.format = tipc_nl_compat_sk_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_MEDIA_NAMES: + msg->rep_size = MAX_MEDIA * TLV_SPACE(TIPC_MAX_MEDIA_NAME); + dump.dumpit = tipc_nl_media_dump; + dump.format = tipc_nl_compat_media_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_GET_NODES: + msg->rep_size = ULTRA_STRING_MAX_LEN; + dump.dumpit = tipc_nl_node_dump; + dump.format = tipc_nl_compat_node_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SET_NODE_ADDR: + msg->req_type = TIPC_TLV_NET_ADDR; + doit.doit = __tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_SET_NETID: + msg->req_type = TIPC_TLV_UNSIGNED; + doit.doit = __tipc_nl_net_set; + doit.transcode = tipc_nl_compat_net_set; + return tipc_nl_compat_doit(&doit, msg); + case TIPC_CMD_GET_NETID: + msg->rep_size = sizeof(u32); + dump.dumpit = tipc_nl_net_dump; + dump.format = tipc_nl_compat_net_dump; + return tipc_nl_compat_dumpit(&dump, msg); + case TIPC_CMD_SHOW_STATS: + return tipc_cmd_show_stats_compat(msg); + } + + return -EOPNOTSUPP; +} + +static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int len; + struct tipc_nl_compat_msg msg; + struct nlmsghdr *req_nlh; + struct nlmsghdr *rep_nlh; + struct tipc_genlmsghdr *req_userhdr = info->userhdr; + + memset(&msg, 0, sizeof(msg)); + + req_nlh = (struct nlmsghdr *)skb->data; + msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN; + msg.cmd = req_userhdr->cmd; + msg.net = genl_info_net(info); + msg.dst_sk = skb->sk; + + if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN); + err = -EACCES; + goto send; + } + + msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) { + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + err = -EOPNOTSUPP; + goto send; + } + + err = tipc_nl_compat_handle(&msg); + if ((err == -EOPNOTSUPP) || (err == -EPERM)) + msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); + else if (err == -EINVAL) + msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR); +send: + if (!msg.rep) + return err; + + len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); + skb_push(msg.rep, len); + rep_nlh = nlmsg_hdr(msg.rep); + memcpy(rep_nlh, info->nlhdr, len); + rep_nlh->nlmsg_len = msg.rep->len; + genlmsg_unicast(msg.net, msg.rep, NETLINK_CB(skb).portid); + + return err; +} + +static const struct genl_ops tipc_genl_compat_ops[] = { + { + .cmd = TIPC_GENL_CMD, + .doit = tipc_nl_compat_recv, + }, +}; + +static struct genl_family tipc_genl_compat_family __ro_after_init = { + .name = TIPC_GENL_NAME, + .version = TIPC_GENL_VERSION, + .hdrsize = TIPC_GENL_HDRLEN, + .maxattr = 0, + .netnsok = true, + .module = THIS_MODULE, + .ops = tipc_genl_compat_ops, + .n_ops = ARRAY_SIZE(tipc_genl_compat_ops), +}; + +int __init tipc_netlink_compat_start(void) +{ + int res; + + res = genl_register_family(&tipc_genl_compat_family); + if (res) { + pr_err("Failed to register legacy compat interface\n"); + return res; + } + + return 0; +} + +void tipc_netlink_compat_stop(void) +{ + genl_unregister_family(&tipc_genl_compat_family); +} diff --git a/net/tipc/node.c b/net/tipc/node.c new file mode 100644 index 000000000..a188c2590 --- /dev/null +++ b/net/tipc/node.c @@ -0,0 +1,2435 @@ +/* + * net/tipc/node.c: TIPC node management routines + * + * Copyright (c) 2000-2006, 2012-2016, Ericsson AB + * Copyright (c) 2005-2006, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "link.h" +#include "node.h" +#include "name_distr.h" +#include "socket.h" +#include "bcast.h" +#include "monitor.h" +#include "discover.h" +#include "netlink.h" + +#define INVALID_NODE_SIG 0x10000 +#define NODE_CLEANUP_AFTER 300000 + +/* Flags used to take different actions according to flag type + * TIPC_NOTIFY_NODE_DOWN: notify node is down + * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type + */ +enum { + TIPC_NOTIFY_NODE_DOWN = (1 << 3), + TIPC_NOTIFY_NODE_UP = (1 << 4), + TIPC_NOTIFY_LINK_UP = (1 << 6), + TIPC_NOTIFY_LINK_DOWN = (1 << 7) +}; + +struct tipc_link_entry { + struct tipc_link *link; + spinlock_t lock; /* per link */ + u32 mtu; + struct sk_buff_head inputq; + struct tipc_media_addr maddr; +}; + +struct tipc_bclink_entry { + struct tipc_link *link; + struct sk_buff_head inputq1; + struct sk_buff_head arrvq; + struct sk_buff_head inputq2; + struct sk_buff_head namedq; +}; + +/** + * struct tipc_node - TIPC node structure + * @addr: network address of node + * @ref: reference counter to node object + * @lock: rwlock governing access to structure + * @net: the applicable net namespace + * @hash: links to adjacent nodes in unsorted hash chain + * @inputq: pointer to input queue containing messages for msg event + * @namedq: pointer to name table input queue with name table messages + * @active_links: bearer ids of active links, used as index into links[] array + * @links: array containing references to all links to node + * @action_flags: bit mask of different types of node actions + * @state: connectivity state vs peer node + * @sync_point: sequence number where synch/failover is finished + * @list: links to adjacent nodes in sorted list of cluster's nodes + * @working_links: number of working links to node (both active and standby) + * @link_cnt: number of links to node + * @capabilities: bitmap, indicating peer node's functional capabilities + * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any + * @publ_list: list of publications + * @rcu: rcu struct for tipc_node + * @delete_at: indicates the time for deleting a down node + */ +struct tipc_node { + u32 addr; + struct kref kref; + rwlock_t lock; + struct net *net; + struct hlist_node hash; + int active_links[2]; + struct tipc_link_entry links[MAX_BEARERS]; + struct tipc_bclink_entry bc_entry; + int action_flags; + struct list_head list; + int state; + bool failover_sent; + u16 sync_point; + int link_cnt; + u16 working_links; + u16 capabilities; + u32 signature; + u32 link_id; + u8 peer_id[16]; + struct list_head publ_list; + struct list_head conn_sks; + unsigned long keepalive_intv; + struct timer_list timer; + struct rcu_head rcu; + unsigned long delete_at; +}; + +/* Node FSM states and events: + */ +enum { + SELF_DOWN_PEER_DOWN = 0xdd, + SELF_UP_PEER_UP = 0xaa, + SELF_DOWN_PEER_LEAVING = 0xd1, + SELF_UP_PEER_COMING = 0xac, + SELF_COMING_PEER_UP = 0xca, + SELF_LEAVING_PEER_DOWN = 0x1d, + NODE_FAILINGOVER = 0xf0, + NODE_SYNCHING = 0xcc +}; + +enum { + SELF_ESTABL_CONTACT_EVT = 0xece, + SELF_LOST_CONTACT_EVT = 0x1ce, + PEER_ESTABL_CONTACT_EVT = 0x9ece, + PEER_LOST_CONTACT_EVT = 0x91ce, + NODE_FAILOVER_BEGIN_EVT = 0xfbe, + NODE_FAILOVER_END_EVT = 0xfee, + NODE_SYNCH_BEGIN_EVT = 0xcbe, + NODE_SYNCH_END_EVT = 0xcee +}; + +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr); +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, + bool delete); +static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); +static void tipc_node_delete(struct tipc_node *node); +static void tipc_node_timeout(struct timer_list *t); +static void tipc_node_fsm_evt(struct tipc_node *n, int evt); +static struct tipc_node *tipc_node_find(struct net *net, u32 addr); +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); +static void tipc_node_put(struct tipc_node *node); +static bool node_is_up(struct tipc_node *n); +static void tipc_node_delete_from_list(struct tipc_node *node); + +struct tipc_sock_conn { + u32 port; + u32 peer_port; + u32 peer_node; + struct list_head list; +}; + +static struct tipc_link *node_active_link(struct tipc_node *n, int sel) +{ + int bearer_id = n->active_links[sel & 1]; + + if (unlikely(bearer_id == INVALID_BEARER_ID)) + return NULL; + + return n->links[bearer_id].link; +} + +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) +{ + struct tipc_node *n; + int bearer_id; + unsigned int mtu = MAX_MSG_SIZE; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return mtu; + + bearer_id = n->active_links[sel & 1]; + if (likely(bearer_id != INVALID_BEARER_ID)) + mtu = n->links[bearer_id].mtu; + tipc_node_put(n); + return mtu; +} + +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id) +{ + u8 *own_id = tipc_own_id(net); + struct tipc_node *n; + + if (!own_id) + return true; + + if (addr == tipc_own_addr(net)) { + memcpy(id, own_id, TIPC_NODEID_LEN); + return true; + } + n = tipc_node_find(net, addr); + if (!n) + return false; + + memcpy(id, &n->peer_id, TIPC_NODEID_LEN); + tipc_node_put(n); + return true; +} + +u16 tipc_node_get_capabilities(struct net *net, u32 addr) +{ + struct tipc_node *n; + u16 caps; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return TIPC_NODE_CAPABILITIES; + caps = n->capabilities; + tipc_node_put(n); + return caps; +} + +static void tipc_node_kref_release(struct kref *kref) +{ + struct tipc_node *n = container_of(kref, struct tipc_node, kref); + + kfree(n->bc_entry.link); + kfree_rcu(n, rcu); +} + +static void tipc_node_put(struct tipc_node *node) +{ + kref_put(&node->kref, tipc_node_kref_release); +} + +static void tipc_node_get(struct tipc_node *node) +{ + kref_get(&node->kref); +} + +/* + * tipc_node_find - locate specified node object, if it exists + */ +static struct tipc_node *tipc_node_find(struct net *net, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *node; + unsigned int thash = tipc_hashfn(addr); + + rcu_read_lock(); + hlist_for_each_entry_rcu(node, &tn->node_htable[thash], hash) { + if (node->addr != addr) + continue; + if (!kref_get_unless_zero(&node->kref)) + node = NULL; + break; + } + rcu_read_unlock(); + return node; +} + +/* tipc_node_find_by_id - locate specified node object by its 128-bit id + * Note: this function is called only when a discovery request failed + * to find the node by its 32-bit id, and is not time critical + */ +static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + bool found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + read_lock_bh(&n->lock); + if (!memcmp(id, n->peer_id, 16) && + kref_get_unless_zero(&n->kref)) + found = true; + read_unlock_bh(&n->lock); + if (found) + break; + } + rcu_read_unlock(); + return found ? n : NULL; +} + +static void tipc_node_read_lock(struct tipc_node *n) +{ + read_lock_bh(&n->lock); +} + +static void tipc_node_read_unlock(struct tipc_node *n) +{ + read_unlock_bh(&n->lock); +} + +static void tipc_node_write_lock(struct tipc_node *n) +{ + write_lock_bh(&n->lock); +} + +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(&n->lock); +} + +static void tipc_node_write_unlock(struct tipc_node *n) +{ + struct net *net = n->net; + u32 addr = 0; + u32 flags = n->action_flags; + u32 link_id = 0; + u32 bearer_id; + struct list_head *publ_list; + + if (likely(!flags)) { + write_unlock_bh(&n->lock); + return; + } + + addr = n->addr; + link_id = n->link_id; + bearer_id = link_id & 0xffff; + publ_list = &n->publ_list; + + n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | + TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP); + + write_unlock_bh(&n->lock); + + if (flags & TIPC_NOTIFY_NODE_DOWN) + tipc_publ_notify(net, publ_list, addr); + + if (flags & TIPC_NOTIFY_NODE_UP) + tipc_named_node_up(net, addr); + + if (flags & TIPC_NOTIFY_LINK_UP) { + tipc_mon_peer_up(net, addr, bearer_id); + tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, link_id); + } + if (flags & TIPC_NOTIFY_LINK_DOWN) { + tipc_mon_peer_down(net, addr, bearer_id); + tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, + addr, link_id); + } +} + +static struct tipc_node *tipc_node_create(struct net *net, u32 addr, + u8 *peer_id, u16 capabilities) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *n, *temp_node; + struct tipc_link *l; + int bearer_id; + int i; + + spin_lock_bh(&tn->node_list_lock); + n = tipc_node_find(net, addr); + if (n) { + if (n->capabilities == capabilities) + goto exit; + /* Same node may come back with new capabilities */ + write_lock_bh(&n->lock); + n->capabilities = capabilities; + for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { + l = n->links[bearer_id].link; + if (l) + tipc_link_update_caps(l, capabilities); + } + write_unlock_bh(&n->lock); + goto exit; + } + n = kzalloc(sizeof(*n), GFP_ATOMIC); + if (!n) { + pr_warn("Node creation failed, no memory\n"); + goto exit; + } + n->addr = addr; + memcpy(&n->peer_id, peer_id, 16); + n->net = net; + n->capabilities = capabilities; + kref_init(&n->kref); + rwlock_init(&n->lock); + INIT_HLIST_NODE(&n->hash); + INIT_LIST_HEAD(&n->list); + INIT_LIST_HEAD(&n->publ_list); + INIT_LIST_HEAD(&n->conn_sks); + skb_queue_head_init(&n->bc_entry.namedq); + skb_queue_head_init(&n->bc_entry.inputq1); + __skb_queue_head_init(&n->bc_entry.arrvq); + skb_queue_head_init(&n->bc_entry.inputq2); + for (i = 0; i < MAX_BEARERS; i++) + spin_lock_init(&n->links[i].lock); + n->state = SELF_DOWN_PEER_LEAVING; + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); + n->signature = INVALID_NODE_SIG; + n->active_links[0] = INVALID_BEARER_ID; + n->active_links[1] = INVALID_BEARER_ID; + if (!tipc_link_bc_create(net, tipc_own_addr(net), + addr, U16_MAX, + tipc_link_window(tipc_bc_sndlink(net)), + n->capabilities, + &n->bc_entry.inputq1, + &n->bc_entry.namedq, + tipc_bc_sndlink(net), + &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } + tipc_node_get(n); + timer_setup(&n->timer, tipc_node_timeout, 0); + n->keepalive_intv = U32_MAX; + hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n->list, &temp_node->list); +exit: + spin_unlock_bh(&tn->node_list_lock); + return n; +} + +static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) +{ + unsigned long tol = tipc_link_tolerance(l); + unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; + + /* Link with lowest tolerance determines timer interval */ + if (intv < n->keepalive_intv) + n->keepalive_intv = intv; + + /* Ensure link's abort limit corresponds to current tolerance */ + tipc_link_set_abort_limit(l, tol / n->keepalive_intv); +} + +static void tipc_node_delete_from_list(struct tipc_node *node) +{ + list_del_rcu(&node->list); + hlist_del_rcu(&node->hash); + tipc_node_put(node); +} + +static void tipc_node_delete(struct tipc_node *node) +{ + tipc_node_delete_from_list(node); + + del_timer_sync(&node->timer); + tipc_node_put(node); +} + +void tipc_node_stop(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *node, *t_node; + + spin_lock_bh(&tn->node_list_lock); + list_for_each_entry_safe(node, t_node, &tn->node_list, list) + tipc_node_delete(node); + spin_unlock_bh(&tn->node_list_lock); +} + +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node subscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_add_tail(subscr, &n->publ_list); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); +} + +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) +{ + struct tipc_node *n; + + if (in_own_node(net, addr)) + return; + + n = tipc_node_find(net, addr); + if (!n) { + pr_warn("Node unsubscribe rejected, unknown node 0x%x\n", addr); + return; + } + tipc_node_write_lock(n); + list_del_init(subscr); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); +} + +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn; + int err = 0; + + if (in_own_node(net, dnode)) + return 0; + + node = tipc_node_find(net, dnode); + if (!node) { + pr_warn("Connecting sock to node 0x%x failed\n", dnode); + return -EHOSTUNREACH; + } + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); + if (!conn) { + err = -EHOSTUNREACH; + goto exit; + } + conn->peer_node = dnode; + conn->port = port; + conn->peer_port = peer_port; + + tipc_node_write_lock(node); + list_add_tail(&conn->list, &node->conn_sks); + tipc_node_write_unlock(node); +exit: + tipc_node_put(node); + return err; +} + +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) +{ + struct tipc_node *node; + struct tipc_sock_conn *conn, *safe; + + if (in_own_node(net, dnode)) + return; + + node = tipc_node_find(net, dnode); + if (!node) + return; + + tipc_node_write_lock(node); + list_for_each_entry_safe(conn, safe, &node->conn_sks, list) { + if (port != conn->port) + continue; + list_del(&conn->list); + kfree(conn); + } + tipc_node_write_unlock(node); + tipc_node_put(node); +} + +static void tipc_node_clear_links(struct tipc_node *node) +{ + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link_entry *le = &node->links[i]; + + if (le->link) { + kfree(le->link); + le->link = NULL; + node->link_cnt--; + } + } +} + +/* tipc_node_cleanup - delete nodes that does not + * have active links for NODE_CLEANUP_AFTER time + */ +static bool tipc_node_cleanup(struct tipc_node *peer) +{ + struct tipc_net *tn = tipc_net(peer->net); + bool deleted = false; + + /* If lock held by tipc_node_stop() the node will be deleted anyway */ + if (!spin_trylock_bh(&tn->node_list_lock)) + return false; + + tipc_node_write_lock(peer); + + if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) { + tipc_node_clear_links(peer); + tipc_node_delete_from_list(peer); + deleted = true; + } + tipc_node_write_unlock(peer); + spin_unlock_bh(&tn->node_list_lock); + return deleted; +} + +/* tipc_node_timeout - handle expiration of node timer + */ +static void tipc_node_timeout(struct timer_list *t) +{ + struct tipc_node *n = from_timer(n, t, timer); + struct tipc_link_entry *le; + struct sk_buff_head xmitq; + int remains = n->link_cnt; + int bearer_id; + int rc = 0; + + if (!node_is_up(n) && tipc_node_cleanup(n)) { + /*Removing the reference of Timer*/ + tipc_node_put(n); + return; + } + + __skb_queue_head_init(&xmitq); + + /* Initial node interval to value larger (10 seconds), then it will be + * recalculated with link lowest tolerance + */ + tipc_node_read_lock(n); + n->keepalive_intv = 10000; + tipc_node_read_unlock(n); + for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) { + tipc_node_read_lock(n); + le = &n->links[bearer_id]; + if (le->link) { + spin_lock_bh(&le->lock); + /* Link tolerance may change asynchronously: */ + tipc_node_calculate_timer(n, le->link); + rc = tipc_link_timeout(le->link, &xmitq); + spin_unlock_bh(&le->lock); + remains--; + } + tipc_node_read_unlock(n); + tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr); + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_link_down(n, bearer_id, false); + } + mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv)); +} + +/** + * __tipc_node_link_up - handle addition of link + * Node lock must be held by caller + * Link becomes active (alone or shared) or standby, depending on its priority. + */ +static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) +{ + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + struct tipc_link *ol = node_active_link(n, 0); + struct tipc_link *nl = n->links[bearer_id].link; + + if (!nl || tipc_link_is_up(nl)) + return; + + tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(nl)) + return; + + n->working_links++; + n->action_flags |= TIPC_NOTIFY_LINK_UP; + n->link_id = tipc_link_id(nl); + + /* Leave room for tunnel header when returning 'mtu' to users: */ + n->links[bearer_id].mtu = tipc_link_mtu(nl) - INT_H_SIZE; + + tipc_bearer_add_dest(n->net, bearer_id, n->addr); + tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id); + + pr_debug("Established link <%s> on network plane %c\n", + tipc_link_name(nl), tipc_link_plane(nl)); + + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + + /* First link? => give it both slots */ + if (!ol) { + *slot0 = bearer_id; + *slot1 = bearer_id; + tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->failover_sent = false; + n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_link_set_active(nl, true); + tipc_bcast_add_peer(n->net, nl, xmitq); + return; + } + + /* Second link => redistribute slots */ + if (tipc_link_prio(nl) > tipc_link_prio(ol)) { + pr_debug("Old link <%s> becomes standby\n", tipc_link_name(ol)); + *slot0 = bearer_id; + *slot1 = bearer_id; + tipc_link_set_active(nl, true); + tipc_link_set_active(ol, false); + } else if (tipc_link_prio(nl) == tipc_link_prio(ol)) { + tipc_link_set_active(nl, true); + *slot1 = bearer_id; + } else { + pr_debug("New link <%s> is standby\n", tipc_link_name(nl)); + } + + /* Prepare synchronization with first link */ + tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq); +} + +/** + * tipc_node_link_up - handle addition of link + * + * Link becomes active (alone or shared) or standby, depending on its priority. + */ +static void tipc_node_link_up(struct tipc_node *n, int bearer_id, + struct sk_buff_head *xmitq) +{ + struct tipc_media_addr *maddr; + + tipc_node_write_lock(n); + __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); + tipc_node_write_unlock(n); +} + +/** + * __tipc_node_link_down - handle loss of link + */ +static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, + struct sk_buff_head *xmitq, + struct tipc_media_addr **maddr) +{ + struct tipc_link_entry *le = &n->links[*bearer_id]; + int *slot0 = &n->active_links[0]; + int *slot1 = &n->active_links[1]; + int i, highest = 0, prio; + struct tipc_link *l, *_l, *tnl; + + l = n->links[*bearer_id].link; + if (!l || tipc_link_is_reset(l)) + return; + + n->working_links--; + n->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n->link_id = tipc_link_id(l); + + tipc_bearer_remove_dest(n->net, *bearer_id, n->addr); + + pr_debug("Lost link <%s> on network plane %c\n", + tipc_link_name(l), tipc_link_plane(l)); + + /* Select new active link if any available */ + *slot0 = INVALID_BEARER_ID; + *slot1 = INVALID_BEARER_ID; + for (i = 0; i < MAX_BEARERS; i++) { + _l = n->links[i].link; + if (!_l || !tipc_link_is_up(_l)) + continue; + if (_l == l) + continue; + prio = tipc_link_prio(_l); + if (prio < highest) + continue; + if (prio > highest) { + highest = prio; + *slot0 = i; + *slot1 = i; + continue; + } + *slot1 = i; + } + + if (!node_is_up(n)) { + if (tipc_link_peer_is_down(l)) + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + tipc_link_reset(l); + tipc_link_build_reset_msg(l, xmitq); + *maddr = &n->links[*bearer_id].maddr; + node_lost_contact(n, &le->inputq); + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); + return; + } + tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id); + + /* There is still a working link => initiate failover */ + *bearer_id = n->active_links[0]; + tnl = n->links[*bearer_id].link; + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1); + tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq); + tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT); + *maddr = &n->links[*bearer_id].maddr; +} + +static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) +{ + struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; + struct tipc_link *l = le->link; + int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; + + if (!l) + return; + + __skb_queue_head_init(&xmitq); + + tipc_node_write_lock(n); + if (!tipc_link_is_establishing(l)) { + __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } + } else { + /* Defuse pending tipc_node_link_up() */ + tipc_link_fsm_evt(l, LINK_RESET_EVT); + } + tipc_node_write_unlock(n); + if (delete) + tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + tipc_sk_rcv(n->net, &le->inputq); +} + +static bool node_is_up(struct tipc_node *n) +{ + return n->active_links[0] != INVALID_BEARER_ID; +} + +bool tipc_node_is_up(struct net *net, u32 addr) +{ + struct tipc_node *n; + bool retval = false; + + if (in_own_node(net, addr)) + return true; + + n = tipc_node_find(net, addr); + if (!n) + return false; + retval = node_is_up(n); + tipc_node_put(n); + return retval; +} + +static u32 tipc_node_suggest_addr(struct net *net, u32 addr) +{ + struct tipc_node *n; + + addr ^= tipc_net(net)->random; + while ((n = tipc_node_find(net, addr))) { + tipc_node_put(n); + addr++; + } + return addr; +} + +/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not + * Returns suggested address if any, otherwise 0 + */ +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_node *n; + + /* Suggest new address if some other peer is using this one */ + n = tipc_node_find(net, addr); + if (n) { + if (!memcmp(n->peer_id, id, NODE_ID_LEN)) + addr = 0; + tipc_node_put(n); + if (!addr) + return 0; + return tipc_node_suggest_addr(net, addr); + } + + /* Suggest previously used address if peer is known */ + n = tipc_node_find_by_id(net, id); + if (n) { + addr = n->addr; + tipc_node_put(n); + return addr; + } + + /* Even this node may be in conflict */ + if (tn->trial_addr == addr) + return tipc_node_suggest_addr(net, addr); + + return 0; +} + +void tipc_node_check_dest(struct net *net, u32 addr, + u8 *peer_id, struct tipc_bearer *b, + u16 capabilities, u32 signature, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr) +{ + struct tipc_node *n; + struct tipc_link *l; + struct tipc_link_entry *le; + bool addr_match = false; + bool sign_match = false; + bool link_up = false; + bool accept_addr = false; + bool reset = true; + char *if_name; + unsigned long intv; + u16 session; + + *dupl_addr = false; + *respond = false; + + n = tipc_node_create(net, addr, peer_id, capabilities); + if (!n) + return; + + tipc_node_write_lock(n); + + le = &n->links[b->identity]; + + /* Prepare to validate requesting node's signature and media address */ + l = le->link; + link_up = l && tipc_link_is_up(l); + addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); + sign_match = (signature == n->signature); + + /* These three flags give us eight permutations: */ + + if (sign_match && addr_match && link_up) { + /* All is fine. Do nothing. */ + reset = false; + } else if (sign_match && addr_match && !link_up) { + /* Respond. The link will come up in due time */ + *respond = true; + } else if (sign_match && !addr_match && link_up) { + /* Peer has changed i/f address without rebooting. + * If so, the link will reset soon, and the next + * discovery will be accepted. So we can ignore it. + * It may also be an cloned or malicious peer having + * chosen the same node address and signature as an + * existing one. + * Ignore requests until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (sign_match && !addr_match && !link_up) { + /* Peer link has changed i/f address without rebooting. + * It may also be a cloned or malicious peer; we can't + * distinguish between the two. + * The signature is correct, so we must accept. + */ + accept_addr = true; + *respond = true; + } else if (!sign_match && addr_match && link_up) { + /* Peer node rebooted. Two possibilities: + * - Delayed re-discovery; this link endpoint has already + * reset and re-established contact with the peer, before + * receiving a discovery message from that node. + * (The peer happened to receive one from this node first). + * - The peer came back so fast that our side has not + * discovered it yet. Probing from this side will soon + * reset the link, since there can be no working link + * endpoint at the peer end, and the link will re-establish. + * Accept the signature, since it comes from a known peer. + */ + n->signature = signature; + } else if (!sign_match && addr_match && !link_up) { + /* The peer node has rebooted. + * Accept signature, since it is a known peer. + */ + n->signature = signature; + *respond = true; + } else if (!sign_match && !addr_match && link_up) { + /* Peer rebooted with new address, or a new/duplicate peer. + * Ignore until the link goes down, if ever. + */ + *dupl_addr = true; + } else if (!sign_match && !addr_match && !link_up) { + /* Peer rebooted with new address, or it is a new peer. + * Accept signature and address. + */ + n->signature = signature; + accept_addr = true; + *respond = true; + } + + if (!accept_addr) + goto exit; + + /* Now create new link if not already existing */ + if (!l) { + if (n->link_cnt == 2) + goto exit; + + if_name = strchr(b->name, ':') + 1; + get_random_bytes(&session, sizeof(u16)); + if (!tipc_link_create(net, if_name, b->identity, b->tolerance, + b->net_plane, b->mtu, b->priority, + b->window, session, + tipc_own_addr(net), addr, peer_id, + n->capabilities, + tipc_bc_sndlink(n->net), n->bc_entry.link, + &le->inputq, + &n->bc_entry.namedq, &l)) { + *respond = false; + goto exit; + } + tipc_link_reset(l); + tipc_link_fsm_evt(l, LINK_RESET_EVT); + if (n->state == NODE_FAILINGOVER) + tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + le->link = l; + n->link_cnt++; + tipc_node_calculate_timer(n, l); + if (n->link_cnt == 1) { + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) + tipc_node_get(n); + } + } + memcpy(&le->maddr, maddr, sizeof(*maddr)); +exit: + tipc_node_write_unlock(n); + if (reset && l && !tipc_link_is_reset(l)) + tipc_node_link_down(n, b->identity, false); + tipc_node_put(n); +} + +void tipc_node_delete_links(struct net *net, int bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *n; + + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_link_down(n, bearer_id, true); + } + rcu_read_unlock(); +} + +static void tipc_node_reset_links(struct tipc_node *n) +{ + int i; + + pr_warn("Resetting all links to %x\n", n->addr); + + for (i = 0; i < MAX_BEARERS; i++) { + tipc_node_link_down(n, i, false); + } +} + +/* tipc_node_fsm_evt - node finite state machine + * Determines when contact is allowed with peer node + */ +static void tipc_node_fsm_evt(struct tipc_node *n, int evt) +{ + int state = n->state; + + switch (state) { + case SELF_DOWN_PEER_DOWN: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_COMING; + break; + case PEER_ESTABL_CONTACT_EVT: + state = SELF_COMING_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_UP_PEER_UP: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_SYNCH_BEGIN_EVT: + state = NODE_SYNCHING; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case NODE_SYNCH_END_EVT: + case NODE_FAILOVER_END_EVT: + break; + default: + goto illegal_evt; + } + break; + case SELF_DOWN_PEER_LEAVING: + switch (evt) { + case PEER_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case SELF_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_UP_PEER_COMING: + switch (evt) { + case PEER_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + case NODE_SYNCH_END_EVT: + case NODE_FAILOVER_BEGIN_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_COMING_PEER_UP: + switch (evt) { + case SELF_ESTABL_CONTACT_EVT: + state = SELF_UP_PEER_UP; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_LOST_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case SELF_LEAVING_PEER_DOWN: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_DOWN; + break; + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + case PEER_LOST_CONTACT_EVT: + break; + case NODE_SYNCH_END_EVT: + case NODE_SYNCH_BEGIN_EVT: + case NODE_FAILOVER_BEGIN_EVT: + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + case NODE_FAILINGOVER: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_FAILOVER_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_SYNCH_BEGIN_EVT: + case NODE_SYNCH_END_EVT: + default: + goto illegal_evt; + } + break; + case NODE_SYNCHING: + switch (evt) { + case SELF_LOST_CONTACT_EVT: + state = SELF_DOWN_PEER_LEAVING; + break; + case PEER_LOST_CONTACT_EVT: + state = SELF_LEAVING_PEER_DOWN; + break; + case NODE_SYNCH_END_EVT: + state = SELF_UP_PEER_UP; + break; + case NODE_FAILOVER_BEGIN_EVT: + state = NODE_FAILINGOVER; + break; + case NODE_SYNCH_BEGIN_EVT: + case SELF_ESTABL_CONTACT_EVT: + case PEER_ESTABL_CONTACT_EVT: + break; + case NODE_FAILOVER_END_EVT: + default: + goto illegal_evt; + } + break; + default: + pr_err("Unknown node fsm state %x\n", state); + break; + } + n->state = state; + return; + +illegal_evt: + pr_err("Illegal node fsm evt %x in state %x\n", evt, state); +} + +static void node_lost_contact(struct tipc_node *n, + struct sk_buff_head *inputq) +{ + struct tipc_sock_conn *conn, *safe; + struct tipc_link *l; + struct list_head *conns = &n->conn_sks; + struct sk_buff *skb; + uint i; + + pr_debug("Lost contact with %x\n", n->addr); + n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); + + /* Clean up broadcast state */ + tipc_bcast_remove_peer(n->net, n->bc_entry.link); + + /* Abort any ongoing link failover */ + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l) + tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT); + } + + /* Notify publications from this node */ + n->action_flags |= TIPC_NOTIFY_NODE_DOWN; + + /* Notify sockets connected to node */ + list_for_each_entry_safe(conn, safe, conns, list) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, + SHORT_H_SIZE, 0, tipc_own_addr(n->net), + conn->peer_node, conn->port, + conn->peer_port, TIPC_ERR_NO_NODE); + if (likely(skb)) + skb_queue_tail(inputq, skb); + list_del(&conn->list); + kfree(conn); + } +} + +/** + * tipc_node_get_linkname - get the name of a link + * + * @bearer_id: id of the bearer + * @node: peer node address + * @linkname: link name output buffer + * + * Returns 0 on success + */ +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, + char *linkname, size_t len) +{ + struct tipc_link *link; + int err = -EINVAL; + struct tipc_node *node = tipc_node_find(net, addr); + + if (!node) + return err; + + if (bearer_id >= MAX_BEARERS) + goto exit; + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (link) { + strncpy(linkname, tipc_link_name(link), len); + err = 0; + } + tipc_node_read_unlock(node); +exit: + tipc_node_put(node); + return err; +} + +/* Caller should hold node lock for the passed node */ +static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_NODE_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_NODE); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) + goto attr_msg_full; + if (node_is_up(node)) + if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +/** + * tipc_node_xmit() is the general link level function for message sending + * @net: the applicable net namespace + * @list: chain of buffers containing message + * @dnode: address of destination node + * @selector: a number used for deterministic link selection + * Consumes the buffer chain. + * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF + */ +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, + u32 dnode, int selector) +{ + struct tipc_link_entry *le = NULL; + struct tipc_node *n; + struct sk_buff_head xmitq; + int bearer_id; + int rc; + + if (in_own_node(net, dnode)) { + spin_lock_init(&list->lock); + tipc_sk_rcv(net, list); + return 0; + } + + n = tipc_node_find(net, dnode); + if (unlikely(!n)) { + __skb_queue_purge(list); + return -EHOSTUNREACH; + } + + tipc_node_read_lock(n); + bearer_id = n->active_links[selector & 1]; + if (unlikely(bearer_id == INVALID_BEARER_ID)) { + tipc_node_read_unlock(n); + tipc_node_put(n); + __skb_queue_purge(list); + return -EHOSTUNREACH; + } + + __skb_queue_head_init(&xmitq); + le = &n->links[bearer_id]; + spin_lock_bh(&le->lock); + rc = tipc_link_xmit(le->link, list, &xmitq); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(n); + + if (unlikely(rc == -ENOBUFS)) + tipc_node_link_down(n, bearer_id, false); + else + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + tipc_node_put(n); + + return rc; +} + +/* tipc_node_xmit_skb(): send single buffer to destination + * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE + * messages, which will not be rejected + * The only exception is datagram messages rerouted after secondary + * lookup, which are rare and safe to dispose of anyway. + */ +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, + u32 selector) +{ + struct sk_buff_head head; + + __skb_queue_head_init(&head); + __skb_queue_tail(&head, skb); + tipc_node_xmit(net, &head, dnode, selector); + return 0; +} + +/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations + * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected + */ +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + u32 selector, dnode; + + while ((skb = __skb_dequeue(xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, selector); + } + return 0; +} + +void tipc_node_broadcast(struct net *net, struct sk_buff *skb) +{ + struct sk_buff *txskb; + struct tipc_node *n; + u32 dst; + + rcu_read_lock(); + list_for_each_entry_rcu(n, tipc_nodes(net), list) { + dst = n->addr; + if (in_own_node(net, dst)) + continue; + if (!node_is_up(n)) + continue; + txskb = pskb_copy(skb, GFP_ATOMIC); + if (!txskb) + break; + msg_set_destnode(buf_msg(txskb), dst); + tipc_node_xmit_skb(net, txskb, dst, 0); + } + rcu_read_unlock(); + + kfree_skb(skb); +} + +static void tipc_node_mcast_rcv(struct tipc_node *n) +{ + struct tipc_bclink_entry *be = &n->bc_entry; + + /* 'arrvq' is under inputq2's lock protection */ + spin_lock_bh(&be->inputq2.lock); + spin_lock_bh(&be->inputq1.lock); + skb_queue_splice_tail_init(&be->inputq1, &be->arrvq); + spin_unlock_bh(&be->inputq1.lock); + spin_unlock_bh(&be->inputq2.lock); + tipc_sk_mcast_rcv(n->net, &be->arrvq, &be->inputq2); +} + +static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, + int bearer_id, struct sk_buff_head *xmitq) +{ + struct tipc_link *ucl; + int rc; + + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + + if (rc & TIPC_LINK_DOWN_EVT) { + tipc_node_reset_links(n); + return; + } + + if (!(rc & TIPC_LINK_SND_STATE)) + return; + + /* If probe message, a STATE response will be sent anyway */ + if (msg_probe(hdr)) + return; + + /* Produce a STATE message carrying broadcast NACK */ + tipc_node_read_lock(n); + ucl = n->links[bearer_id].link; + if (ucl) + tipc_link_build_state_msg(ucl, xmitq); + tipc_node_read_unlock(n); +} + +/** + * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer_id: id of bearer message arrived on + * + * Invoked with no locks held. + */ +static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id) +{ + int rc; + struct sk_buff_head xmitq; + struct tipc_bclink_entry *be; + struct tipc_link_entry *le; + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + u32 dnode = msg_destnode(hdr); + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + /* If NACK for other node, let rcv link for that node peek into it */ + if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net))) + n = tipc_node_find(net, dnode); + else + n = tipc_node_find(net, msg_prevnode(hdr)); + if (!n) { + kfree_skb(skb); + return; + } + be = &n->bc_entry; + le = &n->links[bearer_id]; + + rc = tipc_bcast_rcv(net, be->link, skb); + + /* Broadcast ACKs are sent on a unicast link */ + if (rc & TIPC_LINK_SND_STATE) { + tipc_node_read_lock(n); + tipc_link_build_state_msg(le->link, &xmitq); + tipc_node_read_unlock(n); + } + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + if (!skb_queue_empty(&be->inputq1)) + tipc_node_mcast_rcv(n); + + /* If reassembly or retransmission failure => reset all links to peer */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); + + tipc_node_put(n); +} + +/** + * tipc_node_check_state - check and if necessary update node state + * @skb: TIPC packet + * @bearer_id: identity of bearer delivering the packet + * Returns true if state and msg are ok, otherwise false + */ +static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, + int bearer_id, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + int usr = msg_user(hdr); + int mtyp = msg_type(hdr); + u16 oseqno = msg_seqno(hdr); + u16 iseqno = msg_seqno(msg_get_wrapped(hdr)); + u16 exp_pkts = msg_msgcnt(hdr); + u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; + int state = n->state; + struct tipc_link *l, *tnl, *pl = NULL; + struct tipc_media_addr *maddr; + int pb_id; + + l = n->links[bearer_id].link; + if (!l) + return false; + rcv_nxt = tipc_link_rcv_nxt(l); + + + if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) + return true; + + /* Find parallel link, if any */ + for (pb_id = 0; pb_id < MAX_BEARERS; pb_id++) { + if ((pb_id != bearer_id) && n->links[pb_id].link) { + pl = n->links[pb_id].link; + break; + } + } + + if (!tipc_link_validate_msg(l, hdr)) + return false; + + /* Check and update node accesibility if applicable */ + if (state == SELF_UP_PEER_COMING) { + if (!tipc_link_is_up(l)) + return true; + if (!msg_peer_link_is_up(hdr)) + return true; + tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT); + } + + if (state == SELF_DOWN_PEER_LEAVING) { + if (msg_peer_node_is_up(hdr)) + return false; + tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); + return true; + } + + if (state == SELF_LEAVING_PEER_DOWN) + return false; + + /* Ignore duplicate packets */ + if ((usr != LINK_PROTOCOL) && less(oseqno, rcv_nxt)) + return true; + + /* Initiate or update failover mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) { + syncpt = oseqno + exp_pkts - 1; + if (pl && tipc_link_is_up(pl)) { + __tipc_node_link_down(n, &pb_id, xmitq, &maddr); + tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), + tipc_link_inputq(l)); + } + /* If parallel link was already down, and this happened before + * the tunnel link came up, FAILOVER was never sent. Ensure that + * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + */ + if (n->state != NODE_FAILINGOVER && !n->failover_sent) { + tipc_link_create_dummy_tnl_msg(l, xmitq); + n->failover_sent = true; + } + /* If pkts arrive out of order, use lowest calculated syncpt */ + if (less(syncpt, n->sync_point)) + n->sync_point = syncpt; + } + + /* Open parallel link when tunnel link reaches synch point */ + if ((n->state == NODE_FAILINGOVER) && tipc_link_is_up(l)) { + if (!more(rcv_nxt, n->sync_point)) + return true; + tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT); + if (pl) + tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT); + return true; + } + + /* No synching needed if only one link */ + if (!pl || !tipc_link_is_up(pl)) + return true; + + /* Initiate synch mode if applicable */ + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { + syncpt = iseqno + exp_pkts - 1; + if (!tipc_link_is_up(l)) + __tipc_node_link_up(n, bearer_id, xmitq); + if (n->state == SELF_UP_PEER_UP) { + n->sync_point = syncpt; + tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT); + } + } + + /* Open tunnel link when parallel link reaches synch point */ + if (n->state == NODE_SYNCHING) { + if (tipc_link_is_synching(l)) { + tnl = l; + } else { + tnl = pl; + pl = l; + } + inputq_len = skb_queue_len(tipc_link_inputq(pl)); + dlv_nxt = tipc_link_rcv_nxt(pl) - inputq_len; + if (more(dlv_nxt, n->sync_point)) { + tipc_link_fsm_evt(tnl, LINK_SYNCH_END_EVT); + tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT); + return true; + } + if (l == pl) + return true; + if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) + return true; + if (usr == LINK_PROTOCOL) + return true; + return false; + } + return true; +} + +/** + * tipc_rcv - process TIPC packets/messages arriving from off-node + * @net: the applicable net namespace + * @skb: TIPC packet + * @bearer: pointer to bearer message arrived on + * + * Invoked with no locks held. Bearer pointer must point to a valid bearer + * structure (i.e. cannot be NULL), but bearer can be inactive. + */ +void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) +{ + struct sk_buff_head xmitq; + struct tipc_node *n; + struct tipc_msg *hdr; + int bearer_id = b->identity; + struct tipc_link_entry *le; + u32 self = tipc_own_addr(net); + int usr, rc = 0; + u16 bc_ack; + + __skb_queue_head_init(&xmitq); + + /* Ensure message is well-formed before touching the header */ + if (unlikely(!tipc_msg_validate(&skb))) + goto discard; + hdr = buf_msg(skb); + usr = msg_user(hdr); + bc_ack = msg_bcast_ack(hdr); + + /* Handle arrival of discovery or broadcast packet */ + if (unlikely(msg_non_seq(hdr))) { + if (unlikely(usr == LINK_CONFIG)) + return tipc_disc_rcv(net, skb, b); + else + return tipc_node_bc_rcv(net, skb, bearer_id); + } + + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + + /* Locate neighboring node that sent packet */ + n = tipc_node_find(net, msg_prevnode(hdr)); + if (unlikely(!n)) + goto discard; + le = &n->links[bearer_id]; + + /* Ensure broadcast reception is in synch with peer's send state */ + if (unlikely(usr == LINK_PROTOCOL)) + tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); + else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) + tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); + + /* Receive packet directly if conditions permit */ + tipc_node_read_lock(n); + if (likely((n->state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL))) { + spin_lock_bh(&le->lock); + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + spin_unlock_bh(&le->lock); + } + tipc_node_read_unlock(n); + + /* Check/update node state before receiving */ + if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto discard; + tipc_node_write_lock(n); + if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { + if (le->link) { + rc = tipc_link_rcv(le->link, skb, &xmitq); + skb = NULL; + } + } + tipc_node_write_unlock(n); + } + + if (unlikely(rc & TIPC_LINK_UP_EVT)) + tipc_node_link_up(n, bearer_id, &xmitq); + + if (unlikely(rc & TIPC_LINK_DOWN_EVT)) + tipc_node_link_down(n, bearer_id, false); + + if (unlikely(!skb_queue_empty(&n->bc_entry.namedq))) + tipc_named_rcv(net, &n->bc_entry.namedq); + + if (unlikely(!skb_queue_empty(&n->bc_entry.inputq1))) + tipc_node_mcast_rcv(n); + + if (!skb_queue_empty(&le->inputq)) + tipc_sk_rcv(net, &le->inputq); + + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr); + + tipc_node_put(n); +discard: + kfree_skb(skb); +} + +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, + int prop) +{ + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; + struct sk_buff_head xmitq; + struct tipc_link_entry *e; + struct tipc_node *n; + + __skb_queue_head_init(&xmitq); + + rcu_read_lock(); + + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_write_lock(n); + e = &n->links[bearer_id]; + if (e->link) { + if (prop == TIPC_NLA_PROP_TOL) + tipc_link_set_tolerance(e->link, b->tolerance, + &xmitq); + else if (prop == TIPC_NLA_PROP_MTU) + tipc_link_set_mtu(e->link, b->mtu); + } + tipc_node_write_unlock(n); + tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr); + } + + rcu_read_unlock(); +} + +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct tipc_node *peer; + u32 addr; + int err; + + /* We identify the peer by its net */ + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], tipc_nl_net_policy, + info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_NET_ADDR]) + return -EINVAL; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + + if (in_own_node(net, addr)) + return -ENOTSUPP; + + spin_lock_bh(&tn->node_list_lock); + peer = tipc_node_find(net, addr); + if (!peer) { + spin_unlock_bh(&tn->node_list_lock); + return -ENXIO; + } + + tipc_node_write_lock(peer); + if (peer->state != SELF_DOWN_PEER_DOWN && + peer->state != SELF_DOWN_PEER_LEAVING) { + tipc_node_write_unlock(peer); + err = -EBUSY; + goto err_out; + } + + tipc_node_clear_links(peer); + tipc_node_write_unlock(peer); + tipc_node_delete(peer); + + err = 0; +err_out: + tipc_node_put(peer); + spin_unlock_bh(&tn->node_list_lock); + + return err; +} + +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + int done = cb->args[0]; + int last_addr = cb->args[1]; + struct tipc_node *node; + struct tipc_nl_msg msg; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (last_addr) { + node = tipc_node_find(net, last_addr); + if (!node) { + rcu_read_unlock(); + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the NLMSG_DONE message having + * the NLM_F_DUMP_INTR flag set if the node state + * changed while we released the lock. + */ + cb->prev_seq = 1; + return -EPIPE; + } + tipc_node_put(node); + } + + list_for_each_entry_rcu(node, &tn->node_list, list) { + if (last_addr) { + if (node->addr == last_addr) + last_addr = 0; + else + continue; + } + + tipc_node_read_lock(node); + err = __tipc_nl_add_node(&msg, node); + if (err) { + last_addr = node->addr; + tipc_node_read_unlock(node); + goto out; + } + + tipc_node_read_unlock(node); + } + done = 1; +out: + cb->args[0] = done; + cb->args[1] = last_addr; + rcu_read_unlock(); + + return skb->len; +} + +/* tipc_node_find_by_name - locate owner node of link by link's name + * @net: the applicable net namespace + * @name: pointer to link name string + * @bearer_id: pointer to index in 'node->links' array where the link was found. + * + * Returns pointer to node owning the link, or 0 if no matching link is found. + */ +static struct tipc_node *tipc_node_find_by_name(struct net *net, + const char *link_name, + unsigned int *bearer_id) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l; + struct tipc_node *n; + struct tipc_node *found_node = NULL; + int i; + + *bearer_id = 0; + rcu_read_lock(); + list_for_each_entry_rcu(n, &tn->node_list, list) { + tipc_node_read_lock(n); + for (i = 0; i < MAX_BEARERS; i++) { + l = n->links[i].link; + if (l && !strcmp(tipc_link_name(l), link_name)) { + *bearer_id = i; + found_node = n; + break; + } + } + tipc_node_read_unlock(n); + if (found_node) + break; + } + rcu_read_unlock(); + + return found_node; +} + +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info) +{ + int err; + int res = 0; + int bearer_id; + char *name; + struct tipc_link *link; + struct tipc_node *node; + struct sk_buff_head xmitq; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + + __skb_queue_head_init(&xmitq); + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(name, tipc_bclink_name) == 0) + return tipc_nl_bc_link_set(net, attrs); + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) + return -EINVAL; + + tipc_node_read_lock(node); + + link = node->links[bearer_id].link; + if (!link) { + res = -EINVAL; + goto out; + } + + if (attrs[TIPC_NLA_LINK_PROP]) { + struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; + + err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], + props); + if (err) { + res = err; + goto out; + } + + if (props[TIPC_NLA_PROP_TOL]) { + u32 tol; + + tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); + tipc_link_set_tolerance(link, tol, &xmitq); + } + if (props[TIPC_NLA_PROP_PRIO]) { + u32 prio; + + prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); + tipc_link_set_prio(link, prio, &xmitq); + } + if (props[TIPC_NLA_PROP_WIN]) { + u32 win; + + win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); + tipc_link_set_queue_limits(link, win); + } + } + +out: + tipc_node_read_unlock(node); + tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr); + return res; +} + +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct tipc_nl_msg msg; + char *name; + int err; + + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto err_free; + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); + if (!node) { + err = -EINVAL; + goto err_free; + } + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); + err = -EINVAL; + goto err_free; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); + if (err) + goto err_free; + } + + return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; +} + +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *link_name; + unsigned int bearer_id; + struct tipc_link *link; + struct tipc_node *node; + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; + struct net *net = sock_net(skb->sk); + struct tipc_link_entry *le; + + if (!info->attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]); + + if (strcmp(link_name, tipc_bclink_name) == 0) { + err = tipc_bclink_reset_stats(net); + if (err) + return err; + return 0; + } + + node = tipc_node_find_by_name(net, link_name, &bearer_id); + if (!node) + return -EINVAL; + + le = &node->links[bearer_id]; + tipc_node_read_lock(node); + spin_lock_bh(&le->lock); + link = node->links[bearer_id].link; + if (!link) { + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return -EINVAL; + } + tipc_link_reset_stats(link); + spin_unlock_bh(&le->lock); + tipc_node_read_unlock(node); + return 0; +} + +/* Caller should hold node lock */ +static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg, + struct tipc_node *node, u32 *prev_link) +{ + u32 i; + int err; + + for (i = *prev_link; i < MAX_BEARERS; i++) { + *prev_link = i; + + if (!node->links[i].link) + continue; + + err = __tipc_nl_add_link(net, msg, + node->links[i].link, NLM_F_MULTI); + if (err) + return err; + } + *prev_link = 0; + + return 0; +} + +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_node *node; + struct tipc_nl_msg msg; + u32 prev_node = cb->args[0]; + u32 prev_link = cb->args[1]; + int done = cb->args[2]; + int err; + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + if (prev_node) { + node = tipc_node_find(net, prev_node); + if (!node) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + goto out; + } + tipc_node_put(node); + + list_for_each_entry_continue_rcu(node, &tn->node_list, + list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } else { + err = tipc_nl_add_bc_link(net, &msg); + if (err) + goto out; + + list_for_each_entry_rcu(node, &tn->node_list, list) { + tipc_node_read_lock(node); + err = __tipc_nl_add_node_links(net, &msg, node, + &prev_link); + tipc_node_read_unlock(node); + if (err) + goto out; + + prev_node = node->addr; + } + } + done = 1; +out: + rcu_read_unlock(); + + cb->args[0] = prev_node; + cb->args[1] = prev_link; + cb->args[2] = done; + + return skb->len; +} + +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1]; + struct net *net = sock_net(skb->sk); + int err; + + if (!info->attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, info->extack); + if (err) + return err; + + if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]); + err = tipc_nl_monitor_set_threshold(net, val); + if (err) + return err; + } + + return 0; +} + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + val = tipc_nl_monitor_get_threshold(net); + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg.skb) + return -ENOMEM; + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_bearer = cb->args[0]; + struct tipc_nl_msg msg; + int bearer_id; + int err; + + if (prev_bearer == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) { + err = __tipc_nl_add_monitor(net, &msg, bearer_id); + if (err) + break; + } + rtnl_unlock(); + cb->args[0] = bearer_id; + + return skb->len; +} + +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_node = cb->args[1]; + u32 bearer_id = cb->args[2]; + int done = cb->args[0]; + struct tipc_nl_msg msg; + int err; + + if (!prev_node) { + struct nlattr **attrs; + struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy, NULL); + if (err) + return err; + + if (!mon[TIPC_NLA_MON_REF]) + return -EINVAL; + + bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]); + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + } + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node); + if (!err) + done = 1; + + rtnl_unlock(); + cb->args[0] = done; + cb->args[1] = prev_node; + cb->args[2] = bearer_id; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h new file mode 100644 index 000000000..48b3298a2 --- /dev/null +++ b/net/tipc/node.h @@ -0,0 +1,101 @@ +/* + * net/tipc/node.h: Include file for TIPC node management routines + * + * Copyright (c) 2000-2006, 2014-2016, Ericsson AB + * Copyright (c) 2005, 2010-2014, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_NODE_H +#define _TIPC_NODE_H + +#include "addr.h" +#include "net.h" +#include "bearer.h" +#include "msg.h" + +/* Optional capabilities supported by this code version + */ +enum { + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BCAST_STATE_NACK = (1 << 2), + TIPC_BLOCK_FLOWCTL = (1 << 3), + TIPC_BCAST_RCAST = (1 << 4), + TIPC_NODE_ID128 = (1 << 5), + TIPC_LINK_PROTO_SEQNO = (1 << 6) +}; + +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ + TIPC_BCAST_STATE_NACK | \ + TIPC_BCAST_RCAST | \ + TIPC_BLOCK_FLOWCTL | \ + TIPC_NODE_ID128 | \ + TIPC_LINK_PROTO_SEQNO) +#define INVALID_BEARER_ID -1 + +void tipc_node_stop(struct net *net); +bool tipc_node_get_id(struct net *net, u32 addr, u8 *id); +u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr); +void tipc_node_check_dest(struct net *net, u32 onode, u8 *peer_id128, + struct tipc_bearer *bearer, + u16 capabilities, u32 signature, + struct tipc_media_addr *maddr, + bool *respond, bool *dupl_addr); +void tipc_node_delete_links(struct net *net, int bearer_id); +void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, int prop); +int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, + char *linkname, size_t len); +int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, + int selector); +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list); +int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, + u32 selector); +void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr); +void tipc_node_broadcast(struct net *net, struct sk_buff *skb); +int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); +void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); +int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +bool tipc_node_is_up(struct net *net, u32 addr); +u16 tipc_node_get_capabilities(struct net *net, u32 addr); +int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info); + +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb); +#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c new file mode 100644 index 000000000..6c18b4565 --- /dev/null +++ b/net/tipc/socket.c @@ -0,0 +1,3569 @@ +/* + * net/tipc/socket.c: TIPC socket API + * + * Copyright (c) 2001-2007, 2012-2017, Ericsson AB + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/rhashtable.h> +#include <linux/sched/signal.h> + +#include "core.h" +#include "name_table.h" +#include "node.h" +#include "link.h" +#include "name_distr.h" +#include "socket.h" +#include "bcast.h" +#include "netlink.h" +#include "group.h" + +#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ +#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define TIPC_FWD_MSG 1 +#define TIPC_MAX_PORT 0xffffffff +#define TIPC_MIN_PORT 1 +#define TIPC_ACK_RATE 4 /* ACK at 1/4 of of rcv window size */ + +enum { + TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_OPEN = TCP_CLOSE, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, + TIPC_CONNECTING = TCP_SYN_SENT, +}; + +struct sockaddr_pair { + struct sockaddr_tipc sock; + struct sockaddr_tipc member; +}; + +/** + * struct tipc_sock - TIPC socket structure + * @sk: socket - interacts with 'port' and with user via the socket API + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @published: non-zero if port has one or more associated names + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @portid: unique port identity in TIPC socket hash table + * @phdr: preformatted message header used when sending messages + * #cong_links: list of congested links + * @publications: list of publications for port + * @blocking_link: address of the congested link we are currently sleeping on + * @pub_count: total # of publications port has made during its lifetime + * @probing_state: + * @conn_timeout: the time we can wait for an unresponded setup request + * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue + * @cong_link_cnt: number of congested links + * @snt_unacked: # messages sent by socket, and not yet acked by peer + * @rcv_unacked: # messages read by user, but not yet acked back to peer + * @peer: 'connected' peer for dgram/rdm + * @node: hash table node + * @mc_method: cookie for use between socket and broadcast layer + * @rcu: rcu struct for tipc_sock + */ +struct tipc_sock { + struct sock sk; + u32 conn_type; + u32 conn_instance; + int published; + u32 max_pkt; + u32 portid; + struct tipc_msg phdr; + struct list_head cong_links; + struct list_head publications; + u32 pub_count; + uint conn_timeout; + atomic_t dupl_rcvcnt; + bool probe_unacked; + u16 cong_link_cnt; + u16 snt_unacked; + u16 snd_win; + u16 peer_caps; + u16 rcv_unacked; + u16 rcv_win; + struct sockaddr_tipc peer; + struct rhash_head node; + struct tipc_mc_method mc_method; + struct rcu_head rcu; + struct tipc_group *group; + bool group_is_open; +}; + +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static void tipc_data_ready(struct sock *sk); +static void tipc_write_space(struct sock *sk); +static void tipc_sock_destruct(struct sock *sk); +static int tipc_release(struct socket *sock); +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern); +static void tipc_sk_timeout(struct timer_list *t); +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq); +static int tipc_sk_leave(struct tipc_sock *tsk); +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); +static int tipc_sk_insert(struct tipc_sock *tsk); +static void tipc_sk_remove(struct tipc_sock *tsk); +static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); + +static const struct proto_ops packet_ops; +static const struct proto_ops stream_ops; +static const struct proto_ops msg_ops; +static struct proto tipc_proto; +static const struct rhashtable_params tsk_rht_params; + +static u32 tsk_own_node(struct tipc_sock *tsk) +{ + return msg_prevnode(&tsk->phdr); +} + +static u32 tsk_peer_node(struct tipc_sock *tsk) +{ + return msg_destnode(&tsk->phdr); +} + +static u32 tsk_peer_port(struct tipc_sock *tsk) +{ + return msg_destport(&tsk->phdr); +} + +static bool tsk_unreliable(struct tipc_sock *tsk) +{ + return msg_src_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) +{ + msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); +} + +static bool tsk_unreturnable(struct tipc_sock *tsk) +{ + return msg_dest_droppable(&tsk->phdr) != 0; +} + +static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) +{ + msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); +} + +static int tsk_importance(struct tipc_sock *tsk) +{ + return msg_importance(&tsk->phdr); +} + +static int tsk_set_importance(struct tipc_sock *tsk, int imp) +{ + if (imp > TIPC_CRITICAL_IMPORTANCE) + return -EINVAL; + msg_set_importance(&tsk->phdr, (u32)imp); + return 0; +} + +static struct tipc_sock *tipc_sk(const struct sock *sk) +{ + return container_of(sk, struct tipc_sock, sk); +} + +static bool tsk_conn_cong(struct tipc_sock *tsk) +{ + return tsk->snt_unacked > tsk->snd_win; +} + +static u16 tsk_blocks(int len) +{ + return ((len / FLOWCTL_BLK_SZ) + 1); +} + +/* tsk_blocks(): translate a buffer size in bytes to number of + * advertisable blocks, taking into account the ratio truesize(len)/len + * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ + */ +static u16 tsk_adv_blocks(int len) +{ + return len / FLOWCTL_BLK_SZ / 4; +} + +/* tsk_inc(): increment counter for sent or received data + * - If block based flow control is not supported by peer we + * fall back to message based ditto, incrementing the counter + */ +static u16 tsk_inc(struct tipc_sock *tsk, int msglen) +{ + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return ((msglen / FLOWCTL_BLK_SZ) + 1); + return 1; +} + +/** + * tsk_advance_rx_queue - discard first buffer in socket receive queue + * + * Caller must hold socket lock + */ +static void tsk_advance_rx_queue(struct sock *sk) +{ + kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); +} + +/* tipc_sk_respond() : send response message back to sender + */ +static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) +{ + u32 selector; + u32 dnode; + u32 onode = tipc_own_addr(sock_net(sk)); + + if (!tipc_msg_reverse(onode, &skb, err)) + return; + + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); +} + +/** + * tsk_rej_rx_queue - reject all buffers in socket receive queue + * + * Caller must hold socket lock + */ +static void tsk_rej_rx_queue(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_receive_queue))) + tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); +} + +static bool tipc_sk_connected(struct sock *sk) +{ + return sk->sk_state == TIPC_ESTABLISHED; +} + +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + +/* tsk_peer_msg - verify if message was sent by connected port's peer + * + * Handles cases where the node's network address has changed from + * the default of <0.0.0> to its configured setting. + */ +static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) +{ + struct sock *sk = &tsk->sk; + u32 self = tipc_own_addr(sock_net(sk)); + u32 peer_port = tsk_peer_port(tsk); + u32 orig_node, peer_node; + + if (unlikely(!tipc_sk_connected(sk))) + return false; + + if (unlikely(msg_origport(msg) != peer_port)) + return false; + + orig_node = msg_orignode(msg); + peer_node = tsk_peer_node(tsk); + + if (likely(orig_node == peer_node)) + return true; + + if (!orig_node && peer_node == self) + return true; + + if (!peer_node && orig_node == self) + return true; + + return false; +} + +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldsk_state = sk->sk_state; + int res = -EINVAL; + + switch (state) { + case TIPC_OPEN: + res = 0; + break; + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_OPEN) + res = 0; + break; + case TIPC_DISCONNECTING: + if (oldsk_state == TIPC_CONNECTING || + oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; + } + + if (!res) + sk->sk_state = state; + + return res; +} + +static int tipc_sk_sock_err(struct socket *sock, long *timeout) +{ + struct sock *sk = sock->sk; + int err = sock_error(sk); + int typ = sock->type; + + if (err) + return err; + if (typ == SOCK_STREAM || typ == SOCK_SEQPACKET) { + if (sk->sk_state == TIPC_DISCONNECTING) + return -EPIPE; + else if (!tipc_sk_connected(sk)) + return -ENOTCONN; + } + if (!*timeout) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeout); + + return 0; +} + +#define tipc_wait_for_cond(sock_, timeo_, condition_) \ +({ \ + DEFINE_WAIT_FUNC(wait_, woken_wake_function); \ + struct sock *sk_; \ + int rc_; \ + \ + while ((rc_ = !(condition_))) { \ + /* coupled with smp_wmb() in tipc_sk_proto_rcv() */ \ + smp_rmb(); \ + sk_ = (sock_)->sk; \ + rc_ = tipc_sk_sock_err((sock_), timeo_); \ + if (rc_) \ + break; \ + prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \ + release_sock(sk_); \ + *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \ + sched_annotate_sleep(); \ + lock_sock(sk_); \ + remove_wait_queue(sk_sleep(sk_), &wait_); \ + } \ + rc_; \ +}) + +/** + * tipc_sk_create - create a TIPC socket + * @net: network namespace (must be default network) + * @sock: pre-allocated socket structure + * @protocol: protocol indicator (must be 0) + * @kern: caused by kernel or by userspace? + * + * This routine creates additional data structures used by the TIPC socket, + * initializes them, and links them together. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_sk_create(struct net *net, struct socket *sock, + int protocol, int kern) +{ + const struct proto_ops *ops; + struct sock *sk; + struct tipc_sock *tsk; + struct tipc_msg *msg; + + /* Validate arguments */ + if (unlikely(protocol != 0)) + return -EPROTONOSUPPORT; + + switch (sock->type) { + case SOCK_STREAM: + ops = &stream_ops; + break; + case SOCK_SEQPACKET: + ops = &packet_ops; + break; + case SOCK_DGRAM: + case SOCK_RDM: + ops = &msg_ops; + break; + default: + return -EPROTOTYPE; + } + + /* Allocate socket's protocol area */ + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); + if (sk == NULL) + return -ENOMEM; + + tsk = tipc_sk(sk); + tsk->max_pkt = MAX_PKT_DEFAULT; + INIT_LIST_HEAD(&tsk->publications); + INIT_LIST_HEAD(&tsk->cong_links); + msg = &tsk->phdr; + + /* Finish initializing socket data structures */ + sock->ops = ops; + sock_init_data(sock, sk); + tipc_set_sk_state(sk, TIPC_OPEN); + if (tipc_sk_insert(tsk)) { + pr_warn("Socket create failed; port number exhausted\n"); + return -EINVAL; + } + + /* Ensure tsk is visible before we read own_addr. */ + smp_mb(); + + tipc_msg_init(tipc_own_addr(net), msg, TIPC_LOW_IMPORTANCE, + TIPC_NAMED_MSG, NAMED_H_SIZE, 0); + + msg_set_origport(msg, tsk->portid); + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); + sk->sk_shutdown = 0; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; + sk->sk_data_ready = tipc_data_ready; + sk->sk_write_space = tipc_write_space; + sk->sk_destruct = tipc_sock_destruct; + tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->group_is_open = true; + atomic_set(&tsk->dupl_rcvcnt, 0); + + /* Start out with safe limits until we receive an advertised window */ + tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); + tsk->rcv_win = tsk->snd_win; + + if (tipc_sk_type_connectionless(sk)) { + tsk_set_unreturnable(tsk, true); + if (sock->type == SOCK_DGRAM) + tsk_set_unreliable(tsk, true); + } + + return 0; +} + +static void tipc_sk_callback(struct rcu_head *head) +{ + struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); + + sock_put(&tsk->sk); +} + +/* Caller should hold socket lock for the socket. */ +static void __tipc_shutdown(struct socket *sock, int error) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); + u32 dnode = tsk_peer_node(tsk); + struct sk_buff *skb; + + /* Avoid that hi-prio shutdown msgs bypass msgs in link wakeup queue */ + tipc_wait_for_cond(sock, &timeout, (!tsk->cong_link_cnt && + !tsk_conn_cong(tsk))); + + /* Reject all unreceived messages, except on an active connection + * (which disconnects locally & sends a 'FIN+' to peer). + */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { + kfree_skb(skb); + continue; + } + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + tipc_sk_respond(sk, skb, error); + } + + if (tipc_sk_type_connectionless(sk)) + return; + + if (sk->sk_state != TIPC_DISCONNECTING) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, error); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + } +} + +/** + * tipc_release - destroy a TIPC socket + * @sock: socket to destroy + * + * This routine cleans up any messages that are still queued on the socket. + * For DGRAM and RDM socket types, all queued messages are rejected. + * For SEQPACKET and STREAM socket types, the first message is rejected + * and any others are discarded. (If the first message on a STREAM socket + * is partially-read, it is discarded and the next one is rejected instead.) + * + * NOTE: Rejected messages are not necessarily returned to the sender! They + * are returned or discarded according to the "destination droppable" setting + * specified for the message by the sender. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk; + + /* + * Exit if socket isn't fully initialized (occurs when a failed accept() + * releases a pre-allocated child socket that was never used) + */ + if (sk == NULL) + return 0; + + tsk = tipc_sk(sk); + lock_sock(sk); + + __tipc_shutdown(sock, TIPC_ERR_NO_PORT); + sk->sk_shutdown = SHUTDOWN_MASK; + tipc_sk_leave(tsk); + tipc_sk_withdraw(tsk, 0, NULL); + sk_stop_timer(sk, &sk->sk_timer); + tipc_sk_remove(tsk); + + sock_orphan(sk); + /* Reject any messages that accumulated in backlog queue */ + release_sock(sk); + tipc_dest_list_purge(&tsk->cong_links); + tsk->cong_link_cnt = 0; + call_rcu(&tsk->rcu, tipc_sk_callback); + sock->sk = NULL; + + return 0; +} + +/** + * tipc_bind - associate or disassocate TIPC name(s) with a socket + * @sock: socket structure + * @uaddr: socket address describing name(s) and desired operation + * @uaddr_len: size of socket address data structure + * + * Name and name sequence binding is indicated using a positive scope value; + * a negative scope value unbinds the specified name. Specifying no name + * (i.e. a socket address length of 0) unbinds all names from the socket. + * + * Returns 0 on success, errno otherwise + * + * NOTE: This routine doesn't need to take the socket lock since it doesn't + * access any non-constant socket information. + */ +static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, + int uaddr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; + struct tipc_sock *tsk = tipc_sk(sk); + int res = -EINVAL; + + lock_sock(sk); + if (unlikely(!uaddr_len)) { + res = tipc_sk_withdraw(tsk, 0, NULL); + goto exit; + } + if (tsk->group) { + res = -EACCES; + goto exit; + } + if (uaddr_len < sizeof(struct sockaddr_tipc)) { + res = -EINVAL; + goto exit; + } + if (addr->family != AF_TIPC) { + res = -EAFNOSUPPORT; + goto exit; + } + + if (addr->addrtype == TIPC_ADDR_NAME) + addr->addr.nameseq.upper = addr->addr.nameseq.lower; + else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { + res = -EAFNOSUPPORT; + goto exit; + } + + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) { + res = -EACCES; + goto exit; + } + + res = (addr->scope >= 0) ? + tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : + tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); +exit: + release_sock(sk); + return res; +} + +/** + * tipc_getname - get port ID of socket or peer socket + * @sock: socket structure + * @uaddr: area for returned socket address + * @uaddr_len: area for returned length of socket address + * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID + * + * Returns 0 on success, errno otherwise + * + * NOTE: This routine doesn't need to take the socket lock since it only + * accesses socket information that is unchanging (or which changes in + * a completely predictable manner). + */ +static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + + memset(addr, 0, sizeof(*addr)); + if (peer) { + if ((!tipc_sk_connected(sk)) && + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) + return -ENOTCONN; + addr->addr.id.ref = tsk_peer_port(tsk); + addr->addr.id.node = tsk_peer_node(tsk); + } else { + addr->addr.id.ref = tsk->portid; + addr->addr.id.node = tipc_own_addr(sock_net(sk)); + } + + addr->addrtype = TIPC_ADDR_ID; + addr->family = AF_TIPC; + addr->scope = 0; + addr->addr.name.domain = 0; + + return sizeof(*addr); +} + +/** + * tipc_poll - read and possibly block on pollmask + * @file: file structure associated with the socket + * @sock: socket for which to calculate the poll bits + * @wait: ??? + * + * Returns pollmask value + * + * COMMENTARY: + * It appears that the usual socket locking mechanisms are not useful here + * since the pollmask info is potentially out-of-date the moment this routine + * exits. TCP and other protocols seem to rely on higher level poll routines + * to handle any preventable race conditions, so TIPC will do the same ... + * + * IMPORTANT: The fact that a read or write operation is indicated does NOT + * imply that the operation will succeed, merely that it should be performed + * and will not block. + */ +static __poll_t tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + __poll_t revents = 0; + + sock_poll_wait(file, sock, wait); + + if (sk->sk_shutdown & RCV_SHUTDOWN) + revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; + if (sk->sk_shutdown == SHUTDOWN_MASK) + revents |= EPOLLHUP; + + switch (sk->sk_state) { + case TIPC_ESTABLISHED: + if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) + revents |= EPOLLOUT; + /* fall thru' */ + case TIPC_LISTEN: + case TIPC_CONNECTING: + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + revents |= EPOLLIN | EPOLLRDNORM; + break; + case TIPC_OPEN: + if (tsk->group_is_open && !tsk->cong_link_cnt) + revents |= EPOLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; + if (skb_queue_empty_lockless(&sk->sk_receive_queue)) + break; + revents |= EPOLLIN | EPOLLRDNORM; + break; + case TIPC_DISCONNECTING: + revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; + break; + } + return revents; +} + +/** + * tipc_sendmcast - send multicast message + * @sock: socket structure + * @seq: destination address + * @msg: message to send + * @dlen: length of data to send + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, + struct msghdr *msg, size_t dlen, long timeout) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + int mtu = tipc_bcast_get_mtu(net); + struct tipc_mc_method *method = &tsk->mc_method; + struct sk_buff_head pkts; + struct tipc_nlist dsts; + int rc; + + if (tsk->group) + return -EACCES; + + /* Block or return if any destination link is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Lookup destination nodes */ + tipc_nlist_init(&dsts, tipc_own_addr(net)); + tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, + seq->upper, &dsts); + if (!dsts.local && !dsts.remote) + return -EHOSTUNREACH; + + /* Build message header */ + msg_set_type(hdr, TIPC_MCAST_MSG); + msg_set_hdr_sz(hdr, MCAST_H_SIZE); + msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_nametype(hdr, seq->type); + msg_set_namelower(hdr, seq->lower); + msg_set_nameupper(hdr, seq->upper); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); + + /* Send message if build was successful */ + if (unlikely(rc == dlen)) + rc = tipc_mcast_xmit(net, &pkts, method, &dsts, + &tsk->cong_link_cnt); + + tipc_nlist_purge(&dsts); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_msg - send a message to a member in the group + * @net: network namespace + * @m: message to send + * @mb: group member + * @dnode: destination node + * @dport: destination port + * @dlen: total length of message data + */ +static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, + struct msghdr *m, struct tipc_member *mb, + u32 dnode, u32 dport, int dlen) +{ + u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); + struct tipc_mc_method *method = &tsk->mc_method; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct sk_buff_head pkts; + int mtu, rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_UCAST_MSG); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, dport); + msg_set_destnode(hdr, dnode); + msg_set_grp_bc_seqno(hdr, bc_snd_nxt); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(&tsk->cong_links, dnode, 0); + tsk->cong_link_cnt++; + } + + /* Update send window */ + tipc_group_update_member(mb, blks); + + /* A broadcast sent within next EXPIRE period must follow same path */ + method->rcast = true; + method->mandatory = true; + return dlen; +} + +/** + * tipc_send_group_unicast - send message to a member in the group + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + struct tipc_member *mb = NULL; + u32 node, port; + int rc; + + node = dest->addr.id.node; + port = dest->addr.id.ref; + if (!port && !node) + return -EHOSTUNREACH; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(&tsk->cong_links, node, 0) && + tsk->group && + !tipc_group_cong(tsk->group, node, port, blks, + &mb)); + if (unlikely(rc)) + return rc; + + if (unlikely(!mb)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_anycast - send message to any member with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct list_head *cong_links = &tsk->cong_links; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_member *first = NULL; + struct tipc_member *mbr = NULL; + struct net *net = sock_net(sk); + u32 node, port, exclude; + struct list_head dsts; + u32 type, inst, scope; + int lookups = 0; + int dstcnt, rc; + bool cong; + + INIT_LIST_HEAD(&dsts); + + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); + + while (++lookups < 4) { + exclude = tipc_group_exclude(tsk->group); + + first = NULL; + + /* Look for a non-congested destination member, if any */ + while (1) { + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, false)) + return -EHOSTUNREACH; + tipc_dest_pop(&dsts, &node, &port); + cong = tipc_group_cong(tsk->group, node, port, blks, + &mbr); + if (!cong) + break; + if (mbr == first) + break; + if (!first) + first = mbr; + } + + /* Start over if destination was not in member list */ + if (unlikely(!mbr)) + continue; + + if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) + break; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(cong_links, node, 0) && + tsk->group && + !tipc_group_cong(tsk->group, node, port, + blks, &mbr)); + if (unlikely(rc)) + return rc; + + /* Send, unless destination disappeared while waiting */ + if (likely(mbr)) + break; + } + + if (unlikely(lookups >= 4)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_bcast - send message to all members in communication group + * @sk: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_nlist *dsts; + struct tipc_mc_method *method = &tsk->mc_method; + bool ack = method->mandatory && method->rcast; + int blks = tsk_blocks(MCAST_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + int rc = -EHOSTUNREACH; + + /* Block or return if any destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tsk->cong_link_cnt && tsk->group && + !tipc_group_bc_cong(tsk->group, blks)); + if (unlikely(rc)) + return rc; + + dsts = tipc_group_dests(tsk->group); + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + + /* Complete message header */ + if (dest) { + msg_set_type(hdr, TIPC_GRP_MCAST_MSG); + msg_set_nameinst(hdr, dest->addr.name.name.instance); + } else { + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_nameinst(hdr, 0); + } + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(tsk->group)); + + /* Avoid getting stuck with repeated forced replicasts */ + msg_set_grp_bc_ack_req(hdr, ack); + + /* Build message as chain of buffers */ + __skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Update broadcast sequence number and send windows */ + tipc_group_update_bc_members(tsk->group, blks, ack); + + /* Broadcast link is now free to choose method for next broadcast */ + method->mandatory = false; + method->expires = jiffies; + + return dlen; +} + +/** + * tipc_send_group_mcast - send message to all members with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + u32 type, inst, scope, exclude; + struct list_head dsts; + u32 dstcnt; + + INIT_LIST_HEAD(&dsts); + + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); + exclude = tipc_group_exclude(grp); + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) + return -EHOSTUNREACH; + + if (dstcnt == 1) { + tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + return tipc_send_group_unicast(sock, m, dlen, timeout); + } + + tipc_dest_list_purge(&dsts); + return tipc_send_group_bcast(sock, m, dlen, timeout); +} + +/** + * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets + * @arrvq: queue with arriving messages, to be cloned after destination lookup + * @inputq: queue with cloned messages, delivered to socket after dest lookup + * + * Multi-threaded: parallel calls with reference to same queues may occur + */ +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq) +{ + u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; + struct sk_buff *skb, *_skb; + u32 portid, onode; + struct sk_buff_head tmpq; + struct list_head dports; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact; + + __skb_queue_head_init(&tmpq); + INIT_LIST_HEAD(&dports); + + skb = tipc_skb_peek(arrvq, &inputq->lock); + for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + __skb_dequeue(arrvq); + __skb_queue_tail(inputq, skb); + } + kfree_skb(skb); + spin_unlock_bh(&inputq->lock); + continue; + } + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); + } + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ + while (tipc_dest_pop(&dports, NULL, &portid)) { + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); + if (_skb) { + msg_set_destport(buf_msg(_skb), portid); + __skb_queue_tail(&tmpq, _skb); + continue; + } + pr_warn("Failed to clone mcast rcv buffer\n"); + } + /* Append to inputq if not already done by other thread */ + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + skb_queue_splice_tail_init(&tmpq, inputq); + /* Decrease the skb's refcnt as increasing in the + * function tipc_skb_peek + */ + kfree_skb(__skb_dequeue(arrvq)); + } + spin_unlock_bh(&inputq->lock); + __skb_queue_purge(&tmpq); + kfree_skb(skb); + } + tipc_sk_rcv(net, inputq); +} + +/** + * tipc_sk_conn_proto_rcv - receive a connection mng protocol message + * @tsk: receiving socket + * @skb: pointer to message buffer. + */ +static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + u32 onode = tsk_own_node(tsk); + struct sock *sk = &tsk->sk; + int mtyp = msg_type(hdr); + bool conn_cong; + + /* Ignore if connection cannot be validated: */ + if (!tsk_peer_msg(tsk, hdr)) + goto exit; + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + + /* State change is ignored if socket already awake, + * - convert msg to abort msg and add to inqueue + */ + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_CONN_MSG); + msg_set_size(hdr, BASIC_H_SIZE); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + __skb_queue_tail(inputq, skb); + return; + } + + tsk->probe_unacked = false; + + if (mtyp == CONN_PROBE) { + msg_set_type(hdr, CONN_PROBE_REPLY); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); + return; + } else if (mtyp == CONN_ACK) { + conn_cong = tsk_conn_cong(tsk); + tsk->snt_unacked -= msg_conn_ack(hdr); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + tsk->snd_win = msg_adv_win(hdr); + if (conn_cong) + sk->sk_write_space(sk); + } else if (mtyp != CONN_PROBE_REPLY) { + pr_warn("Received unknown CONN_PROTO msg\n"); + } +exit: + kfree_skb(skb); +} + +/** + * tipc_sendmsg - send message in connectionless manner + * @sock: socket structure + * @m: message to send + * @dsz: amount of user data to be sent + * + * Message must have an destination specified explicitly. + * Used for SOCK_RDM and SOCK_DGRAM messages, + * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. + * (Note: 'SYN+' is prohibited on SOCK_STREAM.) + * + * Returns the number of bytes sent on success, or errno otherwise + */ +static int tipc_sendmsg(struct socket *sock, + struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendmsg(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) +{ + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + struct list_head *clinks = &tsk->cong_links; + bool syn = !tipc_sk_type_connectionless(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_name_seq *seq; + struct sk_buff_head pkts; + u32 dport, dnode = 0; + u32 type, inst; + int mtu, rc; + + if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) + return -EMSGSIZE; + + if (likely(dest)) { + if (unlikely(m->msg_namelen < sizeof(*dest))) + return -EINVAL; + if (unlikely(dest->family != AF_TIPC)) + return -EINVAL; + } + + if (grp) { + if (!dest) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_NAME) + return tipc_send_group_anycast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_ID) + return tipc_send_group_unicast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_MCAST) + return tipc_send_group_mcast(sock, m, dlen, timeout); + return -EINVAL; + } + + if (unlikely(!dest)) { + dest = &tsk->peer; + if (!syn && dest->family != AF_TIPC) + return -EDESTADDRREQ; + } + + if (unlikely(syn)) { + if (sk->sk_state == TIPC_LISTEN) + return -EPIPE; + if (sk->sk_state != TIPC_OPEN) + return -EISCONN; + if (tsk->published) + return -EOPNOTSUPP; + if (dest->addrtype == TIPC_ADDR_NAME) { + tsk->conn_type = dest->addr.name.name.type; + tsk->conn_instance = dest->addr.name.name.instance; + } + } + + seq = &dest->addr.nameseq; + if (dest->addrtype == TIPC_ADDR_MCAST) + return tipc_sendmcast(sock, seq, m, dlen, timeout); + + if (dest->addrtype == TIPC_ADDR_NAME) { + type = dest->addr.name.name.type; + inst = dest->addr.name.name.instance; + dnode = dest->addr.name.domain; + msg_set_type(hdr, TIPC_NAMED_MSG); + msg_set_hdr_sz(hdr, NAMED_H_SIZE); + msg_set_nametype(hdr, type); + msg_set_nameinst(hdr, inst); + msg_set_lookup_scope(hdr, tipc_node2scope(dnode)); + dport = tipc_nametbl_translate(net, type, inst, &dnode); + msg_set_destnode(hdr, dnode); + msg_set_destport(hdr, dport); + if (unlikely(!dport && !dnode)) + return -EHOSTUNREACH; + } else if (dest->addrtype == TIPC_ADDR_ID) { + dnode = dest->addr.id.node; + msg_set_type(hdr, TIPC_DIRECT_MSG); + msg_set_lookup_scope(hdr, 0); + msg_set_destnode(hdr, dnode); + msg_set_destport(hdr, dest->addr.id.ref); + msg_set_hdr_sz(hdr, BASIC_H_SIZE); + } else { + return -EINVAL; + } + + /* Block or return if destination link is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(clinks, dnode, 0)); + if (unlikely(rc)) + return rc; + + __skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(clinks, dnode, 0); + tsk->cong_link_cnt++; + rc = 0; + } + + if (unlikely(syn && !rc)) + tipc_set_sk_state(sk, TIPC_CONNECTING); + + return rc ? rc : dlen; +} + +/** + * tipc_sendstream - send stream-oriented data + * @sock: socket structure + * @m: data to send + * @dsz: total length of data to be transmitted + * + * Used for SOCK_STREAM data. + * + * Returns the number of bytes sent on success (or partial success), + * or errno if no data sent + */ +static int tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz) +{ + struct sock *sk = sock->sk; + int ret; + + lock_sock(sk); + ret = __tipc_sendstream(sock, m, dsz); + release_sock(sk); + + return ret; +} + +static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + struct sk_buff_head pkts; + u32 dnode = tsk_peer_node(tsk); + int send, sent = 0; + int rc = 0; + + __skb_queue_head_init(&pkts); + + if (unlikely(dlen > INT_MAX)) + return -EMSGSIZE; + + /* Handle implicit connection setup */ + if (unlikely(dest)) { + rc = __tipc_sendmsg(sock, m, dlen); + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); + tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } + return rc; + } + + do { + rc = tipc_wait_for_cond(sock, &timeout, + (!tsk->cong_link_cnt && + !tsk_conn_cong(tsk) && + tipc_sk_connected(sk))); + if (unlikely(rc)) + break; + + send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE); + rc = tipc_msg_build(hdr, m, sent, send, tsk->max_pkt, &pkts); + if (unlikely(rc != send)) + break; + + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tsk->cong_link_cnt = 1; + rc = 0; + } + if (likely(!rc)) { + tsk->snt_unacked += tsk_inc(tsk, send + MIN_H_SIZE); + sent += send; + } + } while (sent < dlen && !rc); + + return sent ? sent : rc; +} + +/** + * tipc_send_packet - send a connection-oriented message + * @sock: socket structure + * @m: message to send + * @dsz: length of data to be transmitted + * + * Used for SOCK_SEQPACKET messages. + * + * Returns the number of bytes sent on success, or errno otherwise + */ +static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) +{ + if (dsz > TIPC_MAX_USER_MSG_SIZE) + return -EMSGSIZE; + + return tipc_sendstream(sock, m, dsz); +} + +/* tipc_sk_finish_conn - complete the setup of a connection + */ +static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, + u32 peer_node) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_msg *msg = &tsk->phdr; + + msg_set_destnode(msg, peer_node); + msg_set_destport(msg, peer_port); + msg_set_type(msg, TIPC_CONN_MSG); + msg_set_lookup_scope(msg, 0); + msg_set_hdr_sz(msg, SHORT_H_SIZE); + + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); + tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); + tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); + tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + return; + + /* Fall back to message based flow control */ + tsk->rcv_win = FLOWCTL_MSG_WIN; + tsk->snd_win = FLOWCTL_MSG_WIN; +} + +/** + * tipc_sk_set_orig_addr - capture sender's address for received message + * @m: descriptor for message info + * @hdr: received message header + * + * Note: Address is not captured if not requested by receiver. + */ +static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) +{ + DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); + struct tipc_msg *hdr = buf_msg(skb); + + if (!srcaddr) + return; + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.scope = 0; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) + return; + + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.scope = 0; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; + m->msg_namelen = sizeof(*srcaddr); +} + +/** + * tipc_sk_anc_data_recv - optionally capture ancillary data for received message + * @m: descriptor for message info + * @skb: received message buffer + * @tsk: TIPC port associated with message + * + * Note: Ancillary data is not captured if not requested by receiver. + * + * Returns 0 if successful, otherwise errno + */ +static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, + struct tipc_sock *tsk) +{ + struct tipc_msg *msg; + u32 anc_data[3]; + u32 err; + u32 dest_type; + int has_name; + int res; + + if (likely(m->msg_controllen == 0)) + return 0; + msg = buf_msg(skb); + + /* Optionally capture errored message object(s) */ + err = msg ? msg_errcode(msg) : 0; + if (unlikely(err)) { + anc_data[0] = err; + anc_data[1] = msg_data_sz(msg); + res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); + if (res) + return res; + if (anc_data[1]) { + if (skb_linearize(skb)) + return -ENOMEM; + msg = buf_msg(skb); + res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], + msg_data(msg)); + if (res) + return res; + } + } + + /* Optionally capture message destination object */ + dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; + switch (dest_type) { + case TIPC_NAMED_MSG: + has_name = 1; + anc_data[0] = msg_nametype(msg); + anc_data[1] = msg_namelower(msg); + anc_data[2] = msg_namelower(msg); + break; + case TIPC_MCAST_MSG: + has_name = 1; + anc_data[0] = msg_nametype(msg); + anc_data[1] = msg_namelower(msg); + anc_data[2] = msg_nameupper(msg); + break; + case TIPC_CONN_MSG: + has_name = (tsk->conn_type != 0); + anc_data[0] = tsk->conn_type; + anc_data[1] = tsk->conn_instance; + anc_data[2] = tsk->conn_instance; + break; + default: + has_name = 0; + } + if (has_name) { + res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); + if (res) + return res; + } + + return 0; +} + +static void tipc_sk_send_ack(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct sk_buff *skb = NULL; + struct tipc_msg *msg; + u32 peer_port = tsk_peer_port(tsk); + u32 dnode = tsk_peer_node(tsk); + + if (!tipc_sk_connected(sk)) + return; + skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, + dnode, tsk_own_node(tsk), peer_port, + tsk->portid, TIPC_OK); + if (!skb) + return; + msg = buf_msg(skb); + msg_set_conn_ack(msg, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + + /* Adjust to and advertize the correct window limit */ + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { + tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); + msg_set_adv_win(msg, tsk->rcv_win); + } + tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); +} + +static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + long timeo = *timeop; + int err = sock_error(sk); + + if (err) + return err; + + for (;;) { + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = -ENOTCONN; + break; + } + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + + err = sock_error(sk); + if (err) + break; + } + finish_wait(sk_sleep(sk), &wait); + *timeop = timeo; + return err; +} + +/** + * tipc_recvmsg - receive packet-oriented message + * @m: descriptor for message info + * @buflen: length of user buffer area + * @flags: receive flags + * + * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. + * If the complete message doesn't fit in user area, truncate it. + * + * Returns size of returned message data, errno otherwise + */ +static int tipc_recvmsg(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) +{ + struct sock *sk = sock->sk; + bool connected = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); + int rc, err, hlen, dlen, copy; + struct tipc_skb_cb *skb_cb; + struct sk_buff_head xmitq; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool grp_evt; + long timeout; + + /* Catch invalid receive requests */ + if (unlikely(!buflen)) + return -EINVAL; + + lock_sock(sk); + if (unlikely(connected && sk->sk_state == TIPC_OPEN)) { + rc = -ENOTCONN; + goto exit; + } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + /* Step rcv queue to first msg with data or error; wait if necessary */ + do { + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + goto exit; + skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + grp_evt = msg_is_grp_evt(hdr); + if (likely(dlen || err)) + break; + tsk_advance_rx_queue(sk); + } while (1); + + /* Collect msg meta data, including error code and rejected data */ + tipc_sk_set_orig_addr(m, skb); + rc = tipc_sk_anc_data_recv(m, skb, tsk); + if (unlikely(rc)) + goto exit; + hdr = buf_msg(skb); + + /* Capture data if non-error msg, otherwise just set return value */ + if (likely(!err)) { + int offset = skb_cb->bytes_read; + + copy = min_t(int, dlen - offset, buflen); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + goto exit; + if (unlikely(offset + copy < dlen)) { + if (flags & MSG_EOR) { + if (!(flags & MSG_PEEK)) + skb_cb->bytes_read = offset + copy; + } else { + m->msg_flags |= MSG_TRUNC; + skb_cb->bytes_read = 0; + } + } else { + if (flags & MSG_EOR) + m->msg_flags |= MSG_EOR; + skb_cb->bytes_read = 0; + } + } else { + copy = 0; + rc = 0; + if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) { + rc = -ECONNRESET; + goto exit; + } + } + + /* Mark message as group event if applicable */ + if (unlikely(grp_evt)) { + if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) + m->msg_flags |= MSG_EOR; + m->msg_flags |= MSG_OOB; + copy = 0; + } + + /* Caption of data or error code/rejected data was successful */ + if (unlikely(flags & MSG_PEEK)) + goto exit; + + /* Send group flow control advertisement when applicable */ + if (tsk->group && msg_in_group(hdr) && !grp_evt) { + __skb_queue_head_init(&xmitq); + tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), + msg_orignode(hdr), msg_origport(hdr), + &xmitq); + tipc_node_distr_xmit(sock_net(sk), &xmitq); + } + + if (skb_cb->bytes_read) + goto exit; + + tsk_advance_rx_queue(sk); + + if (likely(!connected)) + goto exit; + + /* Send connection flow control advertisement when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + tipc_sk_send_ack(tsk); +exit: + release_sock(sk); + return rc ? rc : copy; +} + +/** + * tipc_recvstream - receive stream-oriented data + * @m: descriptor for message info + * @buflen: total size of user buffer area + * @flags: receive flags + * + * Used for SOCK_STREAM messages only. If not enough data is available + * will optionally wait for more; never truncates data. + * + * Returns size of returned message data, errno otherwise + */ +static int tipc_recvstream(struct socket *sock, struct msghdr *m, + size_t buflen, int flags) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct sk_buff *skb; + struct tipc_msg *hdr; + struct tipc_skb_cb *skb_cb; + bool peek = flags & MSG_PEEK; + int offset, required, copy, copied = 0; + int hlen, dlen, err, rc; + long timeout; + + /* Catch invalid receive attempts */ + if (unlikely(!buflen)) + return -EINVAL; + + lock_sock(sk); + + if (unlikely(sk->sk_state == TIPC_OPEN)) { + rc = -ENOTCONN; + goto exit; + } + required = sock_rcvlowat(sk, flags & MSG_WAITALL, buflen); + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + /* Look at first msg in receive queue; wait if necessary */ + rc = tipc_wait_for_rcvmsg(sock, &timeout); + if (unlikely(rc)) + break; + skb = skb_peek(&sk->sk_receive_queue); + skb_cb = TIPC_SKB_CB(skb); + hdr = buf_msg(skb); + dlen = msg_data_sz(hdr); + hlen = msg_hdr_sz(hdr); + err = msg_errcode(hdr); + + /* Discard any empty non-errored (SYN-) message */ + if (unlikely(!dlen && !err)) { + tsk_advance_rx_queue(sk); + continue; + } + + /* Collect msg meta data, incl. error code and rejected data */ + if (!copied) { + tipc_sk_set_orig_addr(m, skb); + rc = tipc_sk_anc_data_recv(m, skb, tsk); + if (rc) + break; + hdr = buf_msg(skb); + } + + /* Copy data if msg ok, otherwise return error/partial data */ + if (likely(!err)) { + offset = skb_cb->bytes_read; + copy = min_t(int, dlen - offset, buflen - copied); + rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); + if (unlikely(rc)) + break; + copied += copy; + offset += copy; + if (unlikely(offset < dlen)) { + if (!peek) + skb_cb->bytes_read = offset; + break; + } + } else { + rc = 0; + if ((err != TIPC_CONN_SHUTDOWN) && !m->msg_control) + rc = -ECONNRESET; + if (copied || rc) + break; + } + + if (unlikely(peek)) + break; + + tsk_advance_rx_queue(sk); + + /* Send connection flow control advertisement when applicable */ + tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); + if (unlikely(tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)) + tipc_sk_send_ack(tsk); + + /* Exit if all requested data or FIN/error received */ + if (copied == buflen || err) + break; + + } while (!skb_queue_empty(&sk->sk_receive_queue) || copied < required); +exit: + release_sock(sk); + return copied ? copied : rc; +} + +/** + * tipc_write_space - wake up thread if port congestion is released + * @sk: socket + */ +static void tipc_write_space(struct sock *sk) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); + rcu_read_unlock(); +} + +/** + * tipc_data_ready - wake up threads to indicate messages have been received + * @sk: socket + * @len: the length of messages + */ +static void tipc_data_ready(struct sock *sk) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (skwq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | + EPOLLRDNORM | EPOLLRDBAND); + rcu_read_unlock(); +} + +static void tipc_sock_destruct(struct sock *sk) +{ + __skb_queue_purge(&sk->sk_receive_queue); +} + +static void tipc_sk_proto_rcv(struct sock *sk, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_group *grp = tsk->group; + bool wakeup = false; + + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq); + return; + case SOCK_WAKEUP: + tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + /* coupled with smp_rmb() in tipc_wait_for_cond() */ + smp_wmb(); + tsk->cong_link_cnt--; + wakeup = true; + break; + case GROUP_PROTOCOL: + tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); + break; + case TOP_SRV: + tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, + hdr, inputq, xmitq); + break; + default: + break; + } + + if (wakeup) + sk->sk_write_space(sk); + + kfree_skb(skb); +} + +/** + * tipc_filter_connect - Handle incoming message for a connection-based socket + * @tsk: TIPC socket + * @skb: pointer to message buffer. Set to NULL if buffer is consumed + * + * Returns true if everything ok, false otherwise + */ +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); + + if (unlikely(msg_mcast(hdr))) + return false; + + switch (sk->sk_state) { + case TIPC_CONNECTING: + /* Accept only ACK or NACK message */ + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } + + if (unlikely(!msg_isdata(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = EINVAL; + sk->sk_state_change(sk); + return true; + } + + tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); + msg_set_importance(&tsk->phdr, msg_importance(hdr)); + + /* If 'ACK+' message, add to socket receive queue */ + if (msg_data_sz(hdr)) + return true; + + /* If empty 'ACK-' message, wake up sleeping connect() */ + sk->sk_state_change(sk); + + /* 'ACK-' message is neither accepted nor rejected: */ + msg_set_dest_droppable(hdr, 1); + return false; + + case TIPC_OPEN: + case TIPC_DISCONNECTING: + break; + case TIPC_LISTEN: + /* Accept only SYN message */ + if (!msg_connected(hdr) && !(msg_errcode(hdr))) + return true; + break; + case TIPC_ESTABLISHED: + /* Accept only connection-based messages sent by peer */ + if (unlikely(!tsk_peer_msg(tsk, hdr))) + return false; + + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + /* Let timer expire on it's own */ + tipc_node_remove_conn(net, tsk_peer_node(tsk), + tsk->portid); + sk->sk_state_change(sk); + } + return true; + default: + pr_err("Unknown sk_state %u\n", sk->sk_state); + } + + return false; +} + +/** + * rcvbuf_limit - get proper overload limit of socket receive queue + * @sk: socket + * @skb: message + * + * For connection oriented messages, irrespective of importance, + * default queue limit is 2 MB. + * + * For connectionless messages, queue limits are based on message + * importance as follows: + * + * TIPC_LOW_IMPORTANCE (2 MB) + * TIPC_MEDIUM_IMPORTANCE (4 MB) + * TIPC_HIGH_IMPORTANCE (8 MB) + * TIPC_CRITICAL_IMPORTANCE (16 MB) + * + * Returns overload limit according to corresponding message importance + */ +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) +{ + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + + if (unlikely(msg_in_group(hdr))) + return sk->sk_rcvbuf; + + if (unlikely(!msg_connected(hdr))) + return sk->sk_rcvbuf << msg_importance(hdr); + + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return sk->sk_rcvbuf; + + return FLOWCTL_MSG_LIM; +} + +/** + * tipc_sk_filter_rcv - validate incoming message + * @sk: socket + * @skb: pointer to message. + * + * Enqueues message on receive queue if acceptable; optionally handles + * disconnect indication for a connected socket. + * + * Called with socket lock already taken + * + */ +static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + bool sk_conn = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = buf_msg(skb); + struct net *net = sock_net(sk); + struct sk_buff_head inputq; + int limit, err = TIPC_OK; + + TIPC_SKB_CB(skb)->bytes_read = 0; + __skb_queue_head_init(&inputq); + __skb_queue_tail(&inputq, skb); + + if (unlikely(!msg_isdata(hdr))) + tipc_sk_proto_rcv(sk, &inputq, xmitq); + + if (unlikely(grp)) + tipc_group_filter_msg(grp, &inputq, xmitq); + + /* Validate and add to receive buffer if there is space */ + while ((skb = __skb_dequeue(&inputq))) { + hdr = buf_msg(skb); + limit = rcvbuf_limit(sk, skb); + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + (!sk_conn && msg_connected(hdr)) || + (!grp && msg_in_group(hdr))) + err = TIPC_ERR_NO_PORT; + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) { + atomic_inc(&sk->sk_drops); + err = TIPC_ERR_OVERLOAD; + } + + if (unlikely(err)) { + tipc_skb_reject(net, err, skb, xmitq); + err = TIPC_OK; + continue; + } + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk); + } +} + +/** + * tipc_sk_backlog_rcv - handle incoming message from backlog queue + * @sk: socket + * @skb: message + * + * Caller must hold socket lock + */ +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + unsigned int before = sk_rmem_alloc_get(sk); + struct sk_buff_head xmitq; + unsigned int added; + + __skb_queue_head_init(&xmitq); + + tipc_sk_filter_rcv(sk, skb, &xmitq); + added = sk_rmem_alloc_get(sk) - before; + atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); + + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); + return 0; +} + +/** + * tipc_sk_enqueue - extract all buffers with destination 'dport' from + * inputq and try adding them to socket or backlog queue + * @inputq: list of incoming buffers with potentially different destinations + * @sk: socket where the buffers should be enqueued + * @dport: port number for the socket + * + * Caller must hold socket lock + */ +static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, + u32 dport, struct sk_buff_head *xmitq) +{ + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); + struct sk_buff *skb; + unsigned int lim; + atomic_t *dcnt; + u32 onode; + + while (skb_queue_len(inputq)) { + if (unlikely(time_after_eq(jiffies, time_limit))) + return; + + skb = tipc_skb_dequeue(inputq, dport); + if (unlikely(!skb)) + return; + + /* Add message directly to receive queue if possible */ + if (!sock_owned_by_user(sk)) { + tipc_sk_filter_rcv(sk, skb, xmitq); + continue; + } + + /* Try backlog, compensating for double-counted bytes */ + dcnt = &tipc_sk(sk)->dupl_rcvcnt; + if (!sk->sk_backlog.len) + atomic_set(dcnt, 0); + lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); + if (likely(!sk_add_backlog(sk, skb, lim))) + continue; + + /* Overload => reject message back to sender */ + onode = tipc_own_addr(sock_net(sk)); + atomic_inc(&sk->sk_drops); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); + break; + } +} + +/** + * tipc_sk_rcv - handle a chain of incoming buffers + * @inputq: buffer list containing the buffers + * Consumes all buffers in list until inputq is empty + * Note: may be called in multiple threads referring to the same queue + */ +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) +{ + struct sk_buff_head xmitq; + u32 dnode, dport = 0; + int err; + struct tipc_sock *tsk; + struct sock *sk; + struct sk_buff *skb; + + __skb_queue_head_init(&xmitq); + while (skb_queue_len(inputq)) { + dport = tipc_skb_peek_port(inputq, dport); + tsk = tipc_sk_lookup(net, dport); + + if (likely(tsk)) { + sk = &tsk->sk; + if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { + tipc_sk_enqueue(inputq, sk, dport, &xmitq); + spin_unlock_bh(&sk->sk_lock.slock); + } + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); + sock_put(sk); + continue; + } + /* No destination socket => dequeue skb if still there */ + skb = tipc_skb_dequeue(inputq, dport); + if (!skb) + return; + + /* Try secondary lookup if unresolved named message */ + err = TIPC_ERR_NO_PORT; + if (tipc_msg_lookup_dest(net, skb, &err)) + goto xmit; + + /* Prepare for message rejection */ + if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + continue; +xmit: + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } +} + +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct sock *sk = sock->sk; + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (!*timeo_p) + return -ETIMEDOUT; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + add_wait_queue(sk_sleep(sk), &wait); + done = sk_wait_event(sk, timeo_p, + sk->sk_state != TIPC_CONNECTING, &wait); + remove_wait_queue(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); +} + +/** + * tipc_connect - establish a connection to another TIPC port + * @sock: socket structure + * @dest: socket address for destination port + * @destlen: size of socket address data structure + * @flags: file-related flags associated with socket + * + * Returns 0 on success, errno otherwise + */ +static int tipc_connect(struct socket *sock, struct sockaddr *dest, + int destlen, int flags) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; + struct msghdr m = {NULL,}; + long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; + int previous; + int res = 0; + + if (destlen != sizeof(struct sockaddr_tipc)) + return -EINVAL; + + lock_sock(sk); + + if (tsk->group) { + res = -EINVAL; + goto exit; + } + + if (dst->family == AF_UNSPEC) { + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); + if (!tipc_sk_type_connectionless(sk)) + res = -EINVAL; + goto exit; + } + if (!tipc_sockaddr_is_sane(dst)) { + res = -EINVAL; + goto exit; + } + /* DGRAM/RDM connect(), just save the destaddr */ + if (tipc_sk_type_connectionless(sk)) { + memcpy(&tsk->peer, dest, destlen); + goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { + res = -EINVAL; + goto exit; + } + + previous = sk->sk_state; + + switch (sk->sk_state) { + case TIPC_OPEN: + /* Send a 'SYN-' to destination */ + m.msg_name = dest; + m.msg_namelen = destlen; + + /* If connect is in non-blocking case, set MSG_DONTWAIT to + * indicate send_msg() is never blocked. + */ + if (!timeout) + m.msg_flags = MSG_DONTWAIT; + + res = __tipc_sendmsg(sock, &m, 0); + if ((res < 0) && (res != -EWOULDBLOCK)) + goto exit; + + /* Just entered TIPC_CONNECTING state; the only + * difference is that return value in non-blocking + * case is EINPROGRESS, rather than EALREADY. + */ + res = -EINPROGRESS; + /* fall thru' */ + case TIPC_CONNECTING: + if (!timeout) { + if (previous == TIPC_CONNECTING) + res = -EALREADY; + goto exit; + } + timeout = msecs_to_jiffies(timeout); + /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ + res = tipc_wait_for_connect(sock, &timeout); + break; + case TIPC_ESTABLISHED: + res = -EISCONN; + break; + default: + res = -EINVAL; + } + +exit: + release_sock(sk); + return res; +} + +/** + * tipc_listen - allow socket to listen for incoming connections + * @sock: socket structure + * @len: (unused) + * + * Returns 0 on success, errno otherwise + */ +static int tipc_listen(struct socket *sock, int len) +{ + struct sock *sk = sock->sk; + int res; + + lock_sock(sk); + res = tipc_set_sk_state(sk, TIPC_LISTEN); + release_sock(sk); + + return res; +} + +static int tipc_wait_for_accept(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int err; + + /* True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + */ + for (;;) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); + release_sock(sk); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EAGAIN; + if (!timeo) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + } + return err; +} + +/** + * tipc_accept - wait for connection request + * @sock: listening socket + * @newsock: new socket that is to be connected + * @flags: file-related flags associated with socket + * + * Returns 0 on success, errno otherwise + */ +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern) +{ + struct sock *new_sk, *sk = sock->sk; + struct sk_buff *buf; + struct tipc_sock *new_tsock; + struct tipc_msg *msg; + long timeo; + int res; + + lock_sock(sk); + + if (sk->sk_state != TIPC_LISTEN) { + res = -EINVAL; + goto exit; + } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; + + buf = skb_peek(&sk->sk_receive_queue); + + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); + if (res) + goto exit; + security_sk_clone(sock->sk, new_sock->sk); + + new_sk = new_sock->sk; + new_tsock = tipc_sk(new_sk); + msg = buf_msg(buf); + + /* we lock on new_sk; but lockdep sees the lock on sk */ + lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING); + + /* + * Reject any stray messages received by new socket + * before the socket lock was taken (very, very unlikely) + */ + tsk_rej_rx_queue(new_sk); + + /* Connect new socket to it's peer */ + tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); + + tsk_set_importance(new_tsock, msg_importance(msg)); + if (msg_named(msg)) { + new_tsock->conn_type = msg_nametype(msg); + new_tsock->conn_instance = msg_nameinst(msg); + } + + /* + * Respond to 'SYN-' by discarding it & returning 'ACK'-. + * Respond to 'SYN+' by queuing it on new socket. + */ + if (!msg_data_sz(msg)) { + struct msghdr m = {NULL,}; + + tsk_advance_rx_queue(sk); + __tipc_sendstream(new_sock, &m, 0); + } else { + __skb_dequeue(&sk->sk_receive_queue); + __skb_queue_head(&new_sk->sk_receive_queue, buf); + skb_set_owner_r(buf, new_sk); + } + release_sock(new_sk); +exit: + release_sock(sk); + return res; +} + +/** + * tipc_shutdown - shutdown socket connection + * @sock: socket structure + * @how: direction to close (must be SHUT_RDWR) + * + * Terminates connection (if necessary), then purges socket's receive queue. + * + * Returns 0 on success, errno otherwise + */ +static int tipc_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + int res; + + if (how != SHUT_RDWR) + return -EINVAL; + + lock_sock(sk); + + __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); + sk->sk_shutdown = SHUTDOWN_MASK; + + if (sk->sk_state == TIPC_DISCONNECTING) { + /* Discard any unreceived messages */ + __skb_queue_purge(&sk->sk_receive_queue); + + res = 0; + } else { + res = -ENOTCONN; + } + /* Wake up anyone sleeping in poll. */ + sk->sk_state_change(sk); + + release_sock(sk); + return res; +} + +static void tipc_sk_timeout(struct timer_list *t) +{ + struct sock *sk = from_timer(sk, t, sk_timer); + struct tipc_sock *tsk = tipc_sk(sk); + u32 peer_port = tsk_peer_port(tsk); + u32 peer_node = tsk_peer_node(tsk); + u32 own_node = tsk_own_node(tsk); + u32 own_port = tsk->portid; + struct net *net = sock_net(sk); + struct sk_buff *skb = NULL; + + bh_lock_sock(sk); + if (!tipc_sk_connected(sk)) + goto exit; + + /* Try again later if socket is busy */ + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); + goto exit; + } + + if (tsk->probe_unacked) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, peer_node, peer_port); + sk->sk_state_change(sk); + goto exit; + } + /* Send new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + peer_node, own_node, peer_port, own_port, + TIPC_OK); + tsk->probe_unacked = true; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +exit: + bh_unlock_sock(sk); + if (skb) + tipc_node_xmit_skb(net, skb, peer_node, own_port); + sock_put(sk); +} + +static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct publication *publ; + u32 key; + + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + + if (tipc_sk_connected(sk)) + return -EINVAL; + key = tsk->portid + tsk->pub_count + 1; + if (key == tsk->portid) + return -EADDRINUSE; + + publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, + scope, tsk->portid, key); + if (unlikely(!publ)) + return -EINVAL; + + list_add(&publ->binding_sock, &tsk->publications); + tsk->pub_count++; + tsk->published = 1; + return 0; +} + +static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, + struct tipc_name_seq const *seq) +{ + struct net *net = sock_net(&tsk->sk); + struct publication *publ; + struct publication *safe; + int rc = -EINVAL; + + if (scope != TIPC_NODE_SCOPE) + scope = TIPC_CLUSTER_SCOPE; + + list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) { + if (seq) { + if (publ->scope != scope) + continue; + if (publ->type != seq->type) + continue; + if (publ->lower != seq->lower) + continue; + if (publ->upper != seq->upper) + break; + tipc_nametbl_withdraw(net, publ->type, publ->lower, + publ->upper, publ->key); + rc = 0; + break; + } + tipc_nametbl_withdraw(net, publ->type, publ->lower, + publ->upper, publ->key); + rc = 0; + } + if (list_empty(&tsk->publications)) + tsk->published = 0; + return rc; +} + +/* tipc_sk_reinit: set non-zero address in all existing sockets + * when we go from standalone to network mode. + */ +void tipc_sk_reinit(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct rhashtable_iter iter; + struct tipc_sock *tsk; + struct tipc_msg *msg; + + rhashtable_walk_enter(&tn->sk_rht, &iter); + + do { + rhashtable_walk_start(&iter); + + while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { + sock_hold(&tsk->sk); + rhashtable_walk_stop(&iter); + lock_sock(&tsk->sk); + msg = &tsk->phdr; + msg_set_prevnode(msg, tipc_own_addr(net)); + msg_set_orignode(msg, tipc_own_addr(net)); + release_sock(&tsk->sk); + rhashtable_walk_start(&iter); + sock_put(&tsk->sk); + } + + rhashtable_walk_stop(&iter); + } while (tsk == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); +} + +static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_sock *tsk; + + rcu_read_lock(); + tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); + if (tsk) + sock_hold(&tsk->sk); + rcu_read_unlock(); + + return tsk; +} + +static int tipc_sk_insert(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; + u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; + + while (remaining--) { + portid++; + if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) + portid = TIPC_MIN_PORT; + tsk->portid = portid; + sock_hold(&tsk->sk); + if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, + tsk_rht_params)) + return 0; + sock_put(&tsk->sk); + } + + return -1; +} + +static void tipc_sk_remove(struct tipc_sock *tsk) +{ + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); + + if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { + WARN_ON(refcount_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } +} + +static const struct rhashtable_params tsk_rht_params = { + .nelem_hint = 192, + .head_offset = offsetof(struct tipc_sock, node), + .key_offset = offsetof(struct tipc_sock, portid), + .key_len = sizeof(u32), /* portid */ + .max_size = 1048576, + .min_size = 256, + .automatic_shrinking = true, +}; + +int tipc_sk_rht_init(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + return rhashtable_init(&tn->sk_rht, &tsk_rht_params); +} + +void tipc_sk_rht_destroy(struct net *net) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + + /* Wait for socket readers to complete */ + synchronize_net(); + + rhashtable_destroy(&tn->sk_rht); +} + +static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_name_seq seq; + int rc; + + if (mreq->type < TIPC_RESERVED_TYPES) + return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; + if (grp) + return -EACCES; + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); + if (!grp) + return -ENOMEM; + tsk->group = grp; + msg_set_lookup_scope(hdr, mreq->scope); + msg_set_nametype(hdr, mreq->type); + msg_set_dest_droppable(hdr, true); + seq.type = mreq->type; + seq.lower = mreq->instance; + seq.upper = seq.lower; + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); + rc = tipc_sk_publish(tsk, mreq->scope, &seq); + if (rc) { + tipc_group_delete(net, grp); + tsk->group = NULL; + return rc; + } + /* Eliminate any risk that a broadcast overtakes sent JOINs */ + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); + return rc; +} + +static int tipc_sk_leave(struct tipc_sock *tsk) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_name_seq seq; + int scope; + + if (!grp) + return -EINVAL; + tipc_group_self(grp, &seq, &scope); + tipc_group_delete(net, grp); + tsk->group = NULL; + tipc_sk_withdraw(tsk, scope, &seq); + return 0; +} + +/** + * tipc_setsockopt - set socket option + * @sock: socket structure + * @lvl: option level + * @opt: option identifier + * @ov: pointer to new option value + * @ol: length of option value + * + * For stream sockets only, accepts and ignores all IPPROTO_TCP options + * (to ease compatibility). + * + * Returns 0 on success, errno otherwise + */ +static int tipc_setsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, unsigned int ol) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group_req mreq; + u32 value = 0; + int res = 0; + + if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) + return 0; + if (lvl != SOL_TIPC) + return -ENOPROTOOPT; + + switch (opt) { + case TIPC_IMPORTANCE: + case TIPC_SRC_DROPPABLE: + case TIPC_DEST_DROPPABLE: + case TIPC_CONN_TIMEOUT: + if (ol < sizeof(value)) + return -EINVAL; + if (get_user(value, (u32 __user *)ov)) + return -EFAULT; + break; + case TIPC_GROUP_JOIN: + if (ol < sizeof(mreq)) + return -EINVAL; + if (copy_from_user(&mreq, ov, sizeof(mreq))) + return -EFAULT; + break; + default: + if (ov || ol) + return -EINVAL; + } + + lock_sock(sk); + + switch (opt) { + case TIPC_IMPORTANCE: + res = tsk_set_importance(tsk, value); + break; + case TIPC_SRC_DROPPABLE: + if (sock->type != SOCK_STREAM) + tsk_set_unreliable(tsk, value); + else + res = -ENOPROTOOPT; + break; + case TIPC_DEST_DROPPABLE: + tsk_set_unreturnable(tsk, value); + break; + case TIPC_CONN_TIMEOUT: + tipc_sk(sk)->conn_timeout = value; + break; + case TIPC_MCAST_BROADCAST: + tsk->mc_method.rcast = false; + tsk->mc_method.mandatory = true; + break; + case TIPC_MCAST_REPLICAST: + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + break; + case TIPC_GROUP_JOIN: + res = tipc_sk_join(tsk, &mreq); + break; + case TIPC_GROUP_LEAVE: + res = tipc_sk_leave(tsk); + break; + default: + res = -EINVAL; + } + + release_sock(sk); + + return res; +} + +/** + * tipc_getsockopt - get socket option + * @sock: socket structure + * @lvl: option level + * @opt: option identifier + * @ov: receptacle for option value + * @ol: receptacle for length of option value + * + * For stream sockets only, returns 0 length result for all IPPROTO_TCP options + * (to ease compatibility). + * + * Returns 0 on success, errno otherwise + */ +static int tipc_getsockopt(struct socket *sock, int lvl, int opt, + char __user *ov, int __user *ol) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_name_seq seq; + int len, scope; + u32 value; + int res; + + if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) + return put_user(0, ol); + if (lvl != SOL_TIPC) + return -ENOPROTOOPT; + res = get_user(len, ol); + if (res) + return res; + + lock_sock(sk); + + switch (opt) { + case TIPC_IMPORTANCE: + value = tsk_importance(tsk); + break; + case TIPC_SRC_DROPPABLE: + value = tsk_unreliable(tsk); + break; + case TIPC_DEST_DROPPABLE: + value = tsk_unreturnable(tsk); + break; + case TIPC_CONN_TIMEOUT: + value = tsk->conn_timeout; + /* no need to set "res", since already 0 at this point */ + break; + case TIPC_NODE_RECVQ_DEPTH: + value = 0; /* was tipc_queue_size, now obsolete */ + break; + case TIPC_SOCK_RECVQ_DEPTH: + value = skb_queue_len(&sk->sk_receive_queue); + break; + case TIPC_GROUP_JOIN: + seq.type = 0; + if (tsk->group) + tipc_group_self(tsk->group, &seq, &scope); + value = seq.type; + break; + default: + res = -EINVAL; + } + + release_sock(sk); + + if (res) + return res; /* "get" failed */ + + if (len < sizeof(value)) + return -EINVAL; + + if (copy_to_user(ov, &value, sizeof(value))) + return -EFAULT; + + return put_user(sizeof(value), ol); +} + +static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct net *net = sock_net(sock->sk); + struct tipc_sioc_nodeid_req nr = {0}; + struct tipc_sioc_ln_req lnr; + void __user *argp = (void __user *)arg; + + switch (cmd) { + case SIOCGETLINKNAME: + if (copy_from_user(&lnr, argp, sizeof(lnr))) + return -EFAULT; + if (!tipc_node_get_linkname(net, + lnr.bearer_id & 0xffff, lnr.peer, + lnr.linkname, TIPC_MAX_LINK_NAME)) { + if (copy_to_user(argp, &lnr, sizeof(lnr))) + return -EFAULT; + return 0; + } + return -EADDRNOTAVAIL; + case SIOCGETNODEID: + if (copy_from_user(&nr, argp, sizeof(nr))) + return -EFAULT; + if (!tipc_node_get_id(net, nr.peer, nr.node_id)) + return -EADDRNOTAVAIL; + if (copy_to_user(argp, &nr, sizeof(nr))) + return -EFAULT; + return 0; + default: + return -ENOIOCTLCMD; + } +} + +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); + return 0; +} + +/* Protocol switches for the various types of TIPC sockets */ + +static const struct proto_ops msg_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, + .accept = sock_no_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = sock_no_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendmsg, + .recvmsg = tipc_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct proto_ops packet_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_send_packet, + .recvmsg = tipc_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct proto_ops stream_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .release = tipc_release, + .bind = tipc_bind, + .connect = tipc_connect, + .socketpair = tipc_socketpair, + .accept = tipc_accept, + .getname = tipc_getname, + .poll = tipc_poll, + .ioctl = tipc_ioctl, + .listen = tipc_listen, + .shutdown = tipc_shutdown, + .setsockopt = tipc_setsockopt, + .getsockopt = tipc_getsockopt, + .sendmsg = tipc_sendstream, + .recvmsg = tipc_recvstream, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage +}; + +static const struct net_proto_family tipc_family_ops = { + .owner = THIS_MODULE, + .family = AF_TIPC, + .create = tipc_sk_create +}; + +static struct proto tipc_proto = { + .name = "TIPC", + .owner = THIS_MODULE, + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + +/** + * tipc_socket_init - initialize TIPC socket interface + * + * Returns 0 on success, errno otherwise + */ +int tipc_socket_init(void) +{ + int res; + + res = proto_register(&tipc_proto, 1); + if (res) { + pr_err("Failed to register TIPC protocol type\n"); + goto out; + } + + res = sock_register(&tipc_family_ops); + if (res) { + pr_err("Failed to register TIPC socket type\n"); + proto_unregister(&tipc_proto); + goto out; + } + out: + return res; +} + +/** + * tipc_socket_stop - stop TIPC socket interface + */ +void tipc_socket_stop(void) +{ + sock_unregister(tipc_family_ops.family); + proto_unregister(&tipc_proto); +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) +{ + u32 peer_node; + u32 peer_port; + struct nlattr *nest; + + peer_node = tsk_peer_node(tsk); + peer_port = tsk_peer_port(tsk); + + nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + + if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) + goto msg_full; + + if (tsk->conn_type != 0) { + if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) + goto msg_full; + if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) + goto msg_full; + } + nla_nest_end(skb, nest); + + return 0; + +msg_full: + nla_nest_cancel(skb, nest); + + return -EMSGSIZE; +} + +static int __tipc_nl_add_sk_info(struct sk_buff *skb, struct tipc_sock + *tsk) +{ + struct net *net = sock_net(skb->sk); + struct sock *sk = &tsk->sk; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid) || + nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tipc_own_addr(net))) + return -EMSGSIZE; + + if (tipc_sk_connected(sk)) { + if (__tipc_nl_add_sk_con(skb, tsk)) + return -EMSGSIZE; + } else if (!list_empty(&tsk->publications)) { + if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) + return -EMSGSIZE; + } + return 0; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk) +{ + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto genlmsg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)) +{ + struct rhashtable_iter *iter = (void *)cb->args[4]; + struct tipc_sock *tsk; + int err; + + rhashtable_walk_start(iter); + while ((tsk = rhashtable_walk_next(iter)) != NULL) { + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + if (err == -EAGAIN) { + err = 0; + continue; + } + break; + } + + sock_hold(&tsk->sk); + rhashtable_walk_stop(iter); + lock_sock(&tsk->sk); + err = skb_handler(skb, cb, tsk); + if (err) { + release_sock(&tsk->sk); + sock_put(&tsk->sk); + goto out; + } + release_sock(&tsk->sk); + rhashtable_walk_start(iter); + sock_put(&tsk->sk); + } + rhashtable_walk_stop(iter); +out: + return skb->len; +} +EXPORT_SYMBOL(tipc_nl_sk_walk); + +int tipc_dump_start(struct netlink_callback *cb) +{ + return __tipc_dump_start(cb, sock_net(cb->skb->sk)); +} +EXPORT_SYMBOL(tipc_dump_start); + +int __tipc_dump_start(struct netlink_callback *cb, struct net *net) +{ + /* tipc_nl_name_table_dump() uses cb->args[0...3]. */ + struct rhashtable_iter *iter = (void *)cb->args[4]; + struct tipc_net *tn = tipc_net(net); + + if (!iter) { + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + + cb->args[4] = (long)iter; + } + + rhashtable_walk_enter(&tn->sk_rht, iter); + return 0; +} + +int tipc_dump_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *hti = (void *)cb->args[4]; + + rhashtable_walk_exit(hti); + kfree(hti); + return 0; +} +EXPORT_SYMBOL(tipc_dump_done); + +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk, u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)) +{ + struct sock *sk = &tsk->sk; + struct nlattr *attrs; + struct nlattr *stat; + + /*filter response w.r.t sk_state*/ + if (!(sk_filter_state & (1 << sk->sk_state))) + return 0; + + attrs = nla_nest_start(skb, TIPC_NLA_SOCK); + if (!attrs) + goto msg_cancel; + + if (__tipc_nl_add_sk_info(skb, tsk)) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_TYPE, (u32)sk->sk_type) || + nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) || + nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) || + nla_put_u32(skb, TIPC_NLA_SOCK_UID, + from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk), + sock_i_uid(sk))) || + nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE, + tipc_diag_gen_cookie(sk), + TIPC_NLA_SOCK_PAD)) + goto attr_msg_cancel; + + stat = nla_nest_start(skb, TIPC_NLA_SOCK_STAT); + if (!stat) + goto attr_msg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_SOCK_STAT_RCVQ, + skb_queue_len(&sk->sk_receive_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_SENDQ, + skb_queue_len(&sk->sk_write_queue)) || + nla_put_u32(skb, TIPC_NLA_SOCK_STAT_DROP, + atomic_read(&sk->sk_drops))) + goto stat_msg_cancel; + + if (tsk->cong_link_cnt && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_LINK_CONG)) + goto stat_msg_cancel; + + if (tsk_conn_cong(tsk) && + nla_put_flag(skb, TIPC_NLA_SOCK_STAT_CONN_CONG)) + goto stat_msg_cancel; + + nla_nest_end(skb, stat); + + if (tsk->group) + if (tipc_group_fill_sock_diag(tsk->group, skb)) + goto stat_msg_cancel; + + nla_nest_end(skb, attrs); + + return 0; + +stat_msg_cancel: + nla_nest_cancel(skb, stat); +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +msg_cancel: + return -EMSGSIZE; +} +EXPORT_SYMBOL(tipc_sk_fill_sock_diag); + +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return tipc_nl_sk_walk(skb, cb, __tipc_nl_add_sk); +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_add_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct publication *publ) +{ + void *hdr; + struct nlattr *attrs; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) + goto msg_cancel; + + attrs = nla_nest_start(skb, TIPC_NLA_PUBL); + if (!attrs) + goto genlmsg_cancel; + + if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) + goto attr_msg_cancel; + if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) + goto attr_msg_cancel; + + nla_nest_end(skb, attrs); + genlmsg_end(skb, hdr); + + return 0; + +attr_msg_cancel: + nla_nest_cancel(skb, attrs); +genlmsg_cancel: + genlmsg_cancel(skb, hdr); +msg_cancel: + return -EMSGSIZE; +} + +/* Caller should hold socket lock for the passed tipc socket. */ +static int __tipc_nl_list_sk_publ(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk, u32 *last_publ) +{ + int err; + struct publication *p; + + if (*last_publ) { + list_for_each_entry(p, &tsk->publications, binding_sock) { + if (p->key == *last_publ) + break; + } + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { + /* We never set seq or call nl_dump_check_consistent() + * this means that setting prev_seq here will cause the + * consistence check to fail in the netlink callback + * handler. Resulting in the last NLMSG_DONE message + * having the NLM_F_DUMP_INTR flag set. + */ + cb->prev_seq = 1; + *last_publ = 0; + return -EPIPE; + } + } else { + p = list_first_entry(&tsk->publications, struct publication, + binding_sock); + } + + list_for_each_entry_from(p, &tsk->publications, binding_sock) { + err = __tipc_nl_add_sk_publ(skb, cb, p); + if (err) { + *last_publ = p->key; + return err; + } + } + *last_publ = 0; + + return 0; +} + +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int err; + u32 tsk_portid = cb->args[0]; + u32 last_publ = cb->args[1]; + u32 done = cb->args[2]; + struct net *net = sock_net(skb->sk); + struct tipc_sock *tsk; + + if (!tsk_portid) { + struct nlattr **attrs; + struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, + attrs[TIPC_NLA_SOCK], + tipc_nl_sock_policy, NULL); + if (err) + return err; + + if (!sock[TIPC_NLA_SOCK_REF]) + return -EINVAL; + + tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); + } + + if (done) + return 0; + + tsk = tipc_sk_lookup(net, tsk_portid); + if (!tsk) + return -EINVAL; + + lock_sock(&tsk->sk); + err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); + if (!err) + done = 1; + release_sock(&tsk->sk); + sock_put(&tsk->sk); + + cb->args[0] = tsk_portid; + cb->args[1] = last_publ; + cb->args[2] = done; + + return skb->len; +} diff --git a/net/tipc/socket.h b/net/tipc/socket.h new file mode 100644 index 000000000..5e575f205 --- /dev/null +++ b/net/tipc/socket.h @@ -0,0 +1,74 @@ +/* net/tipc/socket.h: Include file for TIPC socket code + * + * Copyright (c) 2014-2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SOCK_H +#define _TIPC_SOCK_H + +#include <net/sock.h> +#include <net/genetlink.h> + +/* Compatibility values for deprecated message based flow control */ +#define FLOWCTL_MSG_WIN 512 +#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE)) + +#define FLOWCTL_BLK_SZ 1024 + +/* Socket receive buffer sizes */ +#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512) +#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) +#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) + +struct tipc_sock; + +int tipc_socket_init(void); +void tipc_socket_stop(void); +void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); +void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, + struct sk_buff_head *inputq); +void tipc_sk_reinit(struct net *net); +int tipc_sk_rht_init(struct net *net); +void tipc_sk_rht_destroy(struct net *net); +int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb, + struct tipc_sock *tsk, u32 sk_filter_state, + u64 (*tipc_diag_gen_cookie)(struct sock *sk)); +int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb, + int (*skb_handler)(struct sk_buff *skb, + struct netlink_callback *cb, + struct tipc_sock *tsk)); +int tipc_dump_start(struct netlink_callback *cb); +int __tipc_dump_start(struct netlink_callback *cb, struct net *net); +int tipc_dump_done(struct netlink_callback *cb); +#endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c new file mode 100644 index 000000000..f340e53da --- /dev/null +++ b/net/tipc/subscr.c @@ -0,0 +1,174 @@ +/* + * net/tipc/subscr.c: TIPC network topology service + * + * Copyright (c) 2000-2017, Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "name_table.h" +#include "subscr.h" + +static void tipc_sub_send_event(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node) +{ + struct tipc_event *evt = &sub->evt; + + if (sub->inactive) + return; + tipc_evt_write(evt, event, event); + tipc_evt_write(evt, found_lower, found_lower); + tipc_evt_write(evt, found_upper, found_upper); + tipc_evt_write(evt, port.ref, port); + tipc_evt_write(evt, port.node, node); + tipc_topsrv_queue_evt(sub->net, sub->conid, event, evt); +} + +/** + * tipc_sub_check_overlap - test for subscription overlap with the + * given values + * + * Returns 1 if there is overlap, otherwise 0. + */ +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper) +{ + if (found_lower < seq->lower) + found_lower = seq->lower; + if (found_upper > seq->upper) + found_upper = seq->upper; + if (found_lower > found_upper) + return 0; + return 1; +} + +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must) +{ + struct tipc_subscr *s = &sub->evt.s; + u32 filter = tipc_sub_read(s, filter); + struct tipc_name_seq seq; + + seq.type = tipc_sub_read(s, seq.type); + seq.lower = tipc_sub_read(s, seq.lower); + seq.upper = tipc_sub_read(s, seq.upper); + + if (!tipc_sub_check_overlap(&seq, found_lower, found_upper)) + return; + + if (!must && !(filter & TIPC_SUB_PORTS)) + return; + if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + return; + if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) + return; + spin_lock(&sub->lock); + tipc_sub_send_event(sub, found_lower, found_upper, + event, port, node); + spin_unlock(&sub->lock); +} + +static void tipc_sub_timeout(struct timer_list *t) +{ + struct tipc_subscription *sub = from_timer(sub, t, timer); + struct tipc_subscr *s = &sub->evt.s; + + spin_lock(&sub->lock); + tipc_sub_send_event(sub, s->seq.lower, s->seq.upper, + TIPC_SUBSCR_TIMEOUT, 0, 0); + sub->inactive = true; + spin_unlock(&sub->lock); +} + +static void tipc_sub_kref_release(struct kref *kref) +{ + kfree(container_of(kref, struct tipc_subscription, kref)); +} + +void tipc_sub_put(struct tipc_subscription *subscription) +{ + kref_put(&subscription->kref, tipc_sub_kref_release); +} + +void tipc_sub_get(struct tipc_subscription *subscription) +{ + kref_get(&subscription->kref); +} + +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid) +{ + u32 filter = tipc_sub_read(s, filter); + struct tipc_subscription *sub; + u32 timeout; + + if ((filter & TIPC_SUB_PORTS && filter & TIPC_SUB_SERVICE) || + (tipc_sub_read(s, seq.lower) > tipc_sub_read(s, seq.upper))) { + pr_warn("Subscription rejected, illegal request\n"); + return NULL; + } + sub = kmalloc(sizeof(*sub), GFP_ATOMIC); + if (!sub) { + pr_warn("Subscription rejected, no memory\n"); + return NULL; + } + INIT_LIST_HEAD(&sub->service_list); + INIT_LIST_HEAD(&sub->sub_list); + sub->net = net; + sub->conid = conid; + sub->inactive = false; + memcpy(&sub->evt.s, s, sizeof(*s)); + spin_lock_init(&sub->lock); + kref_init(&sub->kref); + if (!tipc_nametbl_subscribe(sub)) { + kfree(sub); + return NULL; + } + timer_setup(&sub->timer, tipc_sub_timeout, 0); + timeout = tipc_sub_read(&sub->evt.s, timeout); + if (timeout != TIPC_WAIT_FOREVER) + mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); + return sub; +} + +void tipc_sub_unsubscribe(struct tipc_subscription *sub) +{ + tipc_nametbl_unsubscribe(sub); + if (sub->evt.s.timeout != TIPC_WAIT_FOREVER) + del_timer_sync(&sub->timer); + list_del(&sub->sub_list); + tipc_sub_put(sub); +} diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h new file mode 100644 index 000000000..aa015c233 --- /dev/null +++ b/net/tipc/subscr.h @@ -0,0 +1,109 @@ +/* + * net/tipc/subscr.h: Include file for TIPC network topology service + * + * Copyright (c) 2003-2017, Ericsson AB + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SUBSCR_H +#define _TIPC_SUBSCR_H + +#include "topsrv.h" + +#define TIPC_MAX_SUBSCR 65535 +#define TIPC_MAX_PUBL 65535 + +struct tipc_subscription; +struct tipc_conn; + +/** + * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber + * @seq: name sequence associated with subscription + * @timer: timer governing subscription duration (optional) + * @nameseq_list: adjacent subscriptions in name sequence's subscription list + * @sub_list: adjacent subscriptions in subscriber's subscription list + * @evt: template for events generated by subscription + */ +struct tipc_subscription { + struct kref kref; + struct net *net; + struct timer_list timer; + struct list_head service_list; + struct list_head sub_list; + struct tipc_event evt; + int conid; + bool inactive; + spinlock_t lock; /* serialize up/down and timer events */ +}; + +struct tipc_subscription *tipc_sub_subscribe(struct net *net, + struct tipc_subscr *s, + int conid); +void tipc_sub_unsubscribe(struct tipc_subscription *sub); + +int tipc_sub_check_overlap(struct tipc_name_seq *seq, u32 found_lower, + u32 found_upper); +void tipc_sub_report_overlap(struct tipc_subscription *sub, + u32 found_lower, u32 found_upper, + u32 event, u32 port, u32 node, + u32 scope, int must); + +int __net_init tipc_topsrv_init_net(struct net *net); +void __net_exit tipc_topsrv_exit_net(struct net *net); + +void tipc_sub_put(struct tipc_subscription *subscription); +void tipc_sub_get(struct tipc_subscription *subscription); + +#define TIPC_FILTER_MASK (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | TIPC_SUB_CANCEL) + +/* tipc_sub_read - return field_ of struct sub_ in host endian format + */ +#define tipc_sub_read(sub_, field_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = (sub__)->field_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (swap_ ? swab32(val__) : val__); \ + }) + +/* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format + */ +#define tipc_evt_write(evt_, field_, val_) \ + ({ \ + struct tipc_event *evt__ = evt_; \ + u32 val__ = val_; \ + int swap_ = !((evt__)->s.filter & (TIPC_FILTER_MASK)); \ + (evt__)->field_ = swap_ ? swab32(val__) : val__; \ + }) + +#endif diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c new file mode 100644 index 000000000..40f6d8208 --- /dev/null +++ b/net/tipc/sysctl.c @@ -0,0 +1,75 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include <linux/sysctl.h> + +static int zero; +static int one = 1; +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, + { + .procname = "named_timeout", + .data = &sysctl_tipc_named_timeout, + .maxlen = sizeof(sysctl_tipc_named_timeout), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c new file mode 100644 index 000000000..1c4733153 --- /dev/null +++ b/net/tipc/topsrv.c @@ -0,0 +1,723 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017-2018, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "subscr.h" +#include "topsrv.h" +#include "core.h" +#include "socket.h" +#include "addr.h" +#include "msg.h" +#include <net/sock.h> +#include <linux/module.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 +#define CF_SERVER 2 + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_topsrv - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @net: network namspace instance + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_release: callback will be called before releasing the connection + * @tipc_conn_recvmsg: callback will be called when message arrives + * @name: server name + * @imp: message importance + * @type: socket type + */ +struct tipc_topsrv { + struct idr conn_idr; + spinlock_t idr_lock; /* for idr list */ + int idr_in_use; + struct net *net; + struct work_struct awork; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + struct socket *listener; + char name[TIPC_SERVER_NAME_LEN]; +}; + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @sub_list: lsit to all pertaing subscriptions + * @sub_lock: lock protecting the subscription list + * @outqueue_lock: control access to the outqueue + * @rwork: receive work item + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: control access to the outqueue + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_topsrv *server; + struct list_head sub_list; + spinlock_t sub_lock; /* for subscription list */ + struct work_struct rwork; + struct list_head outqueue; + spinlock_t outqueue_lock; /* for outqueue */ + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + bool inactive; + struct tipc_event evt; + struct list_head list; +}; + +static void tipc_conn_recv_work(struct work_struct *work); +static void tipc_conn_send_work(struct work_struct *work); +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt); +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s); + +static bool connected(struct tipc_conn *con) +{ + return con && test_bit(CF_CONNECTED, &con->flags); +} + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_topsrv *s = con->server; + struct outqueue_entry *e, *safe; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + if (con->sock) + sock_release(con->sock); + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static void tipc_conn_close(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + bool disconnect = false; + + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + + if (disconnect) { + sk->sk_user_data = NULL; + tipc_conn_delete_sub(con, NULL); + } + write_unlock_bh(&sk->sk_callback_lock); + + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; + + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); +} + +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(*con), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + INIT_LIST_HEAD(&con->sub_list); + spin_lock_init(&con->outqueue_lock); + spin_lock_init(&con->sub_lock); + INIT_WORK(&con->swork, tipc_conn_send_work); + INIT_WORK(&con->rwork, tipc_conn_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_topsrv *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (!connected(con) || !kref_get_unless_zero(&con->kref)) + con = NULL; + spin_unlock_bh(&s->idr_lock); + return con; +} + +/* tipc_conn_delete_sub - delete a specific or all subscriptions + * for a given subscriber + */ +static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(con->server->net); + struct list_head *sub_list = &con->sub_list; + struct tipc_subscription *sub, *tmp; + + spin_lock_bh(&con->sub_lock); + list_for_each_entry_safe(sub, tmp, sub_list, sub_list) { + if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { + tipc_sub_unsubscribe(sub); + atomic_dec(&tn->subscription_count); + } else if (s) { + break; + } + } + spin_unlock_bh(&con->sub_lock); +} + +static void tipc_conn_send_to_sock(struct tipc_conn *con) +{ + struct list_head *queue = &con->outqueue; + struct tipc_topsrv *srv = con->server; + struct outqueue_entry *e; + struct tipc_event *evt; + struct msghdr msg; + struct kvec iov; + int count = 0; + int ret; + + spin_lock_bh(&con->outqueue_lock); + + while (!list_empty(queue)) { + e = list_first_entry(queue, struct outqueue_entry, list); + evt = &e->evt; + spin_unlock_bh(&con->outqueue_lock); + + if (e->inactive) + tipc_conn_delete_sub(con, &evt->s); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + iov.iov_base = evt; + iov.iov_len = sizeof(*evt); + msg.msg_name = NULL; + + if (con->sock) { + ret = kernel_sendmsg(con->sock, &msg, &iov, + 1, sizeof(*evt)); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + return; + } else if (ret < 0) { + return tipc_conn_close(con); + } + } else { + tipc_topsrv_kern_evt(srv->net, evt); + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + kfree(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +static void tipc_conn_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (connected(con)) + tipc_conn_send_to_sock(con); + + conn_put(con); +} + +/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance + * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock() + */ +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(srv, conid); + if (!con) + return; + + if (!connected(con)) + goto err; + + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (!e) + goto err; + e->inactive = (event == TIPC_SUBSCR_TIMEOUT); + memcpy(&e->evt, evt, sizeof(*evt)); + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (queue_work(srv->send_wq, &con->swork)) + return; +err: + conn_put(con); +} + +/* tipc_conn_write_space - interrupt callback after a sendmsg EAGAIN + * Indicates that there now is more space in the send buffer + * The queued work is launched into tipc_send_work()->tipc_conn_send_to_sock() + */ +static void tipc_conn_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, + struct tipc_conn *con, + struct tipc_subscr *s) +{ + struct tipc_net *tn = tipc_net(srv->net); + struct tipc_subscription *sub; + + if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); + tipc_conn_delete_sub(con, s); + return 0; + } + if (atomic_read(&tn->subscription_count) >= TIPC_MAX_SUBSCR) { + pr_warn("Subscription rejected, max (%u)\n", TIPC_MAX_SUBSCR); + return -1; + } + sub = tipc_sub_subscribe(srv->net, s, con->conid); + if (!sub) + return -1; + atomic_inc(&tn->subscription_count); + spin_lock_bh(&con->sub_lock); + list_add(&sub->sub_list, &con->sub_list); + spin_unlock_bh(&con->sub_lock); + return 0; +} + +static int tipc_conn_rcv_from_sock(struct tipc_conn *con) +{ + struct tipc_topsrv *srv = con->server; + struct sock *sk = con->sock->sk; + struct msghdr msg = {}; + struct tipc_subscr s; + struct kvec iov; + int ret; + + iov.iov_base = &s; + iov.iov_len = sizeof(s); + msg.msg_name = NULL; + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); + if (ret == -EWOULDBLOCK) + return -EWOULDBLOCK; + if (ret == sizeof(s)) { + read_lock_bh(&sk->sk_callback_lock); + /* RACE: the connection can be closed in the meantime */ + if (likely(connected(con))) + ret = tipc_conn_rcv_sub(srv, con, &s); + read_unlock_bh(&sk->sk_callback_lock); + if (!ret) + return 0; + } + + tipc_conn_close(con); + return ret; +} + +static void tipc_conn_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (connected(con)) { + if (tipc_conn_rcv_from_sock(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +/* tipc_conn_data_ready - interrupt callback indicating the socket has data + * The queued work is launched into tipc_recv_work()->tipc_conn_rcv_from_sock() + */ +static void tipc_conn_data_ready(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock_bh(&sk->sk_callback_lock); + con = sk->sk_user_data; + if (connected(con)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_topsrv_accept(struct work_struct *work) +{ + struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork); + struct socket *lsock = srv->listener; + struct socket *newsock; + struct tipc_conn *con; + struct sock *newsk; + int ret; + + while (1) { + ret = kernel_accept(lsock, &newsock, O_NONBLOCK); + if (ret < 0) + return; + con = tipc_conn_alloc(srv); + if (IS_ERR(con)) { + ret = PTR_ERR(con); + sock_release(newsock); + return; + } + /* Register callbacks */ + newsk = newsock->sk; + write_lock_bh(&newsk->sk_callback_lock); + newsk->sk_data_ready = tipc_conn_data_ready; + newsk->sk_write_space = tipc_conn_write_space; + newsk->sk_user_data = con; + con->sock = newsock; + write_unlock_bh(&newsk->sk_callback_lock); + + /* Wake up receive process in case of 'SYN+' message */ + newsk->sk_data_ready(newsk); + } +} + +/* tipc_toprsv_listener_data_ready - interrupt callback with connection request + * The queued job is launched into tipc_topsrv_accept() + */ +static void tipc_topsrv_listener_data_ready(struct sock *sk) +{ + struct tipc_topsrv *srv; + + read_lock_bh(&sk->sk_callback_lock); + srv = sk->sk_user_data; + if (srv->listener) + queue_work(srv->rcv_wq, &srv->awork); + read_unlock_bh(&sk->sk_callback_lock); +} + +static int tipc_topsrv_create_listener(struct tipc_topsrv *srv) +{ + int imp = TIPC_CRITICAL_IMPORTANCE; + struct socket *lsock = NULL; + struct sockaddr_tipc saddr; + struct sock *sk; + int rc; + + rc = sock_create_kern(srv->net, AF_TIPC, SOCK_SEQPACKET, 0, &lsock); + if (rc < 0) + return rc; + + srv->listener = lsock; + sk = lsock->sk; + write_lock_bh(&sk->sk_callback_lock); + sk->sk_data_ready = tipc_topsrv_listener_data_ready; + sk->sk_user_data = srv; + write_unlock_bh(&sk->sk_callback_lock); + + rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&imp, sizeof(imp)); + if (rc < 0) + goto err; + + saddr.family = AF_TIPC; + saddr.addrtype = TIPC_ADDR_NAMESEQ; + saddr.addr.nameseq.type = TIPC_TOP_SRV; + saddr.addr.nameseq.lower = TIPC_TOP_SRV; + saddr.addr.nameseq.upper = TIPC_TOP_SRV; + saddr.scope = TIPC_NODE_SCOPE; + + rc = kernel_bind(lsock, (struct sockaddr *)&saddr, sizeof(saddr)); + if (rc < 0) + goto err; + rc = kernel_listen(lsock, 0); + if (rc < 0) + goto err; + + /* As server's listening socket owner and creator is the same module, + * we have to decrease TIPC module reference count to guarantee that + * it remains zero after the server socket is created, otherwise, + * executing "rmmod" command is unable to make TIPC module deleted + * after TIPC module is inserted successfully. + * + * However, the reference count is ever increased twice in + * sock_create_kern(): one is to increase the reference count of owner + * of TIPC socket's proto_ops struct; another is to increment the + * reference count of owner of TIPC proto struct. Therefore, we must + * decrement the module reference count twice to ensure that it keeps + * zero after server's listening socket is created. Of course, we + * must bump the module reference count twice as well before the socket + * is closed. + */ + module_put(lsock->ops->owner); + module_put(sk->sk_prot_creator->owner); + + return 0; +err: + sock_release(lsock); + return -EINVAL; +} + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) +{ + struct tipc_subscr sub; + struct tipc_conn *con; + int rc; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; + *(u32 *)&sub.usr_handle = port; + + con = tipc_conn_alloc(tipc_topsrv(net)); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + con->sock = NULL; + rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); + if (rc >= 0) + return true; + conn_put(con); + return false; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + + test_and_clear_bit(CF_CONNECTED, &con->flags); + tipc_conn_delete_sub(con, NULL); + conn_put(con); + conn_put(con); +} + +static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + +static int tipc_topsrv_work_start(struct tipc_topsrv *s) +{ + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +static void tipc_topsrv_work_stop(struct tipc_topsrv *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_topsrv_start(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + const char name[] = "topology_server"; + struct tipc_topsrv *srv; + int ret; + + srv = kzalloc(sizeof(*srv), GFP_ATOMIC); + if (!srv) + return -ENOMEM; + + srv->net = net; + srv->max_rcvbuf_size = sizeof(struct tipc_subscr); + INIT_WORK(&srv->awork, tipc_topsrv_accept); + + strscpy(srv->name, name, sizeof(srv->name)); + tn->topsrv = srv; + atomic_set(&tn->subscription_count, 0); + + spin_lock_init(&srv->idr_lock); + idr_init(&srv->conn_idr); + srv->idr_in_use = 0; + + ret = tipc_topsrv_work_start(srv); + if (ret < 0) + goto err_start; + + ret = tipc_topsrv_create_listener(srv); + if (ret < 0) + goto err_create; + + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); + return ret; +} + +static void tipc_topsrv_stop(struct net *net) +{ + struct tipc_topsrv *srv = tipc_topsrv(net); + struct socket *lsock = srv->listener; + struct tipc_conn *con; + int id; + + spin_lock_bh(&srv->idr_lock); + for (id = 0; srv->idr_in_use; id++) { + con = idr_find(&srv->conn_idr, id); + if (con) { + spin_unlock_bh(&srv->idr_lock); + tipc_conn_close(con); + spin_lock_bh(&srv->idr_lock); + } + } + __module_get(lsock->ops->owner); + __module_get(lsock->sk->sk_prot_creator->owner); + srv->listener = NULL; + spin_unlock_bh(&srv->idr_lock); + sock_release(lsock); + tipc_topsrv_work_stop(srv); + idr_destroy(&srv->conn_idr); + kfree(srv); +} + +int __net_init tipc_topsrv_init_net(struct net *net) +{ + return tipc_topsrv_start(net); +} + +void __net_exit tipc_topsrv_exit_net(struct net *net) +{ + tipc_topsrv_stop(net); +} diff --git a/net/tipc/topsrv.h b/net/tipc/topsrv.h new file mode 100644 index 000000000..c7ea71293 --- /dev/null +++ b/net/tipc/topsrv.h @@ -0,0 +1,54 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include "core.h" + +#define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 +#define TIPC_SUB_NO_STATUS 0x80 + +void tipc_topsrv_queue_evt(struct net *net, int conid, + u32 event, struct tipc_event *evt); + +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); +void tipc_topsrv_kern_unsubscr(struct net *net, int conid); + +#endif diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c new file mode 100644 index 000000000..1d6235479 --- /dev/null +++ b/net/tipc/udp_media.c @@ -0,0 +1,813 @@ +/* net/tipc/udp_media.c: IP bearer support for TIPC + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/socket.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/inet.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <net/sock.h> +#include <net/ip.h> +#include <net/udp_tunnel.h> +#include <net/addrconf.h> +#include <linux/tipc_netlink.h> +#include "core.h" +#include "addr.h" +#include "net.h" +#include "bearer.h" +#include "netlink.h" +#include "msg.h" + +/* IANA assigned UDP port */ +#define UDP_PORT_DEFAULT 6118 + +#define UDP_MIN_HEADROOM 48 + +/** + * struct udp_media_addr - IP/UDP addressing information + * + * This is the bearer level originating address used in neighbor discovery + * messages, and all fields should be in network byte order + */ +struct udp_media_addr { + __be16 proto; + __be16 port; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; +}; + +/* struct udp_replicast - container for UDP remote addresses */ +struct udp_replicast { + struct udp_media_addr addr; + struct rcu_head rcu; + struct list_head list; +}; + +/** + * struct udp_bearer - ip/udp bearer data structure + * @bearer: associated generic tipc bearer + * @ubsock: bearer associated socket + * @ifindex: local address scope + * @work: used to schedule deferred work on a bearer + */ +struct udp_bearer { + struct tipc_bearer __rcu *bearer; + struct socket *ubsock; + u32 ifindex; + struct work_struct work; + struct udp_replicast rcast; +}; + +static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) +{ + if (ntohs(addr->proto) == ETH_P_IP) + return ipv4_is_multicast(addr->ipv4.s_addr); +#if IS_ENABLED(CONFIG_IPV6) + else + return ipv6_addr_is_multicast(&addr->ipv6); +#endif + return 0; +} + +/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ +static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, + struct udp_media_addr *ua) +{ + memset(addr, 0, sizeof(struct tipc_media_addr)); + addr->media_id = TIPC_MEDIA_TYPE_UDP; + memcpy(addr->value, ua, sizeof(struct udp_media_addr)); + + if (tipc_udp_is_mcast_addr(ua)) + addr->broadcast = TIPC_BROADCAST_SUPPORT; +} + +/* tipc_udp_addr2str - convert ip/udp address to string */ +static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) +{ + struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; + + if (ntohs(ua->proto) == ETH_P_IP) + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); + else if (ntohs(ua->proto) == ETH_P_IPV6) + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); + else + pr_err("Invalid UDP media address\n"); + return 0; +} + +/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */ +static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a, + char *msg) +{ + struct udp_media_addr *ua; + + ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET); + if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP) + return -EINVAL; + tipc_udp_media_addr_set(a, ua); + return 0; +} + +/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */ +static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) +{ + memset(msg, 0, TIPC_MEDIA_INFO_SIZE); + msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP; + memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value, + sizeof(struct udp_media_addr)); + return 0; +} + +/* tipc_send_msg - enqueue a send request */ +static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, + struct udp_bearer *ub, struct udp_media_addr *src, + struct udp_media_addr *dst) +{ + int ttl, err = 0; + struct rtable *rt; + + if (dst->proto == htons(ETH_P_IP)) { + struct flowi4 fl = { + .daddr = dst->ipv4.s_addr, + .saddr = src->ipv4.s_addr, + .flowi4_mark = skb->mark, + .flowi4_proto = IPPROTO_UDP + }; + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto tx_error; + } + + ttl = ip4_dst_hoplimit(&rt->dst); + udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, + dst->ipv4.s_addr, 0, ttl, 0, src->port, + dst->port, false, true); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct dst_entry *ndst; + struct flowi6 fl6 = { + .flowi6_oif = ub->ifindex, + .daddr = dst->ipv6, + .saddr = src->ipv6, + .flowi6_proto = IPPROTO_UDP + }; + ndst = ipv6_stub->ipv6_dst_lookup_flow(net, + ub->ubsock->sk, + &fl6, NULL); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + goto tx_error; + } + ttl = ip6_dst_hoplimit(ndst); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false); +#endif + } + return err; + +tx_error: + kfree_skb(skb); + return err; +} + +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *addr) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; + struct udp_replicast *rcast; + struct udp_bearer *ub; + int err = 0; + + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto out; + } + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto out; + } + + if (addr->broadcast != TIPC_REPLICAST_SUPPORT) + return tipc_udp_xmit(net, skb, ub, src, dst); + + /* Replicast, send an skb to each configured IP address */ + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { + struct sk_buff *_skb; + + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + err = -ENOMEM; + goto out; + } + + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); + if (err) + goto out; + } + err = 0; +out: + kfree_skb(skb); + return err; +} + +static bool tipc_udp_is_known_peer(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast, *tmp; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err_ratelimited("UDP bearer instance not found\n"); + return false; + } + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr))) + return true; + } + + return false; +} + +static int tipc_udp_rcast_add(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); + if (!rcast) + return -ENOMEM; + + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); + + if (ntohs(addr->proto) == ETH_P_IP) + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); +#if IS_ENABLED(CONFIG_IPV6) + else if (ntohs(addr->proto) == ETH_P_IPV6) + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); +#endif + b->bcast_addr.broadcast = TIPC_REPLICAST_SUPPORT; + list_add_rcu(&rcast->list, &ub->rcast.list); + return 0; +} + +static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb) +{ + struct udp_media_addr src = {0}; + struct udp_media_addr *dst; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) + return 0; + + src.port = udp_hdr(skb)->source; + + if (ip_hdr(skb)->version == 4) { + struct iphdr *iphdr = ip_hdr(skb); + + src.proto = htons(ETH_P_IP); + src.ipv4.s_addr = iphdr->saddr; + if (ipv4_is_multicast(iphdr->daddr)) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_hdr(skb)->version == 6) { + struct ipv6hdr *iphdr = ipv6_hdr(skb); + + src.proto = htons(ETH_P_IPV6); + src.ipv6 = iphdr->saddr; + if (ipv6_addr_is_multicast(&iphdr->daddr)) + return 0; +#endif + } else { + return 0; + } + + if (likely(tipc_udp_is_known_peer(b, &src))) + return 0; + + return tipc_udp_rcast_add(b, &src); +} + +/* tipc_udp_recv - read data from bearer socket */ +static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) +{ + struct udp_bearer *ub; + struct tipc_bearer *b; + struct tipc_msg *hdr; + int err; + + ub = rcu_dereference_sk_user_data(sk); + if (!ub) { + pr_err_ratelimited("Failed to get UDP bearer reference"); + goto out; + } + skb_pull(skb, sizeof(struct udphdr)); + hdr = buf_msg(skb); + + rcu_read_lock(); + b = rcu_dereference_rtnl(ub->bearer); + if (!b) + goto rcu_out; + + if (b && test_bit(0, &b->up)) { + tipc_rcv(sock_net(sk), skb, b); + rcu_read_unlock(); + return 0; + } + + if (unlikely(msg_user(hdr) == LINK_CONFIG)) { + err = tipc_udp_rcast_disc(b, skb); + if (err) + goto rcu_out; + } + +rcu_out: + rcu_read_unlock(); +out: + kfree_skb(skb); + return 0; +} + +static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) +{ + int err = 0; + struct ip_mreqn mreqn; + struct sock *sk = ub->ubsock->sk; + + if (ntohs(remote->proto) == ETH_P_IP) { + mreqn.imr_multiaddr = remote->ipv4; + mreqn.imr_ifindex = ub->ifindex; + err = ip_mc_join_group(sk, &mreqn); +#if IS_ENABLED(CONFIG_IPV6) + } else { + err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, + &remote->ipv6); +#endif + } + return err; +} + +static int __tipc_nl_add_udp_addr(struct sk_buff *skb, + struct udp_media_addr *addr, int nla_t) +{ + if (ntohs(addr->proto) == ETH_P_IP) { + struct sockaddr_in ip4; + + memset(&ip4, 0, sizeof(ip4)); + ip4.sin_family = AF_INET; + ip4.sin_port = addr->port; + ip4.sin_addr.s_addr = addr->ipv4.s_addr; + if (nla_put(skb, nla_t, sizeof(ip4), &ip4)) + return -EMSGSIZE; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (ntohs(addr->proto) == ETH_P_IPV6) { + struct sockaddr_in6 ip6; + + memset(&ip6, 0, sizeof(ip6)); + ip6.sin6_family = AF_INET6; + ip6.sin6_port = addr->port; + memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); + if (nla_put(skb, nla_t, sizeof(ip6), &ip6)) + return -EMSGSIZE; +#endif + } + + return 0; +} + +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) +{ + u32 bid = cb->args[0]; + u32 skip_cnt = cb->args[1]; + u32 portid = NETLINK_CB(cb->skb).portid; + struct udp_replicast *rcast, *tmp; + struct tipc_bearer *b; + struct udp_bearer *ub; + void *hdr; + int err; + int i; + + if (!bid && !skip_cnt) { + struct net *net = sock_net(skb->sk); + struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr **attrs; + char *bname; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy, NULL); + if (err) + return err; + + if (!battrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, bname); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + bid = b->identity; + } else { + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + rtnl_lock(); + b = rtnl_dereference(tn->bearer_list[bid]); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + } + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + rtnl_unlock(); + return -EINVAL; + } + + i = 0; + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (i < skip_cnt) + goto count; + + hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_BEARER_GET); + if (!hdr) + goto done; + + err = __tipc_nl_add_udp_addr(skb, &rcast->addr, + TIPC_NLA_UDP_REMOTE); + if (err) { + genlmsg_cancel(skb, hdr); + goto done; + } + genlmsg_end(skb, hdr); +count: + i++; + } +done: + rtnl_unlock(); + cb->args[0] = bid; + cb->args[1] = i; + + return skb->len; +} + +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst; + struct udp_bearer *ub; + struct nlattr *nest; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + if (!nest) + goto msg_full; + + if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL)) + goto msg_full; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE)) + goto msg_full; + + if (!list_empty(&ub->rcast.list)) { + if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP)) + goto msg_full; + } + + nla_nest_end(msg->skb, nest); + return 0; +msg_full: + nla_nest_cancel(msg->skb, nest); + return -EMSGSIZE; +} + +/** + * tipc_parse_udp_addr - build udp media address from netlink data + * @nlattr: netlink attribute containing sockaddr storage aligned address + * @addr: tipc media address to fill with address, port and protocol type + * @scope_id: IPv6 scope id pointer, not NULL indicates it's required + */ + +static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, + u32 *scope_id) +{ + struct sockaddr_storage sa; + + nla_memcpy(&sa, nla, sizeof(sa)); + if (sa.ss_family == AF_INET) { + struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa; + + addr->proto = htons(ETH_P_IP); + addr->port = ip4->sin_port; + addr->ipv4.s_addr = ip4->sin_addr.s_addr; + return 0; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sa.ss_family == AF_INET6) { + struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa; + + addr->proto = htons(ETH_P_IPV6); + addr->port = ip6->sin6_port; + memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); + + /* Scope ID is only interesting for local addresses */ + if (scope_id) { + int atype; + + atype = ipv6_addr_type(&ip6->sin6_addr); + if (__ipv6_addr_needs_scope_id(atype) && + !ip6->sin6_scope_id) { + return -EINVAL; + } + + *scope_id = ip6->sin6_scope_id ? : 0; + } + + return 0; +#endif + } + return -EADDRNOTAVAIL; +} + +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) +{ + int err; + struct udp_media_addr addr = {0}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct udp_media_addr *dst; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, + tipc_nl_udp_policy, NULL)) + return -EINVAL; + + if (!opts[TIPC_NLA_UDP_REMOTE]) + return -EINVAL; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); + if (err) + return err; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) { + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); + return -EINVAL; + } + + if (tipc_udp_is_known_peer(b, &addr)) + return 0; + + return tipc_udp_rcast_add(b, &addr); +} + +/** + * tipc_udp_enable - callback to create a new udp bearer instance + * @net: network namespace + * @b: pointer to generic tipc_bearer + * @attrs: netlink bearer configuration + * + * validate the bearer parameters and initialize the udp bearer + * rtnl_lock should be held + */ +static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, + struct nlattr *attrs[]) +{ + int err = -EINVAL; + struct udp_bearer *ub; + struct udp_media_addr remote = {0}; + struct udp_media_addr local = {0}; + struct udp_port_cfg udp_conf = {0}; + struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + u8 node_id[NODE_ID_LEN] = {0,}; + + ub = kzalloc(sizeof(*ub), GFP_ATOMIC); + if (!ub) + return -ENOMEM; + + INIT_LIST_HEAD(&ub->rcast.list); + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy, NULL)) + goto err; + + if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { + pr_err("Invalid UDP bearer configuration"); + err = -EINVAL; + goto err; + } + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, + &ub->ifindex); + if (err) + goto err; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); + if (err) + goto err; + + if (remote.proto != local.proto) { + err = -EINVAL; + goto err; + } + + /* Autoconfigure own node identity if needed */ + if (!tipc_own_id(net)) { + memcpy(node_id, local.ipv6.in6_u.u6_addr8, 16); + tipc_net_init(net, node_id, 0); + } + if (!tipc_own_id(net)) { + pr_warn("Failed to set node id, please configure manually\n"); + err = -EINVAL; + goto err; + } + + b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP; + b->bcast_addr.broadcast = TIPC_BROADCAST_SUPPORT; + rcu_assign_pointer(b->media_ptr, ub); + rcu_assign_pointer(ub->bearer, b); + tipc_udp_media_addr_set(&b->addr, &local); + if (local.proto == htons(ETH_P_IP)) { + struct net_device *dev; + + dev = __ip_dev_find(net, local.ipv4.s_addr, false); + if (!dev) { + err = -ENODEV; + goto err; + } + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.use_udp_checksums = false; + ub->ifindex = dev->ifindex; + if (tipc_mtu_bad(dev, sizeof(struct iphdr) + + sizeof(struct udphdr))) { + err = -EINVAL; + goto err; + } + b->mtu = b->media->mtu; +#if IS_ENABLED(CONFIG_IPV6) + } else if (local.proto == htons(ETH_P_IPV6)) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = true; + udp_conf.use_udp6_rx_checksums = true; + udp_conf.local_ip6 = in6addr_any; + b->mtu = 1280; +#endif + } else { + err = -EAFNOSUPPORT; + goto err; + } + udp_conf.local_udp_port = local.port; + err = udp_sock_create(net, &udp_conf, &ub->ubsock); + if (err) + goto err; + tuncfg.sk_user_data = ub; + tuncfg.encap_type = 1; + tuncfg.encap_rcv = tipc_udp_recv; + tuncfg.encap_destroy = NULL; + setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); + + /** + * The bcast media address port is used for all peers and the ip + * is used if it's a multicast address. + */ + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); + if (tipc_udp_is_mcast_addr(&remote)) + err = enable_mcast(ub, &remote); + else + err = tipc_udp_rcast_add(b, &remote); + if (err) + goto err; + + return 0; +err: + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + kfree(ub); + return err; +} + +/* cleanup_bearer - break the socket/bearer association */ +static void cleanup_bearer(struct work_struct *work) +{ + struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + list_del_rcu(&rcast->list); + kfree_rcu(rcast, rcu); + } + + if (ub->ubsock) + udp_tunnel_sock_release(ub->ubsock); + synchronize_net(); + kfree(ub); +} + +/* tipc_udp_disable - detach bearer from socket */ +static void tipc_udp_disable(struct tipc_bearer *b) +{ + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err("UDP bearer instance not found\n"); + return; + } + if (ub->ubsock) + sock_set_flag(ub->ubsock->sk, SOCK_DEAD); + RCU_INIT_POINTER(ub->bearer, NULL); + + /* sock_release need to be done outside of rtnl lock */ + INIT_WORK(&ub->work, cleanup_bearer); + schedule_work(&ub->work); +} + +struct tipc_media udp_media_info = { + .send_msg = tipc_udp_send_msg, + .enable_media = tipc_udp_enable, + .disable_media = tipc_udp_disable, + .addr2str = tipc_udp_addr2str, + .addr2msg = tipc_udp_addr2msg, + .msg2addr = tipc_udp_msg2addr, + .priority = TIPC_DEF_LINK_PRI, + .tolerance = TIPC_DEF_LINK_TOL, + .window = TIPC_DEF_LINK_WIN, + .mtu = TIPC_DEF_LINK_UDP_MTU, + .type_id = TIPC_MEDIA_TYPE_UDP, + .hwaddr_len = 0, + .name = "udp" +}; diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h new file mode 100644 index 000000000..e7455cc73 --- /dev/null +++ b/net/tipc/udp_media.h @@ -0,0 +1,60 @@ +/* + * net/tipc/udp_media.h: Include file for UDP bearer media + * + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_TIPC_MEDIA_UDP +#ifndef _TIPC_UDP_MEDIA_H +#define _TIPC_UDP_MEDIA_H + +#include <linux/ip.h> +#include <linux/udp.h> + +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); + +/* check if configured MTU is too low for tipc headers */ +static inline bool tipc_udp_mtu_bad(u32 mtu) +{ + if (mtu >= (TIPC_MIN_BEARER_MTU + sizeof(struct iphdr) + + sizeof(struct udphdr))) + return false; + + pr_warn("MTU too low for tipc bearer\n"); + return true; +} + +#endif +#endif |