diff options
Diffstat (limited to 'drivers/net/bonding/bond_alb.c')
-rw-r--r-- | drivers/net/bonding/bond_alb.c | 1836 |
1 files changed, 1836 insertions, 0 deletions
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c new file mode 100644 index 000000000..fc5da5d77 --- /dev/null +++ b/drivers/net/bonding/bond_alb.c @@ -0,0 +1,1836 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. + */ + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/pkt_sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/if_arp.h> +#include <linux/if_ether.h> +#include <linux/if_bonding.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <net/arp.h> +#include <net/ipv6.h> +#include <net/ndisc.h> +#include <asm/byteorder.h> +#include <net/bonding.h> +#include <net/bond_alb.h> + +static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { + 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 +}; +static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; + +#pragma pack(1) +struct learning_pkt { + u8 mac_dst[ETH_ALEN]; + u8 mac_src[ETH_ALEN]; + __be16 type; + u8 padding[ETH_ZLEN - ETH_HLEN]; +}; + +struct arp_pkt { + __be16 hw_addr_space; + __be16 prot_addr_space; + u8 hw_addr_len; + u8 prot_addr_len; + __be16 op_code; + u8 mac_src[ETH_ALEN]; /* sender hardware address */ + __be32 ip_src; /* sender IP address */ + u8 mac_dst[ETH_ALEN]; /* target hardware address */ + __be32 ip_dst; /* target IP address */ +}; +#pragma pack() + +/* Forward declaration */ +static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], + bool strict_match); +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); +static void rlb_src_unlink(struct bonding *bond, u32 index); +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, + u32 ip_dst_hash); + +static inline u8 _simple_hash(const u8 *hash_start, int hash_size) +{ + int i; + u8 hash = 0; + + for (i = 0; i < hash_size; i++) + hash ^= hash_start[i]; + + return hash; +} + +/*********************** tlb specific functions ***************************/ + +static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load) +{ + if (save_load) { + entry->load_history = 1 + entry->tx_bytes / + BOND_TLB_REBALANCE_INTERVAL; + entry->tx_bytes = 0; + } + + entry->tx_slave = NULL; + entry->next = TLB_NULL_INDEX; + entry->prev = TLB_NULL_INDEX; +} + +static inline void tlb_init_slave(struct slave *slave) +{ + SLAVE_TLB_INFO(slave).load = 0; + SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX; +} + +static void __tlb_clear_slave(struct bonding *bond, struct slave *slave, + int save_load) +{ + struct tlb_client_info *tx_hash_table; + u32 index; + + /* clear slave from tx_hashtbl */ + tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl; + + /* skip this if we've already freed the tx hash table */ + if (tx_hash_table) { + index = SLAVE_TLB_INFO(slave).head; + while (index != TLB_NULL_INDEX) { + u32 next_index = tx_hash_table[index].next; + + tlb_init_table_entry(&tx_hash_table[index], save_load); + index = next_index; + } + } + + tlb_init_slave(slave); +} + +static void tlb_clear_slave(struct bonding *bond, struct slave *slave, + int save_load) +{ + spin_lock_bh(&bond->mode_lock); + __tlb_clear_slave(bond, slave, save_load); + spin_unlock_bh(&bond->mode_lock); +} + +/* Must be called before starting the monitor timer */ +static int tlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info); + struct tlb_client_info *new_hashtbl; + int i; + + new_hashtbl = kzalloc(size, GFP_KERNEL); + if (!new_hashtbl) + return -ENOMEM; + + spin_lock_bh(&bond->mode_lock); + + bond_info->tx_hashtbl = new_hashtbl; + + for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) + tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0); + + spin_unlock_bh(&bond->mode_lock); + + return 0; +} + +/* Must be called only after all slaves have been released */ +static void tlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + spin_lock_bh(&bond->mode_lock); + + kfree(bond_info->tx_hashtbl); + bond_info->tx_hashtbl = NULL; + + spin_unlock_bh(&bond->mode_lock); +} + +static long long compute_gap(struct slave *slave) +{ + return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */ + (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */ +} + +static struct slave *tlb_get_least_loaded_slave(struct bonding *bond) +{ + struct slave *slave, *least_loaded; + struct list_head *iter; + long long max_gap; + + least_loaded = NULL; + max_gap = LLONG_MIN; + + /* Find the slave with the largest gap */ + bond_for_each_slave_rcu(bond, slave, iter) { + if (bond_slave_can_tx(slave)) { + long long gap = compute_gap(slave); + + if (max_gap < gap) { + least_loaded = slave; + max_gap = gap; + } + } + } + + return least_loaded; +} + +static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index, + u32 skb_len) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct tlb_client_info *hash_table; + struct slave *assigned_slave; + + hash_table = bond_info->tx_hashtbl; + assigned_slave = hash_table[hash_index].tx_slave; + if (!assigned_slave) { + assigned_slave = tlb_get_least_loaded_slave(bond); + + if (assigned_slave) { + struct tlb_slave_info *slave_info = + &(SLAVE_TLB_INFO(assigned_slave)); + u32 next_index = slave_info->head; + + hash_table[hash_index].tx_slave = assigned_slave; + hash_table[hash_index].next = next_index; + hash_table[hash_index].prev = TLB_NULL_INDEX; + + if (next_index != TLB_NULL_INDEX) + hash_table[next_index].prev = hash_index; + + slave_info->head = hash_index; + slave_info->load += + hash_table[hash_index].load_history; + } + } + + if (assigned_slave) + hash_table[hash_index].tx_bytes += skb_len; + + return assigned_slave; +} + +static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, + u32 skb_len) +{ + struct slave *tx_slave; + + /* We don't need to disable softirq here, because + * tlb_choose_channel() is only called by bond_alb_xmit() + * which already has softirq disabled. + */ + spin_lock(&bond->mode_lock); + tx_slave = __tlb_choose_channel(bond, hash_index, skb_len); + spin_unlock(&bond->mode_lock); + + return tx_slave; +} + +/*********************** rlb specific functions ***************************/ + +/* when an ARP REPLY is received from a client update its info + * in the rx_hashtbl + */ +static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + spin_lock_bh(&bond->mode_lock); + + hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->assigned) && + (client_info->ip_src == arp->ip_dst) && + (client_info->ip_dst == arp->ip_src) && + (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) { + /* update the clients MAC address */ + ether_addr_copy(client_info->mac_dst, arp->mac_src); + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + + spin_unlock_bh(&bond->mode_lock); +} + +static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond, + struct slave *slave) +{ + struct arp_pkt *arp, _arp; + + if (skb->protocol != cpu_to_be16(ETH_P_ARP)) + goto out; + + arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp); + if (!arp) + goto out; + + /* We received an ARP from arp->ip_src. + * We might have used this IP address previously (on the bonding host + * itself or on a system that is bridged together with the bond). + * However, if arp->mac_src is different than what is stored in + * rx_hashtbl, some other host is now using the IP and we must prevent + * sending out client updates with this IP address and the old MAC + * address. + * Clean up all hash table entries that have this address as ip_src but + * have a different mac_src. + */ + rlb_purge_src_ip(bond, arp); + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* update rx hash table for this ARP */ + rlb_update_entry_from_arp(bond, arp); + slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n"); + } +out: + return RX_HANDLER_ANOTHER; +} + +/* Caller must hold rcu_read_lock() */ +static struct slave *__rlb_next_rx_slave(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *before = NULL, *rx_slave = NULL, *slave; + struct list_head *iter; + bool found = false; + + bond_for_each_slave_rcu(bond, slave, iter) { + if (!bond_slave_can_tx(slave)) + continue; + if (!found) { + if (!before || before->speed < slave->speed) + before = slave; + } else { + if (!rx_slave || rx_slave->speed < slave->speed) + rx_slave = slave; + } + if (slave == bond_info->rx_slave) + found = true; + } + /* we didn't find anything after the current or we have something + * better before and up to the current slave + */ + if (!rx_slave || (before && rx_slave->speed < before->speed)) + rx_slave = before; + + if (rx_slave) + bond_info->rx_slave = rx_slave; + + return rx_slave; +} + +/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */ +static struct slave *rlb_next_rx_slave(struct bonding *bond) +{ + struct slave *rx_slave; + + ASSERT_RTNL(); + + rcu_read_lock(); + rx_slave = __rlb_next_rx_slave(bond); + rcu_read_unlock(); + + return rx_slave; +} + +/* teach the switch the mac of a disabled slave + * on the primary for fault tolerance + * + * Caller must hold RTNL + */ +static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, + const u8 addr[]) +{ + struct slave *curr_active = rtnl_dereference(bond->curr_active_slave); + + if (!curr_active) + return; + + if (!bond->alb_info.primary_is_promisc) { + if (!dev_set_promiscuity(curr_active->dev, 1)) + bond->alb_info.primary_is_promisc = 1; + else + bond->alb_info.primary_is_promisc = 0; + } + + bond->alb_info.rlb_promisc_timeout_counter = 0; + + alb_send_learning_packets(curr_active, addr, true); +} + +/* slave being removed should not be active at this point + * + * Caller must hold rtnl. + */ +static void rlb_clear_slave(struct bonding *bond, struct slave *slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *rx_hash_table; + u32 index, next_index; + + /* clear slave from rx_hashtbl */ + spin_lock_bh(&bond->mode_lock); + + rx_hash_table = bond_info->rx_hashtbl; + index = bond_info->rx_hashtbl_used_head; + for (; index != RLB_NULL_INDEX; index = next_index) { + next_index = rx_hash_table[index].used_next; + if (rx_hash_table[index].slave == slave) { + struct slave *assigned_slave = rlb_next_rx_slave(bond); + + if (assigned_slave) { + rx_hash_table[index].slave = assigned_slave; + if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) { + bond_info->rx_hashtbl[index].ntt = 1; + bond_info->rx_ntt = 1; + /* A slave has been removed from the + * table because it is either disabled + * or being released. We must retry the + * update to avoid clients from not + * being updated & disconnecting when + * there is stress + */ + bond_info->rlb_update_retry_counter = + RLB_UPDATE_RETRY; + } + } else { /* there is no active slave */ + rx_hash_table[index].slave = NULL; + } + } + } + + spin_unlock_bh(&bond->mode_lock); + + if (slave != rtnl_dereference(bond->curr_active_slave)) + rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr); +} + +static void rlb_update_client(struct rlb_client_info *client_info) +{ + int i; + + if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) + return; + + for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { + struct sk_buff *skb; + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, + client_info->ip_dst, + client_info->slave->dev, + client_info->ip_src, + client_info->mac_dst, + client_info->slave->dev->dev_addr, + client_info->mac_dst); + if (!skb) { + slave_err(client_info->slave->bond->dev, + client_info->slave->dev, + "failed to create an ARP packet\n"); + continue; + } + + skb->dev = client_info->slave->dev; + + if (client_info->vlan_id) { + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + client_info->vlan_id); + } + + arp_xmit(skb); + } +} + +/* sends ARP REPLIES that update the clients that need updating */ +static void rlb_update_rx_clients(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + spin_lock_bh(&bond->mode_lock); + + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + if (client_info->ntt) { + rlb_update_client(client_info); + if (bond_info->rlb_update_retry_counter == 0) + client_info->ntt = 0; + } + } + + /* do not update the entries again until this counter is zero so that + * not to confuse the clients. + */ + bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY; + + spin_unlock_bh(&bond->mode_lock); +} + +/* The slave was assigned a new mac address - update the clients */ +static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + int ntt = 0; + u32 hash_index; + + spin_lock_bh(&bond->mode_lock); + + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if ((client_info->slave == slave) && + is_valid_ether_addr(client_info->mac_dst)) { + client_info->ntt = 1; + ntt = 1; + } + } + + /* update the team's flag only after the whole iteration */ + if (ntt) { + bond_info->rx_ntt = 1; + /* fasten the change */ + bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY; + } + + spin_unlock_bh(&bond->mode_lock); +} + +/* mark all clients using src_ip to be updated */ +static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *client_info; + u32 hash_index; + + spin_lock(&bond->mode_lock); + + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (!client_info->slave) { + netdev_err(bond->dev, "found a client with no channel in the client's hash table\n"); + continue; + } + /* update all clients using this src_ip, that are not assigned + * to the team's address (curr_active_slave) and have a known + * unicast mac address. + */ + if ((client_info->ip_src == src_ip) && + !ether_addr_equal_64bits(client_info->slave->dev->dev_addr, + bond->dev->dev_addr) && + is_valid_ether_addr(client_info->mac_dst)) { + client_info->ntt = 1; + bond_info->rx_ntt = 1; + } + } + + spin_unlock(&bond->mode_lock); +} + +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *assigned_slave, *curr_active_slave; + struct rlb_client_info *client_info; + u32 hash_index = 0; + + spin_lock(&bond->mode_lock); + + curr_active_slave = rcu_dereference(bond->curr_active_slave); + + hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); + client_info = &(bond_info->rx_hashtbl[hash_index]); + + if (client_info->assigned) { + if ((client_info->ip_src == arp->ip_src) && + (client_info->ip_dst == arp->ip_dst)) { + /* the entry is already assigned to this client */ + if (!is_broadcast_ether_addr(arp->mac_dst)) { + /* update mac address from arp */ + ether_addr_copy(client_info->mac_dst, arp->mac_dst); + } + ether_addr_copy(client_info->mac_src, arp->mac_src); + + assigned_slave = client_info->slave; + if (assigned_slave) { + spin_unlock(&bond->mode_lock); + return assigned_slave; + } + } else { + /* the entry is already assigned to some other client, + * move the old client to primary (curr_active_slave) so + * that the new client can be assigned to this entry. + */ + if (curr_active_slave && + client_info->slave != curr_active_slave) { + client_info->slave = curr_active_slave; + rlb_update_client(client_info); + } + } + } + /* assign a new slave */ + assigned_slave = __rlb_next_rx_slave(bond); + + if (assigned_slave) { + if (!(client_info->assigned && + client_info->ip_src == arp->ip_src)) { + /* ip_src is going to be updated, + * fix the src hash list + */ + u32 hash_src = _simple_hash((u8 *)&arp->ip_src, + sizeof(arp->ip_src)); + rlb_src_unlink(bond, hash_index); + rlb_src_link(bond, hash_src, hash_index); + } + + client_info->ip_src = arp->ip_src; + client_info->ip_dst = arp->ip_dst; + /* arp->mac_dst is broadcast for arp requests. + * will be updated with clients actual unicast mac address + * upon receiving an arp reply. + */ + ether_addr_copy(client_info->mac_dst, arp->mac_dst); + ether_addr_copy(client_info->mac_src, arp->mac_src); + client_info->slave = assigned_slave; + + if (is_valid_ether_addr(client_info->mac_dst)) { + client_info->ntt = 1; + bond->alb_info.rx_ntt = 1; + } else { + client_info->ntt = 0; + } + + if (vlan_get_tag(skb, &client_info->vlan_id)) + client_info->vlan_id = 0; + + if (!client_info->assigned) { + u32 prev_tbl_head = bond_info->rx_hashtbl_used_head; + + bond_info->rx_hashtbl_used_head = hash_index; + client_info->used_next = prev_tbl_head; + if (prev_tbl_head != RLB_NULL_INDEX) { + bond_info->rx_hashtbl[prev_tbl_head].used_prev = + hash_index; + } + client_info->assigned = 1; + } + } + + spin_unlock(&bond->mode_lock); + + return assigned_slave; +} + +/* chooses (and returns) transmit channel for arp reply + * does not choose channel for other arp types since they are + * sent on the curr_active_slave + */ +static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) +{ + struct slave *tx_slave = NULL; + struct net_device *dev; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); + + /* Don't modify or load balance ARPs that do not originate + * from the bond itself or a VLAN directly above the bond. + */ + if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) + return NULL; + + dev = ip_dev_find(dev_net(bond->dev), arp->ip_src); + if (dev) { + if (netif_is_bridge_master(dev)) { + dev_put(dev); + return NULL; + } + dev_put(dev); + } + + if (arp->op_code == htons(ARPOP_REPLY)) { + /* the arp must be sent on the selected rx channel */ + tx_slave = rlb_choose_channel(skb, bond, arp); + if (tx_slave) + bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, + tx_slave->dev->addr_len); + netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n", + tx_slave ? tx_slave->dev->name : "NULL"); + } else if (arp->op_code == htons(ARPOP_REQUEST)) { + /* Create an entry in the rx_hashtbl for this client as a + * place holder. + * When the arp reply is received the entry will be updated + * with the correct unicast address of the client. + */ + tx_slave = rlb_choose_channel(skb, bond, arp); + + /* The ARP reply packets must be delayed so that + * they can cancel out the influence of the ARP request. + */ + bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY; + + /* arp requests are broadcast and are sent on the primary + * the arp request will collapse all clients on the subnet to + * the primary slave. We must register these clients to be + * updated with their assigned mac. + */ + rlb_req_update_subnet_clients(bond, arp->ip_src); + netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n", + tx_slave ? tx_slave->dev->name : "NULL"); + } + + return tx_slave; +} + +static void rlb_rebalance(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct slave *assigned_slave; + struct rlb_client_info *client_info; + int ntt; + u32 hash_index; + + spin_lock_bh(&bond->mode_lock); + + ntt = 0; + hash_index = bond_info->rx_hashtbl_used_head; + for (; hash_index != RLB_NULL_INDEX; + hash_index = client_info->used_next) { + client_info = &(bond_info->rx_hashtbl[hash_index]); + assigned_slave = __rlb_next_rx_slave(bond); + if (assigned_slave && (client_info->slave != assigned_slave)) { + client_info->slave = assigned_slave; + if (!is_zero_ether_addr(client_info->mac_dst)) { + client_info->ntt = 1; + ntt = 1; + } + } + } + + /* update the team's flag only after the whole iteration */ + if (ntt) + bond_info->rx_ntt = 1; + spin_unlock_bh(&bond->mode_lock); +} + +/* Caller must hold mode_lock */ +static void rlb_init_table_entry_dst(struct rlb_client_info *entry) +{ + entry->used_next = RLB_NULL_INDEX; + entry->used_prev = RLB_NULL_INDEX; + entry->assigned = 0; + entry->slave = NULL; + entry->vlan_id = 0; +} +static void rlb_init_table_entry_src(struct rlb_client_info *entry) +{ + entry->src_first = RLB_NULL_INDEX; + entry->src_prev = RLB_NULL_INDEX; + entry->src_next = RLB_NULL_INDEX; +} + +static void rlb_init_table_entry(struct rlb_client_info *entry) +{ + memset(entry, 0, sizeof(struct rlb_client_info)); + rlb_init_table_entry_dst(entry); + rlb_init_table_entry_src(entry); +} + +static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next_index = bond_info->rx_hashtbl[index].used_next; + u32 prev_index = bond_info->rx_hashtbl[index].used_prev; + + if (index == bond_info->rx_hashtbl_used_head) + bond_info->rx_hashtbl_used_head = next_index; + if (prev_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[prev_index].used_next = next_index; + if (next_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next_index].used_prev = prev_index; +} + +/* unlink a rlb hash table entry from the src list */ +static void rlb_src_unlink(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next_index = bond_info->rx_hashtbl[index].src_next; + u32 prev_index = bond_info->rx_hashtbl[index].src_prev; + + bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX; + bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX; + + if (next_index != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next_index].src_prev = prev_index; + + if (prev_index == RLB_NULL_INDEX) + return; + + /* is prev_index pointing to the head of this list? */ + if (bond_info->rx_hashtbl[prev_index].src_first == index) + bond_info->rx_hashtbl[prev_index].src_first = next_index; + else + bond_info->rx_hashtbl[prev_index].src_next = next_index; + +} + +static void rlb_delete_table_entry(struct bonding *bond, u32 index) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); + + rlb_delete_table_entry_dst(bond, index); + rlb_init_table_entry_dst(entry); + + rlb_src_unlink(bond, index); +} + +/* add the rx_hashtbl[ip_dst_hash] entry to the list + * of entries with identical ip_src_hash + */ +static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 next; + + bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash; + next = bond_info->rx_hashtbl[ip_src_hash].src_first; + bond_info->rx_hashtbl[ip_dst_hash].src_next = next; + if (next != RLB_NULL_INDEX) + bond_info->rx_hashtbl[next].src_prev = ip_dst_hash; + bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash; +} + +/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does + * not match arp->mac_src + */ +static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src)); + u32 index; + + spin_lock_bh(&bond->mode_lock); + + index = bond_info->rx_hashtbl[ip_src_hash].src_first; + while (index != RLB_NULL_INDEX) { + struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]); + u32 next_index = entry->src_next; + + if (entry->ip_src == arp->ip_src && + !ether_addr_equal_64bits(arp->mac_src, entry->mac_src)) + rlb_delete_table_entry(bond, index); + index = next_index; + } + spin_unlock_bh(&bond->mode_lock); +} + +static int rlb_initialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct rlb_client_info *new_hashtbl; + int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info); + int i; + + new_hashtbl = kmalloc(size, GFP_KERNEL); + if (!new_hashtbl) + return -1; + + spin_lock_bh(&bond->mode_lock); + + bond_info->rx_hashtbl = new_hashtbl; + + bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; + + for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) + rlb_init_table_entry(bond_info->rx_hashtbl + i); + + spin_unlock_bh(&bond->mode_lock); + + /* register to receive ARPs */ + bond->recv_probe = rlb_arp_recv; + + return 0; +} + +static void rlb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + spin_lock_bh(&bond->mode_lock); + + kfree(bond_info->rx_hashtbl); + bond_info->rx_hashtbl = NULL; + bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX; + + spin_unlock_bh(&bond->mode_lock); +} + +static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + u32 curr_index; + + spin_lock_bh(&bond->mode_lock); + + curr_index = bond_info->rx_hashtbl_used_head; + while (curr_index != RLB_NULL_INDEX) { + struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]); + u32 next_index = bond_info->rx_hashtbl[curr_index].used_next; + + if (curr->vlan_id == vlan_id) + rlb_delete_table_entry(bond, curr_index); + + curr_index = next_index; + } + + spin_unlock_bh(&bond->mode_lock); +} + +/*********************** tlb/rlb shared functions *********************/ + +static void alb_send_lp_vid(struct slave *slave, const u8 mac_addr[], + __be16 vlan_proto, u16 vid) +{ + struct learning_pkt pkt; + struct sk_buff *skb; + int size = sizeof(struct learning_pkt); + + memset(&pkt, 0, size); + ether_addr_copy(pkt.mac_dst, mac_addr); + ether_addr_copy(pkt.mac_src, mac_addr); + pkt.type = cpu_to_be16(ETH_P_LOOPBACK); + + skb = dev_alloc_skb(size); + if (!skb) + return; + + skb_put_data(skb, &pkt, size); + + skb_reset_mac_header(skb); + skb->network_header = skb->mac_header + ETH_HLEN; + skb->protocol = pkt.type; + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + + slave_dbg(slave->bond->dev, slave->dev, + "Send learning packet: mac %pM vlan %d\n", mac_addr, vid); + + if (vid) + __vlan_hwaccel_put_tag(skb, vlan_proto, vid); + + dev_queue_xmit(skb); +} + +struct alb_walk_data { + struct bonding *bond; + struct slave *slave; + const u8 *mac_addr; + bool strict_match; +}; + +static int alb_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) +{ + struct alb_walk_data *data = (struct alb_walk_data *)priv->data; + bool strict_match = data->strict_match; + const u8 *mac_addr = data->mac_addr; + struct bonding *bond = data->bond; + struct slave *slave = data->slave; + struct bond_vlan_tag *tags; + + if (is_vlan_dev(upper) && + bond->dev->lower_level == upper->lower_level - 1) { + if (upper->addr_assign_type == NET_ADDR_STOLEN) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } + } + + /* If this is a macvlan device, then only send updates + * when strict_match is turned off. + */ + if (netif_is_macvlan(upper) && !strict_match) { + tags = bond_verify_device_path(bond->dev, upper, 0); + if (IS_ERR_OR_NULL(tags)) + BUG(); + alb_send_lp_vid(slave, upper->dev_addr, + tags[0].vlan_proto, tags[0].vlan_id); + kfree(tags); + } + + return 0; +} + +static void alb_send_learning_packets(struct slave *slave, const u8 mac_addr[], + bool strict_match) +{ + struct bonding *bond = bond_get_bond_by_slave(slave); + struct netdev_nested_priv priv; + struct alb_walk_data data = { + .strict_match = strict_match, + .mac_addr = mac_addr, + .slave = slave, + .bond = bond, + }; + + priv.data = (void *)&data; + /* send untagged */ + alb_send_lp_vid(slave, mac_addr, 0, 0); + + /* loop through all devices and see if we need to send a packet + * for that device. + */ + rcu_read_lock(); + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv); + rcu_read_unlock(); +} + +static int alb_set_slave_mac_addr(struct slave *slave, const u8 addr[], + unsigned int len) +{ + struct net_device *dev = slave->dev; + struct sockaddr_storage ss; + + if (BOND_MODE(slave->bond) == BOND_MODE_TLB) { + __dev_addr_set(dev, addr, len); + return 0; + } + + /* for rlb each slave must have a unique hw mac addresses so that + * each slave will receive packets destined to a different mac + */ + memcpy(ss.__data, addr, len); + ss.ss_family = dev->type; + if (dev_set_mac_address(dev, (struct sockaddr *)&ss, NULL)) { + slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n"); + return -EOPNOTSUPP; + } + return 0; +} + +/* Swap MAC addresses between two slaves. + * + * Called with RTNL held, and no other locks. + */ +static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2) +{ + u8 tmp_mac_addr[MAX_ADDR_LEN]; + + bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr, + slave1->dev->addr_len); + alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, + slave2->dev->addr_len); + alb_set_slave_mac_addr(slave2, tmp_mac_addr, + slave1->dev->addr_len); + +} + +/* Send learning packets after MAC address swap. + * + * Called with RTNL and no other locks + */ +static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, + struct slave *slave2) +{ + int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2)); + struct slave *disabled_slave = NULL; + + ASSERT_RTNL(); + + /* fasten the change in the switch */ + if (bond_slave_can_tx(slave1)) { + alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave1); + } + } else { + disabled_slave = slave1; + } + + if (bond_slave_can_tx(slave2)) { + alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); + if (bond->alb_info.rlb_enabled) { + /* inform the clients that the mac address + * has changed + */ + rlb_req_update_slave_clients(bond, slave2); + } + } else { + disabled_slave = slave2; + } + + if (bond->alb_info.rlb_enabled && slaves_state_differ) { + /* A disabled slave was assigned an active mac addr */ + rlb_teach_disabled_mac_on_primary(bond, + disabled_slave->dev->dev_addr); + } +} + +/** + * alb_change_hw_addr_on_detach + * @bond: bonding we're working on + * @slave: the slave that was just detached + * + * We assume that @slave was already detached from the slave list. + * + * If @slave's permanent hw address is different both from its current + * address and from @bond's address, then somewhere in the bond there's + * a slave that has @slave's permanet address as its current address. + * We'll make sure that slave no longer uses @slave's permanent address. + * + * Caller must hold RTNL and no other locks + */ +static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave) +{ + int perm_curr_diff; + int perm_bond_diff; + struct slave *found_slave; + + perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, + slave->dev->dev_addr); + perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr, + bond->dev->dev_addr); + + if (perm_curr_diff && perm_bond_diff) { + found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr); + + if (found_slave) { + alb_swap_mac_addr(slave, found_slave); + alb_fasten_mac_swap(bond, slave, found_slave); + } + } +} + +/** + * alb_handle_addr_collision_on_attach + * @bond: bonding we're working on + * @slave: the slave that was just attached + * + * checks uniqueness of slave's mac address and handles the case the + * new slave uses the bonds mac address. + * + * If the permanent hw address of @slave is @bond's hw address, we need to + * find a different hw address to give @slave, that isn't in use by any other + * slave in the bond. This address must be, of course, one of the permanent + * addresses of the other slaves. + * + * We go over the slave list, and for each slave there we compare its + * permanent hw address with the current address of all the other slaves. + * If no match was found, then we've found a slave with a permanent address + * that isn't used by any other slave in the bond, so we can assign it to + * @slave. + * + * assumption: this function is called before @slave is attached to the + * bond slave list. + */ +static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave) +{ + struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave); + struct slave *tmp_slave1, *free_mac_slave = NULL; + struct list_head *iter; + + if (!bond_has_slaves(bond)) { + /* this is the first slave */ + return 0; + } + + /* if slave's mac address differs from bond's mac address + * check uniqueness of slave's mac address against the other + * slaves in the bond. + */ + if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) { + if (!bond_slave_has_mac(bond, slave->dev->dev_addr)) + return 0; + + /* Try setting slave mac to bond address and fall-through + * to code handling that situation below... + */ + alb_set_slave_mac_addr(slave, bond->dev->dev_addr, + bond->dev->addr_len); + } + + /* The slave's address is equal to the address of the bond. + * Search for a spare address in the bond for this slave. + */ + bond_for_each_slave(bond, tmp_slave1, iter) { + if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) { + /* no slave has tmp_slave1's perm addr + * as its curr addr + */ + free_mac_slave = tmp_slave1; + break; + } + + if (!has_bond_addr) { + if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr, + bond->dev->dev_addr)) { + + has_bond_addr = tmp_slave1; + } + } + } + + if (free_mac_slave) { + alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, + free_mac_slave->dev->addr_len); + + slave_warn(bond->dev, slave->dev, "the slave hw address is in use by the bond; giving it the hw address of %s\n", + free_mac_slave->dev->name); + + } else if (has_bond_addr) { + slave_err(bond->dev, slave->dev, "the slave hw address is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n"); + return -EFAULT; + } + + return 0; +} + +/** + * alb_set_mac_address + * @bond: bonding we're working on + * @addr: MAC address to set + * + * In TLB mode all slaves are configured to the bond's hw address, but set + * their dev_addr field to different addresses (based on their permanent hw + * addresses). + * + * For each slave, this function sets the interface to the new address and then + * changes its dev_addr field to its previous value. + * + * Unwinding assumes bond's mac address has not yet changed. + */ +static int alb_set_mac_address(struct bonding *bond, void *addr) +{ + struct slave *slave, *rollback_slave; + struct list_head *iter; + struct sockaddr_storage ss; + char tmp_addr[MAX_ADDR_LEN]; + int res; + + if (bond->alb_info.rlb_enabled) + return 0; + + bond_for_each_slave(bond, slave, iter) { + /* save net_device's current hw address */ + bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr, + slave->dev->addr_len); + + res = dev_set_mac_address(slave->dev, addr, NULL); + + /* restore net_device's hw address */ + dev_addr_set(slave->dev, tmp_addr); + + if (res) + goto unwind; + } + + return 0; + +unwind: + memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len); + ss.ss_family = bond->dev->type; + + /* unwind from head to the slave that failed */ + bond_for_each_slave(bond, rollback_slave, iter) { + if (rollback_slave == slave) + break; + bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr, + rollback_slave->dev->addr_len); + dev_set_mac_address(rollback_slave->dev, + (struct sockaddr *)&ss, NULL); + dev_addr_set(rollback_slave->dev, tmp_addr); + } + + return res; +} + +/* determine if the packet is NA or NS */ +static bool alb_determine_nd(struct sk_buff *skb, struct bonding *bond) +{ + struct ipv6hdr *ip6hdr; + struct icmp6hdr *hdr; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) + return true; + + ip6hdr = ipv6_hdr(skb); + if (ip6hdr->nexthdr != IPPROTO_ICMPV6) + return false; + + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr) + sizeof(*hdr))) + return true; + + hdr = icmp6_hdr(skb); + return hdr->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT || + hdr->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION; +} + +/************************ exported alb functions ************************/ + +int bond_alb_initialize(struct bonding *bond, int rlb_enabled) +{ + int res; + + res = tlb_initialize(bond); + if (res) + return res; + + if (rlb_enabled) { + res = rlb_initialize(bond); + if (res) { + tlb_deinitialize(bond); + return res; + } + bond->alb_info.rlb_enabled = 1; + } else { + bond->alb_info.rlb_enabled = 0; + } + + return 0; +} + +void bond_alb_deinitialize(struct bonding *bond) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + tlb_deinitialize(bond); + + if (bond_info->rlb_enabled) + rlb_deinitialize(bond); +} + +static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, + struct slave *tx_slave) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct ethhdr *eth_data = eth_hdr(skb); + + if (!tx_slave) { + /* unbalanced or unassigned, send through primary */ + tx_slave = rcu_dereference(bond->curr_active_slave); + if (bond->params.tlb_dynamic_lb) + bond_info->unbalanced_load += skb->len; + } + + if (tx_slave && bond_slave_can_tx(tx_slave)) { + if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) { + ether_addr_copy(eth_data->h_source, + tx_slave->dev->dev_addr); + } + + return bond_dev_queue_xmit(bond, skb, tx_slave->dev); + } + + if (tx_slave && bond->params.tlb_dynamic_lb) { + spin_lock(&bond->mode_lock); + __tlb_clear_slave(bond, tx_slave, 0); + spin_unlock(&bond->mode_lock); + } + + /* no suitable interface, frame not sent */ + return bond_tx_drop(bond->dev, skb); +} + +struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + struct slave *tx_slave = NULL; + struct ethhdr *eth_data; + u32 hash_index; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + /* Do not TX balance any multicast or broadcast */ + if (!is_multicast_ether_addr(eth_data->h_dest)) { + switch (skb->protocol) { + case htons(ETH_P_IPV6): + if (alb_determine_nd(skb, bond)) + break; + fallthrough; + case htons(ETH_P_IP): + hash_index = bond_xmit_hash(bond, skb); + if (bond->params.tlb_dynamic_lb) { + tx_slave = tlb_choose_channel(bond, + hash_index & 0xFF, + skb->len); + } else { + struct bond_up_slave *slaves; + unsigned int count; + + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (likely(count)) + tx_slave = slaves->arr[hash_index % + count]; + } + break; + } + } + return tx_slave; +} + +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave; + + tx_slave = bond_xmit_tlb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); +} + +struct slave *bond_xmit_alb_slave_get(struct bonding *bond, + struct sk_buff *skb) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + static const __be32 ip_bcast = htonl(0xffffffff); + struct slave *tx_slave = NULL; + const u8 *hash_start = NULL; + bool do_tx_balance = true; + struct ethhdr *eth_data; + u32 hash_index = 0; + int hash_size = 0; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + switch (ntohs(skb->protocol)) { + case ETH_P_IP: { + const struct iphdr *iph; + + if (is_broadcast_ether_addr(eth_data->h_dest) || + !pskb_network_may_pull(skb, sizeof(*iph))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { + do_tx_balance = false; + break; + } + hash_start = (char *)&(iph->daddr); + hash_size = sizeof(iph->daddr); + break; + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + + /* IPv6 doesn't really use broadcast mac address, but leave + * that here just in case. + */ + if (is_broadcast_ether_addr(eth_data->h_dest)) { + do_tx_balance = false; + break; + } + + /* IPv6 uses all-nodes multicast as an equivalent to + * broadcasts in IPv4. + */ + if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) { + do_tx_balance = false; + break; + } + + if (alb_determine_nd(skb, bond)) { + do_tx_balance = false; + break; + } + + /* The IPv6 header is pulled by alb_determine_nd */ + /* Additionally, DAD probes should not be tx-balanced as that + * will lead to false positives for duplicate addresses and + * prevent address configuration from working. + */ + ip6hdr = ipv6_hdr(skb); + if (ipv6_addr_any(&ip6hdr->saddr)) { + do_tx_balance = false; + break; + } + + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); + break; + } + case ETH_P_ARP: + do_tx_balance = false; + if (bond_info->rlb_enabled) + tx_slave = rlb_arp_xmit(skb, bond); + break; + default: + do_tx_balance = false; + break; + } + + if (do_tx_balance) { + if (bond->params.tlb_dynamic_lb) { + hash_index = _simple_hash(hash_start, hash_size); + tx_slave = tlb_choose_channel(bond, hash_index, skb->len); + } else { + /* + * do_tx_balance means we are free to select the tx_slave + * So we do exactly what tlb would do for hash selection + */ + + struct bond_up_slave *slaves; + unsigned int count; + + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (likely(count)) + tx_slave = slaves->arr[bond_xmit_hash(bond, skb) % + count]; + } + } + return tx_slave; +} + +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *tx_slave = NULL; + + tx_slave = bond_xmit_alb_slave_get(bond, skb); + return bond_do_alb_xmit(skb, bond, tx_slave); +} + +void bond_alb_monitor(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + alb_work.work); + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + struct list_head *iter; + struct slave *slave; + + if (!bond_has_slaves(bond)) { + atomic_set(&bond_info->tx_rebalance_counter, 0); + bond_info->lp_counter = 0; + goto re_arm; + } + + rcu_read_lock(); + + atomic_inc(&bond_info->tx_rebalance_counter); + bond_info->lp_counter++; + + /* send learning packets */ + if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { + bool strict_match; + + bond_for_each_slave_rcu(bond, slave, iter) { + /* If updating current_active, use all currently + * user mac addresses (!strict_match). Otherwise, only + * use mac of the slave device. + * In RLB mode, we always use strict matches. + */ + strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) || + bond_info->rlb_enabled); + alb_send_learning_packets(slave, slave->dev->dev_addr, + strict_match); + } + bond_info->lp_counter = 0; + } + + /* rebalance tx traffic */ + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { + bond_for_each_slave_rcu(bond, slave, iter) { + tlb_clear_slave(bond, slave, 1); + if (slave == rcu_access_pointer(bond->curr_active_slave)) { + SLAVE_TLB_INFO(slave).load = + bond_info->unbalanced_load / + BOND_TLB_REBALANCE_INTERVAL; + bond_info->unbalanced_load = 0; + } + } + atomic_set(&bond_info->tx_rebalance_counter, 0); + } + + if (bond_info->rlb_enabled) { + if (bond_info->primary_is_promisc && + (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) { + + /* dev_set_promiscuity requires rtnl and + * nothing else. Avoid race with bond_close. + */ + rcu_read_unlock(); + if (!rtnl_trylock()) + goto re_arm; + + bond_info->rlb_promisc_timeout_counter = 0; + + /* If the primary was set to promiscuous mode + * because a slave was disabled then + * it can now leave promiscuous mode. + */ + dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev, + -1); + bond_info->primary_is_promisc = 0; + + rtnl_unlock(); + rcu_read_lock(); + } + + if (bond_info->rlb_rebalance) { + bond_info->rlb_rebalance = 0; + rlb_rebalance(bond); + } + + /* check if clients need updating */ + if (bond_info->rx_ntt) { + if (bond_info->rlb_update_delay_counter) { + --bond_info->rlb_update_delay_counter; + } else { + rlb_update_rx_clients(bond); + if (bond_info->rlb_update_retry_counter) + --bond_info->rlb_update_retry_counter; + else + bond_info->rx_ntt = 0; + } + } + } + rcu_read_unlock(); +re_arm: + queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); +} + +/* assumption: called before the slave is attached to the bond + * and not locked by the bond lock + */ +int bond_alb_init_slave(struct bonding *bond, struct slave *slave) +{ + int res; + + res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, + slave->dev->addr_len); + if (res) + return res; + + res = alb_handle_addr_collision_on_attach(bond, slave); + if (res) + return res; + + tlb_init_slave(slave); + + /* order a rebalance ASAP */ + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); + + if (bond->alb_info.rlb_enabled) + bond->alb_info.rlb_rebalance = 1; + + return 0; +} + +/* Remove slave from tlb and rlb hash tables, and fix up MAC addresses + * if necessary. + * + * Caller must hold RTNL and no other locks + */ +void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave) +{ + if (bond_has_slaves(bond)) + alb_change_hw_addr_on_detach(bond, slave); + + tlb_clear_slave(bond, slave, 0); + + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rx_slave = NULL; + rlb_clear_slave(bond, slave); + } + +} + +void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link) +{ + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); + + if (link == BOND_LINK_DOWN) { + tlb_clear_slave(bond, slave, 0); + if (bond->alb_info.rlb_enabled) + rlb_clear_slave(bond, slave); + } else if (link == BOND_LINK_UP) { + /* order a rebalance ASAP */ + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); + if (bond->alb_info.rlb_enabled) { + bond->alb_info.rlb_rebalance = 1; + /* If the updelay module parameter is smaller than the + * forwarding delay of the switch the rebalance will + * not work because the rebalance arp replies will + * not be forwarded to the clients.. + */ + } + } + + if (bond_is_nondyn_tlb(bond)) { + if (bond_update_slave_arr(bond, NULL)) + pr_err("Failed to build slave-array for TLB mode.\n"); + } +} + +/** + * bond_alb_handle_active_change - assign new curr_active_slave + * @bond: our bonding struct + * @new_slave: new slave to assign + * + * Set the bond->curr_active_slave to @new_slave and handle + * mac address swapping and promiscuity changes as needed. + * + * Caller must hold RTNL + */ +void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) +{ + struct slave *swap_slave; + struct slave *curr_active; + + curr_active = rtnl_dereference(bond->curr_active_slave); + if (curr_active == new_slave) + return; + + if (curr_active && bond->alb_info.primary_is_promisc) { + dev_set_promiscuity(curr_active->dev, -1); + bond->alb_info.primary_is_promisc = 0; + bond->alb_info.rlb_promisc_timeout_counter = 0; + } + + swap_slave = curr_active; + rcu_assign_pointer(bond->curr_active_slave, new_slave); + + if (!new_slave || !bond_has_slaves(bond)) + return; + + /* set the new curr_active_slave to the bonds mac address + * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave + */ + if (!swap_slave) + swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr); + + /* Arrange for swap_slave and new_slave to temporarily be + * ignored so we can mess with their MAC addresses without + * fear of interference from transmit activity. + */ + if (swap_slave) + tlb_clear_slave(bond, swap_slave, 1); + tlb_clear_slave(bond, new_slave, 1); + + /* in TLB mode, the slave might flip down/up with the old dev_addr, + * and thus filter bond->dev_addr's packets, so force bond's mac + */ + if (BOND_MODE(bond) == BOND_MODE_TLB) { + struct sockaddr_storage ss; + u8 tmp_addr[MAX_ADDR_LEN]; + + bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr, + new_slave->dev->addr_len); + + bond_hw_addr_copy(ss.__data, bond->dev->dev_addr, + bond->dev->addr_len); + ss.ss_family = bond->dev->type; + /* we don't care if it can't change its mac, best effort */ + dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss, + NULL); + + dev_addr_set(new_slave->dev, tmp_addr); + } + + /* curr_active_slave must be set before calling alb_swap_mac_addr */ + if (swap_slave) { + /* swap mac address */ + alb_swap_mac_addr(swap_slave, new_slave); + alb_fasten_mac_swap(bond, swap_slave, new_slave); + } else { + /* set the new_slave to the bond mac address */ + alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, + bond->dev->addr_len); + alb_send_learning_packets(new_slave, bond->dev->dev_addr, + false); + } +} + +/* Called with RTNL */ +int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct sockaddr_storage *ss = addr; + struct slave *curr_active; + struct slave *swap_slave; + int res; + + if (!is_valid_ether_addr(ss->__data)) + return -EADDRNOTAVAIL; + + res = alb_set_mac_address(bond, addr); + if (res) + return res; + + dev_addr_set(bond_dev, ss->__data); + + /* If there is no curr_active_slave there is nothing else to do. + * Otherwise we'll need to pass the new address to it and handle + * duplications. + */ + curr_active = rtnl_dereference(bond->curr_active_slave); + if (!curr_active) + return 0; + + swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr); + + if (swap_slave) { + alb_swap_mac_addr(swap_slave, curr_active); + alb_fasten_mac_swap(bond, swap_slave, curr_active); + } else { + alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr, + bond_dev->addr_len); + + alb_send_learning_packets(curr_active, + bond_dev->dev_addr, false); + if (bond->alb_info.rlb_enabled) { + /* inform clients mac address has changed */ + rlb_req_update_slave_clients(bond, curr_active); + } + } + + return 0; +} + +void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id) +{ + if (bond->alb_info.rlb_enabled) + rlb_clear_vlan(bond, vlan_id); +} + |