diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/infiniband/hw/mlx4/alias_GUID.c | 905 |
1 files changed, 905 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c new file mode 100644 index 000000000..baab9afa9 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -0,0 +1,905 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + /***********************************************************/ +/*This file support the handling of the Alias GUID feature. */ +/***********************************************************/ +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_pack.h> +#include <linux/mlx4/cmd.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <rdma/ib_user_verbs.h> +#include <linux/delay.h> +#include "mlx4_ib.h" + +/* +The driver keeps the current state of all guids, as they are in the HW. +Whenever we receive an smp mad GUIDInfo record, the data will be cached. +*/ + +struct mlx4_alias_guid_work_context { + u8 port; + struct mlx4_ib_dev *dev ; + struct ib_sa_query *sa_query; + struct completion done; + int query_id; + struct list_head list; + int block_num; + ib_sa_comp_mask guid_indexes; + u8 method; +}; + +struct mlx4_next_alias_guid_work { + u8 port; + u8 block_num; + u8 method; + struct mlx4_sriov_alias_guid_info_rec_det rec_det; +}; + +static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, + int *resched_delay_sec); + +void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, + u8 port_num, u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id; + int port_index = port_num - 1; + + if (!mlx4_is_master(dev->dev)) + return; + + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* The location of the specific index starts from bit number 4 + * until bit num 11 */ + if (test_bit(i + 4, (unsigned long *)&guid_indexes)) { + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->num_slaves) { + pr_debug("The last slave: %d\n", slave_id); + return; + } + + /* cache the guid: */ + memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id], + &p_data[i * GUID_REC_SIZE], + GUID_REC_SIZE); + } else + pr_debug("Guid number: %d in block: %d" + " was not updated\n", i, block_num); + } +} + +static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index) +{ + if (index >= NUM_ALIAS_GUID_PER_PORT) { + pr_err("%s: ERROR: asked for index:%d\n", __func__, index); + return (__force __be64) -1; + } + return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; +} + + +ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index) +{ + return IB_SA_COMP_MASK(4 + index); +} + +void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave, + int port, int slave_init) +{ + __be64 curr_guid, required_guid; + int record_num = slave / 8; + int index = slave % 8; + int port_index = port - 1; + unsigned long flags; + int do_work = 0; + + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + if (dev->sriov.alias_guid.ports_guid[port_index].state_flags & + GUID_STATE_NEED_PORT_INIT) + goto unlock; + if (!slave_init) { + curr_guid = *(__be64 *)&dev->sriov. + alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + all_recs[GUID_REC_SIZE * index]; + if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) || + !curr_guid) + goto unlock; + required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); + } else { + required_guid = mlx4_get_admin_guid(dev->dev, slave, port); + if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + goto unlock; + } + *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + all_recs[GUID_REC_SIZE * index] = required_guid; + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].guid_indexes + |= mlx4_ib_get_aguid_comp_mask_from_ix(index); + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].status + = MLX4_GUID_INFO_STATUS_IDLE; + /* set to run immediately */ + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num].time_to_run = 0; + dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[record_num]. + guids_retry_schedule[index] = 0; + do_work = 1; +unlock: + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + + if (do_work) + mlx4_ib_init_alias_guid_work(dev, port_index); +} + +/* + * Whenever new GUID is set/unset (guid table change) create event and + * notify the relevant slave (master also should be notified). + * If the GUID value is not as we have in the cache the slave will not be + * updated; in this case it waits for the smp_snoop or the port management + * event to call the function and to update the slave. + * block_number - the index of the block (16 blocks available) + * port_number - 1 or 2 + */ +void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, u8 port_num, + u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id, slave_port; + enum slave_port_state new_state; + enum slave_port_state prev_state; + __be64 tmp_cur_ag, form_cache_ag; + enum slave_port_gen_event gen_event; + struct mlx4_sriov_alias_guid_info_rec_det *rec; + unsigned long flags; + __be64 required_value; + + if (!mlx4_is_master(dev->dev)) + return; + + rec = &dev->sriov.alias_guid.ports_guid[port_num - 1]. + all_rec_per_port[block_num]; + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + /*calculate the slaves and notify them*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* the location of the specific index runs from bits 4..11 */ + if (!(test_bit(i + 4, (unsigned long *)&guid_indexes))) + continue; + + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->persist->num_vfs + 1) + return; + + slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num); + if (slave_port < 0) /* this port isn't available for the VF */ + continue; + + tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; + form_cache_ag = get_cached_alias_guid(dev, port_num, + (NUM_ALIAS_GUID_IN_REC * block_num) + i); + /* + * Check if guid is not the same as in the cache, + * If it is different, wait for the snoop_smp or the port mgmt + * change event to update the slave on its port state change + */ + if (tmp_cur_ag != form_cache_ag) + continue; + + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]; + + if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + required_value = 0; + + if (tmp_cur_ag == required_value) { + rec->guid_indexes = rec->guid_indexes & + ~mlx4_ib_get_aguid_comp_mask_from_ix(i); + } else { + /* may notify port down if value is 0 */ + if (tmp_cur_ag != MLX4_NOT_SET_GUID) { + spin_unlock_irqrestore(&dev->sriov. + alias_guid.ag_work_lock, flags); + continue; + } + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, + flags); + mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); + /*2 cases: Valid GUID, and Invalid Guid*/ + + if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ + prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num); + new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + &gen_event); + pr_debug("slave: %d, port: %d prev_port_state: %d," + " new_port_state: %d, gen_event: %d\n", + slave_id, port_num, prev_state, new_state, gen_event); + if (gen_event == SLAVE_PORT_GEN_EVENT_UP) { + pr_debug("sending PORT_UP event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, slave_id, + port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE); + } + } else { /* request to invalidate GUID */ + set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, + &gen_event); + if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) { + pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, + slave_id, + port_num, + MLX4_PORT_CHANGE_SUBTYPE_DOWN); + } + } + } +} + +static void aliasguid_query_handler(int status, + struct ib_sa_guidinfo_rec *guid_rec, + void *context) +{ + struct mlx4_ib_dev *dev; + struct mlx4_alias_guid_work_context *cb_ctx = context; + u8 port_index ; + int i; + struct mlx4_sriov_alias_guid_info_rec_det *rec; + unsigned long flags, flags1; + ib_sa_comp_mask declined_guid_indexes = 0; + ib_sa_comp_mask applied_guid_indexes = 0; + unsigned int resched_delay_sec = 0; + + if (!context) + return; + + dev = cb_ctx->dev; + port_index = cb_ctx->port - 1; + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[cb_ctx->block_num]; + + if (status) { + pr_debug("(port: %d) failed: status = %d\n", + cb_ctx->port, status); + rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC; + goto out; + } + + if (guid_rec->block_num != cb_ctx->block_num) { + pr_err("block num mismatch: %d != %d\n", + cb_ctx->block_num, guid_rec->block_num); + goto out; + } + + pr_debug("lid/port: %d/%d, block_num: %d\n", + be16_to_cpu(guid_rec->lid), cb_ctx->port, + guid_rec->block_num); + + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[guid_rec->block_num]; + + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { + __be64 sm_response, required_val; + + if (!(cb_ctx->guid_indexes & + mlx4_ib_get_aguid_comp_mask_from_ix(i))) + continue; + sm_response = *(__be64 *)&guid_rec->guid_info_list + [i * GUID_REC_SIZE]; + required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]; + if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) { + if (required_val == + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + goto next_entry; + + /* A new value was set till we got the response */ + pr_debug("need to set new value %llx, record num %d, block_num:%d\n", + be64_to_cpu(required_val), + i, guid_rec->block_num); + goto entry_declined; + } + + /* check if the SM didn't assign one of the records. + * if it didn't, re-ask for. + */ + if (sm_response == MLX4_NOT_SET_GUID) { + if (rec->guids_retry_schedule[i] == 0) + mlx4_ib_warn(&dev->ib_dev, + "%s:Record num %d in block_num: %d was declined by SM\n", + __func__, i, + guid_rec->block_num); + goto entry_declined; + } else { + /* properly assigned record. */ + /* We save the GUID we just got from the SM in the + * admin_guid in order to be persistent, and in the + * request from the sm the process will ask for the same GUID */ + if (required_val && + sm_response != required_val) { + /* Warn only on first retry */ + if (rec->guids_retry_schedule[i] == 0) + mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" + " admin guid after SysAdmin " + "configuration. " + "Record num %d in block_num:%d " + "was declined by SM, " + "new val(0x%llx) was kept, SM returned (0x%llx)\n", + __func__, i, + guid_rec->block_num, + be64_to_cpu(required_val), + be64_to_cpu(sm_response)); + goto entry_declined; + } else { + *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = + sm_response; + if (required_val == 0) + mlx4_set_admin_guid(dev->dev, + sm_response, + (guid_rec->block_num + * NUM_ALIAS_GUID_IN_REC) + i, + cb_ctx->port); + goto next_entry; + } + } +entry_declined: + declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); + rec->guids_retry_schedule[i] = + (rec->guids_retry_schedule[i] == 0) ? 1 : + min((unsigned int)60, + rec->guids_retry_schedule[i] * 2); + /* using the minimum value among all entries in that record */ + resched_delay_sec = (resched_delay_sec == 0) ? + rec->guids_retry_schedule[i] : + min(resched_delay_sec, + rec->guids_retry_schedule[i]); + continue; + +next_entry: + rec->guids_retry_schedule[i] = 0; + } + + applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes; + if (declined_guid_indexes || + rec->guid_indexes & ~(applied_guid_indexes)) { + pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n", + guid_rec->block_num, + be64_to_cpu((__force __be64)rec->guid_indexes), + be64_to_cpu((__force __be64)applied_guid_indexes), + be64_to_cpu((__force __be64)declined_guid_indexes)); + rec->time_to_run = ktime_get_boot_ns() + + resched_delay_sec * NSEC_PER_SEC; + } else { + rec->status = MLX4_GUID_INFO_STATUS_SET; + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + /* + The func is call here to close the cases when the + sm doesn't send smp, so in the sa response the driver + notifies the slave. + */ + mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num, + cb_ctx->port, + guid_rec->guid_info_list); +out: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) { + get_low_record_time_index(dev, port_index, &resched_delay_sec); + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, + &dev->sriov.alias_guid.ports_guid[port_index]. + alias_guid_work, + msecs_to_jiffies(resched_delay_sec * 1000)); + } + if (cb_ctx->sa_query) { + list_del(&cb_ctx->list); + kfree(cb_ctx); + } else + complete(&cb_ctx->done); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index) +{ + int i; + u64 cur_admin_val; + ib_sa_comp_mask comp_mask = 0; + + dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status + = MLX4_GUID_INFO_STATUS_SET; + + /* calculate the comp_mask for that record.*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + cur_admin_val = + *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; + /* + check the admin value: if it's for delete (~00LL) or + it is the first guid of the first record (hw guid) or + the records is not in ownership of the sysadmin and the sm doesn't + need to assign GUIDs, then don't put it up for assignment. + */ + if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || + (!index && !i)) + continue; + comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); + } + dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].guid_indexes |= comp_mask; + if (dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].guid_indexes) + dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE; + +} + +static int set_guid_rec(struct ib_device *ibdev, + struct mlx4_next_alias_guid_work *rec) +{ + int err; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_sa_guidinfo_rec guid_info_rec; + ib_sa_comp_mask comp_mask; + struct ib_port_attr attr; + struct mlx4_alias_guid_work_context *callback_context; + unsigned long resched_delay, flags, flags1; + u8 port = rec->port + 1; + int index = rec->block_num; + struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det; + struct list_head *head = + &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; + + memset(&attr, 0, sizeof(attr)); + err = __mlx4_ib_query_port(ibdev, port, &attr, 1); + if (err) { + pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", + err, port); + return err; + } + /*check the port was configured by the sm, otherwise no need to send */ + if (attr.state != IB_PORT_ACTIVE) { + pr_debug("port %d not active...rescheduling\n", port); + resched_delay = 5 * HZ; + err = -EAGAIN; + goto new_schedule; + } + + callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL); + if (!callback_context) { + err = -ENOMEM; + resched_delay = HZ * 5; + goto new_schedule; + } + callback_context->port = port; + callback_context->dev = dev; + callback_context->block_num = index; + callback_context->guid_indexes = rec_det->guid_indexes; + callback_context->method = rec->method; + + memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); + + guid_info_rec.lid = ib_lid_be16(attr.lid); + guid_info_rec.block_num = index; + + memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, + GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC); + comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM | + rec_det->guid_indexes; + + init_completion(&callback_context->done); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_add_tail(&callback_context->list, head); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + + callback_context->query_id = + ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, + ibdev, port, &guid_info_rec, + comp_mask, rec->method, 1000, + GFP_KERNEL, aliasguid_query_handler, + callback_context, + &callback_context->sa_query); + if (callback_context->query_id < 0) { + pr_debug("ib_sa_guid_info_rec_query failed, query_id: " + "%d. will reschedule to the next 1 sec.\n", + callback_context->query_id); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_del(&callback_context->list); + kfree(callback_context); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + resched_delay = 1 * HZ; + err = -EAGAIN; + goto new_schedule; + } + err = 0; + goto out; + +new_schedule: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + invalidate_guid_record(dev, port, index); + if (!dev->sriov.is_going_down) { + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + resched_delay); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + +out: + return err; +} + +static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port) +{ + int j, k, entry; + __be64 guid; + + /*Check if the SM doesn't need to assign the GUIDs*/ + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) { + entry = j * NUM_ALIAS_GUID_IN_REC + k; + /* no request for the 0 entry (hw guid) */ + if (!entry || entry > dev->dev->persist->num_vfs || + !mlx4_is_slave_active(dev->dev, entry)) + continue; + guid = mlx4_get_admin_guid(dev->dev, entry, port); + *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[j].all_recs + [GUID_REC_SIZE * k] = guid; + pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n", + entry, + be64_to_cpu(guid), + port); + } + } +} +void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) +{ + int i; + unsigned long flags, flags1; + + pr_debug("port %d\n", port); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + + if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags & + GUID_STATE_NEED_PORT_INIT) { + mlx4_ib_guid_port_init(dev, port); + dev->sriov.alias_guid.ports_guid[port - 1].state_flags &= + (~GUID_STATE_NEED_PORT_INIT); + } + for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) + invalidate_guid_record(dev, port, i); + + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { + /* + make sure no work waits in the queue, if the work is already + queued(not on the timer) the cancel will fail. That is not a problem + because we just want the work started. + */ + cancel_delayed_work(&dev->sriov.alias_guid. + ports_guid[port - 1].alias_guid_work); + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static void set_required_record(struct mlx4_ib_dev *dev, u8 port, + struct mlx4_next_alias_guid_work *next_rec, + int record_index) +{ + int i; + int lowset_time_entry = -1; + int lowest_time = 0; + ib_sa_comp_mask delete_guid_indexes = 0; + ib_sa_comp_mask set_guid_indexes = 0; + struct mlx4_sriov_alias_guid_info_rec_det *rec = + &dev->sriov.alias_guid.ports_guid[port]. + all_rec_per_port[record_index]; + + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + if (!(rec->guid_indexes & + mlx4_ib_get_aguid_comp_mask_from_ix(i))) + continue; + + if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] == + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) + delete_guid_indexes |= + mlx4_ib_get_aguid_comp_mask_from_ix(i); + else + set_guid_indexes |= + mlx4_ib_get_aguid_comp_mask_from_ix(i); + + if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <= + lowest_time) { + lowset_time_entry = i; + lowest_time = rec->guids_retry_schedule[i]; + } + } + + memcpy(&next_rec->rec_det, rec, sizeof(*rec)); + next_rec->port = port; + next_rec->block_num = record_index; + + if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] == + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) { + next_rec->rec_det.guid_indexes = delete_guid_indexes; + next_rec->method = MLX4_GUID_INFO_RECORD_DELETE; + } else { + next_rec->rec_det.guid_indexes = set_guid_indexes; + next_rec->method = MLX4_GUID_INFO_RECORD_SET; + } +} + +/* return index of record that should be updated based on lowest + * rescheduled time + */ +static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, + int *resched_delay_sec) +{ + int record_index = -1; + u64 low_record_time = 0; + struct mlx4_sriov_alias_guid_info_rec_det rec; + int j; + + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + rec = dev->sriov.alias_guid.ports_guid[port]. + all_rec_per_port[j]; + if (rec.status == MLX4_GUID_INFO_STATUS_IDLE && + rec.guid_indexes) { + if (record_index == -1 || + rec.time_to_run < low_record_time) { + record_index = j; + low_record_time = rec.time_to_run; + } + } + } + if (resched_delay_sec) { + u64 curr_time = ktime_get_boot_ns(); + + *resched_delay_sec = (low_record_time < curr_time) ? 0 : + div_u64((low_record_time - curr_time), NSEC_PER_SEC); + } + + return record_index; +} + +/* The function returns the next record that was + * not configured (or failed to be configured) */ +static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, + struct mlx4_next_alias_guid_work *rec) +{ + unsigned long flags; + int record_index; + int ret = 0; + + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + record_index = get_low_record_time_index(dev, port, NULL); + + if (record_index < 0) { + ret = -ENOENT; + goto out; + } + + set_required_record(dev, port, rec, record_index); +out: + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + return ret; +} + +static void alias_guid_work(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + int ret = 0; + struct mlx4_next_alias_guid_work *rec; + struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port = + container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det, + alias_guid_work); + struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent; + struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid, + struct mlx4_ib_sriov, + alias_guid); + struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); + + rec = kzalloc(sizeof *rec, GFP_KERNEL); + if (!rec) + return; + + pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); + ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); + if (ret) { + pr_debug("No more records to update.\n"); + goto out; + } + + set_guid_rec(&dev->ib_dev, rec); +out: + kfree(rec); +} + + +void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) +{ + unsigned long flags, flags1; + + if (!mlx4_is_master(dev->dev)) + return; + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) { + /* If there is pending one should cancel then run, otherwise + * won't run till previous one is ended as same work + * struct is used. + */ + cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port]. + alias_guid_work); + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, + &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) +{ + int i; + struct mlx4_ib_sriov *sriov = &dev->sriov; + struct mlx4_alias_guid_work_context *cb_ctx; + struct mlx4_sriov_alias_guid_port_rec_det *det; + struct ib_sa_query *sa_query; + unsigned long flags; + + for (i = 0 ; i < dev->num_ports; i++) { + det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + while (!list_empty(&det->cb_list)) { + cb_ctx = list_entry(det->cb_list.next, + struct mlx4_alias_guid_work_context, + list); + sa_query = cb_ctx->sa_query; + cb_ctx->sa_query = NULL; + list_del(&cb_ctx->list); + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + ib_sa_cancel_query(cb_ctx->query_id, sa_query); + wait_for_completion(&cb_ctx->done); + kfree(cb_ctx); + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + } + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + } + for (i = 0 ; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + } + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); +} + +int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) +{ + char alias_wq_name[15]; + int ret = 0; + int i, j; + union ib_gid gid; + + if (!mlx4_is_master(dev->dev)) + return 0; + dev->sriov.alias_guid.sa_client = + kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL); + if (!dev->sriov.alias_guid.sa_client) + return -ENOMEM; + + ib_sa_register_client(dev->sriov.alias_guid.sa_client); + + spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); + + for (i = 1; i <= dev->num_ports; ++i) { + if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + ret = -EFAULT; + goto err_unregister; + } + } + + for (i = 0 ; i < dev->num_ports; i++) { + memset(&dev->sriov.alias_guid.ports_guid[i], 0, + sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); + dev->sriov.alias_guid.ports_guid[i].state_flags |= + GUID_STATE_NEED_PORT_INIT; + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + /* mark each val as it was deleted */ + memset(dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j].all_recs, 0xFF, + sizeof(dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j].all_recs)); + } + INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); + /*prepare the records, set them to be allocated by sm*/ + if (mlx4_ib_sm_guid_assign) + for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++) + mlx4_set_admin_guid(dev->dev, 0, j, i + 1); + for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) + invalidate_guid_record(dev, i + 1, j); + + dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; + dev->sriov.alias_guid.ports_guid[i].port = i; + + snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); + dev->sriov.alias_guid.ports_guid[i].wq = + alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM); + if (!dev->sriov.alias_guid.ports_guid[i].wq) { + ret = -ENOMEM; + goto err_thread; + } + INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work, + alias_guid_work); + } + return 0; + +err_thread: + for (--i; i >= 0; i--) { + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + dev->sriov.alias_guid.ports_guid[i].wq = NULL; + } + +err_unregister: + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); + dev->sriov.alias_guid.sa_client = NULL; + pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret); + return ret; +} |