diff options
Diffstat (limited to 'net/smc')
-rw-r--r-- | net/smc/af_smc.c | 121 | ||||
-rw-r--r-- | net/smc/smc.h | 11 | ||||
-rw-r--r-- | net/smc/smc_clc.c | 319 | ||||
-rw-r--r-- | net/smc/smc_clc.h | 67 | ||||
-rw-r--r-- | net/smc/smc_core.c | 43 | ||||
-rw-r--r-- | net/smc/smc_core.h | 18 | ||||
-rw-r--r-- | net/smc/smc_diag.c | 9 | ||||
-rw-r--r-- | net/smc/smc_ib.c | 2 | ||||
-rw-r--r-- | net/smc/smc_ism.c | 50 | ||||
-rw-r--r-- | net/smc/smc_ism.h | 30 | ||||
-rw-r--r-- | net/smc/smc_pnet.c | 14 | ||||
-rw-r--r-- | net/smc/smc_sysctl.c | 24 | ||||
-rw-r--r-- | net/smc/smc_sysctl.h | 2 | ||||
-rw-r--r-- | net/smc/smc_tx.c | 30 |
14 files changed, 461 insertions, 279 deletions
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73eebddbbf..0f53a5c6fd 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -677,8 +677,6 @@ static bool smc_isascii(char *hostname) static void smc_conn_save_peer_info_fce(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)clc; struct smc_clc_first_contact_ext *fce; int clc_v2_len; @@ -687,17 +685,15 @@ static void smc_conn_save_peer_info_fce(struct smc_sock *smc, return; if (smc->conn.lgr->is_smcd) { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - d1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1); } else { - memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid, + memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid, SMC_MAX_EID_LEN); - clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2, - r1); + clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1); } - fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len); + fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); smc->conn.lgr->peer_os = fce->os_type; smc->conn.lgr->peer_smc_release = fce->release; if (smc_isascii(fce->hostname)) @@ -928,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + smc->sk.sk_socket->wq.fasync_list = NULL; /* There might be some wait entries remaining * in smc sk->sk_wq and they should be woken up @@ -1048,7 +1045,8 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, { int rc = SMC_CLC_DECL_NOSMCDDEV; struct smcd_dev *smcd; - int i = 1; + int i = 1, entry = 1; + bool is_virtual; u16 chid; if (smcd_indicated(ini->smc_type_v1)) @@ -1060,14 +1058,23 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, chid = smc_ism_get_chid(smcd); if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) continue; + is_virtual = __smc_ism_is_virtual(chid); if (!smc_pnet_is_pnetid_set(smcd->pnetid) || smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { + if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) + /* It's the last GID-CHID entry left in CLC + * Proposal SMC-Dv2 extension, but a virtual + * ISM device will take two entries. So give + * up it and try the next potential ISM device. + */ + continue; ini->ism_dev[i] = smcd; ini->ism_chid[i] = chid; ini->is_smcd = true; rc = 0; i++; - if (i > SMC_MAX_ISM_DEVS) + entry = is_virtual ? entry + 2 : entry + 1; + if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES) break; } } @@ -1149,13 +1156,13 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, } #define SMC_CLC_MAX_ACCEPT_LEN \ - (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ + (sizeof(struct smc_clc_msg_accept_confirm) + \ sizeof(struct smc_clc_first_contact_ext_v2x) + \ sizeof(struct smc_clc_msg_trail)) /* CLC handshake during connect */ static int smc_connect_clc(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *aclc2, + struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int rc = 0; @@ -1165,7 +1172,7 @@ static int smc_connect_clc(struct smc_sock *smc, if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, + return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN, SMC_CLC_ACCEPT, CLC_WAIT_TIME); } @@ -1201,10 +1208,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, false); + smc_get_clc_first_contact_ext(aclc, false); struct net *net = sock_net(&smc->sk); int rc; @@ -1327,10 +1332,7 @@ static int smc_connect_rdma(struct smc_sock *smc, } if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->r1.eid; + eid = aclc->r1.eid; if (ini->first_contact_local) smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist, link->smcibdev, link->gid); @@ -1371,7 +1373,7 @@ connect_abort: * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. */ static int -smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, +smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini) { int i; @@ -1398,12 +1400,9 @@ static int smc_connect_ism(struct smc_sock *smc, ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; if (aclc->hdr.version == SMC_V2) { - struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - if (ini->first_contact_peer) { struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(aclc_v2, true); + smc_get_clc_first_contact_ext(aclc, true); ini->release_nr = fce->release; rc = smc_clc_clnt_v2x_features_validate(fce, ini); @@ -1411,11 +1410,16 @@ static int smc_connect_ism(struct smc_sock *smc, return rc; } - rc = smc_v2_determine_accepted_chid(aclc_v2, ini); + rc = smc_v2_determine_accepted_chid(aclc, ini); if (rc) return rc; + + if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected])) + ini->ism_peer_gid[ini->ism_selected].gid_ext = + ntohll(aclc->d1.gid_ext); + /* for non-virtual ISM devices, peer gid_ext remains 0. */ } - ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid); + ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid); /* there is only one lgr role for SMC-D; use server lock */ mutex_lock(&smc_server_lgr_pending); @@ -1437,12 +1441,8 @@ static int smc_connect_ism(struct smc_sock *smc, smc_rx_init(smc); smc_tx_init(smc); - if (aclc->hdr.version > SMC_V1) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)aclc; - - eid = clc_v2->d1.eid; - } + if (aclc->hdr.version > SMC_V1) + eid = aclc->d1.eid; rc = smc_clc_send_confirm(smc, ini->first_contact_local, aclc->hdr.version, eid, ini); @@ -1493,7 +1493,6 @@ static int smc_connect_check_aclc(struct smc_init_info *ini, static int __smc_connect(struct smc_sock *smc) { u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1; - struct smc_clc_msg_accept_confirm_v2 *aclc2; struct smc_clc_msg_accept_confirm *aclc; struct smc_init_info *ini = NULL; u8 *buf = NULL; @@ -1541,11 +1540,10 @@ static int __smc_connect(struct smc_sock *smc) rc = SMC_CLC_DECL_MEM; goto fallback; } - aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; - aclc = (struct smc_clc_msg_accept_confirm *)aclc2; + aclc = (struct smc_clc_msg_accept_confirm *)buf; /* perform CLC handshake */ - rc = smc_connect_clc(smc, aclc2, ini); + rc = smc_connect_clc(smc, aclc, ini); if (rc) { /* -EAGAIN on timeout, see tcp_recvmsg() */ if (rc == -EAGAIN) { @@ -2106,7 +2104,8 @@ static bool smc_is_already_selected(struct smcd_dev *smcd, /* check for ISM devices matching proposed ISM devices */ static void smc_check_ism_v2_match(struct smc_init_info *ini, - u16 proposed_chid, u64 proposed_gid, + u16 proposed_chid, + struct smcd_gid *proposed_gid, unsigned int *matches) { struct smcd_dev *smcd; @@ -2118,7 +2117,11 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini, continue; if (smc_ism_get_chid(smcd) == proposed_chid && !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { - ini->ism_peer_gid[*matches] = proposed_gid; + ini->ism_peer_gid[*matches].gid = proposed_gid->gid; + if (__smc_ism_is_virtual(proposed_chid)) + ini->ism_peer_gid[*matches].gid_ext = + proposed_gid->gid_ext; + /* non-virtual ISM's peer gid_ext remains 0. */ ini->ism_dev[*matches] = smcd; (*matches)++; break; @@ -2140,9 +2143,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, struct smc_clc_v2_extension *smc_v2_ext; struct smc_clc_msg_smcd *pclc_smcd; unsigned int matches = 0; + struct smcd_gid smcd_gid; u8 smcd_version; u8 *eid = NULL; int i, rc; + u16 chid; if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) goto not_found; @@ -2152,18 +2157,35 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); mutex_lock(&smcd_dev_list.mutex); - if (pclc_smcd->ism.chid) + if (pclc_smcd->ism.chid) { /* check for ISM device matching proposed native ISM device */ + smcd_gid.gid = ntohll(pclc_smcd->ism.gid); + smcd_gid.gid_ext = 0; smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), - ntohll(pclc_smcd->ism.gid), &matches); - for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { + &smcd_gid, &matches); + } + for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) { /* check for ISM devices matching proposed non-native ISM * devices */ - smc_check_ism_v2_match(ini, - ntohs(smcd_v2_ext->gidchid[i - 1].chid), - ntohll(smcd_v2_ext->gidchid[i - 1].gid), - &matches); + smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid); + smcd_gid.gid_ext = 0; + chid = ntohs(smcd_v2_ext->gidchid[i].chid); + if (__smc_ism_is_virtual(chid)) { + if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt || + chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid)) + /* each virtual ISM device takes two GID-CHID + * entries and CHID of the second entry repeats + * that of the first entry. + * + * So check if the next GID-CHID entry exists + * and both two entries' CHIDs are the same. + */ + continue; + smcd_gid.gid_ext = + ntohll(smcd_v2_ext->gidchid[++i].gid); + } + smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches); } mutex_unlock(&smcd_dev_list.mutex); @@ -2212,7 +2234,8 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) goto not_found; ini->is_smcd = true; /* prepare ISM check */ - ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid); + ini->ism_peer_gid[0].gid_ext = 0; rc = smc_find_ism_device(new_smc, ini); if (rc) goto not_found; @@ -2461,7 +2484,7 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; - rc = smc_clc_srv_v2x_features_validate(pclc, ini); + rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini); if (rc) goto out_decl; diff --git a/net/smc/smc.h b/net/smc/smc.h index e377980b84..df64efd2de 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -29,9 +29,6 @@ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ -#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM - * devices - */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; @@ -58,6 +55,13 @@ enum smc_state { /* possible states of an SMC socket */ SMC_PROCESSABORT = 27, }; +enum smc_supplemental_features { + SMC_SPF_VIRT_ISM_DEV = 0, +}; + +#define SMC_FEATURE_MASK \ + (BIT(SMC_SPF_VIRT_ISM_DEV)) + struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ @@ -196,7 +200,6 @@ struct smc_connection { * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ - atomic_t tx_pushing; /* nr_threads trying tx push */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1489a8421d..9a13709bea 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -387,9 +387,9 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) /* check arriving CLC accept or confirm */ static bool -smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2) +smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc) { - struct smc_clc_msg_hdr *hdr = &clc_v2->hdr; + struct smc_clc_msg_hdr *hdr = &clc->hdr; if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) return false; @@ -428,15 +428,16 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) return true; } -static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, - struct smc_init_info *ini) +static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_init_info *ini) { - int ret = sizeof(*fce); + int ret = sizeof(*fce_v2x); - memset(fce, 0, sizeof(*fce)); - fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX; - fce->fce_v2_base.release = ini->release_nr; - memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname)); + memset(fce_v2x, 0, sizeof(*fce_v2x)); + fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX; + fce_v2x->fce_v2_base.release = ini->release_nr; + memcpy(fce_v2x->fce_v2_base.hostname, + smc_hostname, sizeof(smc_hostname)); if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) { ret = sizeof(struct smc_clc_first_contact_ext); goto out; @@ -444,9 +445,10 @@ static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce, if (ini->release_nr >= SMC_RELEASE_1) { if (!ini->is_smcd) { - fce->max_conns = ini->max_conns; - fce->max_links = ini->max_links; + fce_v2x->max_conns = ini->max_conns; + fce_v2x->max_links = ini->max_links; } + fce_v2x->feature_mask = htons(ini->feature_mask); } out: @@ -458,7 +460,7 @@ out: */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2; + struct smc_clc_msg_accept_confirm *clc; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; @@ -476,12 +478,11 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: - clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm; - if (!smc_clc_msg_acc_conf_valid(clc_v2)) + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (!smc_clc_msg_acc_conf_valid(clc)) return false; trl = (struct smc_clc_msg_trail *) - ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) - - sizeof(*trl)); + ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; @@ -834,6 +835,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) struct smc_clc_smcd_gid_chid *gidchids; struct smc_clc_msg_proposal_area *pclc; struct smc_clc_ipv6_prefix *ipv6_prfx; + struct net *net = sock_net(&smc->sk); struct smc_clc_v2_extension *v2_ext; struct smc_clc_msg_smcd *pclc_smcd; struct smc_clc_msg_trail *trl; @@ -891,11 +893,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) ETH_ALEN); } if (smcd_indicated(ini->smc_type_v1)) { + struct smcd_gid smcd_gid; + /* add SMC-D specifics */ if (ini->ism_dev[0]) { smcd = ini->ism_dev[0]; - pclc_smcd->ism.gid = - htonll(smcd->ops->get_local_gid(smcd)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + pclc_smcd->ism.gid = htonll(smcd_gid.gid); pclc_smcd->ism.chid = htons(smc_ism_get_chid(ini->ism_dev[0])); } @@ -916,6 +920,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) pclc_smcd->v2_ext_offset = htons(v2_ext_offset); plen += sizeof(*v2_ext); + v2_ext->feature_mask = htons(SMC_FEATURE_MASK); read_lock(&smc_clc_eid_table.lock); v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; @@ -927,10 +932,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) read_unlock(&smc_clc_eid_table.lock); } if (smcd_indicated(ini->smc_type_v2)) { + struct smcd_gid smcd_gid; u8 *eid = NULL; + int entry = 0; v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; - v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + @@ -942,19 +948,31 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { smcd = ini->ism_dev[i]; - gidchids[i - 1].gid = - htonll(smcd->ops->get_local_gid(smcd)); - gidchids[i - 1].chid = + smcd->ops->get_local_gid(smcd, &smcd_gid); + gidchids[entry].chid = htons(smc_ism_get_chid(ini->ism_dev[i])); + gidchids[entry].gid = htonll(smcd_gid.gid); + if (smc_ism_is_virtual(smcd)) { + /* a virtual ISM device takes two + * entries. CHID of the second entry + * repeats that of the first entry. + */ + gidchids[entry + 1].chid = + gidchids[entry].chid; + gidchids[entry + 1].gid = + htonll(smcd_gid.gid_ext); + entry++; + } + entry++; } - plen += ini->ism_offered_cnt * - sizeof(struct smc_clc_smcd_gid_chid); + plen += entry * sizeof(struct smc_clc_smcd_gid_chid); } + v2_ext->hdr.ism_gid_cnt = entry; } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); - v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER; - v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER; + v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr; + v2_ext->max_links = net->smc.sysctl_max_links_per_lgr; } pclc_base->hdr.length = htons(plen); @@ -985,7 +1003,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) vec[i++].iov_len = sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { vec[i].iov_base = gidchids; - vec[i++].iov_len = ini->ism_offered_cnt * + vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt * sizeof(struct smc_clc_smcd_gid_chid); } } @@ -1006,109 +1024,143 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return reason_code; } +static void +smcd_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_msg_trail *trl) +{ + struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; + u16 chid; + int len; + + /* SMC-D specific settings */ + memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); + smcd->ops->get_local_gid(smcd, &smcd_gid); + clc->hdr.typev1 = SMC_TYPE_D; + clc->d0.gid = htonll(smcd_gid.gid); + clc->d0.token = htonll(conn->rmb_desc->token); + clc->d0.dmbe_size = conn->rmbe_size_comp; + clc->d0.dmbe_idx = 0; + memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); + } else { + chid = smc_ism_get_chid(smcd); + clc->d1.chid = htons(chid); + if (eid && eid[0]) + memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN); + if (__smc_ism_is_virtual(chid)) + clc->d1.gid_ext = htonll(smcd_gid.gid_ext); + len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMCD_EYECATCHER, + sizeof(SMCD_EYECATCHER)); +} + +static void +smcr_clc_prep_confirm_accept(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc, + int first_contact, u8 version, + u8 *eid, struct smc_init_info *ini, + int *fce_len, + struct smc_clc_first_contact_ext_v2x *fce_v2x, + struct smc_clc_fce_gid_ext *gle, + struct smc_clc_msg_trail *trl) +{ + struct smc_link *link = conn->lnk; + int len; + + /* SMC-R specific settings */ + memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, + sizeof(SMC_EYECATCHER)); + clc->hdr.typev1 = SMC_TYPE_R; + memcpy(clc->r0.lcl.id_for_peer, local_systemid, + sizeof(local_systemid)); + memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); + memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], + ETH_ALEN); + hton24(clc->r0.qpn, link->roce_qp->qp_num); + clc->r0.rmb_rkey = + htonl(conn->rmb_desc->mr[link->link_idx]->rkey); + clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ + clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); + switch (clc->hdr.type) { + case SMC_CLC_ACCEPT: + clc->r0.qp_mtu = link->path_mtu; + break; + case SMC_CLC_CONFIRM: + clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); + break; + } + clc->r0.rmbe_size = conn->rmbe_size_comp; + clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? + cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : + cpu_to_be64((u64)sg_dma_address + (conn->rmb_desc->sgt[link->link_idx].sgl)); + hton24(clc->r0.psn, link->psn_initial); + if (version == SMC_V1) { + clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); + } else { + if (eid && eid[0]) + memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN); + len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; + if (first_contact) { + *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); + len += *fce_len; + fce_v2x->fce_v2_base.v2_direct = + !link->lgr->uses_gateway; + if (clc->hdr.type == SMC_CLC_CONFIRM) { + memset(gle, 0, sizeof(*gle)); + gle->gid_cnt = ini->smcrv2.gidlist.len; + len += sizeof(*gle); + len += gle->gid_cnt * sizeof(gle->gid[0]); + } + } + clc->hdr.length = htons(len); + } + memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); +} + /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, - struct smc_clc_msg_accept_confirm_v2 *clc_v2, + struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { + struct smc_clc_first_contact_ext_v2x fce_v2x; struct smc_connection *conn = &smc->conn; - struct smc_clc_first_contact_ext_v2x fce; - struct smcd_dev *smcd = conn->lgr->smcd; - struct smc_clc_msg_accept_confirm *clc; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; - int i, len, fce_len; + int i, fce_len; struct kvec vec[5]; struct msghdr msg; /* send SMC Confirm CLC msg */ - clc = (struct smc_clc_msg_accept_confirm *)clc_v2; clc->hdr.version = version; /* SMC version */ if (first_contact) clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; - if (conn->lgr->is_smcd) { - /* SMC-D specific settings */ - memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_D; - clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd)); - clc->d0.token = htonll(conn->rmb_desc->token); - clc->d0.dmbe_size = conn->rmbe_size_comp; - clc->d0.dmbe_idx = 0; - memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); - } else { - clc_v2->d1.chid = htons(smc_ism_get_chid(smcd)); - if (eid && eid[0]) - memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN); - len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMCD_EYECATCHER, - sizeof(SMCD_EYECATCHER)); - } else { - struct smc_link *link = conn->lnk; - - /* SMC-R specific settings */ - memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, - sizeof(SMC_EYECATCHER)); - clc->hdr.typev1 = SMC_TYPE_R; - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - memcpy(clc->r0.lcl.id_for_peer, local_systemid, - sizeof(local_systemid)); - memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); - memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], - ETH_ALEN); - hton24(clc->r0.qpn, link->roce_qp->qp_num); - clc->r0.rmb_rkey = - htonl(conn->rmb_desc->mr[link->link_idx]->rkey); - clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ - clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); - switch (clc->hdr.type) { - case SMC_CLC_ACCEPT: - clc->r0.qp_mtu = link->path_mtu; - break; - case SMC_CLC_CONFIRM: - clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); - break; - } - clc->r0.rmbe_size = conn->rmbe_size_comp; - clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? - cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : - cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[link->link_idx].sgl)); - hton24(clc->r0.psn, link->psn_initial); - if (version == SMC_V1) { - clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); - } else { - if (eid && eid[0]) - memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN); - len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; - if (first_contact) { - fce_len = smc_clc_fill_fce(&fce, ini); - len += fce_len; - fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway; - if (clc->hdr.type == SMC_CLC_CONFIRM) { - memset(&gle, 0, sizeof(gle)); - gle.gid_cnt = ini->smcrv2.gidlist.len; - len += sizeof(gle); - len += gle.gid_cnt * sizeof(gle.gid[0]); - } - } - clc_v2->hdr.length = htons(len); - } - memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); - } - + if (conn->lgr->is_smcd) + smcd_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &trl); + else + smcr_clc_prep_confirm_accept(conn, clc, first_contact, + version, eid, ini, &fce_len, + &fce_v2x, &gle, &trl); memset(&msg, 0, sizeof(msg)); i = 0; - vec[i].iov_base = clc_v2; + vec[i].iov_base = clc; if (version > SMC_V1) vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : @@ -1120,7 +1172,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, SMCR_CLC_ACCEPT_CONFIRM_LEN) - sizeof(trl); if (version > SMC_V1 && first_contact) { - vec[i].iov_base = &fce; + vec[i].iov_base = &fce_v2x; vec[i++].iov_len = fce_len; if (!conn->lgr->is_smcd) { if (clc->hdr.type == SMC_CLC_CONFIRM) { @@ -1142,16 +1194,16 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc, int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 cclc_v2; + struct smc_clc_msg_accept_confirm cclc; int reason_code = 0; int len; /* send SMC Confirm CLC msg */ - memset(&cclc_v2, 0, sizeof(cclc_v2)); - cclc_v2.hdr.type = SMC_CLC_CONFIRM; - len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact, + memset(&cclc, 0, sizeof(cclc)); + cclc.hdr.type = SMC_CLC_CONFIRM; + len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact, version, eid, ini); - if (len < ntohs(cclc_v2.hdr.length)) { + if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; @@ -1167,26 +1219,29 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 aclc_v2; + struct smc_clc_msg_accept_confirm aclc; int len; - memset(&aclc_v2, 0, sizeof(aclc_v2)); - aclc_v2.hdr.type = SMC_CLC_ACCEPT; - len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact, + memset(&aclc, 0, sizeof(aclc)); + aclc.hdr.type = SMC_CLC_ACCEPT; + len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact, version, negotiated_eid, ini); - if (len < ntohs(aclc_v2.hdr.length)) + if (len < ntohs(aclc.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; } -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { struct smc_clc_v2_extension *pclc_v2_ext; + struct net *net = sock_net(&smc->sk); ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; + ini->feature_mask = SMC_FEATURE_MASK; if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) || ini->release_nr < SMC_RELEASE_1) @@ -1197,11 +1252,13 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, return SMC_CLC_DECL_NOV2EXT; if (ini->smcr_version & SMC_V2) { - ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER); + ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, + net->smc.sysctl_max_conns_per_lgr); if (ini->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; - ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER); + ini->max_links = min_t(u8, pclc_v2_ext->max_links, + net->smc.sysctl_max_links_per_lgr); if (ini->max_links < SMC_LINKS_ADD_LNK_MIN) return SMC_CLC_DECL_MAXLINKERR; } @@ -1228,6 +1285,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, return SMC_CLC_DECL_MAXLINKERR; ini->max_links = fce_v2x->max_links; } + /* common supplemental features of server and client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK; return 0; } @@ -1235,10 +1294,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, struct smc_init_info *ini) { - struct smc_clc_msg_accept_confirm_v2 *clc_v2 = - (struct smc_clc_msg_accept_confirm_v2 *)cclc; struct smc_clc_first_contact_ext *fce = - smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd); + smc_get_clc_first_contact_ext(cclc, ini->is_smcd); struct smc_clc_first_contact_ext_v2x *fce_v2x = (struct smc_clc_first_contact_ext_v2x *)fce; @@ -1258,6 +1315,8 @@ int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, if (fce_v2x->max_links != ini->max_links) return SMC_CLC_DECL_MAXLINKERR; } + /* common supplemental features returned by client */ + ini->feature_mask = ntohs(fce_v2x->feature_mask); return 0; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 08155a96a0..a9f9bdd26d 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -138,7 +138,8 @@ struct smc_clc_v2_extension { u8 roce[16]; /* RoCEv2 GID */ u8 max_conns; u8 max_links; - u8 reserved[14]; + __be16 feature_mask; + u8 reserved[12]; u8 user_eids[][SMC_MAX_EID_LEN]; }; @@ -171,6 +172,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ #define SMC_CLC_MAX_V6_PREFIX 8 #define SMC_CLC_MAX_UEID 8 +#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC + * proposal SMC-Dv2 extension. + * each ISM device takes one entry and + * each virtual ISM takes two entries. + */ struct smc_clc_msg_proposal_area { struct smc_clc_msg_proposal pclc_base; @@ -180,7 +186,8 @@ struct smc_clc_msg_proposal_area { struct smc_clc_v2_extension pclc_v2_ext; u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN]; struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext; - struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS]; + struct smc_clc_smcd_gid_chid + pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES]; struct smc_clc_msg_trail pclc_trl; }; @@ -240,9 +247,14 @@ struct smc_clc_first_contact_ext { struct smc_clc_first_contact_ext_v2x { struct smc_clc_first_contact_ext fce_v2_base; - u8 max_conns; /* for SMC-R only */ - u8 max_links; /* for SMC-R only */ - u8 reserved3[2]; + union { + struct { + u8 max_conns; /* for SMC-R only */ + u8 max_links; /* for SMC-R only */ + }; + u8 reserved3[2]; /* for SMC-D only */ + }; + __be16 feature_mask; __be32 vendor_exp_options; u8 reserved4[8]; } __packed; /* format defined in @@ -259,28 +271,21 @@ struct smc_clc_fce_gid_ext { struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ struct smc_clc_msg_hdr hdr; union { - struct smcr_clc_msg_accept_confirm r0; /* SMC-R */ - struct { /* SMC-D */ - struct smcd_clc_msg_accept_confirm_common d0; - u32 reserved5[3]; - }; - }; -} __packed; /* format defined in RFC7609 */ - -struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */ - struct smc_clc_msg_hdr hdr; - union { struct { /* SMC-R */ struct smcr_clc_msg_accept_confirm r0; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved6[8]; - } r1; + struct { /* v2 only */ + u8 eid[SMC_MAX_EID_LEN]; + u8 reserved6[8]; + } __packed r1; + }; struct { /* SMC-D */ struct smcd_clc_msg_accept_confirm_common d0; - __be16 chid; - u8 eid[SMC_MAX_EID_LEN]; - u8 reserved5[8]; - } d1; + struct { /* v2 only, but 12 bytes reserved in v1 */ + __be16 chid; + u8 eid[SMC_MAX_EID_LEN]; + __be64 gid_ext; + } __packed d1; + }; }; }; @@ -389,24 +394,23 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext) } static inline struct smc_clc_first_contact_ext * -smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2, +smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm *clc, bool is_smcd) { int clc_v2_len; - if (clc_v2->hdr.version == SMC_V1 || - !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) + if (clc->hdr.version == SMC_V1 || + !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) return NULL; if (is_smcd) clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, d1); + offsetofend(struct smc_clc_msg_accept_confirm, d1); else clc_v2_len = - offsetofend(struct smc_clc_msg_accept_confirm_v2, r1); + offsetofend(struct smc_clc_msg_accept_confirm, r1); - return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + - clc_v2_len); + return (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len); } struct smcd_dev; @@ -422,7 +426,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini); -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini); int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, struct smc_init_info *ini); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d520ee62c8..e4c8584112 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -506,6 +506,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, { char smc_pnet[SMC_MAX_PNETID_LEN + 1]; struct smcd_dev *smcd = lgr->smcd; + struct smcd_gid smcd_gid; struct nlattr *attrs; void *nlh; @@ -521,13 +522,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id))) goto errattr; + smcd->ops->get_local_gid(smcd, &smcd_gid); if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, - smcd->ops->get_local_gid(smcd), - SMC_NLA_LGR_D_PAD)) + smcd_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; - if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid, + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID, + smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid, SMC_NLA_LGR_D_PAD)) goto errattr; + if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID, + lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD)) + goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id)) goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num)) @@ -876,7 +883,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) /* SMC-D specific settings */ smcd = ini->ism_dev[ini->ism_selected]; get_device(smcd->ops->get_dev(smcd)); - lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected]; + lgr->peer_gid.gid = + ini->ism_peer_gid[ini->ism_selected].gid; + lgr->peer_gid.gid_ext = + ini->ism_peer_gid[ini->ism_selected].gid_ext; lgr->smcd = ini->ism_dev[ini->ism_selected]; lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; @@ -1514,7 +1524,8 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) } /* Called when peer lgr shutdown (regularly or abnormally) is received */ -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, + unsigned short vlan) { struct smc_link_group *lgr, *l; LIST_HEAD(lgr_free_list); @@ -1522,9 +1533,12 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* run common cleanup function and build free list */ spin_lock_bh(&dev->lgr_lock); list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { - if ((!peer_gid || lgr->peer_gid == peer_gid) && + if ((!peer_gid->gid || + (lgr->peer_gid.gid == peer_gid->gid && + !smc_ism_is_virtual(dev) ? 1 : + lgr->peer_gid.gid_ext == peer_gid->gid_ext)) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { - if (peer_gid) /* peer triggered termination */ + if (peer_gid->gid) /* peer triggered termination */ lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); lgr->freeing = 1; @@ -1860,9 +1874,18 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, } static bool smcd_lgr_match(struct smc_link_group *lgr, - struct smcd_dev *smcismdev, u64 peer_gid) + struct smcd_dev *smcismdev, + struct smcd_gid *peer_gid) { - return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ @@ -1892,7 +1915,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) write_lock_bh(&lgr->conns_lock); if ((ini->is_smcd ? smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected], - ini->ism_peer_gid[ini->ism_selected]) : + &ini->ism_peer_gid[ini->ism_selected]) : smcr_lgr_match(lgr, ini->smcr_version, ini->peer_systemid, ini->peer_gid, ini->peer_mac, role, diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 120027d404..1f17537603 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -17,9 +17,11 @@ #include <linux/pci.h> #include <rdma/ib_verbs.h> #include <net/genetlink.h> +#include <net/smc.h> #include "smc.h" #include "smc_ib.h" +#include "smc_clc.h" #define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */ #define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */ @@ -355,7 +357,7 @@ struct smc_link_group { /* max links can be added in lgr */ }; struct { /* SMC-D */ - u64 peer_gid; + struct smcd_gid peer_gid; /* Peer GID (remote) */ struct smcd_dev *smcd; /* ISM device for VLAN reg. */ @@ -392,6 +394,11 @@ struct smc_init_info_smcrv2 { struct smc_gidlist gidlist; }; +#define SMC_MAX_V2_ISM_DEVS SMCD_CLC_MAX_V2_GID_ENTRIES + /* max # of proposed non-native ISM devices, + * which can't exceed the max # of CHID-GID + * entries in CLC proposal SMC-Dv2 extension. + */ struct smc_init_info { u8 is_smcd; u8 smc_type_v1; @@ -401,6 +408,7 @@ struct smc_init_info { u8 max_links; u8 first_contact_peer; u8 first_contact_local; + u16 feature_mask; unsigned short vlan_id; u32 rc; u8 negotiated_eid[SMC_MAX_EID_LEN]; @@ -416,9 +424,9 @@ struct smc_init_info { u32 ib_clcqpn; struct smc_init_info_smcrv2 smcrv2; /* SMC-D */ - u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1]; - struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1]; - u16 ism_chid[SMC_MAX_ISM_DEVS + 1]; + struct smcd_gid ism_peer_gid[SMC_MAX_V2_ISM_DEVS + 1]; + struct smcd_dev *ism_dev[SMC_MAX_V2_ISM_DEVS + 1]; + u16 ism_chid[SMC_MAX_V2_ISM_DEVS + 1]; u8 ism_offered_cnt; /* # of ISM devices offered */ u8 ism_selected; /* index of selected ISM dev*/ u8 smcd_version; @@ -544,7 +552,7 @@ void smc_lgr_hold(struct smc_link_group *lgr); void smc_lgr_put(struct smc_link_group *lgr); void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport); void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport); -void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, +void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index fb9e5cc128..5a33908015 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -21,6 +21,7 @@ #include "smc.h" #include "smc_core.h" +#include "smc_ism.h" struct smc_diag_dump_ctx { int pos[2]; @@ -167,12 +168,16 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, struct smc_connection *conn = &smc->conn; struct smcd_diag_dmbinfo dinfo; struct smcd_dev *smcd = conn->lgr->smcd; + struct smcd_gid smcd_gid; memset(&dinfo, 0, sizeof(dinfo)); dinfo.linkid = *((u32 *)conn->lgr->id); - dinfo.peer_gid = conn->lgr->peer_gid; - dinfo.my_gid = smcd->ops->get_local_gid(smcd); + dinfo.peer_gid = conn->lgr->peer_gid.gid; + dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext; + smcd->ops->get_local_gid(smcd, &smcd_gid); + dinfo.my_gid = smcd_gid.gid; + dinfo.my_gid_ext = smcd_gid.gid_ext; dinfo.token = conn->rmb_desc->token; dinfo.peer_token = conn->peer_token; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 89981dbe46..97704a9e84 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -844,7 +844,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; - smc_order = MAX_ORDER - cqe_size_order; + smc_order = MAX_PAGE_ORDER - cqe_size_order; if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE) cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2; smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index fbee249309..ac88de2a06 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -43,8 +43,30 @@ static struct ism_client smc_ism_client = { }; #endif +static void smc_ism_create_system_eid(void) +{ + struct smc_ism_seid *seid = + (struct smc_ism_seid *)smc_ism_v2_system_eid; +#if IS_ENABLED(CONFIG_S390) + struct cpuid id; + u16 ident_tail; + char tmp[5]; + + memcpy(seid->seid_string, "IBM-SYSZ-ISMSEID00000000", 24); + get_cpu_id(&id); + ident_tail = (u16)(id.ident & SMC_ISM_IDENT_MASK); + snprintf(tmp, 5, "%04X", ident_tail); + memcpy(seid->serial_number, tmp, 4); + snprintf(tmp, 5, "%04X", id.machine); + memcpy(seid->type, tmp, 4); +#else + memset(seid, 0, SMC_MAX_EID_LEN); +#endif +} + /* Test if an ISM communication is possible - same CPC */ -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd) +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *smcd) { return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0, vlan_id); @@ -208,7 +230,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, dmb.dmb_len = dmb_len; dmb.sba_idx = dmb_desc->sba_idx; dmb.vlan_id = lgr->vlan_id; - dmb.rgid = lgr->peer_gid; + dmb.rgid = lgr->peer_gid.gid; rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client); if (!rc) { dmb_desc->sba_idx = dmb.sba_idx; @@ -340,18 +362,20 @@ union smcd_sw_event_info { static void smcd_handle_sw_event(struct smc_ism_event_work *wrk) { + struct smcd_gid peer_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; union smcd_sw_event_info ev_info; ev_info.info = wrk->event.info; switch (wrk->event.code) { case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id); + smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id); break; case ISM_EVENT_CODE_TESTLINK: /* Activity timer */ if (ev_info.code == ISM_EVENT_REQUEST) { ev_info.code = ISM_EVENT_RESPONSE; wrk->smcd->ops->signal_event(wrk->smcd, - wrk->event.tok, + &peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_TESTLINK, ev_info.info); @@ -365,10 +389,12 @@ static void smc_ism_event_work(struct work_struct *work) { struct smc_ism_event_work *wrk = container_of(work, struct smc_ism_event_work, work); + struct smcd_gid smcd_gid = { .gid = wrk->event.tok, + .gid_ext = 0 }; switch (wrk->event.type) { case ISM_EVENT_GID: /* GID event, token is peer GID */ - smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK); + smc_smcd_terminate(wrk->smcd, &smcd_gid, VLAN_VID_MASK); break; case ISM_EVENT_DMB: break; @@ -426,14 +452,8 @@ static void smcd_register_dev(struct ism_dev *ism) mutex_lock(&smcd_dev_list.mutex); if (list_empty(&smcd_dev_list.list)) { - u8 *system_eid = NULL; - - system_eid = smcd->ops->get_system_eid(); - if (smcd->ops->supports_v2()) { + if (smcd->ops->supports_v2()) smc_ism_v2_capable = true; - memcpy(smc_ism_v2_system_eid, system_eid, - SMC_MAX_EID_LEN); - } } /* sort list: devices without pnetid before devices with pnetid */ if (smcd->pnetid[0]) @@ -525,7 +545,7 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr) memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); ev_info.vlan_id = lgr->vlan_id; ev_info.code = ISM_EVENT_REQUEST; - rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid, + rc = lgr->smcd->ops->signal_event(lgr->smcd, &lgr->peer_gid, ISM_EVENT_REQUEST_IR, ISM_EVENT_CODE_SHUTDOWN, ev_info.info); @@ -537,10 +557,10 @@ int smc_ism_init(void) { int rc = 0; -#if IS_ENABLED(CONFIG_ISM) smc_ism_v2_capable = false; - memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN); + smc_ism_create_system_eid(); +#if IS_ENABLED(CONFIG_ISM) rc = ism_register_client(&smc_ism_client); #endif return rc; diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h index 832b2f42d7..ffff40c30a 100644 --- a/net/smc/smc_ism.h +++ b/net/smc/smc_ism.h @@ -15,6 +15,9 @@ #include "smc.h" +#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00 +#define SMC_ISM_IDENT_MASK 0x00FFFF + struct smcd_dev_list { /* List of SMCD devices */ struct list_head list; struct mutex mutex; /* Protects list of devices */ @@ -28,9 +31,16 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */ refcount_t refcnt; /* Reference count */ }; +struct smc_ism_seid { + u8 seid_string[24]; + u8 serial_number[4]; + u8 type[4]; +}; + struct smcd_dev; -int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev); +int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id, + struct smcd_dev *dev); void smc_ism_set_conn(struct smc_connection *conn); void smc_ism_unset_conn(struct smc_connection *conn); int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id); @@ -56,4 +66,22 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok, return rc < 0 ? rc : 0; } +static inline bool __smc_ism_is_virtual(u16 chid) +{ + /* CHIDs in range of 0xFF00 to 0xFFFF are reserved + * for virtual ISM device. + * + * loopback-ism: 0xFFFF + * virtio-ism: 0xFF00 ~ 0xFFFE + */ + return ((chid & 0xFF00) == 0xFF00); +} + +static inline bool smc_ism_is_virtual(struct smcd_dev *smcd) +{ + u16 chid = smcd->ops->get_chid(smcd); + + return __smc_ism_is_virtual(chid); +} + #endif diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 11775401df..2adb92b8c4 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -806,6 +806,16 @@ static void smc_pnet_create_pnetids_list(struct net *net) u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev; + /* Newly created netns do not have devices. + * Do not even acquire rtnl. + */ + if (list_empty(&net->dev_base_head)) + return; + + /* Note: This might not be needed, because smc_pnet_netdev_event() + * is also calling smc_pnet_add_base_pnetid() when handling + * NETDEV_UP event. + */ rtnl_lock(); for_each_netdev(net, dev) smc_pnet_add_base_pnetid(net, dev, ndev_pnetid); @@ -1103,8 +1113,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, list_for_each_entry(ismdev, &smcd_dev_list.list, list) { if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) && !ismdev->going_away && - (!ini->ism_peer_gid[0] || - !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id, + (!ini->ism_peer_gid[0].gid || + !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id, ismdev))) { ini->ism_dev[0] = ismdev; break; diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 5cbc18c6e6..a5946d1b9d 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -25,6 +25,10 @@ static int max_sndbuf = INT_MAX / 2; static int max_rcvbuf = INT_MAX / 2; static const int net_smc_wmem_init = (64 * 1024); static const int net_smc_rmem_init = (64 * 1024); +static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; +static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; +static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; +static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static struct ctl_table smc_table[] = { { @@ -68,6 +72,24 @@ static struct ctl_table smc_table[] = { .extra1 = &min_rcvbuf, .extra2 = &max_rcvbuf, }, + { + .procname = "smcr_max_links_per_lgr", + .data = &init_net.smc.sysctl_max_links_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &links_per_lgr_min, + .extra2 = &links_per_lgr_max, + }, + { + .procname = "smcr_max_conns_per_lgr", + .data = &init_net.smc.sysctl_max_conns_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &conns_per_lgr_min, + .extra2 = &conns_per_lgr_max, + }, { } }; @@ -97,6 +119,8 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index 0becc11bd2..eb2465ae1e 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -23,6 +23,8 @@ void __net_exit smc_sysctl_net_exit(struct net *net); static inline int smc_sysctl_net_init(struct net *net) { net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 3b0ff3b589..214ac3cbcf 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -621,7 +621,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } -static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn) +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); int rc = 0; @@ -655,34 +655,6 @@ out: return rc; } -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) -{ - int rc; - - /* This make sure only one can send simultaneously to prevent wasting - * of CPU and CDC slot. - * Record whether someone has tried to push while we are pushing. - */ - if (atomic_inc_return(&conn->tx_pushing) > 1) - return 0; - -again: - atomic_set(&conn->tx_pushing, 1); - smp_wmb(); /* Make sure tx_pushing is 1 before real send */ - rc = __smc_tx_sndbuf_nonempty(conn); - - /* We need to check whether someone else have added some data into - * the send queue and tried to push but failed after the atomic_set() - * when we are pushing. - * If so, we need to push again to prevent those data hang in the send - * queue. - */ - if (unlikely(!atomic_dec_and_test(&conn->tx_pushing))) - goto again; - - return rc; -} - /* Wakeup sndbuf consumers from process context * since there is more data to transmit. The caller * must hold sock lock. |