summaryrefslogtreecommitdiffstats
path: root/src/nvme/mi.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvme/mi.c')
-rw-r--r--src/nvme/mi.c351
1 files changed, 328 insertions, 23 deletions
diff --git a/src/nvme/mi.c b/src/nvme/mi.c
index 6ff0a6f..adf1753 100644
--- a/src/nvme/mi.c
+++ b/src/nvme/mi.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <stdlib.h>
#include <stdio.h>
+#include <time.h>
#include <ccan/array_size/array_size.h>
#include <ccan/endian/endian.h>
@@ -21,6 +22,20 @@
static const int default_timeout = 1000; /* milliseconds; endpoints may
override */
+static bool nvme_mi_probe_enabled_default(void)
+{
+ char *val;
+
+ val = getenv("LIBNVME_MI_PROBE_ENABLED");
+ if (!val)
+ return true;
+
+ return strcmp(val, "0") &&
+ strcasecmp(val, "false") &&
+ strncasecmp(val, "disable", 7);
+
+}
+
/* MI-equivalent of nvme_create_root, but avoids clashing symbol names
* when linking against both libnvme and libnvme-mi.
*/
@@ -33,6 +48,7 @@ nvme_root_t nvme_mi_create_root(FILE *fp, int log_level)
}
r->log_level = log_level;
r->fp = stderr;
+ r->mi_probe_enabled = nvme_mi_probe_enabled_default();
if (fp)
r->fp = fp;
list_head_init(&r->hosts);
@@ -50,6 +66,180 @@ void nvme_mi_free_root(nvme_root_t root)
free(root);
}
+void nvme_mi_set_probe_enabled(nvme_root_t root, bool enabled)
+{
+ root->mi_probe_enabled = enabled;
+}
+
+static void nvme_mi_record_resp_time(struct nvme_mi_ep *ep)
+{
+ int rc;
+
+ rc = clock_gettime(CLOCK_MONOTONIC, &ep->last_resp_time);
+ ep->last_resp_time_valid = !rc;
+}
+
+static bool nvme_mi_compare_vid_mn(struct nvme_mi_ep *ep,
+ struct nvme_id_ctrl *id,
+ __u16 vid, const char *mn)
+
+{
+ int len;
+
+ len = strlen(mn);
+ if (len >= sizeof(id->mn)) {
+ nvme_msg(ep->root, LOG_ERR,
+ "Internal error: invalid model number for %s\n",
+ __func__);
+ return false;
+ }
+
+ return le16_to_cpu(id->vid) == vid && !strncmp(id->mn, mn, len);
+}
+
+static void __nvme_mi_format_mn(struct nvme_id_ctrl *id,
+ char *mn, size_t mn_len)
+{
+ const size_t id_mn_size = sizeof(id->mn);
+ int i;
+
+ /* A BUILD_ASSERT() would be nice here, but we're not const enough for
+ * that
+ */
+ if (mn_len <= id_mn_size)
+ abort();
+
+ memcpy(mn, id->mn, id_mn_size);
+ mn[id_mn_size] = '\0';
+
+ for (i = id_mn_size - 1; i >= 0; i--) {
+ if (mn[i] != '\0' && mn[i] != ' ')
+ break;
+ mn[i] = '\0';
+ }
+}
+
+#define nvme_mi_format_mn(id, m) __nvme_mi_format_mn(id, m, sizeof(m))
+
+void nvme_mi_ep_probe(struct nvme_mi_ep *ep)
+{
+ struct nvme_identify_args id_args = { 0 };
+ struct nvme_id_ctrl id = { 0 };
+ struct nvme_mi_ctrl *ctrl;
+ int rc;
+
+ if (!ep->root->mi_probe_enabled)
+ return;
+
+ /* start with no quirks, detect as we go */
+ ep->quirks = 0;
+
+ ctrl = nvme_mi_init_ctrl(ep, 0);
+ if (!ctrl)
+ return;
+
+ /* Do enough of an identify (assuming controller 0) to retrieve
+ * device and firmware identification information. This gives us the
+ * following fields in id:
+ *
+ * - vid (PCI vendor ID)
+ * - ssvid (PCI subsystem vendor ID)
+ * - sn (Serial number)
+ * - mn (Model number)
+ * - fr (Firmware revision)
+ *
+ * all other fields - rab and onwards - will be zero!
+ */
+ id_args.args_size = sizeof(id_args);
+ id_args.data = &id;
+ id_args.cns = NVME_IDENTIFY_CNS_CTRL;
+ id_args.nsid = NVME_NSID_NONE;
+ id_args.cntid = 0;
+ id_args.csi = NVME_CSI_NVM;
+
+ rc = nvme_mi_admin_identify_partial(ctrl, &id_args, 0,
+ offsetof(struct nvme_id_ctrl, rab));
+ if (rc) {
+ nvme_msg(ep->root, LOG_WARNING,
+ "Identify Controller failed, no quirks applied\n");
+ goto out_close;
+ }
+
+ /* Samsung MZUL2512: cannot receive commands sent within ~1ms of
+ * the previous response. Set an inter-command delay of 1.2ms for
+ * a little extra tolerance.
+ */
+ if (nvme_mi_compare_vid_mn(ep, &id, 0x144d, "MZUL2512HCJQ")) {
+ ep->quirks |= NVME_QUIRK_MIN_INTER_COMMAND_TIME;
+ ep->inter_command_us = 1200;
+ }
+
+ /* If we're quirking for the inter-command time, record the last
+ * command time now, so we don't conflict with the just-sent identify.
+ */
+ if (ep->quirks & NVME_QUIRK_MIN_INTER_COMMAND_TIME)
+ nvme_mi_record_resp_time(ep);
+
+ if (ep->quirks) {
+ char tmp[sizeof(id.mn) + 1];
+
+ nvme_mi_format_mn(&id, tmp);
+ nvme_msg(ep->root, LOG_DEBUG,
+ "device %02x:%s: applying quirks 0x%08lx\n",
+ id.vid, tmp, ep->quirks);
+ }
+
+out_close:
+ nvme_mi_close_ctrl(ctrl);
+}
+
+static const int nsec_per_sec = 1000 * 1000 * 1000;
+/* timercmp and timersub, but for struct timespec */
+#define timespec_cmp(a, b, CMP) \
+ (((a)->tv_sec == (b)->tv_sec) \
+ ? ((a)->tv_nsec CMP (b)->tv_nsec) \
+ : ((a)->tv_sec CMP (b)->tv_sec))
+
+#define timespec_sub(a, b, result) \
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_nsec = (a)->tv_nsec - (b)->tv_nsec; \
+ if ((result)->tv_nsec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_nsec += nsec_per_sec; \
+ } \
+ } while (0)
+
+static void nvme_mi_insert_delay(struct nvme_mi_ep *ep)
+{
+ struct timespec now, next, delay;
+ int rc;
+
+ if (!ep->last_resp_time_valid)
+ return;
+
+ /* calculate earliest next command time */
+ next.tv_nsec = ep->last_resp_time.tv_nsec + ep->inter_command_us * 1000;
+ next.tv_sec = ep->last_resp_time.tv_sec;
+ if (next.tv_nsec > nsec_per_sec) {
+ next.tv_nsec -= nsec_per_sec;
+ next.tv_sec += 1;
+ }
+
+ rc = clock_gettime(CLOCK_MONOTONIC, &now);
+ if (rc) {
+ /* not much we can do; continue immediately */
+ return;
+ }
+
+ if (timespec_cmp(&now, &next, >=))
+ return;
+
+ timespec_sub(&next, &now, &delay);
+
+ nanosleep(&delay, NULL);
+}
+
struct nvme_mi_ep *nvme_mi_init_ep(nvme_root_t root)
{
struct nvme_mi_ep *ep;
@@ -93,6 +283,11 @@ unsigned int nvme_mi_ep_get_timeout(nvme_mi_ep_t ep)
return ep->timeout;
}
+static bool nvme_mi_ep_has_quirk(nvme_mi_ep_t ep, unsigned long quirk)
+{
+ return ep->quirks & quirk;
+}
+
struct nvme_mi_ctrl *nvme_mi_init_ctrl(nvme_mi_ep_t ep, __u16 ctrl_id)
{
struct nvme_mi_ctrl *ctrl;
@@ -139,9 +334,7 @@ int nvme_mi_scan_ep(nvme_mi_ep_t ep, bool force_rescan)
struct nvme_mi_ctrl *ctrl;
__u16 id;
- id = le32_to_cpu(list.identifier[i]);
- if (!id)
- continue;
+ id = le16_to_cpu(list.identifier[i]);
ctrl = nvme_mi_init_ctrl(ep, id);
if (!ctrl)
@@ -223,7 +416,14 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req,
if (ep->transport->mic_enabled)
nvme_mi_calc_req_mic(req);
+ if (nvme_mi_ep_has_quirk(ep, NVME_QUIRK_MIN_INTER_COMMAND_TIME))
+ nvme_mi_insert_delay(ep);
+
rc = ep->transport->submit(ep, req, resp);
+
+ if (nvme_mi_ep_has_quirk(ep, NVME_QUIRK_MIN_INTER_COMMAND_TIME))
+ nvme_mi_record_resp_time(ep);
+
if (rc) {
nvme_msg(ep->root, LOG_INFO, "transport failure\n");
return rc;
@@ -333,7 +533,12 @@ static int nvme_mi_admin_parse_status(struct nvme_mi_resp *resp, __u32 *result)
admin_hdr = (struct nvme_mi_admin_resp_hdr *)resp->hdr;
nvme_result = le32_to_cpu(admin_hdr->cdw0);
- nvme_status = le32_to_cpu(admin_hdr->cdw3) >> 16;
+
+ /* Shift down 17 here: the SC starts at bit 17, and the NVME_SC_*
+ * definitions align to this bit (and up). The CRD, MORE and DNR
+ * bits are defined accordingly (eg., DNR is 0x4000).
+ */
+ nvme_status = le32_to_cpu(admin_hdr->cdw3) >> 17;
/* the result pointer, optionally stored if the caller needs it */
if (result)
@@ -419,6 +624,96 @@ int nvme_mi_admin_xfer(nvme_mi_ctrl_t ctrl,
return 0;
}
+int nvme_mi_admin_admin_passthru(nvme_mi_ctrl_t ctrl, __u8 opcode, __u8 flags,
+ __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3,
+ __u32 cdw10, __u32 cdw11, __u32 cdw12,
+ __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result)
+{
+ /* Input parameters flags, rsvd, metadata, metadata_len are not used */
+ struct nvme_mi_admin_resp_hdr resp_hdr;
+ struct nvme_mi_admin_req_hdr req_hdr;
+ struct nvme_mi_resp resp;
+ struct nvme_mi_req req;
+ int rc;
+ int direction = opcode & 0x3;
+ bool has_write_data = false;
+ bool has_read_data = false;
+
+ if (direction == NVME_DATA_TFR_BIDIRECTIONAL) {
+ nvme_msg(ctrl->ep->root, LOG_ERR,
+ "nvme_mi_admin_admin_passthru doesn't support bidirectional commands\n");
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (data_len > 4096) {
+ nvme_msg(ctrl->ep->root, LOG_ERR,
+ "nvme_mi_admin_admin_passthru doesn't support data_len over 4096 bytes.\n");
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (data != NULL && data_len != 0) {
+ if (direction == NVME_DATA_TFR_HOST_TO_CTRL)
+ has_write_data = true;
+ if (direction == NVME_DATA_TFR_CTRL_TO_HOST)
+ has_read_data = true;
+ }
+
+ if (timeout_ms > nvme_mi_ep_get_timeout(ctrl->ep)) {
+ /* Set timeout if user needs a bigger timeout */
+ nvme_mi_ep_set_timeout(ctrl->ep, timeout_ms);
+ }
+
+ nvme_mi_admin_init_req(&req, &req_hdr, ctrl->id, opcode);
+ req_hdr.cdw1 = cpu_to_le32(nsid);
+ req_hdr.cdw2 = cpu_to_le32(cdw2);
+ req_hdr.cdw3 = cpu_to_le32(cdw3);
+ req_hdr.cdw10 = cpu_to_le32(cdw10);
+ req_hdr.cdw11 = cpu_to_le32(cdw11);
+ req_hdr.cdw12 = cpu_to_le32(cdw12);
+ req_hdr.cdw13 = cpu_to_le32(cdw13);
+ req_hdr.cdw14 = cpu_to_le32(cdw14);
+ req_hdr.cdw15 = cpu_to_le32(cdw15);
+ req_hdr.doff = 0;
+ if (data_len != 0) {
+ req_hdr.dlen = cpu_to_le32(data_len);
+ /* Bit 0 set to 1 means DLEN contains a value */
+ req_hdr.flags = 0x1;
+ }
+
+ if (has_write_data) {
+ req.data = data;
+ req.data_len = data_len;
+ }
+
+ nvme_mi_calc_req_mic(&req);
+
+ nvme_mi_admin_init_resp(&resp, &resp_hdr);
+
+ if (has_read_data) {
+ resp.data = data;
+ resp.data_len = data_len;
+ }
+
+ rc = nvme_mi_submit(ctrl->ep, &req, &resp);
+ if (rc)
+ return rc;
+
+ rc = nvme_mi_admin_parse_status(&resp, result);
+ if (rc)
+ return rc;
+
+ if (has_read_data && (resp.data_len != data_len)) {
+ errno = EPROTO;
+ return -1;
+ }
+
+ return 0;
+}
+
int nvme_mi_admin_identify_partial(nvme_mi_ctrl_t ctrl,
struct nvme_identify_args *args,
off_t offset, size_t size)
@@ -477,11 +772,19 @@ int nvme_mi_admin_identify_partial(nvme_mi_ctrl_t ctrl,
}
/* retrieves a MCTP-messsage-sized chunk of log page data. offset and len are
- * specified within the args->data area */
+ * specified within the args->data area. The `offset` parameter is a relative
+ * offset to the args->lpo !
+ *
+ * What's more, we change the LPO of original command to chunk the request
+ * message into proper size which is allowed by MI interface. One reason is that
+ * this option seems to be supported better by devices. For more information
+ * about this option, please check https://github.com/linux-nvme/libnvme/pull/539
+ * */
static int __nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl,
const struct nvme_get_log_args *args,
off_t offset, size_t *lenp, bool final)
{
+ __u64 log_page_offset = args->lpo + offset;
struct nvme_mi_admin_resp_hdr resp_hdr;
struct nvme_mi_admin_req_hdr req_hdr;
struct nvme_mi_resp resp;
@@ -513,17 +816,13 @@ static int __nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl,
(args->lid & 0xff));
req_hdr.cdw11 = cpu_to_le32(args->lsi << 16 |
ndw >> 16);
- req_hdr.cdw12 = cpu_to_le32(args->lpo & 0xffffffff);
- req_hdr.cdw13 = cpu_to_le32(args->lpo >> 32);
+ req_hdr.cdw12 = cpu_to_le32(log_page_offset & 0xffffffff);
+ req_hdr.cdw13 = cpu_to_le32(log_page_offset >> 32);
req_hdr.cdw14 = cpu_to_le32(args->csi << 24 |
(args->ot ? 1 : 0) << 23 |
args->uuidx);
req_hdr.flags = 0x1;
req_hdr.dlen = cpu_to_le32(len & 0xffffffff);
- if (offset) {
- req_hdr.flags |= 0x2;
- req_hdr.doff = cpu_to_le32(offset);
- }
nvme_mi_calc_req_mic(&req);
@@ -544,7 +843,7 @@ static int __nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl,
int nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl, struct nvme_get_log_args *args)
{
- const size_t xfer_size = 4096;
+ const size_t max_xfer_size = 4096;
off_t xfer_offset;
int rc = 0;
@@ -553,26 +852,32 @@ int nvme_mi_admin_get_log(nvme_mi_ctrl_t ctrl, struct nvme_get_log_args *args)
return -1;
}
+ if (args->ot && (args->len > max_xfer_size)) {
+ errno = EINVAL;
+ return -1;
+ }
+
for (xfer_offset = 0; xfer_offset < args->len;) {
- size_t tmp, cur_xfer_size = xfer_size;
+ size_t xfered_size, cur_xfer_size = max_xfer_size;
bool final;
if (xfer_offset + cur_xfer_size > args->len)
cur_xfer_size = args->len - xfer_offset;
- tmp = cur_xfer_size;
+ xfered_size = cur_xfer_size;
final = xfer_offset + cur_xfer_size >= args->len;
+ /* xfered_size is used as both input and output parameter */
rc = __nvme_mi_admin_get_log(ctrl, args, xfer_offset,
- &tmp, final);
+ &xfered_size, final);
if (rc)
break;
- xfer_offset += tmp;
+ xfer_offset += xfered_size;
/* if we returned less data than expected, consider that
* the end of the log page */
- if (tmp != cur_xfer_size)
+ if (xfered_size != cur_xfer_size)
break;
}
@@ -606,8 +911,8 @@ int nvme_mi_admin_security_send(nvme_mi_ctrl_t ctrl,
nvme_admin_security_send);
req_hdr.cdw10 = cpu_to_le32(args->secp << 24 |
- args->spsp0 << 16 |
- args->spsp1 << 8 |
+ args->spsp1 << 16 |
+ args->spsp0 << 8 |
args->nssf);
req_hdr.cdw11 = cpu_to_le32(args->data_len & 0xffffffff);
@@ -652,8 +957,8 @@ int nvme_mi_admin_security_recv(nvme_mi_ctrl_t ctrl,
nvme_admin_security_recv);
req_hdr.cdw10 = cpu_to_le32(args->secp << 24 |
- args->spsp0 << 16 |
- args->spsp1 << 8 |
+ args->spsp1 << 16 |
+ args->spsp0 << 8 |
args->nssf);
req_hdr.cdw11 = cpu_to_le32(args->data_len & 0xffffffff);
@@ -985,7 +1290,7 @@ static int nvme_mi_read_data(nvme_mi_ep_t ep, __u32 cdw0,
req_hdr.hdr.nmp = (NVME_MI_ROR_REQ << 7) |
(NVME_MI_MT_MI << 3); /* we always use command slot 0 */
req_hdr.opcode = nvme_mi_mi_opcode_mi_data_read;
- req_hdr.cdw0 = cdw0;
+ req_hdr.cdw0 = cpu_to_le32(cdw0);
memset(&req, 0, sizeof(req));
req.hdr = &req_hdr.hdr;
@@ -1222,7 +1527,7 @@ void nvme_mi_close(nvme_mi_ep_t ep)
nvme_mi_for_each_ctrl_safe(ep, ctrl, tmp)
nvme_mi_close_ctrl(ctrl);
- if (ep->transport->close)
+ if (ep->transport && ep->transport->close)
ep->transport->close(ep);
list_del(&ep->root_entry);
free(ep);