diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:38:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:38:39 +0000 |
commit | 80b126032c7e73d273bc883e973b5f1a94aac581 (patch) | |
tree | e7d13cf4f02585c239665a6c5465735ffde429c8 /plugins/solidigm | |
parent | Releasing debian version 2.9.1-3. (diff) | |
download | nvme-cli-80b126032c7e73d273bc883e973b5f1a94aac581.tar.xz nvme-cli-80b126032c7e73d273bc883e973b5f1a94aac581.zip |
Merging upstream version 2.10.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'plugins/solidigm')
-rw-r--r-- | plugins/solidigm/meson.build | 1 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-garbage-collection.c | 2 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-get-drive-info.c | 2 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-id-ctrl.c | 67 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-internal-logs.c | 671 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-latency-tracking.c | 2 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-log-page-dir.c | 2 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-market-log.c | 1 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-nvme.c | 7 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-nvme.h | 4 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-smart.c | 2 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-workload-tracker.c | 536 | ||||
-rw-r--r-- | plugins/solidigm/solidigm-workload-tracker.h | 8 |
13 files changed, 1118 insertions, 187 deletions
diff --git a/plugins/solidigm/meson.build b/plugins/solidigm/meson.build index 052afa1..df2dc57 100644 --- a/plugins/solidigm/meson.build +++ b/plugins/solidigm/meson.build @@ -11,6 +11,7 @@ sources += [ 'plugins/solidigm/solidigm-temp-stats.c', 'plugins/solidigm/solidigm-get-drive-info.c', 'plugins/solidigm/solidigm-ocp-version.c', + 'plugins/solidigm/solidigm-workload-tracker.c', ] subdir('solidigm-telemetry') diff --git a/plugins/solidigm/solidigm-garbage-collection.c b/plugins/solidigm/solidigm-garbage-collection.c index 002b187..3c046b0 100644 --- a/plugins/solidigm/solidigm-garbage-collection.c +++ b/plugins/solidigm/solidigm-garbage-collection.c @@ -68,7 +68,7 @@ static void vu_gc_log_show(struct garbage_control_collection_log *payload, const int solidigm_get_garbage_collection_log(int argc, char **argv, struct command *cmd, struct plugin *plugin) { const char *desc = "Get and parse Solidigm vendor specific garbage collection event log."; - enum nvme_print_flags flags; + nvme_print_flags_t flags; struct nvme_dev *dev; int err; __u8 uuid_index; diff --git a/plugins/solidigm/solidigm-get-drive-info.c b/plugins/solidigm/solidigm-get-drive-info.c index 21f59bb..c783fa8 100644 --- a/plugins/solidigm/solidigm-get-drive-info.c +++ b/plugins/solidigm/solidigm-get-drive-info.c @@ -16,7 +16,7 @@ int sldgm_get_drive_info(int argc, char **argv, struct command *cmd, struct plug const char *desc = "Get drive HW information"; const char *FTL_unit_size_str = "FTL_unit_size"; char *output_format = "normal"; - enum nvme_print_flags flags; + nvme_print_flags_t flags; nvme_root_t r; nvme_ctrl_t c; nvme_ns_t n; diff --git a/plugins/solidigm/solidigm-id-ctrl.c b/plugins/solidigm/solidigm-id-ctrl.c index f45e758..67dc7b7 100644 --- a/plugins/solidigm/solidigm-id-ctrl.c +++ b/plugins/solidigm/solidigm-id-ctrl.c @@ -9,7 +9,7 @@ #include "common.h" #include "solidigm-id-ctrl.h" -struct __packed nvme_vu_id_ctrl_field { /* CDR MR5 */ +struct __packed nvme_vu_id_ctrl_field { // CPC __u8 rsvd1[3]; __u8 ss; char health[20]; @@ -22,6 +22,26 @@ struct __packed nvme_vu_id_ctrl_field { /* CDR MR5 */ __le64 ww; char mic_bl[4]; char mic_fw[4]; + __u8 rsvd3[678]; + __u32 signature; + __u8 version; + __u8 product_type; + __u8 nand_type; + __u8 form_factor; + __u32 fw_status; + __u32 p4_revision; // git hash first 8 characters + __u32 customer_id; + __u32 usage_model; + struct{ + __u32 zns_nvme : 1; // bit 0 + __u32 mfnd_nvme : 1; // bit 1 + __u32 cdw1413 : 1; // bit 2: CDW14 remapping into CDW13 + __u32 vpd_avail : 1; // bit 3: VPD EEPROM is available + //at moment of id-ctrl response + __u32 rsvd : 28; // bit 4..31 are unused + } + command_set; + }; void sldgm_id_ctrl(uint8_t *vs, struct json_object *root) @@ -37,6 +57,19 @@ void sldgm_id_ctrl(uint8_t *vs, struct json_object *root) const char *str_ww = "wwid"; const char *str_mic_bl = "bwLimGran"; const char *str_mic_fw = "ioLimGran"; + const char *str_signature = "signature"; + const char *str_version = "version"; + const char *str_product_type = "prodType"; + const char *str_nand_type = "nandType"; + const char *str_form_factor = "formFactor"; + const char *str_fw_status = "fwStatus"; + const char *str_p4_revision = "P4Revision"; + const char *str_customer_id = "customerID"; + const char *str_usage_model = "usageModel"; + const char *str_zns_nvme = "znsNVMe"; + const char *str_mfnd_nvme = "mfndNVMe"; + const char *str_cdw14_cdw13 = "cdw14map13"; + const char *str_vpd_avail = "vpdAvail"; struct nvme_vu_id_ctrl_field *id = (struct nvme_vu_id_ctrl_field *)vs; @@ -54,12 +87,25 @@ void sldgm_id_ctrl(uint8_t *vs, struct json_object *root) printf("%-10s: 0x%016"PRIx64"\n", str_ww, le64_to_cpu(id->ww)); printf("%-10s: %.*s\n", str_mic_bl, (int)sizeof(id->mic_bl), id->mic_bl); printf("%-10s: %.*s\n", str_mic_fw, (int)sizeof(id->mic_fw), id->mic_fw); + printf("%-10s: 0x%08X\n", str_signature, id->signature); + printf("%-10s: 0x%02X\n", str_version, id->version); + printf("%-10s: %u\n", str_product_type, id->product_type); + printf("%-10s: %u\n", str_nand_type, id->nand_type); + printf("%-10s: %u\n", str_form_factor, id->form_factor); + printf("%-10s: %u\n", str_fw_status, id->fw_status); + printf("%-10s: 0x%08X\n", str_p4_revision, id->p4_revision); + printf("%-10s: 0x%08X\n", str_customer_id, id->customer_id); + printf("%-10s: %u\n", str_usage_model, id->usage_model); + printf("%-10s: %u\n", str_zns_nvme, id->command_set.zns_nvme); + printf("%-10s: %u\n", str_mfnd_nvme, id->command_set.mfnd_nvme); + printf("%-10s: %u\n", str_cdw14_cdw13, id->command_set.cdw1413); + printf("%-10s: %u\n", str_vpd_avail, id->command_set.vpd_avail); return; } json_object_add_value_uint(root, str_ss, id->ss); json_object_object_add(root, str_health, - json_object_new_string_len(health, sizeof(id->health))); + json_object_new_string_len(health, sizeof(id->health))); json_object_add_value_uint(root, str_cls, id->cls); json_object_add_value_uint(root, str_nlw, id->nlw); json_object_add_value_uint(root, str_scap, id->scap); @@ -67,7 +113,20 @@ void sldgm_id_ctrl(uint8_t *vs, struct json_object *root) json_object_object_add(root, str_bl, json_object_new_string_len(id->bl, sizeof(id->bl))); json_object_add_value_uint64(root, str_ww, le64_to_cpu(id->ww)); json_object_object_add(root, str_mic_bl, - json_object_new_string_len(id->mic_bl, sizeof(id->mic_bl))); + json_object_new_string_len(id->mic_bl, sizeof(id->mic_bl))); json_object_object_add(root, str_mic_fw, - json_object_new_string_len(id->mic_fw, sizeof(id->mic_fw))); + json_object_new_string_len(id->mic_fw, sizeof(id->mic_fw))); + json_object_add_value_uint(root, str_signature, id->signature); + json_object_add_value_uint(root, str_version, id->version); + json_object_add_value_uint(root, str_product_type, id->product_type); + json_object_add_value_uint(root, str_nand_type, id->nand_type); + json_object_add_value_uint(root, str_form_factor, id->form_factor); + json_object_add_value_uint(root, str_fw_status, id->fw_status); + json_object_add_value_uint(root, str_p4_revision, id->p4_revision); + json_object_add_value_uint(root, str_customer_id, id->customer_id); + json_object_add_value_uint(root, str_usage_model, id->usage_model); + json_object_add_value_uint(root, str_zns_nvme, id->command_set.zns_nvme); + json_object_add_value_uint(root, str_mfnd_nvme, id->command_set.mfnd_nvme); + json_object_add_value_uint(root, str_cdw14_cdw13, id->command_set.cdw1413); + json_object_add_value_uint(root, str_vpd_avail, id->command_set.vpd_avail); } diff --git a/plugins/solidigm/solidigm-internal-logs.c b/plugins/solidigm/solidigm-internal-logs.c index c604761..f5b57f3 100644 --- a/plugins/solidigm/solidigm-internal-logs.c +++ b/plugins/solidigm/solidigm-internal-logs.c @@ -12,7 +12,6 @@ #include <stdlib.h> #include <unistd.h> #include <inttypes.h> -#include <linux/limits.h> #include <time.h> #include "common.h" @@ -23,11 +22,15 @@ #include "solidigm-util.h" #define DWORD_SIZE 4 +#define LOG_FILE_PERMISSION 0644 enum log_type { NLOG = 0, EVENTLOG = 1, ASSERTLOG = 2, + HIT, + CIT, + ALL }; #pragma pack(push, internal_logs, 1) @@ -122,12 +125,20 @@ struct nlog_dump_header4_1 { #pragma pack(pop, internal_logs) struct config { - __u32 namespace_id; - char *dir_prefix; + char *out_dir; char *type; bool verbose; }; +struct ilog { + struct nvme_dev *dev; + struct config *cfg; + int count; + struct nvme_id_ctrl id_ctrl; + enum nvme_telemetry_da max_da; + __u32 max_tx; +}; + static void print_nlog_header(__u8 *buffer) { struct nlog_dump_header_common *nlog_header = (struct nlog_dump_header_common *) buffer; @@ -218,29 +229,29 @@ static int get_serial_number(char *str, int fd) return err; } -static int dump_assert_logs(struct nvme_dev *dev, struct config cfg) +static int ilog_dump_assert_logs(struct ilog *ilog) { __u8 buf[INTERNAL_LOG_MAX_BYTE_TRANSFER]; __u8 head_buf[INTERNAL_LOG_MAX_BYTE_TRANSFER]; - char file_path[PATH_MAX]; + char file_path[PATH_MAX] = {0}; char file_name[] = "AssertLog.bin"; struct assert_dump_header *ad = (struct assert_dump_header *) head_buf; struct nvme_passthru_cmd cmd = { .opcode = 0xd2, - .nsid = cfg.namespace_id, + .nsid = NVME_NSID_ALL, .addr = (unsigned long)(void *)head_buf, .cdw12 = ASSERTLOG, .cdw13 = 0, }; int output, err; - err = read_header(&cmd, dev_fd(dev)); + err = read_header(&cmd, dev_fd(ilog->dev)); if (err) return err; snprintf(file_path, sizeof(file_path), "%.*s/%s", - (int) (sizeof(file_path) - sizeof(file_name) - 1), cfg.dir_prefix, file_name); - output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + (int) (sizeof(file_path) - sizeof(file_name) - 1), ilog->cfg->out_dir, file_name); + output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, LOG_FILE_PERMISSION); if (output < 0) return -errno; err = write_header((__u8 *)ad, output, ad->header.header_size * DWORD_SIZE); @@ -251,7 +262,7 @@ static int dump_assert_logs(struct nvme_dev *dev, struct config cfg) } cmd.addr = (unsigned long)(void *)buf; - if (cfg.verbose) { + if (ilog->cfg->verbose) { printf("Assert Log, cores: %d log size: %d header size: %d\n", ad->header.numcores, ad->header.log_size * DWORD_SIZE, ad->header.header_size * DWORD_SIZE); for (__u32 i = 0; i < ad->header.numcores; i++) @@ -262,28 +273,27 @@ static int dump_assert_logs(struct nvme_dev *dev, struct config cfg) if (!ad->core[i].assertvalid) continue; cmd.cdw13 = ad->core[i].coreoffset; - err = cmd_dump_repeat(&cmd, ad->core[i].assertsize, - output, - dev_fd(dev), false); + err = cmd_dump_repeat(&cmd, ad->core[i].assertsize, output, + dev_fd(ilog->dev), false); if (err) { close(output); return err; } } close(output); - printf("Successfully wrote log to %s\n", file_path); + printf("Successfully wrote Assert to %s\n", file_path); return err; } -static int dump_event_logs(struct nvme_dev *dev, struct config cfg) +static int ilog_dump_event_logs(struct ilog *ilog) { __u8 buf[INTERNAL_LOG_MAX_BYTE_TRANSFER]; __u8 head_buf[INTERNAL_LOG_MAX_BYTE_TRANSFER]; - char file_path[PATH_MAX]; + char file_path[PATH_MAX] = {0}; struct event_dump_header *ehdr = (struct event_dump_header *) head_buf; struct nvme_passthru_cmd cmd = { .opcode = 0xd2, - .nsid = cfg.namespace_id, + .nsid = NVME_NSID_ALL, .addr = (unsigned long)(void *)head_buf, .cdw12 = EVENTLOG, .cdw13 = 0, @@ -291,11 +301,11 @@ static int dump_event_logs(struct nvme_dev *dev, struct config cfg) int output; int core_num, err; - err = read_header(&cmd, dev_fd(dev)); + err = read_header(&cmd, dev_fd(ilog->dev)); if (err) return err; - snprintf(file_path, sizeof(file_path), "%s/EventLog.bin", cfg.dir_prefix); - output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + snprintf(file_path, sizeof(file_path) - 1, "%s/EventLog.bin", ilog->cfg->out_dir); + output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, LOG_FILE_PERMISSION); if (output < 0) return -errno; err = write_header(head_buf, output, INTERNAL_LOG_MAX_BYTE_TRANSFER); @@ -308,11 +318,11 @@ static int dump_event_logs(struct nvme_dev *dev, struct config cfg) } cmd.addr = (unsigned long)(void *)buf; - if (cfg.verbose) + if (ilog->cfg->verbose) printf("Event Log, cores: %d log size: %d\n", core_num, ehdr->header.log_size * 4); for (__u32 j = 0; j < core_num; j++) { - if (cfg.verbose) { + if (ilog->cfg->verbose) { for (int k = 0 ; k < 16; k++) { printf("core: %d event: %d ", j, k); printf("validity: %d ", ehdr->edumps[j].eventIdValidity[k]); @@ -321,14 +331,14 @@ static int dump_event_logs(struct nvme_dev *dev, struct config cfg) } cmd.cdw13 = ehdr->edumps[j].coreoffset; err = cmd_dump_repeat(&cmd, ehdr->edumps[j].coresize, - output, dev_fd(dev), false); + output, dev_fd(ilog->dev), false); if (err) { close(output); return err; } } close(output); - printf("Successfully wrote log to %s\n", file_path); + printf("Successfully wrote Events to %s\n", file_path); return err; } @@ -348,16 +358,16 @@ static size_t get_nlog_header_size(struct nlog_dump_header_common *nlog_header) } /* dumps nlogs from specified core or all cores when core = -1 */ -static int dump_nlogs(struct nvme_dev *dev, struct config cfg, int core) +static int ilog_dump_nlogs(struct ilog *ilog, int core) { int err = 0; __u32 count, core_num; __u8 buf[INTERNAL_LOG_MAX_BYTE_TRANSFER]; - char file_path[PATH_MAX]; + char file_path[PATH_MAX] = {0}; struct nlog_dump_header_common *nlog_header = (struct nlog_dump_header_common *)buf; struct nvme_passthru_cmd cmd = { .opcode = 0xd2, - .nsid = cfg.namespace_id, + .nsid = NVME_NSID_ALL, .addr = (unsigned long)(void *)buf }; @@ -381,7 +391,7 @@ static int dump_nlogs(struct nvme_dev *dev, struct config cfg, int core) do { cmd.cdw13 = 0; cmd.cdw12 = log_select.raw; - err = read_header(&cmd, dev_fd(dev)); + err = read_header(&cmd, dev_fd(ilog->dev)); if (err) { if (is_open) close(output); @@ -390,9 +400,10 @@ static int dump_nlogs(struct nvme_dev *dev, struct config cfg, int core) count = nlog_header->totalnlogs; core_num = core < 0 ? nlog_header->corecount : 0; if (!header_size) { - snprintf(file_path, sizeof(file_path), "%s/NLog.bin", - cfg.dir_prefix); - output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, 0666); + snprintf(file_path, sizeof(file_path) - 1, "%s/NLog.bin", + ilog->cfg->out_dir); + output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, + LOG_FILE_PERMISSION); if (output < 0) return -errno; header_size = get_nlog_header_size(nlog_header); @@ -401,11 +412,11 @@ static int dump_nlogs(struct nvme_dev *dev, struct config cfg, int core) err = write_header(buf, output, header_size); if (err) break; - if (cfg.verbose) + if (ilog->cfg->verbose) print_nlog_header(buf); cmd.cdw13 = 0x400; err = cmd_dump_repeat(&cmd, nlog_header->nlogbytesize / 4, - output, dev_fd(dev), true); + output, dev_fd(ilog->dev), true); if (err) break; } while (++log_select.selectNlog < count); @@ -414,244 +425,552 @@ static int dump_nlogs(struct nvme_dev *dev, struct config cfg, int core) } while (++log_select.selectCore < core_num); if (is_open) { close(output); - printf("Successfully wrote log to %s\n", file_path); + printf("Successfully wrote Nlog to %s\n", file_path); } return err; } -enum telemetry_type { - HOSTGENOLD, - HOSTGENNEW, - CONTROLLER +int ensure_dir(const char *parent_dir_name, const char *name) +{ + char file_path[PATH_MAX] = {0}; + struct stat sb; + + snprintf(file_path, sizeof(file_path) - 1, "%s/%s", parent_dir_name, name); + if (!(stat(file_path, &sb) == 0 && S_ISDIR(sb.st_mode))) { + if (mkdir(file_path, 777) != 0) { + perror(file_path); + return -errno; + } + } + return 0; +} + +struct log { + __u8 id; + const char *desc; + size_t buffer_size; + __u8 *buffer; }; -static int dump_telemetry(struct nvme_dev *dev, struct config cfg, enum telemetry_type ttype) +static int log_save(struct log *log, const char *parent_dir_name, const char *subdir_name, + const char *file_name, __u8 *buffer, size_t buf_size) { - _cleanup_free_ struct nvme_telemetry_log *log = NULL; - size_t log_size = 0; - int err = 0; - __u8 *buffer = NULL; + _cleanup_fd_ int output = -1; + char file_path[PATH_MAX] = {0}; size_t bytes_remaining = 0; + int err = 0; + + ensure_dir(parent_dir_name, subdir_name); + + snprintf(file_path, sizeof(file_path) - 1, "%s/%s/%s", parent_dir_name, subdir_name, + file_name); + output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, LOG_FILE_PERMISSION); + if (output < 0) + return -errno; + + bytes_remaining = buf_size; + + while (bytes_remaining) { + ssize_t bytes_written = write(output, buffer, bytes_remaining); + + if (bytes_written < 0) { + err = -errno; + goto log_save_close_output; + } + + bytes_remaining -= bytes_written; + buffer += bytes_written; + } + printf("Successfully wrote %s to %s\n", log->desc, file_path); + +log_save_close_output: + close(output); + return err; +} + +static int ilog_dump_identify_page(struct ilog *ilog, struct log *cns, __u32 nsid) +{ + __u8 data[NVME_IDENTIFY_DATA_SIZE]; + __u8 *buff = cns->buffer ? cns->buffer : data; + char filename[sizeof( + "cntid_XXXXX_cns_XXX_nsid_XXXXXXXXXX_nvmsetid_XXXXX_csi_XXX.bin")] = {0}; + int err = nvme_identify_cns_nsid(dev_fd(ilog->dev), cns->id, nsid, buff); + + if (err) + return err; + + snprintf(filename, sizeof(filename) - 1, "cntid_0_cns_%d_nsid_%d_nvmsetid_0_csi_0.bin", + cns->id, nsid); + return log_save(cns, ilog->cfg->out_dir, "identify", filename, buff, sizeof(data)); +} + +static int ilog_ensure_dump_id_ctrl(struct ilog *ilog) +{ + static bool first = true; + static int err; + struct log idctrl = {NVME_IDENTIFY_CNS_CTRL, "Id Controller Data", sizeof(ilog->id_ctrl), + (__u8 *) &ilog->id_ctrl}; + + if (!first) + return err; + + first = false; + err = ilog_dump_identify_page(ilog, &idctrl, 0); + + if (err) + return err; + + ilog->count++; + + if (ilog->id_ctrl.lpa & 0x8) + ilog->max_da = NVME_TELEMETRY_DA_3; + if (ilog->id_ctrl.lpa & 0x40) + ilog->max_da = NVME_TELEMETRY_DA_4; + + /* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least 4096 bytes */ + ilog->max_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE; + if (ilog->max_tx > DRIVER_MAX_TX_256K) + ilog->max_tx = DRIVER_MAX_TX_256K; + + return err; +} + +static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype) +{ + int err = 0; enum nvme_telemetry_da da; size_t max_data_tx; - char file_path[PATH_MAX]; - char *file_name; - char *log_descr; - struct stat sb; + const char *file_name; + struct nvme_feat_host_behavior prev = {0}; + bool host_behavior_changed = false; + struct log log = {0}; + + err = ilog_ensure_dump_id_ctrl(ilog); + if (err) + return err; - _cleanup_file_ int output = -1; + da = ilog->max_da; + max_data_tx = ilog->max_tx; + + if (da == 4) { + __u32 result; + int err = nvme_get_features_host_behavior(dev_fd(ilog->dev), 0, &prev, &result); + + if (!err && !prev.etdas) { + struct nvme_feat_host_behavior da4_enable = prev; + + da4_enable.etdas = 1; + nvme_set_features_host_behavior(dev_fd(ilog->dev), 0, &da4_enable); + host_behavior_changed = true; + } + } switch (ttype) { - case HOSTGENNEW: + case HIT: file_name = "lid_0x07_lsp_0x01_lsi_0x0000.bin"; - log_descr = "Generated Host Initiated"; - break; - case HOSTGENOLD: - file_name = "lid_0x07_lsp_0x00_lsi_0x0000.bin"; - log_descr = "Existing Host Initiated"; + log.desc = "Host Initiated Telemetry"; + err = nvme_get_telemetry_log(dev_fd(ilog->dev), true, false, false, max_data_tx, da, + (struct nvme_telemetry_log **) &log.buffer, + &log.buffer_size); break; - case CONTROLLER: + case CIT: file_name = "lid_0x08_lsp_0x00_lsi_0x0000.bin"; - log_descr = "Controller Initiated"; + log.desc = "Controller Initiated Telemetry"; + err = nvme_get_telemetry_log(dev_fd(ilog->dev), false, true, true, max_data_tx, da, + (struct nvme_telemetry_log **) &log.buffer, + &log.buffer_size); break; default: return -EINVAL; } - err = nvme_get_telemetry_max(dev_fd(dev), &da, &max_data_tx); + + if (host_behavior_changed) + nvme_set_features_host_behavior(dev_fd(ilog->dev), 0, &prev); + if (err) return err; - if (max_data_tx > DRIVER_MAX_TX_256K) - max_data_tx = DRIVER_MAX_TX_256K; + err = log_save(&log, ilog->cfg->out_dir, "log_pages", file_name, log.buffer, + log.buffer_size); + return err; +} - switch (ttype) { - case HOSTGENNEW: - err = nvme_get_telemetry_log(dev_fd(dev), true, false, false, max_data_tx, da, - &log, &log_size); - break; - case HOSTGENOLD: - err = nvme_get_telemetry_log(dev_fd(dev), false, false, false, max_data_tx, da, - &log, &log_size); - break; - case CONTROLLER: - err = nvme_get_telemetry_log(dev_fd(dev), false, true, true, max_data_tx, da, &log, - &log_size); - break; +static int ilog_dump_identify_pages(struct ilog *ilog) +{ + struct nvme_ns_list ns_list; + __u32 j = 0; + struct log identify_base_list[] = { + {NVME_IDENTIFY_CNS_NS_ACTIVE_LIST, "Id Active Namespace ID list", + sizeof(ns_list), (__u8 *) &ns_list}, + {NVME_IDENTIFY_CNS_NVMSET_LIST, "Id NVM Set List"}, + {NVME_IDENTIFY_CNS_CSI_CTRL, "Id I/O Command Set specific"}, + {NVME_IDENTIFY_CNS_ALLOCATED_NS_LIST, "Id Allocated Namespace ID list"}, + {NVME_IDENTIFY_CNS_CTRL_LIST, "Id Controller List"} + }; + struct log identify_ns_required_list[] = { + {NVME_IDENTIFY_CNS_NS, "Id Namespace data"}, + {NVME_IDENTIFY_CNS_NS_DESC_LIST, "Id Namespace Id Descriptor list"}, + {NVME_IDENTIFY_CNS_CSI_NS, "Id Namespace ID I/O Command Set specific"}, + {NVME_IDENTIFY_CNS_CSI_INDEPENDENT_ID_NS, + "I/O Command Set Independent Identify Namespace Data"}, + {NVME_IDENTIFY_CNS_ALLOCATED_NS, "Id Namespace data "}, + {NVME_IDENTIFY_CNS_NS_CTRL_LIST, "Id Namespace Id Controller List"}, + }; + + ilog_ensure_dump_id_ctrl(ilog); + + for (int i = 0; i < ARRAY_SIZE(identify_base_list); i++) { + int err = ilog_dump_identify_page(ilog, &identify_base_list[i], 0); + + if (err == 0) + ilog->count++; } - if (err) - return err; + while (ns_list.ns[j]) { + for (int i = 0; i < ARRAY_SIZE(identify_ns_required_list); i++) { + int err = ilog_dump_identify_page(ilog, &identify_ns_required_list[i], + ns_list.ns[j]); - snprintf(file_path, sizeof(file_path), "%s/log_pages", cfg.dir_prefix); - if (!(stat(file_path, &sb) == 0 && S_ISDIR(sb.st_mode))) { - if (mkdir(file_path, 777) != 0) { - perror(file_path); - return -errno; + if (err == 0) + ilog->count++; } + j++; } - snprintf(file_path, sizeof(file_path), "%s/log_pages/%s", cfg.dir_prefix, file_name); - output = open(file_path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (output < 0) - return -errno; + return 0; +} - bytes_remaining = log_size; - buffer = (__u8 *)log; +static int ilog_dump_log_page(struct ilog *ilog, struct log *lp, __u32 nsid) +{ + __u8 *buff = lp->buffer; + char filename[sizeof("lid_0xXX_lsp_0xXX_lsi_0xXXXX.bin")] = {0}; + int err; - while (bytes_remaining) { - ssize_t bytes_written = write(output, buffer, bytes_remaining); + if (!lp->buffer_size) + return -EINVAL; + if (!buff) { + buff = nvme_alloc(lp->buffer_size); + if (!buff) + return -ENOMEM; + } + err = nvme_get_nsid_log(dev_fd(ilog->dev), 0, lp->id, 0, lp->buffer_size, buff); + if (err) + return err; - if (bytes_written < 0) { - err = -errno; - goto tele_close_output; - } + snprintf(filename, sizeof(filename), "lid_0x%02x_lsp_0x00_lsi_0x0000.bin", + lp->id); + return log_save(lp, ilog->cfg->out_dir, "log_pages", filename, buff, lp->buffer_size); +} - bytes_remaining -= bytes_written; - buffer += bytes_written; +static int ilog_dump_no_lsp_log_pages(struct ilog *ilog) +{ + struct lba_status_info { + __u32 lslplen; + __u32 nlslne; + __u32 estulb; + __u16 rsvd; + __u16 lsgc; + } lba_status = {}; + __u64 num_entries = 0; + struct log log_page_dependent_list[] = { + {NVME_LOG_LID_LBA_STATUS}, + {NVME_LOG_LID_ENDURANCE_GRP_EVT}, + }; + struct log log_page_base_list[] = { + {NVME_LOG_LID_SUPPORTED_LOG_PAGES, NULL, sizeof(struct nvme_supported_log_pages)}, + {NVME_LOG_LID_ERROR, NULL, + (ilog->id_ctrl.elpe + 1) * sizeof(struct nvme_error_log_page)}, + {NVME_LOG_LID_SMART, NULL, sizeof(struct nvme_smart_log)}, + {NVME_LOG_LID_FW_SLOT, NULL, sizeof(struct nvme_firmware_slot)}, + {NVME_LOG_LID_CHANGED_NS, NULL, sizeof(struct nvme_ns_list)}, + {NVME_LOG_LID_CMD_EFFECTS, NULL, sizeof(struct nvme_cmd_effects_log)}, + {NVME_LOG_LID_DEVICE_SELF_TEST, NULL, sizeof(struct nvme_self_test_log)}, + {NVME_LOG_LID_LBA_STATUS, NULL, sizeof(lba_status), (__u8 *) &lba_status}, + {NVME_LOG_LID_ENDURANCE_GRP_EVT, NULL, sizeof(num_entries), (__u8 *) &num_entries}, + {NVME_LOG_LID_FID_SUPPORTED_EFFECTS, NULL, + sizeof(struct nvme_fid_supported_effects_log)}, + {NVME_LOG_LID_MI_CMD_SUPPORTED_EFFECTS, NULL, + sizeof(struct nvme_mi_cmd_supported_effects_log)}, + {NVME_LOG_LID_CMD_AND_FEAT_LOCKDOWN, NULL, 512}, + {NVME_LOG_LID_PHY_RX_EOM, NULL, 512}, + {NVME_LOG_LID_SANITIZE, NULL, sizeof(struct nvme_sanitize_log_page)}, + {0xC0, "OCP or VU SMART / Health Information Extended", 512}, + {0xC1, "OCP Error Recovery or VU Latency Reads", 512}, + {0xC2, "OCP Firmware Activation History or VU Latency Writes", 4096}, + {0xC3, "OCP Latency Monitor", 512}, + {0xC4, "OCP Device Capabilities or VU Endurance Manager Statistics", 4096}, + {0xC5, "OCP Unsupported Requirements or VU Tempeture Statistics", 4096}, + {0xC7, "OCP TCG Configuration", 512}, + {0xCA, "SMART Attributes", 512}, + {0xd5, "Tempeture Statistics", 512}, + {0xfe, "Latency Outlier", 8192}, + }; + + for (int i = 0; i < ARRAY_SIZE(log_page_base_list); i++) { + log_page_base_list[i].desc = log_page_base_list[i].desc ? + log_page_base_list[i].desc : + nvme_log_to_string(log_page_base_list[i].id); + if (!ilog_dump_log_page(ilog, &log_page_base_list[i], 0)) + ilog->count++; } - printf("Successfully wrote %s Telemetry log to %s\n", log_descr, file_path); -tele_close_output: - close(output); + /* if needed, patch logs based on retrieved log size */ + if (lba_status.lslplen > sizeof(lba_status)) + log_page_dependent_list[0].buffer_size = lba_status.lslplen; + if (num_entries) + log_page_dependent_list[1].buffer_size = sizeof(num_entries) + + (num_entries * sizeof(__u16)); + + for (int i = 0; i < ARRAY_SIZE(log_page_dependent_list); i++) { + log_page_dependent_list[i].desc = log_page_dependent_list[i].desc ? + log_page_dependent_list[i].desc : + nvme_log_to_string(log_page_dependent_list[i].id); + ilog_dump_log_page(ilog, &log_page_dependent_list[i], 0); + } + + return 0; +} + +static int ilog_dump_pel(struct ilog *ilog) +{ + struct log lp = { + NVME_LOG_LID_PERSISTENT_EVENT, + nvme_log_to_string(NVME_LOG_LID_PERSISTENT_EVENT) + }; + void *pevent_log_full; + int err; + struct nvme_get_log_args args; + + _cleanup_free_ struct nvme_persistent_event_log *pevent = NULL; + + _cleanup_huge_ struct nvme_mem_huge mh = {0}; + + err = nvme_get_log_persistent_event(dev_fd(ilog->dev), NVME_PEVENT_LOG_RELEASE_CTX, + sizeof(*pevent), pevent); + if (err) + return err; + + + pevent = nvme_alloc(sizeof(*pevent)); + if (!pevent) + return -ENOMEM; + + err = nvme_get_log_persistent_event(dev_fd(ilog->dev), NVME_PEVENT_LOG_EST_CTX_AND_READ, + sizeof(*pevent), pevent); + if (err) + return err; + + lp.buffer_size = le64_to_cpu(pevent->tll); + + pevent_log_full = nvme_alloc_huge(lp.buffer_size, &mh); + if (!pevent_log_full) + return -ENOMEM; + + err = nvme_get_log_persistent_event(dev_fd(ilog->dev), NVME_PEVENT_LOG_READ, + lp.buffer_size, pevent_log_full); + args = (struct nvme_get_log_args) { + .lpo = 0, + .result = NULL, + .log = pevent_log_full, + .args_size = sizeof(args), + .fd = dev_fd(ilog->dev), + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .lid = NVME_LOG_LID_PERSISTENT_EVENT, + .len = lp.buffer_size, + .nsid = NVME_NSID_ALL, + .csi = NVME_CSI_NVM, + .lsi = NVME_LOG_LSI_NONE, + .lsp = NVME_PEVENT_LOG_READ, + .uuidx = NVME_UUID_NONE, + .rae = false, + .ot = false, + }; + err = nvme_get_log_page(dev_fd(ilog->dev), ilog->max_tx, &args); + if (err) + return err; + + err = log_save(&lp, ilog->cfg->out_dir, "log_pages", "lid_0x0d_lsp_0x00_lsi_0x0000.bin", + pevent_log_full, lp.buffer_size); + + nvme_get_log_persistent_event(dev_fd(ilog->dev), NVME_PEVENT_LOG_RELEASE_CTX, + sizeof(*pevent), pevent); + return err; } int solidigm_get_internal_log(int argc, char **argv, struct command *command, struct plugin *plugin) { - char folder[PATH_MAX]; - char zip_name[PATH_MAX]; - char *output_path; char sn_prefix[sizeof(((struct nvme_id_ctrl *)0)->sn)+1]; - int log_count = 0; + char date_str[sizeof("-YYYYMMDDHHMMSS")]; + char full_folder[PATH_MAX] = {0}; + char unique_folder[sizeof(sn_prefix)+sizeof(date_str)-1] = {0}; + char *initial_folder; + char zip_name[PATH_MAX] = {0}; + char *output_path; + struct ilog ilog = {0}; int err; _cleanup_nvme_dev_ struct nvme_dev *dev = NULL; - bool all = false; - time_t t; - struct tm tm; + enum log_type log_type = ALL; + char type_ALL[] = "ALL"; + time_t current_time; + DIR *dir; const char *desc = "Get Debug Firmware Logs and save them."; - const char *type = - "Log type: ALL, CONTROLLERINITTELEMETRY, HOSTINITTELEMETRY, HOSTINITTELEMETRYNOGEN, NLOG, ASSERT, EVENT. Defaults to ALL."; - const char *prefix = "Output dir prefix; defaults to device serial number."; + const char *type = "Log type; Defaults to ALL."; + const char *out_dir = "Output directory; defaults to current working directory."; const char *verbose = "To print out verbose info."; - const char *namespace_id = "Namespace to get logs from."; - struct config cfg = { - .namespace_id = NVME_NSID_ALL, - .dir_prefix = NULL, - .type = NULL, + .out_dir = ".", + .type = type_ALL, }; OPT_ARGS(opts) = { - OPT_STR("type", 't', &cfg.type, type), - OPT_UINT("namespace-id", 'n', &cfg.namespace_id, namespace_id), - OPT_FILE("dir-prefix", 'p', &cfg.dir_prefix, prefix), - OPT_FLAG("verbose", 'v', &cfg.verbose, verbose), + OPT_STRING("type", 't', "ALL|CIT|HIT|NLOG|ASSERT|EVENT", &cfg.type, type), + OPT_STRING("dir-name", 'd', "DIRECTORY", &cfg.out_dir, out_dir), + OPT_FLAG("verbose", 'v', &cfg.verbose, verbose), OPT_END() }; err = parse_and_open(&dev, argc, argv, desc, opts); if (err) return err; - - if (!cfg.dir_prefix) { - err = get_serial_number(sn_prefix, dev_fd(dev)); - if (err) - return err; - cfg.dir_prefix = sn_prefix; + ilog.dev = dev; + ilog.cfg = &cfg; + + for (char *p = cfg.type; *p; ++p) + *p = toupper(*p); + + if (!strcmp(cfg.type, "ALL")) + log_type = ALL; + else if (!strcmp(cfg.type, "HIT")) + log_type = HIT; + else if (!strcmp(cfg.type, "CIT")) + log_type = CIT; + else if (!strcmp(cfg.type, "NLOG")) + log_type = NLOG; + else if (!strcmp(cfg.type, "ASSERT")) + log_type = ASSERTLOG; + else if (!strcmp(cfg.type, "EVENT")) + log_type = EVENTLOG; + else { + fprintf(stderr, "Invalid log type: %s\n", cfg.type); + return -EINVAL; } - t = time(NULL); - tm = *localtime(&t); - snprintf(folder, sizeof(folder), "%s-%d%02d%02d%02d%02d%02d", cfg.dir_prefix, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); - if (mkdir(folder, 0777) != 0) { - perror("mkdir"); + + dir = opendir(cfg.out_dir); + if (dir) + closedir(dir); + else { + perror(cfg.out_dir); return -errno; } - cfg.dir_prefix = folder; - output_path = folder; - if (!cfg.type) - cfg.type = "ALL"; - else { - for (char *p = cfg.type; *p; ++p) - *p = toupper(*p); - } + initial_folder = cfg.out_dir; - if (!strcmp(cfg.type, "ALL")) { - all = true; - } - if (all || !strcmp(cfg.type, "ASSERT")) { - err = dump_assert_logs(dev, cfg); - if (err == 0) - log_count++; - else if (err < 0) - perror("Error retrieving Assert log"); + err = get_serial_number(sn_prefix, dev_fd(dev)); + if (err) + return err; + + current_time = time(NULL); + strftime(date_str, sizeof(date_str), "-%Y%m%d%H%M%S", localtime(¤t_time)); + snprintf(unique_folder, sizeof(unique_folder), "%s%s", sn_prefix, date_str); + snprintf(full_folder, sizeof(full_folder) - 1, "%s/%s", cfg.out_dir, unique_folder); + if (mkdir(full_folder, 0755) != 0) { + perror("mkdir"); + return -errno; } - if (all || !strcmp(cfg.type, "EVENT")) { - err = dump_event_logs(dev, cfg); + cfg.out_dir = full_folder; + output_path = full_folder; + + /* Retrieve first logs that records actions to retrieve other logs */ + if (log_type == ALL || log_type == HIT) { + err = ilog_dump_telemetry(&ilog, HIT); if (err == 0) - log_count++; + ilog.count++; else if (err < 0) - perror("Error retrieving Event log"); + perror("Error retrieving Host Initiated Telemetry"); } - if (all || !strcmp(cfg.type, "NLOG")) { - err = dump_nlogs(dev, cfg, -1); + if (log_type == ALL || log_type == NLOG) { + err = ilog_dump_nlogs(&ilog, -1); if (err == 0) - log_count++; + ilog.count++; else if (err < 0) perror("Error retrieving Nlog"); } - if (all || !strcmp(cfg.type, "CONTROLLERINITTELEMETRY")) { - err = dump_telemetry(dev, cfg, CONTROLLER); + if (log_type == ALL || log_type == CIT) { + err = ilog_dump_telemetry(&ilog, CIT); if (err == 0) - log_count++; + ilog.count++; else if (err < 0) - perror("Error retrieving Telemetry Controller Initiated"); + perror("Error retrieving Controller Initiated Telemetry"); } - if (all || !strcmp(cfg.type, "HOSTINITTELEMETRYNOGEN")) { - err = dump_telemetry(dev, cfg, HOSTGENOLD); + if (log_type == ALL || log_type == ASSERTLOG) { + err = ilog_dump_assert_logs(&ilog); if (err == 0) - log_count++; + ilog.count++; else if (err < 0) - perror("Error retrieving previously existing Telemetry Host Initiated"); + perror("Error retrieving Assert log"); } - if (all || !strcmp(cfg.type, "HOSTINITTELEMETRY")) { - err = dump_telemetry(dev, cfg, HOSTGENNEW); + if (log_type == ALL || log_type == EVENTLOG) { + err = ilog_dump_event_logs(&ilog); if (err == 0) - log_count++; + ilog.count++; else if (err < 0) - perror("Error retrieving Telemetry Host Initiated"); + perror("Error retrieving Event log"); + } + if (log_type == ALL) { + err = ilog_dump_identify_pages(&ilog); + if (err < 0) + perror("Error retrieving Identify pages"); + + err = ilog_dump_pel(&ilog); + if (err < 0) + perror("Error retrieving Persistent Event Log page"); + + err = ilog_dump_no_lsp_log_pages(&ilog); + if (err < 0) + perror("Error retrieving no LSP Log pages"); } - if (log_count > 0) { + if (ilog.count > 0) { int ret_cmd; - char cmd[ARG_MAX]; - char *where_err = cfg.verbose ? "" : ">/dev/null 2>&1"; - - snprintf(zip_name, sizeof(zip_name), "%s.zip", cfg.dir_prefix); - snprintf(cmd, sizeof(cmd), "cd \"%s\" && zip -r \"../%s\" ./* %s", cfg.dir_prefix, - zip_name, where_err); + char *cmd; + char *quiet = cfg.verbose ? "" : " -q"; + + snprintf(zip_name, sizeof(zip_name) - 1, "%s.zip", unique_folder); + if (asprintf(&cmd, "cd \"%s\" && zip -MM -r \"../%s\" ./* %s", cfg.out_dir, + zip_name, quiet) < 0) { + err = errno; + perror("Can't allocate string for zip command"); + goto out; + } printf("Compressing logs to %s\n", zip_name); ret_cmd = system(cmd); - if (ret_cmd == -1) + if (ret_cmd) perror(cmd); else { output_path = zip_name; - snprintf(cmd, sizeof(cmd), "rm -rf %s", cfg.dir_prefix); - printf("Removing %s\n", cfg.dir_prefix); + free(cmd); + if (asprintf(&cmd, "rm -rf %s", cfg.out_dir) < 0) { + err = errno; + perror("Can't allocate string for cleanup"); + goto out; + } if (system(cmd) != 0) perror("Failed removing logs folder"); } + free(cmd); } - if (log_count == 0) { +out: + if (ilog.count == 0) { if (err > 0) nvme_show_status(err); - } else if ((log_count > 1) || cfg.verbose) - printf("Total: %d log files in %s\n", log_count, output_path); + + } else if ((ilog.count > 1) || cfg.verbose) + printf("Total: %d log files in %s/%s\n", ilog.count, initial_folder, output_path); return err; } diff --git a/plugins/solidigm/solidigm-latency-tracking.c b/plugins/solidigm/solidigm-latency-tracking.c index c6c3315..899075d 100644 --- a/plugins/solidigm/solidigm-latency-tracking.c +++ b/plugins/solidigm/solidigm-latency-tracking.c @@ -45,7 +45,7 @@ struct latency_tracker { int fd; __u8 uuid_index; struct config cfg; - enum nvme_print_flags print_flags; + nvme_print_flags_t print_flags; struct latency_statistics stats; struct json_object *bucket_list; __u32 bucket_list_size; diff --git a/plugins/solidigm/solidigm-log-page-dir.c b/plugins/solidigm/solidigm-log-page-dir.c index 7d7c027..f8d1974 100644 --- a/plugins/solidigm/solidigm-log-page-dir.c +++ b/plugins/solidigm/solidigm-log-page-dir.c @@ -241,7 +241,7 @@ int solidigm_get_log_page_directory_log(int argc, char **argv, struct command *c } if (!err) { - enum nvme_print_flags print_flag; + nvme_print_flags_t print_flag; err = validate_output_format(format, &print_flag); if (err < 0) { diff --git a/plugins/solidigm/solidigm-market-log.c b/plugins/solidigm/solidigm-market-log.c index d7d38da..e7e8728 100644 --- a/plugins/solidigm/solidigm-market-log.c +++ b/plugins/solidigm/solidigm-market-log.c @@ -12,7 +12,6 @@ #include <stdlib.h> #include <unistd.h> #include <inttypes.h> -#include <linux/limits.h> #include "common.h" #include "nvme.h" diff --git a/plugins/solidigm/solidigm-nvme.c b/plugins/solidigm/solidigm-nvme.c index 3fb86f5..8a7db07 100644 --- a/plugins/solidigm/solidigm-nvme.c +++ b/plugins/solidigm/solidigm-nvme.c @@ -21,6 +21,7 @@ #include "solidigm-temp-stats.h" #include "solidigm-get-drive-info.h" #include "solidigm-ocp-version.h" +#include "solidigm-workload-tracker.h" #include "plugins/ocp/ocp-clear-features.h" #include "plugins/ocp/ocp-smart-extended-log.h" @@ -107,3 +108,9 @@ static int get_cloud_SSDplugin_version(int argc, char **argv, struct command *cm { return sldgm_ocp_version(argc, argv, cmd, plugin); } + +static int get_workload_tracker(int argc, char **argv, struct command *cmd, + struct plugin *plugin) +{ + return sldgm_get_workload_tracker(argc, argv, cmd, plugin); +} diff --git a/plugins/solidigm/solidigm-nvme.h b/plugins/solidigm/solidigm-nvme.h index a639fd2..2b74a02 100644 --- a/plugins/solidigm/solidigm-nvme.h +++ b/plugins/solidigm/solidigm-nvme.h @@ -13,7 +13,7 @@ #include "cmd.h" -#define SOLIDIGM_PLUGIN_VERSION "1.2" +#define SOLIDIGM_PLUGIN_VERSION "1.6" PLUGIN(NAME("solidigm", "Solidigm vendor specific extensions", SOLIDIGM_PLUGIN_VERSION), COMMAND_LIST( @@ -32,6 +32,8 @@ PLUGIN(NAME("solidigm", "Solidigm vendor specific extensions", SOLIDIGM_PLUGIN_V ENTRY("temp-stats", "Retrieve Temperature Statistics log", get_temp_stats_log) ENTRY("vs-drive-info", "Retrieve drive information", get_drive_info) ENTRY("cloud-SSDplugin-version", "Prints plug-in OCP version", get_cloud_SSDplugin_version) + ENTRY("workload-tracker", "Real Time capture Workload Tracker samples", + get_workload_tracker) ) ); diff --git a/plugins/solidigm/solidigm-smart.c b/plugins/solidigm/solidigm-smart.c index a97abe2..002753a 100644 --- a/plugins/solidigm/solidigm-smart.c +++ b/plugins/solidigm/solidigm-smart.c @@ -197,7 +197,7 @@ int solidigm_get_additional_smart_log(int argc, char **argv, struct command *cmd "Get Solidigm vendor specific smart log (optionally, for the specified namespace), and show it."; const int solidigm_vu_smart_log_id = 0xCA; struct vu_smart_log smart_log_payload; - enum nvme_print_flags flags; + nvme_print_flags_t flags; struct nvme_dev *dev; int err; __u8 uuid_index; diff --git a/plugins/solidigm/solidigm-workload-tracker.c b/plugins/solidigm/solidigm-workload-tracker.c new file mode 100644 index 0000000..73bb3c3 --- /dev/null +++ b/plugins/solidigm/solidigm-workload-tracker.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2024 Solidigm. + * + * Authors: leonardo.da.cunha@solidigm.com + */ + +#include "common.h" +#include "nvme-print.h" +#include <errno.h> +#include <time.h> + +#define LID 0xf9 +#define FID 0xf1 +#define WLT2MS 25000 +#define MAX_WORKLOAD_LOG_ENTRIES 126 +#define MAX_WORKLOAD_LOG_ENTRY_SIZE 32 +#define MAX_FIELDS 15 + +char const *samplet[] = { + "default", + "1ms", + "5ms", + "10ms", + "50ms", + "100ms", + "500ms", + "1s", + "5s", + "10s", + "30s", + "1m", + "5m", + "10m", + "30m", + "1h" +}; + +char const *trk_types[] = { + "Base", + "CmdQ", + "Pattern", + "RandSeq", + "Throttle", + "Power", + "Defrag" +}; + +struct field { + __u8 size; + char *name; + char *desc; +}; + +struct field group_fields[][MAX_FIELDS] = { +{ // Base, group 0 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {1, "misalignment%", "% of Misaligned Sectors"}, + {1, "collision%", "% of Colliding Sectors"}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomRead%", "% of Random Read Sectors vs. Sequential"}, + {4, "xorInvokedCount", "Count of XOR Operations Invoked"}, + {4, "hostSoftReadSuccess", "Count of Soft Reads Completed Successfully."}, + {4, "bandDefragRelocation", "Count of BDRs"}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {1, "tbufBg%", "% of Background TBUF Work vs. All Available Work"}, + {1, "tbufHost%", "% of Host Requested TBUF Work vs. All Available Work"}, + {0} +}, +{ //CmdQ stats, group 1 + {4, "CmdQ_InternalReadQDepth", "Snapshot of the Internal Read Queue Depth"}, + {4, "CmdQ_DetectedWriteQDepth", "Snapshot of the Internal Write Queue Depth"}, + {4, "CmdQ_ReadCmdsPending", "Snapshot of the Internal Read Commands Pending"}, + {1, "misalignment%", "% of Misaligned Sectors"}, + {1, "collision%", "% of Colliding Sectors"}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomRead%", "% of Random Read Sectors vs. Sequential"}, + {4, "CmdQ_WriteCmdsPending", "Snapshot of the Internal Write Commands Pending"}, + {4, "CmdQ_ReadCmdsOutstanding", "Snapshot of the Internal Read Commands Outstanding"}, + {4, "CmdQ_WriteCmdsOutstanding", "Snapshot of the Internal Read Commands Outstanding"}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {1, "tbufBg%", "% of Background TBUF Work vs. All Available Work"}, + {1, "tbufHost%", "% of Host Requested TBUF Work vs. All Available Work"}, + {0} +}, +{ // test pattern, group 2 + {4, "x11223300"}, + {4, "x44556600_"}, + {4, "x77889900_"}, + {4, "xAABBCC00_"}, + {2, "xDD00"}, + {2, "xEE00"}, + {2, "xFF00"}, + {2, "x0_"}, + {1, "x00"}, + {1, "x80"}, + {1, "x__"}, + {1, "x8_"}, + {4, "x33322100"}, + {0} +}, +{ // Random vs. Sequential Data, group 3 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "randomReadCmd", "Count of Random Read Commands (vs. Sequential)"}, + {4, "randomWriteCmd", "Count of Random Write Commands (vs. Sequential)"}, + {4, "hostReadCmd", "Count of Total Host Read Commands (vs. Sequential)"}, + {4, "hostWriteCmd", "Count of Total Host Read Commands (vs. Sequential)"}, + {1, NULL}, + {1, NULL}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomThrottleRead%", "% of Random Read Sectors vs. Sequential"}, + {0} +}, +{ //Detailed Throttle Data, group 4 + {4, "pwrThrottleOn_ms", "Duration of Power Throttling in mS."}, + {4, "thmThrottleOn_ms", "Duration of Thermal Throttling in mS."}, + {4, "powerOn_us", "Duration of Power-on in uS."}, + {4, NULL}, + {4, NULL}, + {4, NULL}, + {4, NULL}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {0} +}, +{ // Detailed Power Data, group 5 + // PMIC and/or Input Voltage Power + {4, "vin1Power", "in uW"}, + {4, "vin2Power"}, + // NAND Workload + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "nandReads", "Nand Read Count in Sectors"}, + // Power Governor (if not enabled, all-0s) + {4, "lastNandAvgPwr"}, + {4, "lastDriveAvgPwr"}, + {4, "NscPwgSysCreditCnt"}, + {4, "burstPowerBudget"}, + {0} +}, +{ // Defrag, group 6 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "defragSlots", "Current defragSlots"}, + {4, "hostSlots", "hostSlots"}, + {4, "totalSlots", "Total slots"}, + {1, "hostBufferUse%", "% of WCM_GetHostBuffersInUse to WCM_GetDesiredHostBuffer"}, + {1, "defragBufferUse%", "% of defragBuffer to Desired defrag buffer %"}, + {1, "defragSlotsUse%", "defragSlots to Total defrag slots %"}, + {1, "hostSlotsUse%", "hostSlots to Total defrag slots %"}, + {1, "aiuUse%", "% of AvailableIndirectionUnits to Start Setpoint IU"}, + {1, "isImminentFRorWL", "defrag/Wear leveling is imminent"}, + {1, "defragType", "defrag type"}, + {0} +}}; + +#pragma pack(push, 1) +union WorkloadLogEnable { + struct { + __u32 trackerEnable : 1; + __u32 triggerEnable : 1; + __u32 triggerSynchronous : 1; // trigger mode, 1=Synchronous,0=ASynchronous(Latency) + __u32 triggerDelta : 1; // trigger value mode, 1=delta, 0=current value + __u32 triggerDwordIndex : 3; // trigger dword index, 0~7 of a log entry + __u32 triggerByteWordIndex : 2; // trigger byte or word index,byte=0~3, word=0~1 + __u32 triggerSize : 2; // trigger size, 1=byte, 2=word, 3=dword as a trigger + __u32 sampleTime : 4; // trigger sample time + __u32 contentGroup : 4; // content group select + __u32 stopCount : 12;// event limit,if<>0,stop tracker after stopCount events + __u32 eventDumpEnable : 1; // trigger event dump enable + } field; + __u32 dword; +}; + +struct workloadLogHeader { + __u16 majorVersion; // Major Version + __u16 minorVersion; // Minor Version + __u32 workloadLogCount; // Number of Entries in the Workload Log + __u32 reserved; // reserve for future + __u32 triggeredEvents; // Count of events triggered + __u32 samplePeriodInMilliseconds; // Sample Period In Milliseconds + __u64 timestamp_lastEntry; // Timestamp for the last full entry + __u64 timestamp_triggered; // Timestamp at the point of trigger + __u32 trackerEnable; // Workload trigger and enable settings + __u32 triggerthreshold; // Trigger threshold + __u32 triggeredValue; // Actual value fired the trigger +}; + + +struct workloadLog { // Full WL Log Structure + struct workloadLogHeader header; + __u8 entry[MAX_WORKLOAD_LOG_ENTRIES][MAX_WORKLOAD_LOG_ENTRY_SIZE]; +}; +#pragma pack(pop) + +struct wltracker { + int fd; + struct workloadLog workload_log; + size_t entry_count; + unsigned int verbose; +}; + +static void wltracker_print_field_names(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt = log->header.workloadLogCount; + union WorkloadLogEnable workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + __u8 content_group = workloadEnable.field.contentGroup; + + if (cnt == 0) + return; + + printf("%-16s", "timestamp"); + + for (int i = 0 ; i < MAX_FIELDS; i++) { + struct field f = group_fields[content_group][i]; + + if (f.size == 0) + break; + if (f.name == NULL) + continue; + printf("%s ", f.name); + } + + if (wlt->verbose > 1) + printf("%s", "entry#"); + + printf("\n"); +} + +static void wltracker_print_header(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt = log->header.workloadLogCount; + union WorkloadLogEnable workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + __u8 content_group = workloadEnable.field.contentGroup; + + printf("%-20s %u.%u\n", "Log page version:", le16_to_cpu(log->header.majorVersion), + le16_to_cpu(log->header.minorVersion)); + printf("%-20s %u\n", "Sample period(ms):", + le32_to_cpu(log->header.samplePeriodInMilliseconds)); + printf("%-20s %lu\n", "timestamp_lastEntry:", + le64_to_cpu(log->header.timestamp_lastEntry) / WLT2MS); + printf("%-20s %lu\n", "timestamp_triggered:", + le64_to_cpu(log->header.timestamp_triggered/1000)); + printf("%-20s 0x%x\n", "trackerEnable:", le32_to_cpu(log->header.trackerEnable)); + printf("%-20s %u\n", "Triggerthreshold:", + le32_to_cpu(log->header.triggerthreshold)); + printf("%-20s %u\n", "ValueTriggered:", le32_to_cpu(log->header.triggeredValue)); + printf("%-20s %s\n", "Tracker Type:", trk_types[content_group]); + printf("%-30s %u\n", "Total workload log entries:", le16_to_cpu(cnt)); + printf("%-20s %ld\n\n", "Sample count:", wlt->entry_count); + if (wlt->entry_count != 0) + wltracker_print_field_names(wlt); +} + +static int wltracker_show_newer_entries(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt; + __u8 content_group; + static __u64 last_timestamp_ms; + __u64 timestamp = 0; + union WorkloadLogEnable workloadEnable; + + int err = nvme_get_log_simple(wlt->fd, LID, sizeof(struct workloadLog), log); + + if (err > 0) { + nvme_show_status(err); + return err; + } + if (err < 0) + return err; + + if (wlt->verbose) + wltracker_print_header(wlt); + + cnt = log->header.workloadLogCount; + workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + content_group = workloadEnable.field.contentGroup; + + if (cnt == 0) { + nvme_show_error("Warning : No valid workload log data\n"); + return 0; + } + + timestamp = (le64_to_cpu(log->header.timestamp_lastEntry) / WLT2MS) - + (log->header.samplePeriodInMilliseconds * (cnt - 1)); + + + if (wlt->entry_count == 0) + wltracker_print_field_names(wlt); + + for (int i = cnt - 1; i >= 0; i--) { + int offset = 0; + __u8 *entry = (__u8 *) &log->entry[i]; + bool is_old = timestamp <= last_timestamp_ms; + + if (is_old) { + timestamp += log->header.samplePeriodInMilliseconds; + continue; + } + printf("%-16llu", timestamp); + for (int j = 0; j < MAX_FIELDS; j++) { + __u32 val = 0; + struct field f = group_fields[content_group][j]; + + if (f.size == 0) { + if (wlt->verbose > 1) + printf("%-*i", (int)sizeof("entry#"), i); + printf("\n"); + break; + } + if (f.name == NULL) + continue; + + switch (f.size) { + case 1: + val = *(entry+offset); + break; + case 2: + val = *(__u16 *)(entry + offset); + break; + case 4: + val = *(__u32 *)(entry + offset); + break; + default: + nvme_show_error("Bad field size"); + } + offset += f.size; + + printf("%-*u ", (int)strlen(f.name), val); + } + wlt->entry_count++; + timestamp += log->header.samplePeriodInMilliseconds; + } + last_timestamp_ms = log->header.timestamp_lastEntry / WLT2MS; + return 0; +} + +int wltracker_config(struct wltracker *wlt, union WorkloadLogEnable *we) +{ + struct nvme_set_features_args args = { + .args_size = sizeof(args), + .fd = wlt->fd, + .fid = FID, + .cdw11 = we->dword, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + }; + + return nvme_set_features(&args); +} + +static int stricmp(char const *a, char const *b) +{ + for (; *a || *b; a++, b++) + if (tolower((unsigned char)*a) != tolower((unsigned char)*b)) + return 1; + return 0; +} + +static int find_option(char const *list[], int size, const char *val) +{ + for (int i = 0; i < size; i++) { + if (!stricmp(val, list[i])) + return i; + } + return -EINVAL; +} + +static void join(char *dest, char const *list[], size_t list_size) +{ + strcat(dest, list[0]); + for (int i = 1; i < list_size; i++) { + strcat(dest, "|"); + strcat(dest, list[i]); + } +} + +__u64 micros(void) +{ + struct timespec ts; + __u64 us; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + us = (((__u64)ts.tv_sec)*1000000) + (((__u64)ts.tv_nsec)/1000); + return us; +} + +int sldgm_get_workload_tracker(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + struct wltracker wlt = {0}; + union WorkloadLogEnable we = {0}; + + _cleanup_nvme_dev_ struct nvme_dev *dev = NULL; + const char *desc = "Real Time capture Workload Tracker samples"; + const char *sample_interval = "Sample interval"; + const char *run_time = "Limit runtime capture time in seconds"; + const char *flush_frequency = + "Samples (1 to 126) to wait for extracting data. Default 100 samples"; + char type_options[80] = {0}; + char sample_options[80] = {0}; + __u64 us_start; + __u64 run_time_us; + __u64 elapsed_run_time_us = 0; + __u64 next_sample_us = 0; + int opt; + int err; + + struct config { + bool enable; + bool disable; + const char *tracker_type; + const char *sample_time; + int run_time_s; + int flush_frequency; + }; + + struct config cfg = { + .sample_time = samplet[0], + .flush_frequency = 100, + .tracker_type = trk_types[0], + }; + + join(type_options, trk_types, ARRAY_SIZE(trk_types)); + join(sample_options, samplet, ARRAY_SIZE(samplet)); + + OPT_ARGS(opts) = { + OPT_FLAG("enable", 'e', &cfg.enable, "tracker enable"), + OPT_FLAG("disable", 'd', &cfg.disable, "tracker disable"), + OPT_STRING("sample-time", 's', sample_options, &cfg.sample_time, sample_interval), + OPT_STRING("type", 't', type_options, &cfg.tracker_type, "Tracker type"), + OPT_INT("run-time", 'r', &cfg.run_time_s, run_time), + OPT_INT("flush-freq", 'f', &cfg.flush_frequency, flush_frequency), + OPT_INCR("verbose", 'v', &wlt.verbose, "Increase logging verbosity"), + OPT_END() + }; + + err = parse_and_open(&dev, argc, argv, desc, opts); + if (err) + return err; + + wlt.fd = dev_fd(dev); + + if ((cfg.flush_frequency < 1) || (cfg.flush_frequency > MAX_WORKLOAD_LOG_ENTRIES)) { + nvme_show_error("Invalid number of samples: %s. Valid values: 1-%d", + cfg.flush_frequency, MAX_WORKLOAD_LOG_ENTRIES); + return -EINVAL; + } + + opt = find_option(samplet, ARRAY_SIZE(samplet), cfg.sample_time); + if (opt < 0) { + nvme_show_error("invalid Sample interval: %s. Valid values: %s", + cfg.sample_time, sample_options); + return -EINVAL; + } + we.field.sampleTime = opt; + + opt = find_option(trk_types, ARRAY_SIZE(trk_types), cfg.tracker_type); + if (opt < 0) { + nvme_show_error("Invalid tracker type: %s. Valid types: %s", + cfg.tracker_type, type_options); + return -EINVAL; + } + we.field.contentGroup = opt; + + if (cfg.enable && cfg.disable) { + nvme_show_error("Can't enable disable simultaneously"); + return -EINVAL; + } + + if (cfg.enable || cfg.disable) { + we.field.trackerEnable = cfg.enable; + err = wltracker_config(&wlt, &we); + if (err < 0) { + nvme_show_error("tracker set-feature: %s", nvme_strerror(errno)); + return err; + } else if (err > 0) { + nvme_show_status(err); + return err; + } + } + + if (cfg.disable && !cfg.enable) { + printf("Tracker disabled\n"); + return 0; + } + + us_start = micros(); + run_time_us = cfg.run_time_s * 1000000; + while (elapsed_run_time_us < run_time_us) { + __u64 interval; + __u64 elapsed; + __u64 prev_elapsed_run_time_us = elapsed_run_time_us; + + err = wltracker_show_newer_entries(&wlt); + + if (err > 0) { + nvme_show_status(err); + return err; + } + interval = ((__u64)wlt.workload_log.header.samplePeriodInMilliseconds) * 1000 * + cfg.flush_frequency; + next_sample_us += interval; + elapsed_run_time_us = micros() - us_start; + elapsed = elapsed_run_time_us - prev_elapsed_run_time_us; + if (wlt.verbose > 1) + printf("elapsed_run_time: %lluus\n", elapsed_run_time_us); + if (interval > elapsed) { + __u64 period_us = min(next_sample_us - elapsed_run_time_us, + run_time_us - elapsed_run_time_us); + if (wlt.verbose > 1) + printf("Sleeping %lluus..\n", period_us); + usleep(period_us); + } + elapsed_run_time_us = micros() - us_start; + } + + err = wltracker_show_newer_entries(&wlt); + + elapsed_run_time_us = micros() - us_start; + if (wlt.verbose > 0) + printf("elapsed_run_time: %lluus\n", elapsed_run_time_us); + + if (err > 0) { + nvme_show_status(err); + return err; + } + return err; +} diff --git a/plugins/solidigm/solidigm-workload-tracker.h b/plugins/solidigm/solidigm-workload-tracker.h new file mode 100644 index 0000000..d3ecc16 --- /dev/null +++ b/plugins/solidigm/solidigm-workload-tracker.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2024 Solidigm. + * + * Author: leonardo.da.cunha@solidigm.com + */ + +int sldgm_get_workload_tracker(int argc, char **argv, struct command *cmd, struct plugin *plugin); |