From 5b7ddc4bd2dcdde201ffa2681ede9a0a029bad96 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 2 Jul 2021 22:47:46 +0200 Subject: Adding upstream version 1.14. Signed-off-by: Daniel Baumann --- plugins/intel/intel-nvme.c | 513 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 427 insertions(+), 86 deletions(-) (limited to 'plugins/intel/intel-nvme.c') diff --git a/plugins/intel/intel-nvme.c b/plugins/intel/intel-nvme.c index 8217c46..27b065d 100644 --- a/plugins/intel/intel-nvme.c +++ b/plugins/intel/intel-nvme.c @@ -11,7 +11,6 @@ #include "nvme.h" #include "nvme-print.h" #include "nvme-ioctl.h" -#include "json.h" #include "plugin.h" #include "argconfig.h" @@ -54,6 +53,13 @@ struct nvme_additional_smart_log { struct nvme_additional_smart_log_item pll_lock_loss_cnt; struct nvme_additional_smart_log_item nand_bytes_written; struct nvme_additional_smart_log_item host_bytes_written; + struct nvme_additional_smart_log_item host_ctx_wear_used; + struct nvme_additional_smart_log_item perf_stat_indicator; + struct nvme_additional_smart_log_item re_alloc_sectr_cnt; + struct nvme_additional_smart_log_item soft_ecc_err_rate; + struct nvme_additional_smart_log_item unexp_power_loss; + struct nvme_additional_smart_log_item media_bytes_read; + struct nvme_additional_smart_log_item avail_fw_downgrades; }; struct nvme_vu_id_ctrl_field { /* CDR MR5 */ @@ -218,60 +224,118 @@ static void show_intel_smart_log_jsn(struct nvme_additional_smart_log *smart, json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->host_bytes_written.raw)); json_object_add_value_object(dev_stats, "host_bytes_written", entry_stats); + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->host_ctx_wear_used.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->host_ctx_wear_used.raw)); + json_object_add_value_object(dev_stats, "host_ctx_wear_used", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->perf_stat_indicator.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->perf_stat_indicator.raw)); + json_object_add_value_object(dev_stats, "perf_stat_indicator", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->re_alloc_sectr_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->re_alloc_sectr_cnt.raw)); + json_object_add_value_object(dev_stats, "re_alloc_sectr_cnt", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->soft_ecc_err_rate.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->soft_ecc_err_rate.raw)); + json_object_add_value_object(dev_stats, "soft_ecc_err_rate", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->unexp_power_loss.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->unexp_power_loss.raw)); + json_object_add_value_object(dev_stats, "unexp_power_loss", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->media_bytes_read.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->media_bytes_read.raw)); + json_object_add_value_object(dev_stats, "media_bytes_read", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->avail_fw_downgrades.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->avail_fw_downgrades.raw)); + json_object_add_value_object(dev_stats, "avail_fw_downgrades", entry_stats); + json_object_add_value_object(root, "Device stats", dev_stats); json_print_object(root, NULL); json_free_object(root); } +static char *id_to_key(__u8 id) +{ + switch (id) { + case 0xAB: + return "program_fail_count"; + case 0xAC: + return "erase_fail_count"; + case 0xAD: + return "wear_leveling_count"; + case 0xB8: + return "e2e_error_detect_count"; + case 0xC7: + return "crc_error_count"; + case 0xE2: + return "media_wear_percentage"; + case 0xE3: + return "host_reads"; + case 0xE4: + return "timed_work_load"; + case 0xEA: + return "thermal_throttle_status"; + case 0xF0: + return "retry_buff_overflow_count"; + case 0xF3: + return "pll_lock_loss_counter"; + case 0xF4: + return "nand_bytes_written"; + case 0xF5: + return "host_bytes_written"; + case 0xF6: + return "host_context_wear_used"; + case 0xF7: + return "performance_status_indicator"; + case 0xF8: + return "media_bytes_read"; + case 0xF9: + return "available_fw_downgrades"; + case 0x05: + return "re-allocated_sector_count"; + case 0x0D: + return "soft_ecc_error_rate"; + case 0xAE: + return "unexpected_power_loss"; + default: + return "Invalid ID"; + } +} + +static void print_intel_smart_log_items(struct nvme_additional_smart_log_item *item) +{ + if (!item->key) + return; + + printf("%#x %-45s %3d %"PRIu64"\n", + item->key, id_to_key(item->key), + item->norm, int48_to_long(item->raw)); +} + static void show_intel_smart_log(struct nvme_additional_smart_log *smart, unsigned int nsid, const char *devname) { + struct nvme_additional_smart_log_item *iter = &smart->program_fail_cnt; + int num_items = sizeof(struct nvme_additional_smart_log) / + sizeof(struct nvme_additional_smart_log_item); + printf("Additional Smart Log for NVME device:%s namespace-id:%x\n", devname, nsid); - printf("key normalized raw\n"); - printf("program_fail_count : %3d%% %"PRIu64"\n", - smart->program_fail_cnt.norm, - int48_to_long(smart->program_fail_cnt.raw)); - printf("erase_fail_count : %3d%% %"PRIu64"\n", - smart->erase_fail_cnt.norm, - int48_to_long(smart->erase_fail_cnt.raw)); - printf("wear_leveling : %3d%% min: %u, max: %u, avg: %u\n", - smart->wear_leveling_cnt.norm, - le16_to_cpu(smart->wear_leveling_cnt.wear_level.min), - le16_to_cpu(smart->wear_leveling_cnt.wear_level.max), - le16_to_cpu(smart->wear_leveling_cnt.wear_level.avg)); - printf("end_to_end_error_detection_count: %3d%% %"PRIu64"\n", - smart->e2e_err_cnt.norm, - int48_to_long(smart->e2e_err_cnt.raw)); - printf("crc_error_count : %3d%% %"PRIu64"\n", - smart->crc_err_cnt.norm, - int48_to_long(smart->crc_err_cnt.raw)); - printf("timed_workload_media_wear : %3d%% %.3f%%\n", - smart->timed_workload_media_wear.norm, - ((float)int48_to_long(smart->timed_workload_media_wear.raw)) / 1024); - printf("timed_workload_host_reads : %3d%% %"PRIu64"%%\n", - smart->timed_workload_host_reads.norm, - int48_to_long(smart->timed_workload_host_reads.raw)); - printf("timed_workload_timer : %3d%% %"PRIu64" min\n", - smart->timed_workload_timer.norm, - int48_to_long(smart->timed_workload_timer.raw)); - printf("thermal_throttle_status : %3d%% %u%%, cnt: %u\n", - smart->thermal_throttle_status.norm, - smart->thermal_throttle_status.thermal_throttle.pct, - smart->thermal_throttle_status.thermal_throttle.count); - printf("retry_buffer_overflow_count : %3d%% %"PRIu64"\n", - smart->retry_buffer_overflow_cnt.norm, - int48_to_long(smart->retry_buffer_overflow_cnt.raw)); - printf("pll_lock_loss_count : %3d%% %"PRIu64"\n", - smart->pll_lock_loss_cnt.norm, - int48_to_long(smart->pll_lock_loss_cnt.raw)); - printf("nand_bytes_written : %3d%% sectors: %"PRIu64"\n", - smart->nand_bytes_written.norm, - int48_to_long(smart->nand_bytes_written.raw)); - printf("host_bytes_written : %3d%% sectors: %"PRIu64"\n", - smart->host_bytes_written.norm, - int48_to_long(smart->host_bytes_written.raw)); + printf("ID KEY Normalized Raw\n"); + + for (int i = 0; i < num_items; i++, iter++) + print_intel_smart_log_items(iter); } static int get_additional_smart_log(int argc, char **argv, struct command *cmd, struct plugin *plugin) @@ -307,7 +371,7 @@ static int get_additional_smart_log(int argc, char **argv, struct command *cmd, return fd; err = nvme_get_log(fd, cfg.namespace_id, 0xca, false, - sizeof(smart_log), &smart_log); + NVME_NO_LOG_LSP, sizeof(smart_log), &smart_log); if (!err) { if (cfg.json) show_intel_smart_log_jsn(&smart_log, cfg.namespace_id, devicename); @@ -347,7 +411,7 @@ static int get_market_log(int argc, char **argv, struct command *cmd, struct plu return fd; err = nvme_get_log(fd, NVME_NSID_ALL, 0xdd, false, - sizeof(log), log); + NVME_NO_LOG_LSP, sizeof(log), log); if (!err) { if (!cfg.raw_binary) printf("Intel Marketing Name Log:\n%s\n", log); @@ -409,7 +473,7 @@ static int get_temp_stats_log(int argc, char **argv, struct command *cmd, struct return fd; err = nvme_get_log(fd, NVME_NSID_ALL, 0xc5, false, - sizeof(stats), &stats); + NVME_NO_LOG_LSP, sizeof(stats), &stats); if (!err) { if (!cfg.raw_binary) show_temp_stats(&stats); @@ -427,6 +491,28 @@ struct intel_lat_stats { __u32 data[1216]; }; +struct __attribute__((__packed__)) optane_lat_stats { + __u16 maj; + __u16 min; + __u64 data[9]; +}; + +#define MEDIA_MAJOR_IDX 0 +#define MEDIA_MINOR_IDX 1 +#define MEDIA_MAX_LEN 2 +#define OPTANE_V1000_BUCKET_LEN 8 + +static struct intel_lat_stats stats; +static struct optane_lat_stats v1000_stats; + +struct v1000_thresholds { + __u32 read[OPTANE_V1000_BUCKET_LEN]; + __u32 write[OPTANE_V1000_BUCKET_LEN]; +}; + +static struct v1000_thresholds v1000_bucket; +static __u16 media_version[MEDIA_MAX_LEN] = {0}; + enum FormatUnit { US, MS, @@ -536,6 +622,28 @@ static void show_lat_stats_bucket(struct intel_lat_stats *stats, printf("%-*d\n", COL_WIDTH, stats->data[i]); } +static void show_optane_lat_stats_bucket(struct optane_lat_stats *stats, + __u32 lower_us, enum inf_bound_type start_type, + __u32 upper_us, enum inf_bound_type end_type, int i) +{ + enum FormatUnit fu = S; + char buffer[BUFSIZE]; + + init_buffer(buffer, BUFSIZE); + printf("%-*d", COL_WIDTH, i); + + fu = get_seconds_magnitude(lower_us); + set_unit_string(buffer, lower_us, fu, start_type); + printf("%-*s", COL_WIDTH, buffer); + + fu = get_seconds_magnitude(upper_us); + set_unit_string(buffer, upper_us, fu, end_type); + printf("%-*s", COL_WIDTH, buffer); + + printf("%-*lu\n", COL_WIDTH, (long unsigned int)stats->data[i]); +} + + static void show_lat_stats_linear(struct intel_lat_stats *stats, __u32 start_offset, __u32 end_offset, __u32 bytes_per, __u32 us_step, bool nonzero_print) @@ -622,6 +730,31 @@ static void json_add_bucket(struct intel_lat_stats *stats, json_object_add_value_int(bucket, "value", val); } +static void json_add_bucket_optane(struct json_object *bucket_list, __u32 id, + __u32 lower_us, enum inf_bound_type start_type, + __u32 upper_us, enum inf_bound_type end_type, __u32 val) +{ + char buffer[BUFSIZE]; + struct json_object *bucket = json_create_object(); + + init_buffer(buffer, BUFSIZE); + + json_object_add_value_object(bucket_list, + "bucket", bucket); + json_object_add_value_int(bucket, "id", id); + + set_unit_string(buffer, lower_us, + get_seconds_magnitude(lower_us), start_type); + json_object_add_value_string(bucket, "start", buffer); + + set_unit_string(buffer, upper_us, + get_seconds_magnitude(upper_us), end_type); + json_object_add_value_string(bucket, "end", buffer); + + json_object_add_value_uint(bucket, "value", val); + +} + static void json_lat_stats_linear(struct intel_lat_stats *stats, struct json_object *bucket_list, __u32 start_offset, __u32 end_offset, __u32 bytes_per, @@ -714,32 +847,121 @@ static void show_lat_stats_4_0(struct intel_lat_stats *stats) } } -static void json_lat_stats(struct intel_lat_stats *stats, int write) +static void jason_lat_stats_v1000_0(struct optane_lat_stats *stats, int write) +{ + int i; + struct json_object *root = json_create_object(); + struct json_object *bucket_list = json_create_object(); + + lat_stats_make_json_root(root, bucket_list, write); + + if (write) { + for (i = 0; i < OPTANE_V1000_BUCKET_LEN - 1; i++) + json_add_bucket_optane(bucket_list, i, + v1000_bucket.write[i], NOINF, + v1000_bucket.write[i + 1] - 1, + NOINF, + stats->data[i]); + + json_add_bucket_optane(bucket_list, + OPTANE_V1000_BUCKET_LEN - 1, + v1000_bucket.write[i], + NOINF, + v1000_bucket.write[i], + POSINF, stats->data[i]); + } else { + for (i = 0; i < OPTANE_V1000_BUCKET_LEN - 1; i++) + json_add_bucket_optane(bucket_list, i, + v1000_bucket.read[i], NOINF, + v1000_bucket.read[i + 1] - 1, + NOINF, + stats->data[i]); + + json_add_bucket_optane(bucket_list, + OPTANE_V1000_BUCKET_LEN - 1, + v1000_bucket.read[i], + NOINF, + v1000_bucket.read[i], + POSINF, stats->data[i]); + } + + struct json_object *subroot = json_create_object(); + json_object_add_value_object(root, "Average latency since last reset", subroot); + + json_object_add_value_uint(subroot, "value in us", stats->data[8]); + + json_print_object(root, NULL); + json_free_object(root); + +} + +static void show_lat_stats_v1000_0(struct optane_lat_stats *stats, int write) +{ + int i; + if (write) { + for (i = 0; i < OPTANE_V1000_BUCKET_LEN - 1; i++) + show_optane_lat_stats_bucket(stats, + v1000_bucket.write[i], + NOINF, + v1000_bucket.write[i + 1] -1, + NOINF, i); + + show_optane_lat_stats_bucket(stats, v1000_bucket.write[i], + NOINF, v1000_bucket.write[i], + POSINF, i); + } else { + for (i = 0; i < OPTANE_V1000_BUCKET_LEN - 1; i++) + show_optane_lat_stats_bucket(stats, + v1000_bucket.read[i], + NOINF, + v1000_bucket.read[i + 1] -1, + NOINF, i); + + show_optane_lat_stats_bucket(stats, v1000_bucket.read[i], + NOINF, v1000_bucket.read[i], + POSINF, i); + } + + printf("Average latency since last reset: %lu us\n", (long unsigned int)stats->data[8]); + +} + +static void json_lat_stats(int write) { - switch (stats->maj) { + switch (media_version[MEDIA_MAJOR_IDX]) { case 3: - json_lat_stats_3_0(stats, write); + json_lat_stats_3_0(&stats, write); break; case 4: - switch (stats->min) { + switch (media_version[MEDIA_MINOR_IDX]) { case 0: case 1: case 2: case 3: case 4: case 5: - json_lat_stats_4_0(stats, write); + json_lat_stats_4_0(&stats, write); break; default: - printf(("Unsupported minor revision (%u.%u)\n" - "Defaulting to format for rev4.0"), - stats->maj, stats->min); + printf("Unsupported minor revision (%u.%u)\n", + stats.maj, stats.min); + break; + } + break; + case 1000: + switch (media_version[MEDIA_MINOR_IDX]) { + case 0: + jason_lat_stats_v1000_0(&v1000_stats, write); + break; + default: + printf("Unsupported minor revision (%u.%u)\n", + stats.maj, stats.min); break; } break; default: printf("Unsupported revision (%u.%u)\n", - stats->maj, stats->min); + stats.maj, stats.min); break; } printf("\n"); @@ -752,69 +974,79 @@ static void print_dash_separator(int count) putchar('\n'); } -static void show_lat_stats(struct intel_lat_stats *stats, int write) +static void show_lat_stats(int write) { static const int separator_length = 50; printf("Intel IO %s Command Latency Statistics\n", write ? "Write" : "Read"); printf("Major Revision : %u\nMinor Revision : %u\n", - stats->maj, stats->min); + media_version[MEDIA_MAJOR_IDX], media_version[MEDIA_MINOR_IDX]); print_dash_separator(separator_length); printf("%-12s%-12s%-12s%-20s\n", "Bucket", "Start", "End", "Value"); print_dash_separator(separator_length); - switch (stats->maj) { + switch (media_version[MEDIA_MAJOR_IDX]) { case 3: - show_lat_stats_3_0(stats); + show_lat_stats_3_0(&stats); break; case 4: - switch (stats->min) { + switch (media_version[MEDIA_MINOR_IDX]) { case 0: case 1: case 2: case 3: case 4: case 5: - show_lat_stats_4_0(stats); + show_lat_stats_4_0(&stats); break; default: - printf(("Unsupported minor revision (%u.%u)\n" - "Defaulting to format for rev4.0"), - stats->maj, stats->min); + printf("Unsupported minor revision (%u.%u)\n", + stats.maj, stats.min); + break; + } + break; + case 1000: + switch (media_version[MEDIA_MINOR_IDX]) { + case 0: + show_lat_stats_v1000_0(&v1000_stats, write); + break; + default: + printf("Unsupported minor revision (%u.%u)\n", + stats.maj, stats.min); break; } break; default: printf("Unsupported revision (%u.%u)\n", - stats->maj, stats->min); + stats.maj, stats.min); break; } } static int get_lat_stats_log(int argc, char **argv, struct command *cmd, struct plugin *plugin) { - struct intel_lat_stats stats; - enum nvme_print_flags flags; + int err, fd; const char *desc = "Get Intel Latency Statistics log and show it."; - const char *raw = "dump output in binary format"; + const char *raw = "Dump output in binary format"; + const char *json= "Dump output in json format"; const char *write = "Get write statistics (read default)"; + struct config { - char *output_format; int raw_binary; + int json; int write; }; struct config cfg = { - .output_format = "normal", }; OPT_ARGS(opts) = { - OPT_FLAG("write", 'w', &cfg.write, write), - OPT_FMT("output-format", 'o', &cfg.output_format, "Output format: normal|json|binary"), - OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_FLAG("write", 'w', &cfg.write, write), + OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_FLAG("json", 'j', &cfg.json, json), OPT_END() }; @@ -822,26 +1054,54 @@ static int get_lat_stats_log(int argc, char **argv, struct command *cmd, struct if (fd < 0) return fd; - err = flags = validate_output_format(cfg.output_format); - if (flags < 0) + /* Query maj and minor version first */ + err = nvme_get_log(fd, NVME_NSID_ALL, cfg.write ? 0xc2 : 0xc1, + false, NVME_NO_LOG_LSP, sizeof(media_version), + media_version); + if (err) goto close_fd; - if (cfg.raw_binary) - flags = BINARY; + if (media_version[0] == 1000) { + __u32 thresholds[OPTANE_V1000_BUCKET_LEN] = {0}; + __u32 result; + + err = nvme_get_feature(fd, 0, 0xf7, 0, cfg.write ? 0x1 : 0x0, + sizeof(thresholds), thresholds, &result); + if (err) { + fprintf(stderr, "Quering thresholds failed. NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + goto close_fd; + } + + /* Update bucket thresholds to be printed */ + if (cfg.write) { + for (int i = 0; i < OPTANE_V1000_BUCKET_LEN; i++) + v1000_bucket.write[i] = thresholds[i]; + } else { + for (int i = 0; i < OPTANE_V1000_BUCKET_LEN; i++) + v1000_bucket.read[i] = thresholds[i]; + + } + + err = nvme_get_log(fd, NVME_NSID_ALL, cfg.write ? 0xc2 : 0xc1, + false, NVME_NO_LOG_LSP, sizeof(v1000_stats), + &v1000_stats); + } else { + err = nvme_get_log(fd, NVME_NSID_ALL, cfg.write ? 0xc2 : 0xc1, + false, NVME_NO_LOG_LSP, sizeof(stats), + &stats); + } - err = nvme_get_log(fd, NVME_NSID_ALL, cfg.write ? 0xc2 : 0xc1, - false, sizeof(stats), &stats); if (!err) { - if (flags & JSON) - json_lat_stats(&stats, cfg.write); - else if (flags & BINARY) - d_raw((unsigned char *)&stats, sizeof(stats)); + if (cfg.json) + json_lat_stats(cfg.write); + else if (!cfg.raw_binary) + show_lat_stats(cfg.write); else - show_lat_stats(&stats, cfg.write); + d_raw((unsigned char *)&stats, sizeof(stats)); } else if (err > 0) fprintf(stderr, "NVMe Status:%s(%x)\n", nvme_status_to_string(err), err); - close_fd: close(fd); return err; @@ -1314,3 +1574,84 @@ static int enable_lat_stats_tracking(int argc, char **argv, } return fd; } + +static int set_lat_stats_thresholds(int argc, char **argv, + struct command *command, struct plugin *plugin) +{ + int err, fd, num; + const char *desc = "Write Intel Bucket Thresholds for Latency Statistics Tracking"; + const char *bucket_thresholds = "Bucket Threshold List, comma separated list: 0, 10, 20 ..."; + const char *write = "Set write bucket Thresholds for latency tracking (read default)"; + + const __u32 nsid = 0; + const __u8 fid = 0xf7; + const __u32 cdw12 = 0x0; + const __u32 save = 0; + __u32 result; + + struct config { + int write; + char *bucket_thresholds; + }; + + struct config cfg = { + .write = 0, + .bucket_thresholds = "", + }; + + OPT_ARGS(opts) = { + OPT_FLAG("write", 'w', &cfg.write, write), + OPT_LIST("bucket-thresholds", 't', &cfg.bucket_thresholds, + bucket_thresholds), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + + if (fd < 0) + return fd; + + /* Query maj and minor version first to figure out the amount of + * valid buckets a user is allowed to modify. Read or write doesn't + * matter + */ + err = nvme_get_log(fd, NVME_NSID_ALL, 0xc2, + false, NVME_NO_LOG_LSP, sizeof(media_version), + media_version); + if (err) { + fprintf(stderr, "Querying media version failed. NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + goto close_fd; + } + + if (media_version[0] == 1000) { + int thresholds[OPTANE_V1000_BUCKET_LEN] = {0}; + num = argconfig_parse_comma_sep_array(cfg.bucket_thresholds, + thresholds, + sizeof(thresholds)); + if (num == -1) { + fprintf(stderr, "ERROR: Bucket list is malformed\n"); + goto close_fd; + + } + + err = nvme_set_feature(fd, nsid, fid, cfg.write ? 0x1 : 0x0, + cdw12, save, OPTANE_V1000_BUCKET_LEN, + thresholds, &result); + + if (err > 0) { + fprintf(stderr, "NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + } else if (err < 0) { + perror("Enable latency tracking"); + fprintf(stderr, "Command failed while parsing.\n"); + } + } else { + fprintf(stderr, "Unsupported command\n"); + } + +close_fd: + close(fd); + return err; +} + -- cgit v1.2.3