diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:38:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 08:38:39 +0000 |
commit | 80b126032c7e73d273bc883e973b5f1a94aac581 (patch) | |
tree | e7d13cf4f02585c239665a6c5465735ffde429c8 /plugins/solidigm/solidigm-workload-tracker.c | |
parent | Releasing debian version 2.9.1-3. (diff) | |
download | nvme-cli-80b126032c7e73d273bc883e973b5f1a94aac581.tar.xz nvme-cli-80b126032c7e73d273bc883e973b5f1a94aac581.zip |
Merging upstream version 2.10.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'plugins/solidigm/solidigm-workload-tracker.c')
-rw-r--r-- | plugins/solidigm/solidigm-workload-tracker.c | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/plugins/solidigm/solidigm-workload-tracker.c b/plugins/solidigm/solidigm-workload-tracker.c new file mode 100644 index 0000000..73bb3c3 --- /dev/null +++ b/plugins/solidigm/solidigm-workload-tracker.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2024 Solidigm. + * + * Authors: leonardo.da.cunha@solidigm.com + */ + +#include "common.h" +#include "nvme-print.h" +#include <errno.h> +#include <time.h> + +#define LID 0xf9 +#define FID 0xf1 +#define WLT2MS 25000 +#define MAX_WORKLOAD_LOG_ENTRIES 126 +#define MAX_WORKLOAD_LOG_ENTRY_SIZE 32 +#define MAX_FIELDS 15 + +char const *samplet[] = { + "default", + "1ms", + "5ms", + "10ms", + "50ms", + "100ms", + "500ms", + "1s", + "5s", + "10s", + "30s", + "1m", + "5m", + "10m", + "30m", + "1h" +}; + +char const *trk_types[] = { + "Base", + "CmdQ", + "Pattern", + "RandSeq", + "Throttle", + "Power", + "Defrag" +}; + +struct field { + __u8 size; + char *name; + char *desc; +}; + +struct field group_fields[][MAX_FIELDS] = { +{ // Base, group 0 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {1, "misalignment%", "% of Misaligned Sectors"}, + {1, "collision%", "% of Colliding Sectors"}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomRead%", "% of Random Read Sectors vs. Sequential"}, + {4, "xorInvokedCount", "Count of XOR Operations Invoked"}, + {4, "hostSoftReadSuccess", "Count of Soft Reads Completed Successfully."}, + {4, "bandDefragRelocation", "Count of BDRs"}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {1, "tbufBg%", "% of Background TBUF Work vs. All Available Work"}, + {1, "tbufHost%", "% of Host Requested TBUF Work vs. All Available Work"}, + {0} +}, +{ //CmdQ stats, group 1 + {4, "CmdQ_InternalReadQDepth", "Snapshot of the Internal Read Queue Depth"}, + {4, "CmdQ_DetectedWriteQDepth", "Snapshot of the Internal Write Queue Depth"}, + {4, "CmdQ_ReadCmdsPending", "Snapshot of the Internal Read Commands Pending"}, + {1, "misalignment%", "% of Misaligned Sectors"}, + {1, "collision%", "% of Colliding Sectors"}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomRead%", "% of Random Read Sectors vs. Sequential"}, + {4, "CmdQ_WriteCmdsPending", "Snapshot of the Internal Write Commands Pending"}, + {4, "CmdQ_ReadCmdsOutstanding", "Snapshot of the Internal Read Commands Outstanding"}, + {4, "CmdQ_WriteCmdsOutstanding", "Snapshot of the Internal Read Commands Outstanding"}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {1, "tbufBg%", "% of Background TBUF Work vs. All Available Work"}, + {1, "tbufHost%", "% of Host Requested TBUF Work vs. All Available Work"}, + {0} +}, +{ // test pattern, group 2 + {4, "x11223300"}, + {4, "x44556600_"}, + {4, "x77889900_"}, + {4, "xAABBCC00_"}, + {2, "xDD00"}, + {2, "xEE00"}, + {2, "xFF00"}, + {2, "x0_"}, + {1, "x00"}, + {1, "x80"}, + {1, "x__"}, + {1, "x8_"}, + {4, "x33322100"}, + {0} +}, +{ // Random vs. Sequential Data, group 3 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "randomReadCmd", "Count of Random Read Commands (vs. Sequential)"}, + {4, "randomWriteCmd", "Count of Random Write Commands (vs. Sequential)"}, + {4, "hostReadCmd", "Count of Total Host Read Commands (vs. Sequential)"}, + {4, "hostWriteCmd", "Count of Total Host Read Commands (vs. Sequential)"}, + {1, NULL}, + {1, NULL}, + {1, "randomWrite%", "% of Random Write Sectors vs. Sequential"}, + {1, "randomThrottleRead%", "% of Random Read Sectors vs. Sequential"}, + {0} +}, +{ //Detailed Throttle Data, group 4 + {4, "pwrThrottleOn_ms", "Duration of Power Throttling in mS."}, + {4, "thmThrottleOn_ms", "Duration of Thermal Throttling in mS."}, + {4, "powerOn_us", "Duration of Power-on in uS."}, + {4, NULL}, + {4, NULL}, + {4, NULL}, + {4, NULL}, + {1, "pwrThrottle%", "% of Throttle Period due to Power Regulation"}, + {1, "thmThrottle%", "% of Throttle Period due to Thermal Levels"}, + {0} +}, +{ // Detailed Power Data, group 5 + // PMIC and/or Input Voltage Power + {4, "vin1Power", "in uW"}, + {4, "vin2Power"}, + // NAND Workload + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "nandReads", "Nand Read Count in Sectors"}, + // Power Governor (if not enabled, all-0s) + {4, "lastNandAvgPwr"}, + {4, "lastDriveAvgPwr"}, + {4, "NscPwgSysCreditCnt"}, + {4, "burstPowerBudget"}, + {0} +}, +{ // Defrag, group 6 + {4, "hostReads", "Host Read Count in Sectors"}, + {4, "hostWrites", "Host Write Count in Sectors"}, + {4, "nandWrites", "Nand Write Count in Sectors"}, + {4, "defragSlots", "Current defragSlots"}, + {4, "hostSlots", "hostSlots"}, + {4, "totalSlots", "Total slots"}, + {1, "hostBufferUse%", "% of WCM_GetHostBuffersInUse to WCM_GetDesiredHostBuffer"}, + {1, "defragBufferUse%", "% of defragBuffer to Desired defrag buffer %"}, + {1, "defragSlotsUse%", "defragSlots to Total defrag slots %"}, + {1, "hostSlotsUse%", "hostSlots to Total defrag slots %"}, + {1, "aiuUse%", "% of AvailableIndirectionUnits to Start Setpoint IU"}, + {1, "isImminentFRorWL", "defrag/Wear leveling is imminent"}, + {1, "defragType", "defrag type"}, + {0} +}}; + +#pragma pack(push, 1) +union WorkloadLogEnable { + struct { + __u32 trackerEnable : 1; + __u32 triggerEnable : 1; + __u32 triggerSynchronous : 1; // trigger mode, 1=Synchronous,0=ASynchronous(Latency) + __u32 triggerDelta : 1; // trigger value mode, 1=delta, 0=current value + __u32 triggerDwordIndex : 3; // trigger dword index, 0~7 of a log entry + __u32 triggerByteWordIndex : 2; // trigger byte or word index,byte=0~3, word=0~1 + __u32 triggerSize : 2; // trigger size, 1=byte, 2=word, 3=dword as a trigger + __u32 sampleTime : 4; // trigger sample time + __u32 contentGroup : 4; // content group select + __u32 stopCount : 12;// event limit,if<>0,stop tracker after stopCount events + __u32 eventDumpEnable : 1; // trigger event dump enable + } field; + __u32 dword; +}; + +struct workloadLogHeader { + __u16 majorVersion; // Major Version + __u16 minorVersion; // Minor Version + __u32 workloadLogCount; // Number of Entries in the Workload Log + __u32 reserved; // reserve for future + __u32 triggeredEvents; // Count of events triggered + __u32 samplePeriodInMilliseconds; // Sample Period In Milliseconds + __u64 timestamp_lastEntry; // Timestamp for the last full entry + __u64 timestamp_triggered; // Timestamp at the point of trigger + __u32 trackerEnable; // Workload trigger and enable settings + __u32 triggerthreshold; // Trigger threshold + __u32 triggeredValue; // Actual value fired the trigger +}; + + +struct workloadLog { // Full WL Log Structure + struct workloadLogHeader header; + __u8 entry[MAX_WORKLOAD_LOG_ENTRIES][MAX_WORKLOAD_LOG_ENTRY_SIZE]; +}; +#pragma pack(pop) + +struct wltracker { + int fd; + struct workloadLog workload_log; + size_t entry_count; + unsigned int verbose; +}; + +static void wltracker_print_field_names(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt = log->header.workloadLogCount; + union WorkloadLogEnable workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + __u8 content_group = workloadEnable.field.contentGroup; + + if (cnt == 0) + return; + + printf("%-16s", "timestamp"); + + for (int i = 0 ; i < MAX_FIELDS; i++) { + struct field f = group_fields[content_group][i]; + + if (f.size == 0) + break; + if (f.name == NULL) + continue; + printf("%s ", f.name); + } + + if (wlt->verbose > 1) + printf("%s", "entry#"); + + printf("\n"); +} + +static void wltracker_print_header(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt = log->header.workloadLogCount; + union WorkloadLogEnable workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + __u8 content_group = workloadEnable.field.contentGroup; + + printf("%-20s %u.%u\n", "Log page version:", le16_to_cpu(log->header.majorVersion), + le16_to_cpu(log->header.minorVersion)); + printf("%-20s %u\n", "Sample period(ms):", + le32_to_cpu(log->header.samplePeriodInMilliseconds)); + printf("%-20s %lu\n", "timestamp_lastEntry:", + le64_to_cpu(log->header.timestamp_lastEntry) / WLT2MS); + printf("%-20s %lu\n", "timestamp_triggered:", + le64_to_cpu(log->header.timestamp_triggered/1000)); + printf("%-20s 0x%x\n", "trackerEnable:", le32_to_cpu(log->header.trackerEnable)); + printf("%-20s %u\n", "Triggerthreshold:", + le32_to_cpu(log->header.triggerthreshold)); + printf("%-20s %u\n", "ValueTriggered:", le32_to_cpu(log->header.triggeredValue)); + printf("%-20s %s\n", "Tracker Type:", trk_types[content_group]); + printf("%-30s %u\n", "Total workload log entries:", le16_to_cpu(cnt)); + printf("%-20s %ld\n\n", "Sample count:", wlt->entry_count); + if (wlt->entry_count != 0) + wltracker_print_field_names(wlt); +} + +static int wltracker_show_newer_entries(struct wltracker *wlt) +{ + struct workloadLog *log = &wlt->workload_log; + __u8 cnt; + __u8 content_group; + static __u64 last_timestamp_ms; + __u64 timestamp = 0; + union WorkloadLogEnable workloadEnable; + + int err = nvme_get_log_simple(wlt->fd, LID, sizeof(struct workloadLog), log); + + if (err > 0) { + nvme_show_status(err); + return err; + } + if (err < 0) + return err; + + if (wlt->verbose) + wltracker_print_header(wlt); + + cnt = log->header.workloadLogCount; + workloadEnable = (union WorkloadLogEnable)log->header.trackerEnable; + content_group = workloadEnable.field.contentGroup; + + if (cnt == 0) { + nvme_show_error("Warning : No valid workload log data\n"); + return 0; + } + + timestamp = (le64_to_cpu(log->header.timestamp_lastEntry) / WLT2MS) - + (log->header.samplePeriodInMilliseconds * (cnt - 1)); + + + if (wlt->entry_count == 0) + wltracker_print_field_names(wlt); + + for (int i = cnt - 1; i >= 0; i--) { + int offset = 0; + __u8 *entry = (__u8 *) &log->entry[i]; + bool is_old = timestamp <= last_timestamp_ms; + + if (is_old) { + timestamp += log->header.samplePeriodInMilliseconds; + continue; + } + printf("%-16llu", timestamp); + for (int j = 0; j < MAX_FIELDS; j++) { + __u32 val = 0; + struct field f = group_fields[content_group][j]; + + if (f.size == 0) { + if (wlt->verbose > 1) + printf("%-*i", (int)sizeof("entry#"), i); + printf("\n"); + break; + } + if (f.name == NULL) + continue; + + switch (f.size) { + case 1: + val = *(entry+offset); + break; + case 2: + val = *(__u16 *)(entry + offset); + break; + case 4: + val = *(__u32 *)(entry + offset); + break; + default: + nvme_show_error("Bad field size"); + } + offset += f.size; + + printf("%-*u ", (int)strlen(f.name), val); + } + wlt->entry_count++; + timestamp += log->header.samplePeriodInMilliseconds; + } + last_timestamp_ms = log->header.timestamp_lastEntry / WLT2MS; + return 0; +} + +int wltracker_config(struct wltracker *wlt, union WorkloadLogEnable *we) +{ + struct nvme_set_features_args args = { + .args_size = sizeof(args), + .fd = wlt->fd, + .fid = FID, + .cdw11 = we->dword, + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + }; + + return nvme_set_features(&args); +} + +static int stricmp(char const *a, char const *b) +{ + for (; *a || *b; a++, b++) + if (tolower((unsigned char)*a) != tolower((unsigned char)*b)) + return 1; + return 0; +} + +static int find_option(char const *list[], int size, const char *val) +{ + for (int i = 0; i < size; i++) { + if (!stricmp(val, list[i])) + return i; + } + return -EINVAL; +} + +static void join(char *dest, char const *list[], size_t list_size) +{ + strcat(dest, list[0]); + for (int i = 1; i < list_size; i++) { + strcat(dest, "|"); + strcat(dest, list[i]); + } +} + +__u64 micros(void) +{ + struct timespec ts; + __u64 us; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + us = (((__u64)ts.tv_sec)*1000000) + (((__u64)ts.tv_nsec)/1000); + return us; +} + +int sldgm_get_workload_tracker(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + struct wltracker wlt = {0}; + union WorkloadLogEnable we = {0}; + + _cleanup_nvme_dev_ struct nvme_dev *dev = NULL; + const char *desc = "Real Time capture Workload Tracker samples"; + const char *sample_interval = "Sample interval"; + const char *run_time = "Limit runtime capture time in seconds"; + const char *flush_frequency = + "Samples (1 to 126) to wait for extracting data. Default 100 samples"; + char type_options[80] = {0}; + char sample_options[80] = {0}; + __u64 us_start; + __u64 run_time_us; + __u64 elapsed_run_time_us = 0; + __u64 next_sample_us = 0; + int opt; + int err; + + struct config { + bool enable; + bool disable; + const char *tracker_type; + const char *sample_time; + int run_time_s; + int flush_frequency; + }; + + struct config cfg = { + .sample_time = samplet[0], + .flush_frequency = 100, + .tracker_type = trk_types[0], + }; + + join(type_options, trk_types, ARRAY_SIZE(trk_types)); + join(sample_options, samplet, ARRAY_SIZE(samplet)); + + OPT_ARGS(opts) = { + OPT_FLAG("enable", 'e', &cfg.enable, "tracker enable"), + OPT_FLAG("disable", 'd', &cfg.disable, "tracker disable"), + OPT_STRING("sample-time", 's', sample_options, &cfg.sample_time, sample_interval), + OPT_STRING("type", 't', type_options, &cfg.tracker_type, "Tracker type"), + OPT_INT("run-time", 'r', &cfg.run_time_s, run_time), + OPT_INT("flush-freq", 'f', &cfg.flush_frequency, flush_frequency), + OPT_INCR("verbose", 'v', &wlt.verbose, "Increase logging verbosity"), + OPT_END() + }; + + err = parse_and_open(&dev, argc, argv, desc, opts); + if (err) + return err; + + wlt.fd = dev_fd(dev); + + if ((cfg.flush_frequency < 1) || (cfg.flush_frequency > MAX_WORKLOAD_LOG_ENTRIES)) { + nvme_show_error("Invalid number of samples: %s. Valid values: 1-%d", + cfg.flush_frequency, MAX_WORKLOAD_LOG_ENTRIES); + return -EINVAL; + } + + opt = find_option(samplet, ARRAY_SIZE(samplet), cfg.sample_time); + if (opt < 0) { + nvme_show_error("invalid Sample interval: %s. Valid values: %s", + cfg.sample_time, sample_options); + return -EINVAL; + } + we.field.sampleTime = opt; + + opt = find_option(trk_types, ARRAY_SIZE(trk_types), cfg.tracker_type); + if (opt < 0) { + nvme_show_error("Invalid tracker type: %s. Valid types: %s", + cfg.tracker_type, type_options); + return -EINVAL; + } + we.field.contentGroup = opt; + + if (cfg.enable && cfg.disable) { + nvme_show_error("Can't enable disable simultaneously"); + return -EINVAL; + } + + if (cfg.enable || cfg.disable) { + we.field.trackerEnable = cfg.enable; + err = wltracker_config(&wlt, &we); + if (err < 0) { + nvme_show_error("tracker set-feature: %s", nvme_strerror(errno)); + return err; + } else if (err > 0) { + nvme_show_status(err); + return err; + } + } + + if (cfg.disable && !cfg.enable) { + printf("Tracker disabled\n"); + return 0; + } + + us_start = micros(); + run_time_us = cfg.run_time_s * 1000000; + while (elapsed_run_time_us < run_time_us) { + __u64 interval; + __u64 elapsed; + __u64 prev_elapsed_run_time_us = elapsed_run_time_us; + + err = wltracker_show_newer_entries(&wlt); + + if (err > 0) { + nvme_show_status(err); + return err; + } + interval = ((__u64)wlt.workload_log.header.samplePeriodInMilliseconds) * 1000 * + cfg.flush_frequency; + next_sample_us += interval; + elapsed_run_time_us = micros() - us_start; + elapsed = elapsed_run_time_us - prev_elapsed_run_time_us; + if (wlt.verbose > 1) + printf("elapsed_run_time: %lluus\n", elapsed_run_time_us); + if (interval > elapsed) { + __u64 period_us = min(next_sample_us - elapsed_run_time_us, + run_time_us - elapsed_run_time_us); + if (wlt.verbose > 1) + printf("Sleeping %lluus..\n", period_us); + usleep(period_us); + } + elapsed_run_time_us = micros() - us_start; + } + + err = wltracker_show_newer_entries(&wlt); + + elapsed_run_time_us = micros() - us_start; + if (wlt.verbose > 0) + printf("elapsed_run_time: %lluus\n", elapsed_run_time_us); + + if (err > 0) { + nvme_show_status(err); + return err; + } + return err; +} |