diff options
Diffstat (limited to '')
-rw-r--r-- | plugins/scaleflux/sfx-nvme.c | 873 | ||||
-rw-r--r-- | plugins/scaleflux/sfx-nvme.h | 25 |
2 files changed, 898 insertions, 0 deletions
diff --git a/plugins/scaleflux/sfx-nvme.c b/plugins/scaleflux/sfx-nvme.c new file mode 100644 index 0000000..846ca77 --- /dev/null +++ b/plugins/scaleflux/sfx-nvme.c @@ -0,0 +1,873 @@ +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <linux/fs.h> +#include <inttypes.h> +#include <asm/byteorder.h> +#include <sys/ioctl.h> +#include <sys/sysinfo.h> + +#include "linux/nvme_ioctl.h" + +#include "nvme.h" +#include "nvme-print.h" +#include "nvme-ioctl.h" +#include "nvme-status.h" +#include "json.h" +#include "plugin.h" + +#include "argconfig.h" +#include "suffix.h" + +#define CREATE_CMD +#include "sfx-nvme.h" + +#define SFX_PAGE_SHIFT 12 +#define SECTOR_SHIFT 9 + +#define SFX_GET_FREESPACE _IOWR('N', 0x240, struct sfx_freespace_ctx) +#define IDEMA_CAP(exp_GB) (((__u64)exp_GB - 50ULL) * 1953504ULL + 97696368ULL) + + +enum { + SFX_LOG_LATENCY_READ_STATS = 0xc1, + SFX_LOG_SMART = 0xc2, + SFX_LOG_LATENCY_WRITE_STATS = 0xc3, + SFX_LOG_QUAL = 0xc4, + SFX_LOG_MISMATCHLBA = 0xc5, + SFX_LOG_MEDIA = 0xc6, + SFX_LOG_BBT = 0xc7, + SFX_LOG_IDENTIFY = 0xcc, + SFX_FEAT_ATOMIC = 0x01, +}; + +enum sfx_nvme_admin_opcode { + nvme_admin_query_cap_info = 0xd3, + nvme_admin_change_cap = 0xd4, + nvme_admin_sfx_set_features = 0xd5, + nvme_admin_sfx_get_features = 0xd6, +}; + +struct sfx_freespace_ctx +{ + __u64 free_space; + __u64 phy_cap; /* physical capacity, in unit of sector */ + __u64 phy_space; /* physical space considering OP, in unit of sector */ + __u64 user_space; /* user required space, in unit of sector*/ + __u64 hw_used; /* hw space used in 4K */ + __u64 app_written; /* app data written in 4K */ +}; + +struct nvme_capacity_info { + __u64 lba_sec_sz; + __u64 phy_sec_sz; + __u64 used_space; + __u64 free_space; +}; +struct __attribute__((packed)) nvme_additional_smart_log_item { + uint8_t key; + uint8_t _kp[2]; + uint8_t norm; + uint8_t _np; + union { + uint8_t raw[6]; + struct wear_level { + uint16_t min; + uint16_t max; + uint16_t avg; + } wear_level ; + struct thermal_throttle { + uint8_t pct; + uint32_t count; + } thermal_throttle; + }; + uint8_t _rp; +}; + +struct nvme_additional_smart_log { + struct nvme_additional_smart_log_item program_fail_cnt; + struct nvme_additional_smart_log_item erase_fail_cnt; + struct nvme_additional_smart_log_item wear_leveling_cnt; + struct nvme_additional_smart_log_item e2e_err_cnt; + struct nvme_additional_smart_log_item crc_err_cnt; + struct nvme_additional_smart_log_item timed_workload_media_wear; + struct nvme_additional_smart_log_item timed_workload_host_reads; + struct nvme_additional_smart_log_item timed_workload_timer; + struct nvme_additional_smart_log_item thermal_throttle_status; + struct nvme_additional_smart_log_item retry_buffer_overflow_cnt; + struct nvme_additional_smart_log_item pll_lock_loss_cnt; + struct nvme_additional_smart_log_item nand_bytes_written; + struct nvme_additional_smart_log_item host_bytes_written; + struct nvme_additional_smart_log_item raid_recover_cnt; // errors which can be recovered by RAID + struct nvme_additional_smart_log_item prog_timeout_cnt; + struct nvme_additional_smart_log_item erase_timeout_cnt; + struct nvme_additional_smart_log_item read_timeout_cnt; + struct nvme_additional_smart_log_item read_ecc_cnt;//retry cnt +}; + +int nvme_change_cap(int fd, __u32 nsid, __u64 capacity) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_change_cap, + .nsid = nsid, + .cdw10 = (capacity & 0xffffffff), + .cdw11 = (capacity >> 32), + }; + + + return nvme_submit_passthru(fd, NVME_IOCTL_ADMIN_CMD,&cmd); +} + +int nvme_sfx_set_features(int fd, __u32 nsid, __u32 fid, __u32 value) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_sfx_set_features, + .nsid = nsid, + .cdw10 = fid, + .cdw11 = value, + }; + + return nvme_submit_passthru(fd, NVME_IOCTL_ADMIN_CMD,&cmd); +} + +int nvme_sfx_get_features(int fd, __u32 nsid, __u32 fid, __u32 *result) +{ + int err = 0; + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_sfx_get_features, + .nsid = nsid, + .cdw10 = fid, + }; + + err = nvme_submit_passthru(fd, NVME_IOCTL_ADMIN_CMD,&cmd); + if (!err && result) { + *result = cmd.result; + } + + return err; +} + +static void show_sfx_smart_log_jsn(struct nvme_additional_smart_log *smart, + unsigned int nsid, const char *devname) +{ + struct json_object *root, *entry_stats, *dev_stats, *multi; + + root = json_create_object(); + json_object_add_value_string(root, "Intel Smart log", devname); + + dev_stats = json_create_object(); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->program_fail_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->program_fail_cnt.raw)); + json_object_add_value_object(dev_stats, "program_fail_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->erase_fail_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->erase_fail_cnt.raw)); + json_object_add_value_object(dev_stats, "erase_fail_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->wear_leveling_cnt.norm); + multi = json_create_object(); + json_object_add_value_int(multi, "min", le16_to_cpu(smart->wear_leveling_cnt.wear_level.min)); + json_object_add_value_int(multi, "max", le16_to_cpu(smart->wear_leveling_cnt.wear_level.max)); + json_object_add_value_int(multi, "avg", le16_to_cpu(smart->wear_leveling_cnt.wear_level.avg)); + json_object_add_value_object(entry_stats, "raw", multi); + json_object_add_value_object(dev_stats, "wear_leveling", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->e2e_err_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->e2e_err_cnt.raw)); + json_object_add_value_object(dev_stats, "end_to_end_error_detection_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->crc_err_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->crc_err_cnt.raw)); + json_object_add_value_object(dev_stats, "crc_error_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->timed_workload_media_wear.norm); + json_object_add_value_float(entry_stats, "raw", ((float)int48_to_long(smart->timed_workload_media_wear.raw)) / 1024); + json_object_add_value_object(dev_stats, "timed_workload_media_wear", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->timed_workload_host_reads.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->timed_workload_host_reads.raw)); + json_object_add_value_object(dev_stats, "timed_workload_host_reads", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->timed_workload_timer.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->timed_workload_timer.raw)); + json_object_add_value_object(dev_stats, "timed_workload_timer", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->thermal_throttle_status.norm); + multi = json_create_object(); + json_object_add_value_int(multi, "pct", smart->thermal_throttle_status.thermal_throttle.pct); + json_object_add_value_int(multi, "cnt", smart->thermal_throttle_status.thermal_throttle.count); + json_object_add_value_object(entry_stats, "raw", multi); + json_object_add_value_object(dev_stats, "thermal_throttle_status", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->retry_buffer_overflow_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->retry_buffer_overflow_cnt.raw)); + json_object_add_value_object(dev_stats, "retry_buffer_overflow_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->pll_lock_loss_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->pll_lock_loss_cnt.raw)); + json_object_add_value_object(dev_stats, "pll_lock_loss_count", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->nand_bytes_written.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->nand_bytes_written.raw)); + json_object_add_value_object(dev_stats, "nand_bytes_written", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->host_bytes_written.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->host_bytes_written.raw)); + json_object_add_value_object(dev_stats, "host_bytes_written", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->raid_recover_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->raid_recover_cnt.raw)); + json_object_add_value_object(dev_stats, "raid_recover_cnt", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->prog_timeout_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->prog_timeout_cnt.raw)); + json_object_add_value_object(dev_stats, "prog_timeout_cnt", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->erase_timeout_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->erase_timeout_cnt.raw)); + json_object_add_value_object(dev_stats, "erase_timeout_cnt", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->read_timeout_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->read_timeout_cnt.raw)); + json_object_add_value_object(dev_stats, "read_timeout_cnt", entry_stats); + + entry_stats = json_create_object(); + json_object_add_value_int(entry_stats, "normalized", smart->read_ecc_cnt.norm); + json_object_add_value_int(entry_stats, "raw", int48_to_long(smart->read_ecc_cnt.raw)); + json_object_add_value_object(dev_stats, "read_ecc_cnt", entry_stats); + + json_object_add_value_object(root, "Device stats", dev_stats); + + json_print_object(root, NULL); + printf("/n"); + json_free_object(root); +} + +static void show_sfx_smart_log(struct nvme_additional_smart_log *smart, + unsigned int nsid, const char *devname) +{ + printf("Additional Smart Log for ScaleFlux device:%s namespace-id:%x\n", + devname, nsid); + printf("key normalized raw\n"); + printf("program_fail_count : %3d%% %"PRIu64"\n", + smart->program_fail_cnt.norm, + int48_to_long(smart->program_fail_cnt.raw)); + printf("erase_fail_count : %3d%% %"PRIu64"\n", + smart->erase_fail_cnt.norm, + int48_to_long(smart->erase_fail_cnt.raw)); + printf("wear_leveling : %3d%% min: %u, max: %u, avg: %u\n", + smart->wear_leveling_cnt.norm, + le16_to_cpu(smart->wear_leveling_cnt.wear_level.min), + le16_to_cpu(smart->wear_leveling_cnt.wear_level.max), + le16_to_cpu(smart->wear_leveling_cnt.wear_level.avg)); + printf("end_to_end_error_detection_count: %3d%% %"PRIu64"\n", + smart->e2e_err_cnt.norm, + int48_to_long(smart->e2e_err_cnt.raw)); + printf("crc_error_count : %3d%% %"PRIu64"\n", + smart->crc_err_cnt.norm, + int48_to_long(smart->crc_err_cnt.raw)); + printf("timed_workload_media_wear : %3d%% %.3f%%\n", + smart->timed_workload_media_wear.norm, + ((float)int48_to_long(smart->timed_workload_media_wear.raw)) / 1024); + printf("timed_workload_host_reads : %3d%% %"PRIu64"%%\n", + smart->timed_workload_host_reads.norm, + int48_to_long(smart->timed_workload_host_reads.raw)); + printf("timed_workload_timer : %3d%% %"PRIu64" min\n", + smart->timed_workload_timer.norm, + int48_to_long(smart->timed_workload_timer.raw)); + printf("thermal_throttle_status : %3d%% %u%%, cnt: %u\n", + smart->thermal_throttle_status.norm, + smart->thermal_throttle_status.thermal_throttle.pct, + smart->thermal_throttle_status.thermal_throttle.count); + printf("retry_buffer_overflow_count : %3d%% %"PRIu64"\n", + smart->retry_buffer_overflow_cnt.norm, + int48_to_long(smart->retry_buffer_overflow_cnt.raw)); + printf("pll_lock_loss_count : %3d%% %"PRIu64"\n", + smart->pll_lock_loss_cnt.norm, + int48_to_long(smart->pll_lock_loss_cnt.raw)); + printf("nand_bytes_written : %3d%% sectors: %"PRIu64"\n", + smart->nand_bytes_written.norm, + int48_to_long(smart->nand_bytes_written.raw)); + printf("host_bytes_written : %3d%% sectors: %"PRIu64"\n", + smart->host_bytes_written.norm, + int48_to_long(smart->host_bytes_written.raw)); + printf("raid_recover_cnt : %3d%% %"PRIu64"\n", + smart->raid_recover_cnt.norm, + int48_to_long(smart->raid_recover_cnt.raw)); + printf("read_ecc_cnt : %3d%% %"PRIu64"\n", + smart->read_ecc_cnt.norm, + int48_to_long(smart->read_ecc_cnt.raw)); + printf("prog_timeout_cnt : %3d%% %"PRIu64"\n", + smart->prog_timeout_cnt.norm, + int48_to_long(smart->prog_timeout_cnt.raw)); + printf("erase_timeout_cnt : %3d%% %"PRIu64"\n", + smart->erase_timeout_cnt.norm, + int48_to_long(smart->erase_timeout_cnt.raw)); + printf("read_timeout_cnt : %3d%% %"PRIu64"\n", + smart->read_timeout_cnt.norm, + int48_to_long(smart->read_timeout_cnt.raw)); +} + +static int get_additional_smart_log(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + struct nvme_additional_smart_log smart_log; + int err, fd; + char *desc = "Get ScaleFlux vendor specific additional smart log (optionally, "\ + "for the specified namespace), and show it."; + const char *namespace = "(optional) desired namespace"; + const char *raw = "dump output in binary format"; + const char *json= "Dump output in json format"; + struct config { + __u32 namespace_id; + int raw_binary; + int json; + }; + + struct config cfg = { + .namespace_id = 0xffffffff, + }; + + OPT_ARGS(opts) = { + OPT_UINT("namespace-id", 'n', &cfg.namespace_id, namespace), + OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_FLAG("json", 'j', &cfg.json, json), + OPT_END() + }; + + + fd = parse_and_open(argc, argv, desc, opts); + + err = nvme_get_log(fd, cfg.namespace_id, 0xca, false, sizeof(smart_log), + (void *)&smart_log); + if (!err) { + if (cfg.json) + show_sfx_smart_log_jsn(&smart_log, cfg.namespace_id, devicename); + else if (!cfg.raw_binary) + show_sfx_smart_log(&smart_log, cfg.namespace_id, devicename); + else + d_raw((unsigned char *)&smart_log, sizeof(smart_log)); + } + else if (err > 0) + fprintf(stderr, "NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + return err; +} + + +struct sfx_lat_stats { + __u16 maj; + __u16 min; + __u32 bucket_1[32]; /* 0~1ms, step 32us */ + __u32 bucket_2[31]; /* 1~32ms, step 1ms */ + __u32 bucket_3[31]; /* 32ms~1s, step 32ms */ + __u32 bucket_4[1]; /* 1s~2s, specifically 1024ms~2047ms */ + __u32 bucket_5[1]; /* 2s~4s, specifically 2048ms~4095ms */ + __u32 bucket_6[1]; /* 4s+, specifically 4096ms+ */ +}; + +static void show_lat_stats(struct sfx_lat_stats *stats, int write) +{ + int i; + + printf(" ScaleFlux IO %s Command Latency Statistics\n", write ? "Write" : "Read"); + printf("-------------------------------------\n"); + printf("Major Revision : %u\n", stats->maj); + printf("Minor Revision : %u\n", stats->min); + + printf("\nGroup 1: Range is 0-1ms, step is 32us\n"); + for (i = 0; i < 32; i++) + printf("Bucket %2d: %u\n", i, stats->bucket_1[i]); + + printf("\nGroup 2: Range is 1-32ms, step is 1ms\n"); + for (i = 0; i < 31; i++) + printf("Bucket %2d: %u\n", i, stats->bucket_2[i]); + + printf("\nGroup 3: Range is 32ms-1s, step is 32ms:\n"); + for (i = 0; i < 31; i++) + printf("Bucket %2d: %u\n", i, stats->bucket_3[i]); + + printf("\nGroup 4: Range is 1s-2s:\n"); + printf("Bucket %2d: %u\n", 0, stats->bucket_4[0]); + + printf("\nGroup 5: Range is 2s-4s:\n"); + printf("Bucket %2d: %u\n", 0, stats->bucket_5[0]); + + printf("\nGroup 6: Range is 4s+:\n"); + printf("Bucket %2d: %u\n", 0, stats->bucket_6[0]); +} + +static int get_lat_stats_log(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + struct sfx_lat_stats stats; + int err, fd; + + char *desc = "Get ScaleFlux Latency Statistics log and show it."; + const char *raw = "dump output in binary format"; + const char *write = "Get write statistics (read default)"; + struct config { + int raw_binary; + int write; + }; + + struct config cfg = { + }; + + OPT_ARGS(opts) = { + OPT_FLAG("write", 'w', &cfg.write, write), + OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + + err = nvme_get_log(fd, 0xffffffff, cfg.write ? 0xc3 : 0xc1, false, sizeof(stats), (void *)&stats); + if (!err) { + if (!cfg.raw_binary) + show_lat_stats(&stats, cfg.write); + else + d_raw((unsigned char *)&stats, sizeof(stats)); + } else if (err > 0) + fprintf(stderr, "NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + return err; +} + +int sfx_nvme_get_log(int fd, __u32 nsid, __u8 log_id, __u32 data_len, void *data) +{ + struct nvme_admin_cmd cmd = { + .opcode = nvme_admin_get_log_page, + .nsid = nsid, + .addr = (__u64)(uintptr_t) data, + .data_len = data_len, + }; + __u32 numd = (data_len >> 2) - 1; + __u16 numdu = numd >> 16, numdl = numd & 0xffff; + + cmd.cdw10 = log_id | (numdl << 16); + cmd.cdw11 = numdu; + + return nvme_submit_admin_passthru(fd, &cmd); +} + +/** + * @brief get bb table through admin_passthru + * + * @param fd + * @param buf + * @param size + * + * @return -1 fail ; 0 success + */ +static int get_bb_table(int fd, __u32 nsid, unsigned char *buf, __u64 size) +{ + if (fd < 0 || !buf || size != 256*4096*sizeof(unsigned char)) { + fprintf(stderr, "Invalid Param \r\n"); + return EINVAL; + } + + return sfx_nvme_get_log(fd, nsid, SFX_LOG_BBT, size, (void *)buf); +} + +/** + * @brief display bb table + * + * @param bd_table buffer that contain bb table dumped from drvier + * @param table_size buffer size (BYTES), should at least has 8 bytes for mf_bb_count and grown_bb_count + */ +static void bd_table_show(unsigned char *bd_table, __u64 table_size) +{ + __u32 mf_bb_count = 0; + __u32 grown_bb_count = 0; + __u32 total_bb_count = 0; + __u32 remap_mfbb_count = 0; + __u32 remap_gbb_count = 0; + __u64 *bb_elem; + __u64 *elem_end = (__u64 *)(bd_table + table_size); + __u64 i; + + /*buf should at least have 8bytes for mf_bb_count & total_bb_count*/ + if (!bd_table || table_size < sizeof(__u64)) + return; + + mf_bb_count = *((__u32 *)bd_table); + grown_bb_count = *((__u32 *)(bd_table + sizeof(__u32))); + total_bb_count = *((__u32 *)(bd_table + 2 * sizeof(__u32))); + remap_mfbb_count = *((__u32 *)(bd_table + 3 * sizeof(__u32))); + remap_gbb_count = *((__u32 *)(bd_table + 4 * sizeof(__u32))); + bb_elem = (__u64 *)(bd_table + 5 * sizeof(__u32)); + + printf("Bad Block Table \n"); + printf("MF_BB_COUNT: %u\n", mf_bb_count); + printf("GROWN_BB_COUNT: %u\n", grown_bb_count); + printf("TOTAL_BB_COUNT: %u\n", total_bb_count); + printf("REMAP_MFBB_COUNT: %u\n", remap_mfbb_count); + printf("REMAP_GBB_COUNT: %u\n", remap_gbb_count); + + printf("REMAP_MFBB_TABLE ["); + i = 0; + while (bb_elem < elem_end && i < remap_mfbb_count) { + printf(" 0x%llx", *(bb_elem++)); + i++; + } + printf(" ]\n"); + + printf("REMAP_GBB_TABLE ["); + i = 0; + while (bb_elem < elem_end && i < remap_gbb_count) { + printf(" 0x%llx",*(bb_elem++)); + i++; + } + printf(" ]\n"); +} + +/** + * @brief "hooks of sfx get-bad-block" + * + * @param argc + * @param argv + * @param cmd + * @param plugin + * + * @return + */ +static int sfx_get_bad_block(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + int fd; + unsigned char *data_buf; + const __u64 buf_size = 256*4096*sizeof(unsigned char); + int err = 0; + + char *desc = "Get bad block table of sfx block device."; + + OPT_ARGS(opts) = { + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + + if (fd < 0) { + return fd; + } + + data_buf = malloc(buf_size); + if (!data_buf) { + fprintf(stderr, "malloc fail, errno %d\r\n", errno); + return -1; + } + + err = get_bb_table(fd, 0xffffffff, data_buf, buf_size); + if (err < 0) { + perror("get-bad-block"); + } else if (err != 0) { + fprintf(stderr, "NVMe IO command error:%s(%x)\n", + nvme_status_to_string(err), err); + } else { + bd_table_show(data_buf, buf_size); + printf("ScaleFlux get bad block table: success\n"); + } + + free(data_buf); + return 0; +} + +static void show_cap_info(struct sfx_freespace_ctx *ctx) +{ + printf("user sectors: %#llx\n", ctx->user_space); + printf("totl physical sectors: %#llx\n", ctx->phy_space); + printf("free physical sectors: %#llx\n", ctx->free_space); + printf("used physical sectors: %#llx\n", ctx->phy_space - ctx->free_space); +} + +static int query_cap_info(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + struct sfx_freespace_ctx ctx = { 0 }; + int err = 0, fd; + char *desc = "query current capacity info of vanda"; + const char *raw = "dump output in binary format"; + const char *json= "Dump output in json format"; + struct config { + int raw_binary; + int json; + }; + struct config cfg; + + OPT_ARGS(opts) = { + OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_FLAG("json", 'j', &cfg.json, json), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + if (fd < 0) { + return fd; + } + + if (ioctl(fd, SFX_GET_FREESPACE, &ctx)) { + fprintf(stderr, "vu ioctl fail, errno %d\r\n", errno); + return -1; + } + + show_cap_info(&ctx); + return err; +} + +static int change_cap_mem_check(int fd, __u64 trg_in_4k) +{ + struct sfx_freespace_ctx freespace_ctx = { 0 }; + struct sysinfo s_info; + __u64 mem_need = 0; + __u64 cur_in_4k = 0; + __u32 cnt_ms = 0; + + while (ioctl(fd, SFX_GET_FREESPACE, &freespace_ctx)) { + if (cnt_ms++ > 600) {//1min + fprintf(stderr, "vu ioctl fail, errno %d\r\n", errno); + return -1; + } + usleep(100000); + } + + cur_in_4k = freespace_ctx.user_space >> (SFX_PAGE_SHIFT - SECTOR_SHIFT); + if (cur_in_4k > trg_in_4k) { + return 0; + } + + if (sysinfo(&s_info) < 0) { + printf("change-cap query mem info fail\n"); + return -1; + } + + mem_need = (trg_in_4k - cur_in_4k) * 8; + if (s_info.freeram <= 10 || mem_need > s_info.freeram) { + fprintf(stderr, "WARNING: mem needed is %llu, free mem is %lu\n" + "Insufficient memory, please drop cache or add free memory and retry\n", + mem_need, s_info.freeram); + return -1; + } + return 0; +} + +static int change_cap(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + int err = -1, fd; + char *desc = "query current capacity info of vanda"; + const char *raw = "dump output in binary format"; + const char *json= "Dump output in json format"; + const char *cap_gb = "cap size in GB"; + const char *cap_byte = "cap size in byte"; + const char *force = "The \"I know what I'm doing\" flag, skip confirmation before sending command"; + __u64 cap_in_4k = 0; + __u64 cap_in_sec = 0; + struct config { + __u64 cap_in_byte; + __u32 capacity_in_gb; + int raw_binary; + int json; + int force; + }; + + struct config cfg = { + .cap_in_byte = 0, + .capacity_in_gb = 0, + .force = 0, + }; + + OPT_ARGS(opts) = { + OPT_UINT("cap", 'c', &cfg.capacity_in_gb, cap_gb), + OPT_UINT("cap-byte", 'z', &cfg.cap_in_byte, cap_byte), + OPT_FLAG("force", 'f', &cfg.force, force), + OPT_FLAG("raw-binary", 'b', &cfg.raw_binary, raw), + OPT_FLAG("json", 'j', &cfg.json, json), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + if (fd < 0) { + return fd; + } + + if (!cfg.force) { + fprintf(stderr, "WARNING: Changing capacity may irrevocably delete user data.\n" + "You have 10 seconds to press Ctrl-C to cancel this operation.\n\n" + "Use the force [--force|-f] option to suppress this warning.\n"); + sleep(10); + fprintf(stderr, "Sending operation ... \n"); + } + + cap_in_sec = IDEMA_CAP(cfg.capacity_in_gb); + cap_in_4k = cap_in_sec >> 3; + if (cfg.cap_in_byte) + cap_in_4k = cfg.cap_in_byte >> 12; + printf("%dG %lluB %llu 4K\n", + cfg.capacity_in_gb, cfg.cap_in_byte, cap_in_4k); + if (change_cap_mem_check(fd, cap_in_4k)) + return err; + + err = nvme_change_cap(fd, 0xffffffff, cap_in_4k); + if (err < 0) + perror("sfx-change-cap"); + else if (err != 0) + fprintf(stderr, "NVMe IO command error:%s(%x)\n", + nvme_status_to_string(err), err); + else { + printf("ScaleFlux change-capacity: success\n"); + if(ioctl(fd, BLKRRPART) < 0) { + fprintf(stderr, "failed to re-read partition table\n"); + err = EFAULT; + } + } + return err; +} + +char *sfx_feature_to_string(int feature) +{ + switch (feature) { + case SFX_FEAT_ATOMIC: return "ATOMIC"; + + default: return "Unknown"; + } +} + +static int sfx_set_feature(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + int err = 0, fd; + char *desc = "ScaleFlux internal set features\n" + "feature id 1: ATOMIC"; + const char *value = "new value of feature (required)"; + const char *feature_id = "hex feature name (required)"; + const char *namespace_id = "desired namespace"; + struct nvme_id_ns ns; + + struct config { + __u32 namespace_id; + __u32 feature_id; + __u32 value; + }; + struct config cfg = { + .namespace_id = 1, + .feature_id = 0, + .value = 0, + }; + + OPT_ARGS(opts) = { + OPT_UINT("namespace-id", 'n', &cfg.namespace_id, namespace_id), + OPT_UINT("feature-id", 'f', &cfg.feature_id, feature_id), + OPT_UINT("value", 'v', &cfg.value, value), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + if (fd < 0) { + return fd; + } + + if (!cfg.feature_id) { + fprintf(stderr, "feature-id required param\n"); + return EINVAL; + } + + if (cfg.feature_id == SFX_FEAT_ATOMIC) { + if (cfg.namespace_id != 0xffffffff) { + err = nvme_identify_ns(fd, cfg.namespace_id, 0, &ns); + if (err) { + if (err < 0) + perror("identify-namespace"); + else + fprintf(stderr, + "NVMe Admin command error:%s(%x)\n", + nvme_status_to_string(err), err); + return err; + } + /* + * atomic only support with sector-size = 4k now + */ + if ((ns.flbas & 0xf) != 1) { + printf("Please change-sector size to 4K, then retry\n"); + return EFAULT; + } + } + } + + err = nvme_sfx_set_features(fd, cfg.namespace_id, cfg.feature_id, cfg.value); + if (err < 0) { + perror("ScaleFlux-set-feature"); + return errno; + } else if (!err) { + printf("ScaleFlux set-feature:%02x (%s), value:%#08x\n", cfg.feature_id, + sfx_feature_to_string(cfg.feature_id), cfg.value); + } else if (err > 0) + fprintf(stderr, "NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + + return err; +} + +static int sfx_get_feature(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + int err = 0, fd; + char *desc = "ScaleFlux internal set features\n" + "feature id 1: ATOMIC"; + const char *feature_id = "hex feature name (required)"; + const char *namespace_id = "desired namespace"; + __u32 result = 0; + + struct config { + __u32 namespace_id; + __u32 feature_id; + }; + struct config cfg = { + .namespace_id = 0, + .feature_id = 0, + }; + + OPT_ARGS(opts) = { + OPT_UINT("namespace-id", 'n', &cfg.namespace_id, namespace_id), + OPT_UINT("feature-id", 'f', &cfg.feature_id, feature_id), + OPT_END() + }; + + fd = parse_and_open(argc, argv, desc, opts); + + if (fd < 0) { + return fd; + } + + if (!cfg.feature_id) { + fprintf(stderr, "feature-id required param\n"); + return EINVAL; + } + + err = nvme_sfx_get_features(fd, cfg.namespace_id, cfg.feature_id, &result); + if (err < 0) { + perror("ScaleFlux-get-feature"); + return errno; + } else if (!err) { + printf("ScaleFlux get-feature:%02x (%s), value:%d\n", cfg.feature_id, + sfx_feature_to_string(cfg.feature_id), result); + } else if (err > 0) + fprintf(stderr, "NVMe Status:%s(%x)\n", + nvme_status_to_string(err), err); + + return err; + +} diff --git a/plugins/scaleflux/sfx-nvme.h b/plugins/scaleflux/sfx-nvme.h new file mode 100644 index 0000000..daf9c33 --- /dev/null +++ b/plugins/scaleflux/sfx-nvme.h @@ -0,0 +1,25 @@ +#undef CMD_INC_FILE +#define CMD_INC_FILE plugins/scaleflux/sfx-nvme + +#if !defined(SFX_NVME) || defined(CMD_HEADER_MULTI_READ) +#define SFX_NVME + +#include "cmd.h" + +PLUGIN(NAME("sfx", "ScaleFlux vendor specific extensions"), + COMMAND_LIST( + ENTRY("smart-log-add", "Retrieve ScaleFlux SMART Log, show it", get_additional_smart_log) + ENTRY("lat-stats", "Retrieve ScaleFlux IO Latency Statistics log, show it", get_lat_stats_log) + ENTRY("get-bad-block", "Retrieve bad block table of block device, show it", sfx_get_bad_block) + ENTRY("query-cap", "Query current capacity info", query_cap_info) + ENTRY("change-cap", "Dynamic change capacity", change_cap) + ENTRY("set-feature", "Set a feature", sfx_set_feature) + ENTRY("get-feature", "get a feature", sfx_get_feature) + ) +); + +#endif + +#include "define_cmd.h" + + |