diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/seastar/dpdk/examples/vhost_scsi | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/examples/vhost_scsi')
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/Makefile | 67 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/meson.build | 21 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/scsi.c | 515 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/scsi_spec.h | 464 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.c | 488 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.h | 79 |
6 files changed, 1634 insertions, 0 deletions
diff --git a/src/seastar/dpdk/examples/vhost_scsi/Makefile b/src/seastar/dpdk/examples/vhost_scsi/Makefile new file mode 100644 index 000000000..0a3450bae --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/Makefile @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation + +# binary name +APP = vhost-scsi + +# all source are stored in SRCS-y +SRCS-y := scsi.c vhost_scsi.c + +# Build using pkg-config variables if possible +$(shell pkg-config --exists libdpdk) +ifeq ($(.SHELLSTATUS),0) + +all: shared +.PHONY: shared static +shared: build/$(APP)-shared + ln -sf $(APP)-shared build/$(APP) +static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +CFLAGS += -D_FILE_OFFSET_BITS=64 +LDFLAGS += -pthread + +PC_FILE := $(shell pkg-config --path libdpdk) +CFLAGS += -O3 $(shell pkg-config --cflags libdpdk) +LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk) +LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk) + +build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) + +build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) + +build: + @mkdir -p $@ + +.PHONY: clean +clean: + rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared + rmdir --ignore-fail-on-non-empty build + +else # Build using legacy build system + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, detect a build directory, by looking for a path with a .config +RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config))))) + +include $(RTE_SDK)/mk/rte.vars.mk + +ifneq ($(CONFIG_RTE_EXEC_ENV_LINUX),y) +$(info This application can only operate in a linux environment, \ +please change the definition of the RTE_TARGET environment variable) +all: +else + +CFLAGS += -D_FILE_OFFSET_BITS=64 +CFLAGS += -O2 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk + +endif +endif diff --git a/src/seastar/dpdk/examples/vhost_scsi/meson.build b/src/seastar/dpdk/examples/vhost_scsi/meson.build new file mode 100644 index 000000000..2972e4d61 --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/meson.build @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +# meson file, for building this example as part of a main DPDK build. +# +# To build this example as a standalone application with an already-installed +# DPDK instance, use 'make' + +if not is_linux + build = false +endif + +if not cc.has_header('linux/virtio_scsi.h') + build = false +endif + +deps += 'vhost' +cflags += ['-D_FILE_OFFSET_BITS=64'] +sources = files( + 'scsi.c', 'vhost_scsi.c' +) diff --git a/src/seastar/dpdk/examples/vhost_scsi/scsi.c b/src/seastar/dpdk/examples/vhost_scsi/scsi.c new file mode 100644 index 000000000..b1529afdc --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/scsi.c @@ -0,0 +1,515 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +/** + * This work is largely based on the "vhost-user-scsi" implementation by + * SPDK(https://github.com/spdk/spdk). + */ + +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <assert.h> +#include <ctype.h> +#include <string.h> +#include <stddef.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_byteorder.h> +#include <rte_string_fns.h> + +#include "vhost_scsi.h" +#include "scsi_spec.h" + +#define INQ_OFFSET(field) (offsetof(struct scsi_cdb_inquiry_data, field) + \ + sizeof(((struct scsi_cdb_inquiry_data *)0x0)->field)) + +static void +vhost_strcpy_pad(void *dst, const char *src, size_t size, int pad) +{ + size_t len; + + len = strlen(src); + if (len < size) { + memcpy(dst, src, len); + memset((char *)dst + len, pad, size - len); + } else { + memcpy(dst, src, size); + } +} + +static int +vhost_hex2bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return (int)ch; +} + +static void +vhost_bdev_scsi_set_naa_ieee_extended(const char *name, uint8_t *buf) +{ + int i, value, count = 0; + uint64_t *temp64, local_value; + + for (i = 0; (i < 16) && (name[i] != '\0'); i++) { + value = vhost_hex2bin(name[i]); + if (i % 2) + buf[count++] |= value << 4; + else + buf[count] = value; + } + + local_value = *(uint64_t *)buf; + /* + * see spc3r23 7.6.3.6.2, + * NAA IEEE Extended identifer format + */ + local_value &= 0x0fff000000ffffffull; + /* NAA 02, and 00 03 47 for IEEE Intel */ + local_value |= 0x2000000347000000ull; + + temp64 = (uint64_t *)buf; + *temp64 = rte_cpu_to_be_64(local_value); +} + +static void +scsi_task_build_sense_data(struct vhost_scsi_task *task, int sk, + int asc, int ascq) +{ + uint8_t *cp; + int resp_code; + + resp_code = 0x70; /* Current + Fixed format */ + + /* Sense Data */ + cp = (uint8_t *)task->resp->sense; + + /* VALID(7) RESPONSE CODE(6-0) */ + cp[0] = 0x80 | resp_code; + /* Obsolete */ + cp[1] = 0; + /* FILEMARK(7) EOM(6) ILI(5) SENSE KEY(3-0) */ + cp[2] = sk & 0xf; + /* INFORMATION */ + memset(&cp[3], 0, 4); + + /* ADDITIONAL SENSE LENGTH */ + cp[7] = 10; + + /* COMMAND-SPECIFIC INFORMATION */ + memset(&cp[8], 0, 4); + /* ADDITIONAL SENSE CODE */ + cp[12] = asc; + /* ADDITIONAL SENSE CODE QUALIFIER */ + cp[13] = ascq; + /* FIELD REPLACEABLE UNIT CODE */ + cp[14] = 0; + + /* SKSV(7) SENSE KEY SPECIFIC(6-0,7-0,7-0) */ + cp[15] = 0; + cp[16] = 0; + cp[17] = 0; + + /* SenseLength */ + task->resp->sense_len = 18; +} + +static void +scsi_task_set_status(struct vhost_scsi_task *task, int sc, int sk, + int asc, int ascq) +{ + if (sc == SCSI_STATUS_CHECK_CONDITION) + scsi_task_build_sense_data(task, sk, asc, ascq); + task->resp->status = sc; +} + +static int +vhost_bdev_scsi_inquiry_command(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + int hlen = 0; + uint32_t alloc_len = 0; + uint16_t len = 0; + uint16_t *temp16; + int pc; + int pd; + int evpd; + int i; + uint8_t *buf; + struct scsi_cdb_inquiry *inq; + + inq = (struct scsi_cdb_inquiry *)task->req->cdb; + + assert(task->iovs_cnt == 1); + + /* At least 36Bytes for inquiry command */ + if (task->data_len < 0x24) + goto inq_error; + + pd = SPC_PERIPHERAL_DEVICE_TYPE_DISK; + pc = inq->page_code; + evpd = inq->evpd & 0x1; + + if (!evpd && pc) + goto inq_error; + + if (evpd) { + struct scsi_vpd_page *vpage = (struct scsi_vpd_page *) + task->iovs[0].iov_base; + + /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ + vpage->peripheral = pd; + /* PAGE CODE */ + vpage->page_code = pc; + + switch (pc) { + case SPC_VPD_SUPPORTED_VPD_PAGES: + hlen = 4; + vpage->params[0] = SPC_VPD_SUPPORTED_VPD_PAGES; + vpage->params[1] = SPC_VPD_UNIT_SERIAL_NUMBER; + vpage->params[2] = SPC_VPD_DEVICE_IDENTIFICATION; + len = 3; + /* PAGE LENGTH */ + vpage->alloc_len = rte_cpu_to_be_16(len); + break; + case SPC_VPD_UNIT_SERIAL_NUMBER: + hlen = 4; + strlcpy((char *)vpage->params, bdev->name, + sizeof(vpage->params)); + vpage->alloc_len = rte_cpu_to_be_16(32); + break; + case SPC_VPD_DEVICE_IDENTIFICATION: + buf = vpage->params; + struct scsi_desig_desc *desig; + + hlen = 4; + /* NAA designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_BINARY; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_NAA; + desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = 8; + vhost_bdev_scsi_set_naa_ieee_extended(bdev->name, + desig->desig); + len = sizeof(struct scsi_desig_desc) + 8; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + + /* T10 Vendor ID designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_ASCII; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID; + desig->association = SPC_VPD_ASSOCIATION_LOGICAL_UNIT; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = 8 + 16 + 32; + strlcpy((char *)desig->desig, "INTEL", 8); + vhost_strcpy_pad((char *)&desig->desig[8], + bdev->product_name, 16, ' '); + strlcpy((char *)&desig->desig[24], bdev->name, 32); + len += sizeof(struct scsi_desig_desc) + 8 + 16 + 32; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + + /* SCSI Device Name designator */ + desig = (struct scsi_desig_desc *)buf; + desig->code_set = SPC_VPD_CODE_SET_UTF8; + desig->protocol_id = SPC_PROTOCOL_IDENTIFIER_ISCSI; + desig->type = SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME; + desig->association = SPC_VPD_ASSOCIATION_TARGET_DEVICE; + desig->reserved0 = 0; + desig->piv = 1; + desig->reserved1 = 0; + desig->len = strlcpy((char *)desig->desig, bdev->name, + 255); + len += sizeof(struct scsi_desig_desc) + desig->len; + + buf += sizeof(struct scsi_desig_desc) + desig->len; + vpage->alloc_len = rte_cpu_to_be_16(len); + break; + default: + goto inq_error; + } + + } else { + struct scsi_cdb_inquiry_data *inqdata = + (struct scsi_cdb_inquiry_data *)task->iovs[0].iov_base; + /* Standard INQUIRY data */ + /* PERIPHERAL QUALIFIER(7-5) PERIPHERAL DEVICE TYPE(4-0) */ + inqdata->peripheral = pd; + + /* RMB(7) */ + inqdata->rmb = 0; + + /* VERSION */ + /* See SPC3/SBC2/MMC4/SAM2 for more details */ + inqdata->version = SPC_VERSION_SPC3; + + /* NORMACA(5) HISUP(4) RESPONSE DATA FORMAT(3-0) */ + /* format 2 */ /* hierarchical support */ + inqdata->response = 2 | 1 << 4; + + hlen = 5; + + /* SCCS(7) ACC(6) TPGS(5-4) 3PC(3) PROTECT(0) */ + /* Not support TPGS */ + inqdata->flags = 0; + + /* MULTIP */ + inqdata->flags2 = 0x10; + + /* WBUS16(5) SYNC(4) LINKED(3) CMDQUE(1) VS(0) */ + /* CMDQUE */ + inqdata->flags3 = 0x2; + + /* T10 VENDOR IDENTIFICATION */ + strlcpy((char *)inqdata->t10_vendor_id, "INTEL", + sizeof(inqdata->t10_vendor_id)); + + /* PRODUCT IDENTIFICATION */ + strlcpy((char *)inqdata->product_id, bdev->product_name, + RTE_DIM(inqdata->product_id)); + + /* PRODUCT REVISION LEVEL */ + strlcpy((char *)inqdata->product_rev, "0001", + sizeof(inqdata->product_rev)); + + /* Standard inquiry data ends here. Only populate + * remaining fields if alloc_len indicates enough + * space to hold it. + */ + len = INQ_OFFSET(product_rev) - 5; + + if (alloc_len >= INQ_OFFSET(vendor)) { + /* Vendor specific */ + memset(inqdata->vendor, 0x20, 20); + len += sizeof(inqdata->vendor); + } + + if (alloc_len >= INQ_OFFSET(ius)) { + /* CLOCKING(3-2) QAS(1) IUS(0) */ + inqdata->ius = 0; + len += sizeof(inqdata->ius); + } + + if (alloc_len >= INQ_OFFSET(reserved)) { + /* Reserved */ + inqdata->reserved = 0; + len += sizeof(inqdata->reserved); + } + + /* VERSION DESCRIPTOR 1-8 */ + if (alloc_len >= INQ_OFFSET(reserved) + 2) { + temp16 = (uint16_t *)&inqdata->desc[0]; + *temp16 = rte_cpu_to_be_16(0x0960); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 4) { + /* SPC-3 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[2]; + *temp16 = rte_cpu_to_be_16(0x0300); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 6) { + /* SBC-2 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[4]; + *temp16 = rte_cpu_to_be_16(0x0320); + len += 2; + } + + if (alloc_len >= INQ_OFFSET(reserved) + 8) { + /* SAM-2 (no version claimed) */ + temp16 = (uint16_t *)&inqdata->desc[6]; + *temp16 = rte_cpu_to_be_16(0x0040); + len += 2; + } + + if (alloc_len > INQ_OFFSET(reserved) + 8) { + i = alloc_len - (INQ_OFFSET(reserved) + 8); + if (i > 30) + i = 30; + memset(&inqdata->desc[8], 0, i); + len += i; + } + + /* ADDITIONAL LENGTH */ + inqdata->add_len = len; + } + + /* STATUS GOOD */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + return hlen + len; + +inq_error: + scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, + SCSI_SENSE_ILLEGAL_REQUEST, + SCSI_ASC_INVALID_FIELD_IN_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + return 0; +} + +static int +vhost_bdev_scsi_readwrite(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task, + uint64_t lba, __rte_unused uint32_t xfer_len) +{ + uint32_t i; + uint64_t offset; + uint32_t nbytes = 0; + + offset = lba * bdev->blocklen; + + for (i = 0; i < task->iovs_cnt; i++) { + if (task->dxfer_dir == SCSI_DIR_TO_DEV) + memcpy(bdev->data + offset, task->iovs[i].iov_base, + task->iovs[i].iov_len); + else + memcpy(task->iovs[i].iov_base, bdev->data + offset, + task->iovs[i].iov_len); + offset += task->iovs[i].iov_len; + nbytes += task->iovs[i].iov_len; + } + + return nbytes; +} + +static int +vhost_bdev_scsi_process_block(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + uint64_t lba, *temp64; + uint32_t xfer_len, *temp32; + uint16_t *temp16; + uint8_t *cdb = (uint8_t *)task->req->cdb; + + switch (cdb[0]) { + case SBC_READ_6: + case SBC_WRITE_6: + lba = (uint64_t)cdb[1] << 16; + lba |= (uint64_t)cdb[2] << 8; + lba |= (uint64_t)cdb[3]; + xfer_len = cdb[4]; + if (xfer_len == 0) + xfer_len = 256; + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_10: + case SBC_WRITE_10: + temp32 = (uint32_t *)&cdb[2]; + lba = rte_be_to_cpu_32(*temp32); + temp16 = (uint16_t *)&cdb[7]; + xfer_len = rte_be_to_cpu_16(*temp16); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_12: + case SBC_WRITE_12: + temp32 = (uint32_t *)&cdb[2]; + lba = rte_be_to_cpu_32(*temp32); + temp32 = (uint32_t *)&cdb[6]; + xfer_len = rte_be_to_cpu_32(*temp32); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_16: + case SBC_WRITE_16: + temp64 = (uint64_t *)&cdb[2]; + lba = rte_be_to_cpu_64(*temp64); + temp32 = (uint32_t *)&cdb[10]; + xfer_len = rte_be_to_cpu_32(*temp32); + return vhost_bdev_scsi_readwrite(bdev, task, lba, xfer_len); + + case SBC_READ_CAPACITY_10: { + uint8_t buffer[8]; + + if (bdev->blockcnt - 1 > 0xffffffffULL) + memset(buffer, 0xff, 4); + else { + temp32 = (uint32_t *)buffer; + *temp32 = rte_cpu_to_be_32(bdev->blockcnt - 1); + } + temp32 = (uint32_t *)&buffer[4]; + *temp32 = rte_cpu_to_be_32(bdev->blocklen); + memcpy(task->iovs[0].iov_base, buffer, sizeof(buffer)); + task->resp->status = SCSI_STATUS_GOOD; + return sizeof(buffer); + } + + case SBC_SYNCHRONIZE_CACHE_10: + case SBC_SYNCHRONIZE_CACHE_16: + task->resp->status = SCSI_STATUS_GOOD; + return 0; + } + + scsi_task_set_status(task, SCSI_STATUS_CHECK_CONDITION, + SCSI_SENSE_ILLEGAL_REQUEST, + SCSI_ASC_INVALID_FIELD_IN_CDB, + SCSI_ASCQ_CAUSE_NOT_REPORTABLE); + return 0; +} + +int +vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task) +{ + int len; + uint8_t *data; + uint64_t *temp64, fmt_lun = 0; + uint32_t *temp32; + const uint8_t *lun; + uint8_t *cdb = (uint8_t *)task->req->cdb; + + lun = (const uint8_t *)task->req->lun; + /* only 1 LUN supported */ + if (lun[0] != 1 || lun[1] >= 1) + return -1; + + switch (cdb[0]) { + case SPC_INQUIRY: + len = vhost_bdev_scsi_inquiry_command(bdev, task); + task->data_len = len; + break; + case SPC_REPORT_LUNS: + data = (uint8_t *)task->iovs[0].iov_base; + fmt_lun |= (0x0ULL & 0x00ffULL) << 48; + temp64 = (uint64_t *)&data[8]; + *temp64 = rte_cpu_to_be_64(fmt_lun); + temp32 = (uint32_t *)data; + *temp32 = rte_cpu_to_be_32(8); + task->data_len = 16; + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_MODE_SELECT_6: + case SPC_MODE_SELECT_10: + /* don't support it now */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_MODE_SENSE_6: + case SPC_MODE_SENSE_10: + /* don't support it now */ + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + case SPC_TEST_UNIT_READY: + scsi_task_set_status(task, SCSI_STATUS_GOOD, 0, 0, 0); + break; + default: + len = vhost_bdev_scsi_process_block(bdev, task); + task->data_len = len; + } + + return 0; +} diff --git a/src/seastar/dpdk/examples/vhost_scsi/scsi_spec.h b/src/seastar/dpdk/examples/vhost_scsi/scsi_spec.h new file mode 100644 index 000000000..27be02688 --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/scsi_spec.h @@ -0,0 +1,464 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +/** + * SCSI specification definition + * refer http://www.t10.org/drafts.htm#SPC_Family for SPC-3 and SBC-3 + */ + +#ifndef _SCSI_SPEC_H +#define _SCSI_SPEC_H + +#include <stdint.h> + +enum scsi_group_code { + SCSI_6BYTE_CMD = 0x00, + SCSI_10BYTE_CMD = 0x20, + SCSI_10BYTE_CMD2 = 0x40, + SCSI_16BYTE_CMD = 0x80, + SCSI_12BYTE_CMD = 0xa0, +}; + +#define SCSI_GROUP_MASK 0xe0 +#define SCSI_OPCODE_MASK 0x1f + +enum scsi_status { + SCSI_STATUS_GOOD = 0x00, + SCSI_STATUS_CHECK_CONDITION = 0x02, + SCSI_STATUS_CONDITION_MET = 0x04, + SCSI_STATUS_BUSY = 0x08, + SCSI_STATUS_INTERMEDIATE = 0x10, + SCSI_STATUS_INTERMEDIATE_CONDITION_MET = 0x14, + SCSI_STATUS_RESERVATION_CONFLICT = 0x18, + SCSI_STATUS_Obsolete = 0x22, + SCSI_STATUS_TASK_SET_FULL = 0x28, + SCSI_STATUS_ACA_ACTIVE = 0x30, + SCSI_STATUS_TASK_ABORTED = 0x40, +}; + +enum scsi_sense { + SCSI_SENSE_NO_SENSE = 0x00, + SCSI_SENSE_RECOVERED_ERROR = 0x01, + SCSI_SENSE_NOT_READY = 0x02, + SCSI_SENSE_MEDIUM_ERROR = 0x03, + SCSI_SENSE_HARDWARE_ERROR = 0x04, + SCSI_SENSE_ILLEGAL_REQUEST = 0x05, + SCSI_SENSE_UNIT_ATTENTION = 0x06, + SCSI_SENSE_DATA_PROTECT = 0x07, + SCSI_SENSE_BLANK_CHECK = 0x08, + SCSI_SENSE_VENDOR_SPECIFIC = 0x09, + SCSI_SENSE_COPY_ABORTED = 0x0a, + SCSI_SENSE_ABORTED_COMMAND = 0x0b, + SCSI_SENSE_VOLUME_OVERFLOW = 0x0d, + SCSI_SENSE_MISCOMPARE = 0x0e, +}; + +enum scsi_asc { + SCSI_ASC_NO_ADDITIONAL_SENSE = 0x00, + SCSI_ASC_PERIPHERAL_DEVICE_WRITE_FAULT = 0x03, + SCSI_ASC_LOGICAL_UNIT_NOT_READY = 0x04, + SCSI_ASC_WARNING = 0x0b, + SCSI_ASC_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x10, + SCSI_ASC_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x10, + SCSI_ASC_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x10, + SCSI_ASC_UNRECOVERED_READ_ERROR = 0x11, + SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION = 0x1d, + SCSI_ASC_INVALID_COMMAND_OPERATION_CODE = 0x20, + SCSI_ASC_ACCESS_DENIED = 0x20, + SCSI_ASC_LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE = 0x21, + SCSI_ASC_INVALID_FIELD_IN_CDB = 0x24, + SCSI_ASC_LOGICAL_UNIT_NOT_SUPPORTED = 0x25, + SCSI_ASC_WRITE_PROTECTED = 0x27, + SCSI_ASC_FORMAT_COMMAND_FAILED = 0x31, + SCSI_ASC_INTERNAL_TARGET_FAILURE = 0x44, +}; + +enum scsi_ascq { + SCSI_ASCQ_CAUSE_NOT_REPORTABLE = 0x00, + SCSI_ASCQ_BECOMING_READY = 0x01, + SCSI_ASCQ_FORMAT_COMMAND_FAILED = 0x01, + SCSI_ASCQ_LOGICAL_BLOCK_GUARD_CHECK_FAILED = 0x01, + SCSI_ASCQ_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = 0x02, + SCSI_ASCQ_NO_ACCESS_RIGHTS = 0x02, + SCSI_ASCQ_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = 0x03, + SCSI_ASCQ_POWER_LOSS_EXPECTED = 0x08, + SCSI_ASCQ_INVALID_LU_IDENTIFIER = 0x09, +}; + +enum spc_opcode { + /* SPC3 related */ + SPC_ACCESS_CONTROL_IN = 0x86, + SPC_ACCESS_CONTROL_OUT = 0x87, + SPC_EXTENDED_COPY = 0x83, + SPC_INQUIRY = 0x12, + SPC_LOG_SELECT = 0x4c, + SPC_LOG_SENSE = 0x4d, + SPC_MODE_SELECT_6 = 0x15, + SPC_MODE_SELECT_10 = 0x55, + SPC_MODE_SENSE_6 = 0x1a, + SPC_MODE_SENSE_10 = 0x5a, + SPC_PERSISTENT_RESERVE_IN = 0x5e, + SPC_PERSISTENT_RESERVE_OUT = 0x5f, + SPC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + SPC_READ_ATTRIBUTE = 0x8c, + SPC_READ_BUFFER = 0x3c, + SPC_RECEIVE_COPY_RESULTS = 0x84, + SPC_RECEIVE_DIAGNOSTIC_RESULTS = 0x1c, + SPC_REPORT_LUNS = 0xa0, + SPC_REQUEST_SENSE = 0x03, + SPC_SEND_DIAGNOSTIC = 0x1d, + SPC_TEST_UNIT_READY = 0x00, + SPC_WRITE_ATTRIBUTE = 0x8d, + SPC_WRITE_BUFFER = 0x3b, + + SPC_SERVICE_ACTION_IN_12 = 0xab, + SPC_SERVICE_ACTION_OUT_12 = 0xa9, + SPC_SERVICE_ACTION_IN_16 = 0x9e, + SPC_SERVICE_ACTION_OUT_16 = 0x9f, + + SPC_VARIABLE_LENGTH = 0x7f, + + SPC_MO_CHANGE_ALIASES = 0x0b, + SPC_MO_SET_DEVICE_IDENTIFIER = 0x06, + SPC_MO_SET_PRIORITY = 0x0e, + SPC_MO_SET_TARGET_PORT_GROUPS = 0x0a, + SPC_MO_SET_TIMESTAMP = 0x0f, + SPC_MI_REPORT_ALIASES = 0x0b, + SPC_MI_REPORT_DEVICE_IDENTIFIER = 0x05, + SPC_MI_REPORT_PRIORITY = 0x0e, + SPC_MI_REPORT_SUPPORTED_OPERATION_CODES = 0x0c, + SPC_MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS = 0x0d, + SPC_MI_REPORT_TARGET_PORT_GROUPS = 0x0a, + SPC_MI_REPORT_TIMESTAMP = 0x0f, + + /* SPC2 related (Obsolete) */ + SPC2_RELEASE_6 = 0x17, + SPC2_RELEASE_10 = 0x57, + SPC2_RESERVE_6 = 0x16, + SPC2_RESERVE_10 = 0x56, +}; + +enum scc_opcode { + SCC_MAINTENANCE_IN = 0xa3, + SCC_MAINTENANCE_OUT = 0xa4, +}; + +enum sbc_opcode { + SBC_COMPARE_AND_WRITE = 0x89, + SBC_FORMAT_UNIT = 0x04, + SBC_GET_LBA_STATUS = 0x0012009e, + SBC_ORWRITE_16 = 0x8b, + SBC_PRE_FETCH_10 = 0x34, + SBC_PRE_FETCH_16 = 0x90, + SBC_READ_6 = 0x08, + SBC_READ_10 = 0x28, + SBC_READ_12 = 0xa8, + SBC_READ_16 = 0x88, + SBC_READ_ATTRIBUTE = 0x8c, + SBC_READ_BUFFER = 0x3c, + SBC_READ_CAPACITY_10 = 0x25, + SBC_READ_DEFECT_DATA_10 = 0x37, + SBC_READ_DEFECT_DATA_12 = 0xb7, + SBC_READ_LONG_10 = 0x3e, + SBC_REASSIGN_BLOCKS = 0x07, + SBC_SANITIZE = 0x48, + SBC_START_STOP_UNIT = 0x1b, + SBC_SYNCHRONIZE_CACHE_10 = 0x35, + SBC_SYNCHRONIZE_CACHE_16 = 0x91, + SBC_UNMAP = 0x42, + SBC_VERIFY_10 = 0x2f, + SBC_VERIFY_12 = 0xaf, + SBC_VERIFY_16 = 0x8f, + SBC_WRITE_6 = 0x0a, + SBC_WRITE_10 = 0x2a, + SBC_WRITE_12 = 0xaa, + SBC_WRITE_16 = 0x8a, + SBC_WRITE_AND_VERIFY_10 = 0x2e, + SBC_WRITE_AND_VERIFY_12 = 0xae, + SBC_WRITE_AND_VERIFY_16 = 0x8e, + SBC_WRITE_LONG_10 = 0x3f, + SBC_WRITE_SAME_10 = 0x41, + SBC_WRITE_SAME_16 = 0x93, + SBC_XDREAD_10 = 0x52, + SBC_XDWRITE_10 = 0x50, + SBC_XDWRITEREAD_10 = 0x53, + SBC_XPWRITE_10 = 0x51, + + SBC_SAI_READ_CAPACITY_16 = 0x10, + SBC_SAI_READ_LONG_16 = 0x11, + SBC_SAO_WRITE_LONG_16 = 0x11, + + SBC_VL_READ_32 = 0x0009, + SBC_VL_VERIFY_32 = 0x000a, + SBC_VL_WRITE_32 = 0x000b, + SBC_VL_WRITE_AND_VERIFY_32 = 0x000c, + SBC_VL_WRITE_SAME_32 = 0x000d, + SBC_VL_XDREAD_32 = 0x0003, + SBC_VL_XDWRITE_32 = 0x0004, + SBC_VL_XDWRITEREAD_32 = 0x0007, + SBC_VL_XPWRITE_32 = 0x0006, +}; + +enum mmc_opcode { + /* MMC6 */ + MMC_READ_DISC_STRUCTURE = 0xad, + + /* MMC4 */ + MMC_BLANK = 0xa1, + MMC_CLOSE_TRACK_SESSION = 0x5b, + MMC_ERASE_10 = 0x2c, + MMC_FORMAT_UNIT = 0x04, + MMC_GET_CONFIGURATION = 0x46, + MMC_GET_EVENT_STATUS_NOTIFICATION = 0x4a, + MMC_GET_PERFORMANCE = 0xac, + MMC_INQUIRY = 0x12, + MMC_LOAD_UNLOAD_MEDIUM = 0xa6, + MMC_MECHANISM_STATUS = 0xbd, + MMC_MODE_SELECT_10 = 0x55, + MMC_MODE_SENSE_10 = 0x5a, + MMC_PAUSE_RESUME = 0x4b, + MMC_PLAY_AUDIO_10 = 0x45, + MMC_PLAY_AUDIO_12 = 0xa5, + MMC_PLAY_AUDIO_MSF = 0x47, + MMC_PREVENT_ALLOW_MEDIUM_REMOVAL = 0x1e, + MMC_READ_10 = 0x28, + MMC_READ_12 = 0xa8, + MMC_READ_BUFFER = 0x3c, + MMC_READ_BUFFER_CAPACITY = 0x5c, + MMC_READ_CAPACITY = 0x25, + MMC_READ_CD = 0xbe, + MMC_READ_CD_MSF = 0xb9, + MMC_READ_DISC_INFORMATION = 0x51, + MMC_READ_DVD_STRUCTURE = 0xad, + MMC_READ_FORMAT_CAPACITIES = 0x23, + MMC_READ_SUB_CHANNEL = 0x42, + MMC_READ_TOC_PMA_ATIP = 0x43, + MMC_READ_TRACK_INFORMATION = 0x52, + MMC_REPAIR_TRACK = 0x58, + MMC_REPORT_KEY = 0xa4, + MMC_REQUEST_SENSE = 0x03, + MMC_RESERVE_TRACK = 0x53, + MMC_SCAN = 0xba, + MMC_SEEK_10 = 0x2b, + MMC_SEND_CUE_SHEET = 0x5d, + MMC_SEND_DVD_STRUCTURE = 0xbf, + MMC_SEND_KEY = 0xa3, + MMC_SEND_OPC_INFORMATION = 0x54, + MMC_SET_CD_SPEED = 0xbb, + MMC_SET_READ_AHEAD = 0xa7, + MMC_SET_STREAMING = 0xb6, + MMC_START_STOP_UNIT = 0x1b, + MMC_STOP_PLAY_SCAN = 0x4e, + MMC_SYNCHRONIZE_CACHE = 0x35, + MMC_TEST_UNIT_READY = 0x00, + MMC_VERIFY_10 = 0x2f, + MMC_WRITE_10 = 0xa2, + MMC_WRITE_12 = 0xaa, + MMC_WRITE_AND_VERIFY_10 = 0x2e, + MMC_WRITE_BUFFER = 0x3b, +}; + +enum ssc_opcode { + SSC_ERASE_6 = 0x19, + SSC_FORMAT_MEDIUM = 0x04, + SSC_LOAD_UNLOAD = 0x1b, + SSC_LOCATE_10 = 0x2b, + SSC_LOCATE_16 = 0x92, + SSC_MOVE_MEDIUM_ATTACHED = 0xa7, + SSC_READ_6 = 0x08, + SSC_READ_BLOCK_LIMITS = 0x05, + SSC_READ_ELEMENT_STATUS_ATTACHED = 0xb4, + SSC_READ_POSITION = 0x34, + SSC_READ_REVERSE_6 = 0x0f, + SSC_RECOVER_BUFFERED_DATA = 0x14, + SSC_REPORT_DENSITY_SUPPORT = 0x44, + SSC_REWIND = 0x01, + SSC_SET_CAPACITY = 0x0b, + SSC_SPACE_6 = 0x11, + SSC_SPACE_16 = 0x91, + SSC_VERIFY_6 = 0x13, + SSC_WRITE_6 = 0x0a, + SSC_WRITE_FILEMARKS_6 = 0x10, +}; + +enum spc_vpd { + SPC_VPD_DEVICE_IDENTIFICATION = 0x83, + SPC_VPD_EXTENDED_INQUIRY_DATA = 0x86, + SPC_VPD_MANAGEMENT_NETWORK_ADDRESSES = 0x85, + SPC_VPD_MODE_PAGE_POLICY = 0x87, + SPC_VPD_SCSI_PORTS = 0x88, + SPC_VPD_SOFTWARE_INTERFACE_IDENTIFICATION = 0x84, + SPC_VPD_SUPPORTED_VPD_PAGES = 0x00, + SPC_VPD_UNIT_SERIAL_NUMBER = 0x80, + SPC_VPD_BLOCK_LIMITS = 0xb0, + SPC_VPD_BLOCK_DEV_CHARS = 0xb1, + SPC_VPD_BLOCK_THIN_PROVISION = 0xb2, +}; + +enum { + SPC_PERIPHERAL_DEVICE_TYPE_DISK = 0x00, + SPC_PERIPHERAL_DEVICE_TYPE_TAPE = 0x01, + SPC_PERIPHERAL_DEVICE_TYPE_DVD = 0x05, + SPC_PERIPHERAL_DEVICE_TYPE_CHANGER = 0x08, + + SPC_VERSION_NONE = 0x00, + SPC_VERSION_SPC = 0x03, + SPC_VERSION_SPC2 = 0x04, + SPC_VERSION_SPC3 = 0x05, + SPC_VERSION_SPC4 = 0x06, + + SPC_PROTOCOL_IDENTIFIER_FC = 0x00, + SPC_PROTOCOL_IDENTIFIER_PSCSI = 0x01, + SPC_PROTOCOL_IDENTIFIER_SSA = 0x02, + SPC_PROTOCOL_IDENTIFIER_IEEE1394 = 0x03, + SPC_PROTOCOL_IDENTIFIER_RDMA = 0x04, + SPC_PROTOCOL_IDENTIFIER_ISCSI = 0x05, + SPC_PROTOCOL_IDENTIFIER_SAS = 0x06, + SPC_PROTOCOL_IDENTIFIER_ADT = 0x07, + SPC_PROTOCOL_IDENTIFIER_ATA = 0x08, + + SPC_VPD_CODE_SET_BINARY = 0x01, + SPC_VPD_CODE_SET_ASCII = 0x02, + SPC_VPD_CODE_SET_UTF8 = 0x03, + + SPC_VPD_ASSOCIATION_LOGICAL_UNIT = 0x00, + SPC_VPD_ASSOCIATION_TARGET_PORT = 0x01, + SPC_VPD_ASSOCIATION_TARGET_DEVICE = 0x02, + + SPC_VPD_IDENTIFIER_TYPE_VENDOR_SPECIFIC = 0x00, + SPC_VPD_IDENTIFIER_TYPE_T10_VENDOR_ID = 0x01, + SPC_VPD_IDENTIFIER_TYPE_EUI64 = 0x02, + SPC_VPD_IDENTIFIER_TYPE_NAA = 0x03, + SPC_VPD_IDENTIFIER_TYPE_RELATIVE_TARGET_PORT = 0x04, + SPC_VPD_IDENTIFIER_TYPE_TARGET_PORT_GROUP = 0x05, + SPC_VPD_IDENTIFIER_TYPE_LOGICAL_UNIT_GROUP = 0x06, + SPC_VPD_IDENTIFIER_TYPE_MD5_LOGICAL_UNIT = 0x07, + SPC_VPD_IDENTIFIER_TYPE_SCSI_NAME = 0x08, +}; + +struct scsi_cdb_inquiry { + uint8_t opcode; + uint8_t evpd; + uint8_t page_code; + uint16_t alloc_len; + uint8_t control; +}; + +struct scsi_cdb_inquiry_data { + uint8_t peripheral; + uint8_t rmb; + uint8_t version; + uint8_t response; + uint8_t add_len; + uint8_t flags; + uint8_t flags2; + uint8_t flags3; + uint8_t t10_vendor_id[8]; + uint8_t product_id[16]; + uint8_t product_rev[4]; + uint8_t vendor[20]; + uint8_t ius; + uint8_t reserved; + uint8_t desc[]; +}; + +struct scsi_vpd_page { + uint8_t peripheral; + uint8_t page_code; + uint16_t alloc_len; + uint8_t params[32]; +}; + +#define SCSI_VEXT_REF_CHK 0x01 +#define SCSI_VEXT_APP_CHK 0x02 +#define SCSI_VEXT_GRD_CHK 0x04 +#define SCSI_VEXT_SIMPSUP 0x01 +#define SCSI_VEXT_ORDSUP 0x02 +#define SCSI_VEXT_HEADSUP 0x04 +#define SCSI_VEXT_PRIOR_SUP 0x08 +#define SCSI_VEXT_GROUP_SUP 0x10 +#define SCSI_VEXT_UASK_SUP 0x20 +#define SCSI_VEXT_V_SUP 0x01 +#define SCSI_VEXT_NV_SUP 0x02 +#define SCSI_VEXT_CRD_SUP 0x04 +#define SCSI_VEXT_WU_SUP 0x08 + +struct scsi_vpd_ext_inquiry { + uint8_t peripheral; + uint8_t page_code; + uint16_t alloc_len; + uint8_t check; + uint8_t sup; + uint8_t sup2; + uint8_t luiclr; + uint8_t cbcs; + uint8_t micro_dl; + uint8_t reserved[54]; +}; + +#define SPC_VPD_DESIG_PIV 0x80 + +/* designation descriptor */ +struct scsi_desig_desc { + uint8_t code_set : 4; + uint8_t protocol_id : 4; + uint8_t type : 4; + uint8_t association : 2; + uint8_t reserved0 : 1; + uint8_t piv : 1; + uint8_t reserved1; + uint8_t len; + uint8_t desig[]; +}; + +/* mode page policy descriptor */ +struct scsi_mpage_policy_desc { + uint8_t page_code; + uint8_t sub_page_code; + uint8_t policy; + uint8_t reserved; +}; + +/* target port descriptor */ +struct scsi_tgt_port_desc { + uint8_t code_set; + uint8_t desig_type; + uint8_t reserved; + uint8_t len; + uint8_t designator[]; +}; + +/* SCSI port designation descriptor */ +struct scsi_port_desc { + uint16_t reserved; + uint16_t rel_port_id; + uint16_t reserved2; + uint16_t init_port_len; + uint16_t init_port_id; + uint16_t reserved3; + uint16_t tgt_desc_len; + uint8_t tgt_desc[]; +}; + +/* SCSI UNMAP block descriptor */ +struct scsi_unmap_bdesc { + /* UNMAP LOGICAL BLOCK ADDRESS */ + uint64_t lba; + + /* NUMBER OF LOGICAL BLOCKS */ + uint32_t block_count; + + /* RESERVED */ + uint32_t reserved; +}; + +#define SCSI_UNMAP_LBPU (1 << 7) +#define SCSI_UNMAP_LBPWS (1 << 6) +#define SCSI_UNMAP_LBPWS10 (1 << 5) + +#define SCSI_UNMAP_FULL_PROVISIONING 0x00 +#define SCSI_UNMAP_RESOURCE_PROVISIONING 0x01 +#define SCSI_UNMAP_THIN_PROVISIONING 0x02 + +#endif /* _SCSI_SPEC_H */ diff --git a/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.c b/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.c new file mode 100644 index 000000000..513af0cca --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.c @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include <stdint.h> +#include <unistd.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <semaphore.h> +#include <linux/virtio_scsi.h> +#include <linux/virtio_ring.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_vhost.h> + +#include "vhost_scsi.h" +#include "scsi_spec.h" + +#define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\ + (1 << VIRTIO_SCSI_F_INOUT) |\ + (1 << VIRTIO_SCSI_F_CHANGE)) + +/* Path to folder where character device will be created. Can be set by user. */ +static char dev_pathname[PATH_MAX] = ""; + +static struct vhost_scsi_ctrlr *g_vhost_ctrlr; +static int g_should_stop; +static sem_t exit_sem; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name) +{ + /* currently we only support 1 socket file fd */ + return g_vhost_ctrlr; +} + +static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + int ret = 0; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + assert(ret != 0); + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + assert(ctrlr != NULL); + } + + assert(ctrlr->mem != NULL); + + return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); +} + +static struct vring_desc * +descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc) +{ + return &vq_desc[cur_desc->next]; +} + +static bool +descriptor_has_next(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_NEXT); +} + +static bool +descriptor_is_wr(struct vring_desc *cur_desc) +{ + return !!(cur_desc->flags & VRING_DESC_F_WRITE); +} + +static void +submit_completion(struct vhost_scsi_task *task, uint32_t q_idx) +{ + struct rte_vhost_vring *vq; + struct vring_used *used; + + vq = task->vq; + used = vq->used; + /* Fill out the next entry in the "used" ring. id = the + * index of the descriptor that contained the SCSI request. + * len = the total amount of data transferred for the SCSI + * request. We must report the correct len, for variable + * length SCSI CDBs, where we may return less data than + * allocated by the guest VM. + */ + used->ring[used->idx & (vq->size - 1)].id = task->req_idx; + used->ring[used->idx & (vq->size - 1)].len = task->data_len; + used->idx++; + + /* Send an interrupt back to the guest VM so that it knows + * a completion is ready to be processed. + */ + rte_vhost_vring_call(task->bdev->vid, q_idx); +} + +static void +vhost_process_read_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + uint64_t chunck_len; + + task->iovs_cnt = 0; + chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!task->resp || chunck_len != task->desc->len) { + fprintf(stderr, "failed to translate desc address.\n"); + return; + } + + while (descriptor_has_next(task->desc)) { + task->desc = descriptor_get_next(task->vq->desc, task->desc); + chunck_len = task->desc->len; + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!data || chunck_len != task->desc->len) { + fprintf(stderr, "failed to translate desc address.\n"); + return; + } + + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + } +} + +static void +vhost_process_write_payload_chain(struct vhost_scsi_task *task) +{ + void *data; + uint64_t chunck_len; + + task->iovs_cnt = 0; + + do { + chunck_len = task->desc->len; + data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!data || chunck_len != task->desc->len) { + fprintf(stderr, "failed to translate desc address.\n"); + return; + } + + task->iovs[task->iovs_cnt].iov_base = data; + task->iovs[task->iovs_cnt].iov_len = task->desc->len; + task->data_len += task->desc->len; + task->iovs_cnt++; + task->desc = descriptor_get_next(task->vq->desc, task->desc); + } while (descriptor_has_next(task->desc)); + + chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!task->resp || chunck_len != task->desc->len) + fprintf(stderr, "failed to translate desc address.\n"); +} + +static struct vhost_block_dev * +vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial, + uint32_t blk_size, uint64_t blk_cnt, + bool wce_enable) +{ + struct vhost_block_dev *bdev; + + bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); + if (!bdev) + return NULL; + + strncpy(bdev->name, bdev_name, sizeof(bdev->name)); + strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name)); + bdev->blocklen = blk_size; + bdev->blockcnt = blk_cnt; + bdev->write_cache = wce_enable; + + /* use memory as disk storage space */ + bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); + if (!bdev->data) { + fprintf(stderr, "no enough reseverd huge memory for disk\n"); + return NULL; + } + + return bdev; +} + +static void +process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx) +{ + int ret; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + + scsi_vq = &ctrlr->bdev->queues[q_idx]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq); + assert(ret == 0); + + while (vq->avail->idx != scsi_vq->last_used_idx) { + int req_idx; + uint16_t last_idx; + struct vhost_scsi_task *task; + uint64_t chunck_len; + + last_idx = scsi_vq->last_used_idx & (vq->size - 1); + req_idx = vq->avail->ring[last_idx]; + + task = rte_zmalloc(NULL, sizeof(*task), 0); + assert(task != NULL); + + task->ctrlr = ctrlr; + task->bdev = ctrlr->bdev; + task->vq = vq; + task->req_idx = req_idx; + task->desc = &task->vq->desc[task->req_idx]; + + /* does not support indirect descriptors */ + assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0); + scsi_vq->last_used_idx++; + + chunck_len = task->desc->len; + task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!task->req || chunck_len != task->desc->len) { + fprintf(stderr, "failed to translate desc address.\n"); + return; + } + + task->desc = descriptor_get_next(task->vq->desc, task->desc); + if (!descriptor_has_next(task->desc)) { + task->dxfer_dir = SCSI_DIR_NONE; + chunck_len = task->desc->len; + task->resp = (void *)(uintptr_t) + gpa_to_vva(task->bdev->vid, + task->desc->addr, + &chunck_len); + if (!task->resp || chunck_len != task->desc->len) { + fprintf(stderr, "failed to translate desc address.\n"); + return; + } + } else if (!descriptor_is_wr(task->desc)) { + task->dxfer_dir = SCSI_DIR_TO_DEV; + vhost_process_write_payload_chain(task); + } else { + task->dxfer_dir = SCSI_DIR_FROM_DEV; + vhost_process_read_payload_chain(task); + } + + ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task); + if (ret) { + /* invalid response */ + task->resp->response = VIRTIO_SCSI_S_BAD_TARGET; + } else { + /* successfully */ + task->resp->response = VIRTIO_SCSI_S_OK; + task->resp->status = 0; + task->resp->resid = 0; + } + submit_completion(task, q_idx); + rte_free(task); + } +} + +/* Main framework for processing IOs */ +static void * +ctrlr_worker(void *arg) +{ + uint32_t idx, num; + struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg; + cpu_set_t cpuset; + pthread_t thread; + + if (ctrlr == NULL || ctrlr->bdev == NULL) { + fprintf(stderr, "%s: Error, invalid argument passed to worker thread\n", + __func__); + exit(0); + } + + thread = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + + num = rte_vhost_get_vring_num(ctrlr->bdev->vid); + fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num); + + if (num != NUM_OF_SCSI_QUEUES) { + fprintf(stderr, "Only 1 IO queue are supported\n"); + exit(0); + } + + while (!g_should_stop && ctrlr->bdev != NULL) { + /* At least 3 vrings, currently only can support 1 IO queue + * Queue 2 for IO queue, does not support TMF and hotplug + * for the example application now + */ + for (idx = 2; idx < num; idx++) + process_requestq(ctrlr, idx); + } + + fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); + sem_post(&exit_sem); + return NULL; +} + +static int +new_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + struct vhost_scsi_queue *scsi_vq; + struct rte_vhost_vring *vq; + pthread_t tid; + int i, ret; + + ret = rte_vhost_get_ifname(vid, path, PATH_MAX); + if (ret) { + fprintf(stderr, "Cannot get socket name\n"); + return -1; + } + + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Controller is not ready\n"); + return -1; + } + + ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); + if (ret) { + fprintf(stderr, "Get Controller memory region failed\n"); + return -1; + } + assert(ctrlr->mem != NULL); + + /* hardcoded block device information with 128MiB */ + ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0", + 4096, 32768, 0); + if (!ctrlr->bdev) + return -1; + + ctrlr->bdev->vid = vid; + + /* Disable Notifications */ + for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) { + rte_vhost_enable_guest_notification(vid, i, 0); + /* restore used index */ + scsi_vq = &ctrlr->bdev->queues[i]; + vq = &scsi_vq->vq; + ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); + assert(ret == 0); + scsi_vq->last_used_idx = vq->used->idx; + scsi_vq->last_avail_idx = vq->used->idx; + } + + g_should_stop = 0; + fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); + if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { + fprintf(stderr, "Worker Thread Started Failed\n"); + return -1; + } + pthread_detach(tid); + return 0; +} + +static void +destroy_device(int vid) +{ + char path[PATH_MAX]; + struct vhost_scsi_ctrlr *ctrlr; + + rte_vhost_get_ifname(vid, path, PATH_MAX); + fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); + ctrlr = vhost_scsi_ctrlr_find(path); + if (!ctrlr) { + fprintf(stderr, "Destroy Ctrlr Failed\n"); + return; + } + ctrlr->bdev = NULL; + g_should_stop = 1; + + sem_wait(&exit_sem); +} + +static const struct vhost_device_ops vhost_scsi_device_ops = { + .new_device = new_device, + .destroy_device = destroy_device, +}; + +static struct vhost_scsi_ctrlr * +vhost_scsi_ctrlr_construct(const char *ctrlr_name) +{ + int ret; + struct vhost_scsi_ctrlr *ctrlr; + char *path; + char cwd[PATH_MAX]; + + /* always use current directory */ + path = getcwd(cwd, PATH_MAX); + if (!path) { + fprintf(stderr, "Cannot get current working directory\n"); + return NULL; + } + snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); + + if (access(dev_pathname, F_OK) != -1) { + if (unlink(dev_pathname) != 0) + rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", + dev_pathname); + } + + if (rte_vhost_driver_register(dev_pathname, 0) != 0) { + fprintf(stderr, "socket %s already exists\n", dev_pathname); + return NULL; + } + + fprintf(stdout, "socket file: %s created\n", dev_pathname); + + ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES); + if (ret != 0) { + fprintf(stderr, "Set vhost driver features failed\n"); + return NULL; + } + + ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); + if (!ctrlr) + return NULL; + + rte_vhost_driver_callback_register(dev_pathname, + &vhost_scsi_device_ops); + + return ctrlr; +} + +static void +signal_handler(__rte_unused int signum) +{ + + if (access(dev_pathname, F_OK) == 0) + unlink(dev_pathname); + exit(0); +} + +int main(int argc, char *argv[]) +{ + int ret; + + signal(SIGINT, signal_handler); + + /* init EAL */ + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket"); + if (g_vhost_ctrlr == NULL) { + fprintf(stderr, "Construct vhost scsi controller failed\n"); + return 0; + } + + if (sem_init(&exit_sem, 0, 0) < 0) { + fprintf(stderr, "Error init exit_sem\n"); + return -1; + } + + rte_vhost_driver_start(dev_pathname); + + /* loop for exit the application */ + while (1) + sleep(1); + + return 0; +} + diff --git a/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.h b/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.h new file mode 100644 index 000000000..7f98d8d10 --- /dev/null +++ b/src/seastar/dpdk/examples/vhost_scsi/vhost_scsi.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _VHOST_SCSI_H_ +#define _VHOST_SCSI_H_ + +#include <sys/uio.h> +#include <stdint.h> +#include <linux/virtio_scsi.h> +#include <linux/virtio_ring.h> + +#include <rte_vhost.h> + +struct vhost_scsi_queue { + struct rte_vhost_vring vq; + uint16_t last_avail_idx; + uint16_t last_used_idx; +}; + +#define NUM_OF_SCSI_QUEUES 3 + +struct vhost_block_dev { + /** ID for vhost library. */ + int vid; + /** Queues for the block device */ + struct vhost_scsi_queue queues[NUM_OF_SCSI_QUEUES]; + /** Unique name for this block device. */ + char name[64]; + + /** Unique product name for this kind of block device. */ + char product_name[256]; + + /** Size in bytes of a logical block for the backend */ + uint32_t blocklen; + + /** Number of blocks */ + uint64_t blockcnt; + + /** write cache enabled, not used at the moment */ + int write_cache; + + /** use memory as disk storage space */ + uint8_t *data; +}; + +struct vhost_scsi_ctrlr { + /** Only support 1 LUN for the example */ + struct vhost_block_dev *bdev; + /** VM memory region */ + struct rte_vhost_memory *mem; +} __rte_cache_aligned; + +#define VHOST_SCSI_MAX_IOVS 128 + +enum scsi_data_dir { + SCSI_DIR_NONE = 0, + SCSI_DIR_TO_DEV = 1, + SCSI_DIR_FROM_DEV = 2, +}; + +struct vhost_scsi_task { + int req_idx; + uint32_t dxfer_dir; + uint32_t data_len; + struct virtio_scsi_cmd_req *req; + struct virtio_scsi_cmd_resp *resp; + struct iovec iovs[VHOST_SCSI_MAX_IOVS]; + uint32_t iovs_cnt; + struct vring_desc *desc; + struct rte_vhost_vring *vq; + struct vhost_block_dev *bdev; + struct vhost_scsi_ctrlr *ctrlr; +}; + +int vhost_bdev_process_scsi_commands(struct vhost_block_dev *bdev, + struct vhost_scsi_task *task); + +#endif /* _VHOST_SCSI_H_ */ |