diff options
Diffstat (limited to 'src/spdk/include/spdk_internal/nvme_tcp.h')
-rw-r--r-- | src/spdk/include/spdk_internal/nvme_tcp.h | 633 |
1 files changed, 633 insertions, 0 deletions
diff --git a/src/spdk/include/spdk_internal/nvme_tcp.h b/src/spdk/include/spdk_internal/nvme_tcp.h new file mode 100644 index 000000000..7065bc060 --- /dev/null +++ b/src/spdk/include/spdk_internal/nvme_tcp.h @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_INTERNAL_NVME_TCP_H +#define SPDK_INTERNAL_NVME_TCP_H + +#include "spdk/likely.h" +#include "spdk/sock.h" +#include "spdk/dif.h" + +#define SPDK_CRC32C_XOR 0xffffffffUL +#define SPDK_NVME_TCP_DIGEST_LEN 4 +#define SPDK_NVME_TCP_DIGEST_ALIGNMENT 4 +#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT 30 +#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR 8 + +/* + * Maximum number of SGL elements. + */ +#define NVME_TCP_MAX_SGL_DESCRIPTORS (16) + +#define MAKE_DIGEST_WORD(BUF, CRC32C) \ + ( ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \ + ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \ + ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \ + ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24))) + +#define MATCH_DIGEST_WORD(BUF, CRC32C) \ + ( ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24)) \ + == (CRC32C)) + +#define DGET32(B) \ + ((( (uint32_t) *((uint8_t *)(B)+0)) << 0) \ + | (((uint32_t) *((uint8_t *)(B)+1)) << 8) \ + | (((uint32_t) *((uint8_t *)(B)+2)) << 16) \ + | (((uint32_t) *((uint8_t *)(B)+3)) << 24)) + +#define DSET32(B,D) \ + (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)), \ + ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)), \ + ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)), \ + ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24))) + +typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg); + +struct _nvme_tcp_sgl { + struct iovec *iov; + int iovcnt; + uint32_t iov_offset; + uint32_t total_size; +}; + +struct nvme_tcp_pdu { + union { + /* to hold error pdu data */ + uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE]; + struct spdk_nvme_tcp_common_pdu_hdr common; + struct spdk_nvme_tcp_ic_req ic_req; + struct spdk_nvme_tcp_term_req_hdr term_req; + struct spdk_nvme_tcp_cmd capsule_cmd; + struct spdk_nvme_tcp_h2c_data_hdr h2c_data; + struct spdk_nvme_tcp_ic_resp ic_resp; + struct spdk_nvme_tcp_rsp capsule_resp; + struct spdk_nvme_tcp_c2h_data_hdr c2h_data; + struct spdk_nvme_tcp_r2t_hdr r2t; + + } hdr; + + bool has_hdgst; + bool ddgst_enable; + uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN]; + + uint8_t ch_valid_bytes; + uint8_t psh_valid_bytes; + uint8_t psh_len; + + nvme_tcp_qpair_xfer_complete_cb cb_fn; + void *cb_arg; + + /* The sock request ends with a 0 length iovec. Place the actual iovec immediately + * after it. There is a static assert below to check if the compiler inserted + * any unwanted padding */ + struct spdk_sock_request sock_req; + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2]; + + struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS]; + uint32_t data_iovcnt; + uint32_t data_len; + + uint32_t readv_offset; + TAILQ_ENTRY(nvme_tcp_pdu) tailq; + uint32_t remaining; + uint32_t padding_len; + struct _nvme_tcp_sgl sgl; + + struct spdk_dif_ctx *dif_ctx; + + void *req; /* data tied to a tcp request */ + void *qpair; +}; +SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu, + sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov), + "Compiler inserted padding between iov and sock_req"); + +enum nvme_tcp_pdu_recv_state { + /* Ready to wait for PDU */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY, + + /* Active tqpair waiting for any PDU common header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH, + + /* Active tqpair waiting for any PDU specific header */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH, + + /* Active tqpair waiting for a tcp request, only use in target side */ + NVME_TCP_PDU_RECV_STATE_AWAIT_REQ, + + /* Active tqpair waiting for payload */ + NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD, + + /* Active tqpair does not wait for payload */ + NVME_TCP_PDU_RECV_STATE_ERROR, +}; + +enum nvme_tcp_error_codes { + NVME_TCP_PDU_IN_PROGRESS = 0, + NVME_TCP_CONNECTION_FATAL = -1, + NVME_TCP_PDU_FATAL = -2, +}; + +enum nvme_tcp_qpair_state { + NVME_TCP_QPAIR_STATE_INVALID = 0, + NVME_TCP_QPAIR_STATE_INITIALIZING = 1, + NVME_TCP_QPAIR_STATE_RUNNING = 2, + NVME_TCP_QPAIR_STATE_EXITING = 3, + NVME_TCP_QPAIR_STATE_EXITED = 4, +}; + +static const bool g_nvme_tcp_hdgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = true, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = true +}; + +static const bool g_nvme_tcp_ddgst[] = { + [SPDK_NVME_TCP_PDU_TYPE_IC_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_IC_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ] = false, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD] = true, + [SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP] = false, + [SPDK_NVME_TCP_PDU_TYPE_H2C_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_C2H_DATA] = true, + [SPDK_NVME_TCP_PDU_TYPE_R2T] = false +}; + +static uint32_t +nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c; + uint32_t hlen = pdu->hdr.common.hlen; + + crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0); + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static uint32_t +_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + assert(iov[i].iov_base != NULL); + assert(iov[i].iov_len != 0); + crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c); + } + + return crc32c; +} + +static uint32_t +nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu) +{ + uint32_t crc32c = SPDK_CRC32C_XOR; + uint32_t mod; + + assert(pdu->data_len != 0); + + if (spdk_likely(!pdu->dif_ctx)) { + crc32c = _update_crc32c_iov(pdu->data_iov, pdu->data_iovcnt, crc32c); + } else { + spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt, + 0, pdu->data_len, &crc32c, pdu->dif_ctx); + } + + mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT; + if (mod != 0) { + uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod; + uint8_t pad[3] = {0, 0, 0}; + + assert(pad_length > 0); + assert(pad_length <= sizeof(pad)); + crc32c = spdk_crc32c_update(pad, pad_length, crc32c); + } + crc32c = crc32c ^ SPDK_CRC32C_XOR; + return crc32c; +} + +static inline void +_nvme_tcp_sgl_init(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t iov_offset) +{ + s->iov = iov; + s->iovcnt = iovcnt; + s->iov_offset = iov_offset; + s->total_size = 0; +} + +static inline void +_nvme_tcp_sgl_advance(struct _nvme_tcp_sgl *s, uint32_t step) +{ + s->iov_offset += step; + while (s->iovcnt > 0) { + if (s->iov_offset < s->iov->iov_len) { + break; + } + + s->iov_offset -= s->iov->iov_len; + s->iov++; + s->iovcnt--; + } +} + +static inline void +_nvme_tcp_sgl_get_buf(struct _nvme_tcp_sgl *s, void **_buf, uint32_t *_buf_len) +{ + if (_buf != NULL) { + *_buf = s->iov->iov_base + s->iov_offset; + } + if (_buf_len != NULL) { + *_buf_len = s->iov->iov_len - s->iov_offset; + } +} + +static inline bool +_nvme_tcp_sgl_append(struct _nvme_tcp_sgl *s, uint8_t *data, uint32_t data_len) +{ + if (s->iov_offset >= data_len) { + s->iov_offset -= data_len; + } else { + assert(s->iovcnt > 0); + s->iov->iov_base = data + s->iov_offset; + s->iov->iov_len = data_len - s->iov_offset; + s->total_size += data_len - s->iov_offset; + s->iov_offset = 0; + s->iov++; + s->iovcnt--; + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static inline bool +_nvme_tcp_sgl_append_multi(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + if (!_nvme_tcp_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) { + return false; + } + } + + return true; +} + +static inline uint32_t +_get_iov_array_size(struct iovec *iov, int iovcnt) +{ + int i; + uint32_t size = 0; + + for (i = 0; i < iovcnt; i++) { + size += iov[i].iov_len; + } + + return size; +} + +static inline bool +_nvme_tcp_sgl_append_multi_with_md(struct _nvme_tcp_sgl *s, struct iovec *iov, int iovcnt, + uint32_t data_len, const struct spdk_dif_ctx *dif_ctx) +{ + int rc; + uint32_t mapped_len = 0; + + if (s->iov_offset >= data_len) { + s->iov_offset -= _get_iov_array_size(iov, iovcnt); + } else { + rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt, + s->iov_offset, data_len - s->iov_offset, + &mapped_len, dif_ctx); + if (rc < 0) { + SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n"); + return false; + } + + s->total_size += mapped_len; + s->iov_offset = 0; + assert(s->iovcnt >= rc); + s->iovcnt -= rc; + s->iov += rc; + + if (s->iovcnt == 0) { + return false; + } + } + + return true; +} + +static int +nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length) +{ + uint32_t hlen, plen; + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, 0); + hlen = pdu->hdr.common.hlen; + + /* Header Digest */ + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + hlen += SPDK_NVME_TCP_DIGEST_LEN; + } + + plen = hlen; + if (!pdu->data_len) { + /* PDU header + possible header digest */ + _nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen); + goto end; + } + + /* Padding */ + if (pdu->padding_len > 0) { + hlen += pdu->padding_len; + plen = hlen; + } + + if (!_nvme_tcp_sgl_append(sgl, (uint8_t *)&pdu->hdr.raw, hlen)) { + goto end; + } + + /* Data Segment */ + plen += pdu->data_len; + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) { + plen += SPDK_NVME_TCP_DIGEST_LEN; + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + + assert(plen == pdu->hdr.common.plen); + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu, + bool ddgst_enable, uint32_t *_mapped_length) +{ + struct _nvme_tcp_sgl *sgl; + + if (iovcnt == 0) { + return 0; + } + + sgl = &pdu->sgl; + _nvme_tcp_sgl_init(sgl, iov, iovcnt, pdu->readv_offset); + + if (spdk_likely(!pdu->dif_ctx)) { + if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) { + goto end; + } + } else { + if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt, + pdu->data_len, pdu->dif_ctx)) { + goto end; + } + } + + /* Data Digest */ + if (ddgst_enable) { + _nvme_tcp_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN); + } + +end: + if (_mapped_length != NULL) { + *_mapped_length = sgl->total_size; + } + return iovcnt - sgl->iovcnt; +} + +static int +nvme_tcp_read_data(struct spdk_sock *sock, int bytes, + void *buf) +{ + int ret; + + ret = spdk_sock_recv(sock, buf, bytes); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + +static int +nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt) +{ + int ret; + + assert(sock != NULL); + if (iov == NULL || iovcnt == 0) { + return 0; + } + + if (iovcnt == 1) { + return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base); + } + + ret = spdk_sock_readv(sock, iov, iovcnt); + + if (ret > 0) { + return ret; + } + + if (ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; + } + + /* For connect reset issue, do not output error log */ + if (errno != ECONNRESET) { + SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n", + errno, spdk_strerror(errno)); + } + } + + /* connection closed */ + return NVME_TCP_CONNECTION_FATAL; +} + + +static int +nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu) +{ + struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1]; + int iovcnt; + + iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu, + pdu->ddgst_enable, NULL); + assert(iovcnt >= 0); + + return nvme_tcp_readv_data(sock, iov, iovcnt); +} + +static void +_nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + pdu->data_iov[0].iov_base = data; + pdu->data_iov[0].iov_len = data_len; + pdu->data_iovcnt = 1; +} + +static void +nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len) +{ + _nvme_tcp_pdu_set_data(pdu, data, data_len); + pdu->data_len = data_len; +} + +static void +nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu, + struct iovec *iov, int iovcnt, + uint32_t data_offset, uint32_t data_len) +{ + uint32_t buf_offset, buf_len, remain_len, len; + uint8_t *buf; + struct _nvme_tcp_sgl *pdu_sgl, buf_sgl; + + pdu->data_len = data_len; + + if (spdk_likely(!pdu->dif_ctx)) { + buf_offset = data_offset; + buf_len = data_len; + } else { + spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset); + spdk_dif_get_range_with_md(data_offset, data_len, + &buf_offset, &buf_len, pdu->dif_ctx); + } + + if (iovcnt == 1) { + _nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len); + } else { + pdu_sgl = &pdu->sgl; + + _nvme_tcp_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0); + _nvme_tcp_sgl_init(&buf_sgl, iov, iovcnt, 0); + + _nvme_tcp_sgl_advance(&buf_sgl, buf_offset); + remain_len = buf_len; + + while (remain_len > 0) { + _nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len); + len = spdk_min(len, remain_len); + + _nvme_tcp_sgl_advance(&buf_sgl, len); + remain_len -= len; + + if (!_nvme_tcp_sgl_append(pdu_sgl, buf, len)) { + break; + } + } + + assert(remain_len == 0); + assert(pdu_sgl->total_size == buf_len); + + pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt; + } +} + +static void +nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable) +{ + uint8_t psh_len, pdo, padding_len; + + psh_len = pdu->hdr.common.hlen; + + if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) { + pdu->has_hdgst = true; + psh_len += SPDK_NVME_TCP_DIGEST_LEN; + if (pdu->hdr.common.plen > psh_len) { + pdo = pdu->hdr.common.pdo; + padding_len = pdo - psh_len; + if (padding_len > 0) { + psh_len = pdo; + } + } + } + + psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr); + pdu->psh_len = psh_len; +} + +#endif /* SPDK_INTERNAL_NVME_TCP_H */ |